diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-08-21 10:42:25 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-08-24 09:09:51 +0200 |
commit | 63084c1988e3c8c0858fd747485f250a2160f434 (patch) | |
tree | 466a6ccbdc5469fc497a49f3b94fe5448b3b7765 /freebsd | |
parent | Update rtems-bsd-kernel-namespace.h (diff) | |
download | rtems-libbsd-63084c1988e3c8c0858fd747485f250a2160f434.tar.bz2 |
IPFW(4): Remove FreeBSD import
This firewall was not ported to RTEMS and is just dead code which may
make trouble during FreeBSD baseline updates. It also increased the
compile-time of the library for nothing.
Update #3472.
Diffstat (limited to 'freebsd')
36 files changed, 0 insertions, 30326 deletions
diff --git a/freebsd/sys/netinet6/ip_fw_nat64.h b/freebsd/sys/netinet6/ip_fw_nat64.h deleted file mode 100644 index a5c38b2a..00000000 --- a/freebsd/sys/netinet6/ip_fw_nat64.h +++ /dev/null @@ -1,154 +0,0 @@ -/*- - * Copyright (c) 2015 Yandex LLC - * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NETINET6_IP_FW_NAT64_H_ -#define _NETINET6_IP_FW_NAT64_H_ - -struct ipfw_nat64stl_stats { - uint64_t opcnt64; /* 6to4 of packets translated */ - uint64_t opcnt46; /* 4to6 of packets translated */ - uint64_t ofrags; /* number of fragments generated */ - uint64_t ifrags; /* number of fragments received */ - uint64_t oerrors; /* number of output errors */ - uint64_t noroute4; - uint64_t noroute6; - uint64_t noproto; /* Protocol not supported */ - uint64_t nomem; /* mbuf allocation filed */ - uint64_t dropped; /* dropped due to some errors */ -}; - -struct ipfw_nat64lsn_stats { - uint64_t opcnt64; /* 6to4 of packets translated */ - uint64_t opcnt46; /* 4to6 of packets translated */ - uint64_t ofrags; /* number of fragments generated */ - uint64_t ifrags; /* number of fragments received */ - uint64_t oerrors; /* number of output errors */ - uint64_t noroute4; - uint64_t noroute6; - uint64_t noproto; /* Protocol not supported */ - uint64_t nomem; /* mbuf allocation filed */ - uint64_t dropped; /* dropped due to some errors */ - - uint64_t nomatch4; /* No addr/port match */ - uint64_t jcalls; /* Number of job handler calls */ - uint64_t jrequests; /* Number of job requests */ - uint64_t jhostsreq; /* Number of job host requests */ - uint64_t jportreq; /* Number of portgroup requests */ - uint64_t jhostfails; /* Number of failed host allocs */ - uint64_t jportfails; /* Number of failed portgroup allocs */ - uint64_t jreinjected; /* Number of packets reinjected to q */ - uint64_t jmaxlen; /* Max queue length reached */ - uint64_t jnomem; /* No memory to alloc queue item */ - - uint64_t screated; /* Number of states created */ - uint64_t sdeleted; /* Number of states deleted */ - uint64_t spgcreated; /* Number of portgroups created */ - uint64_t spgdeleted; /* Number of portgroups deleted */ - uint64_t hostcount; /* Number of hosts */ - uint64_t tcpchunks; /* Number of TCP chunks */ - uint64_t udpchunks; /* Number of UDP chunks */ - uint64_t icmpchunks; /* Number of ICMP chunks */ - - uint64_t _reserved[4]; -}; - -#define NAT64_LOG 0x0001 /* Enable logging via BPF */ - -typedef struct _ipfw_nat64stl_cfg { - char name[64]; /* NAT name */ - ipfw_obj_ntlv ntlv6; /* object name tlv */ - ipfw_obj_ntlv ntlv4; /* object name tlv */ - struct in6_addr prefix6; /* NAT64 prefix */ - uint8_t plen6; /* Prefix length */ - uint8_t set; /* Named instance set [0..31] */ - uint8_t spare[2]; - uint32_t flags; -} ipfw_nat64stl_cfg; - -/* - * NAT64LSN default configuration values - */ -#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */ -#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */ -#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */ -#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */ -#define NAT64LSN_TCP_FIN_AGE 180 /* State's TTL after FIN/RST received */ -#define NAT64LSN_UDP_AGE 120 /* TTL for UDP states */ -#define NAT64LSN_ICMP_AGE 60 /* TTL for ICMP states */ -#define NAT64LSN_HOST_AGE 3600 /* TTL for stale host entry */ -#define NAT64LSN_PG_AGE 900 /* TTL for stale ports groups */ - -typedef struct _ipfw_nat64lsn_cfg { - char name[64]; /* NAT name */ - uint32_t flags; - uint32_t max_ports; /* Max ports per client */ - uint32_t agg_prefix_len; /* Prefix length to count */ - uint32_t agg_prefix_max; /* Max hosts per agg prefix */ - struct in_addr prefix4; - uint16_t plen4; /* Prefix length */ - uint16_t plen6; /* Prefix length */ - struct in6_addr prefix6; /* NAT64 prefix */ - uint32_t jmaxlen; /* Max jobqueue length */ - uint16_t min_port; /* Min port group # to use */ - uint16_t max_port; /* Max port group # to use */ - uint16_t nh_delete_delay;/* Stale host delete delay */ - uint16_t pg_delete_delay;/* Stale portgroup delete delay */ - uint16_t st_syn_ttl; /* TCP syn expire */ - uint16_t st_close_ttl; /* TCP fin expire */ - uint16_t st_estab_ttl; /* TCP established expire */ - uint16_t st_udp_ttl; /* UDP expire */ - uint16_t st_icmp_ttl; /* ICMP expire */ - uint8_t set; /* Named instance set [0..31] */ - uint8_t spare; -} ipfw_nat64lsn_cfg; - -typedef struct _ipfw_nat64lsn_state { - struct in_addr daddr; /* Remote IPv4 address */ - uint16_t dport; /* Remote destination port */ - uint16_t aport; /* Local alias port */ - uint16_t sport; /* Source port */ - uint8_t flags; /* State flags */ - uint8_t spare[3]; - uint16_t idle; /* Last used time */ -} ipfw_nat64lsn_state; - -typedef struct _ipfw_nat64lsn_stg { - uint64_t next_idx; /* next state index */ - struct in_addr alias4; /* IPv4 alias address */ - uint8_t proto; /* protocol */ - uint8_t flags; - uint16_t spare; - struct in6_addr host6; /* Bound IPv6 host */ - uint32_t count; /* Number of states */ - uint32_t spare2; -} ipfw_nat64lsn_stg; - -#endif /* _NETINET6_IP_FW_NAT64_H_ */ - diff --git a/freebsd/sys/netinet6/ip_fw_nptv6.h b/freebsd/sys/netinet6/ip_fw_nptv6.h deleted file mode 100644 index e2357eff..00000000 --- a/freebsd/sys/netinet6/ip_fw_nptv6.h +++ /dev/null @@ -1,51 +0,0 @@ -/*- - * Copyright (c) 2016 Yandex LLC - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NETINET6_IP_FW_NPTV6_H_ -#define _NETINET6_IP_FW_NPTV6_H_ - -struct ipfw_nptv6_stats { - uint64_t in2ex; /* Int->Ext packets translated */ - uint64_t ex2in; /* Ext->Int packets translated */ - uint64_t dropped; /* dropped due to some errors */ - uint64_t reserved[5]; -}; - -typedef struct _ipfw_nptv6_cfg { - char name[64]; /* NPTv6 instance name */ - struct in6_addr internal; /* NPTv6 internal prefix */ - struct in6_addr external; /* NPTv6 external prefix */ - uint8_t plen; /* Prefix length */ - uint8_t set; /* Named instance set [0..31] */ - uint8_t spare[2]; - uint32_t flags; -} ipfw_nptv6_cfg; - -#endif /* _NETINET6_IP_FW_NPTV6_H_ */ - diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm.h b/freebsd/sys/netpfil/ipfw/dn_aqm.h deleted file mode 100644 index d01e98eb..00000000 --- a/freebsd/sys/netpfil/ipfw/dn_aqm.h +++ /dev/null @@ -1,167 +0,0 @@ -/*- - * Copyright (C) 2016 Centre for Advanced Internet Architectures, - * Swinburne University of Technology, Melbourne, Australia. - * Portions of this code were made possible in part by a gift from - * The Comcast Innovation Fund. - * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * API for writing an Active Queue Management algorithm for Dummynet - * - * $FreeBSD$ - */ - -#ifndef _IP_DN_AQM_H -#define _IP_DN_AQM_H - - -/* NOW is the current time in millisecond*/ -#define NOW ((dn_cfg.curr_time * tick) / 1000) - -#define AQM_UNOW (dn_cfg.curr_time * tick) -#define AQM_TIME_1US ((aqm_time_t)(1)) -#define AQM_TIME_1MS ((aqm_time_t)(1000)) -#define AQM_TIME_1S ((aqm_time_t)(AQM_TIME_1MS * 1000)) - -/* aqm time allows to store up to 4294 seconds */ -typedef uint32_t aqm_time_t; -typedef int32_t aqm_stime_t; - -#define DN_AQM_MTAG_TS 55345 - -/* Macro for variable bounding */ -#define BOUND_VAR(x,l,h) ((x) > (h)? (h) : ((x) > (l)? (x) : (l))) - -/* sysctl variable to count number of dropped packets */ -extern unsigned long io_pkt_drop; - -/* - * Structure for holding data and function pointers that together represent a - * AQM algorithm. - */ - struct dn_aqm { -#define DN_AQM_NAME_MAX 50 - char name[DN_AQM_NAME_MAX]; /* name of AQM algorithm */ - uint32_t type; /* AQM type number */ - - /* Methods implemented by AQM algorithm: - * - * enqueue enqueue packet 'm' on queue 'q'. - * Return 0 on success, 1 on drop. - * - * dequeue dequeue a packet from queue 'q'. - * Return a packet, NULL if no packet available. - * - * config configure AQM algorithm - * If required, this function should allocate space to store - * the configurations and set 'fs->aqmcfg' to point to this space. - * 'dn_extra_parms' includes array of parameters send - * from ipfw userland command. - * Return 0 on success, non-zero otherwise. - * - * deconfig deconfigure AQM algorithm. - * The allocated configuration memory space should be freed here. - * Return 0 on success, non-zero otherwise. - * - * init initialise AQM status variables of queue 'q' - * This function is used to allocate space and init AQM status for a - * queue and q->aqm_status to point to this space. - * Return 0 on success, non-zero otherwise. - * - * cleanup cleanup AQM status variables of queue 'q' - * The allocated memory space for AQM status should be freed here. - * Return 0 on success, non-zero otherwise. - * - * getconfig retrieve AQM configurations - * This function is used to return AQM parameters to userland - * command. The function should fill 'dn_extra_parms' struct with - * the AQM configurations using 'par' array. - * - */ - - int (*enqueue)(struct dn_queue *, struct mbuf *); - struct mbuf * (*dequeue)(struct dn_queue *); - int (*config)(struct dn_fsk *, struct dn_extra_parms *ep, int); - int (*deconfig)(struct dn_fsk *); - int (*init)(struct dn_queue *); - int (*cleanup)(struct dn_queue *); - int (*getconfig)(struct dn_fsk *, struct dn_extra_parms *); - - int ref_count; /*Number of queues instances in the system */ - int cfg_ref_count; /*Number of AQM instances in the system */ - SLIST_ENTRY (dn_aqm) next; /* Next AQM in the list */ -}; - -/* Helper function to update queue and scheduler statistics. - * negative len + drop -> drop - * negative len -> dequeue - * positive len -> enqueue - * positive len + drop -> drop during enqueue - */ -__inline static void -update_stats(struct dn_queue *q, int len, int drop) -{ - int inc = 0; - struct dn_flow *sni; - struct dn_flow *qni; - - sni = &q->_si->ni; - qni = &q->ni; - - if (len < 0) - inc = -1; - else if(len > 0) - inc = 1; - - if (drop) { - qni->drops++; - sni->drops++; - io_pkt_drop++; - } else { - /*update queue stats */ - qni->length += inc; - qni->len_bytes += len; - - /*update scheduler instance stats */ - sni->length += inc; - sni->len_bytes += len; - } - /* tot_pkts is updated in dn_enqueue function */ -} - - -/* kernel module related function */ -int -dn_aqm_modevent(module_t mod, int cmd, void *arg); - -#define DECLARE_DNAQM_MODULE(name, dnaqm) \ - static moduledata_t name##_mod = { \ - #name, dn_aqm_modevent, dnaqm \ - }; \ - DECLARE_MODULE(name, name##_mod, \ - SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \ - MODULE_DEPEND(name, dummynet, 3, 3, 3) - -#endif diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h b/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h deleted file mode 100644 index f5618e76..00000000 --- a/freebsd/sys/netpfil/ipfw/dn_aqm_codel.h +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Codel - The Controlled-Delay Active Queue Management algorithm. - * - * $FreeBSD$ - * - * Copyright (C) 2016 Centre for Advanced Internet Architectures, - * Swinburne University of Technology, Melbourne, Australia. - * Portions of this code were made possible in part by a gift from - * The Comcast Innovation Fund. - * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> - * - * Copyright (C) 2011-2014 Kathleen Nichols <nichols@pollere.com>. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * o Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * - * o Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * o The names of the authors may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * Alternatively, provided that this notice is retained in full, this - * software may be distributed under the terms of the GNU General Public - * License ("GPL") version 2, in which case the provisions of the GPL - * apply INSTEAD OF those given above. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _IP_DN_AQM_CODEL_H -#define _IP_DN_AQM_CODEL_H - - -// XXX How to choose MTAG? -#define FIX_POINT_BITS 16 - -enum { - CODEL_ECN_ENABLED = 1 -}; - -/* Codel parameters */ -struct dn_aqm_codel_parms { - aqm_time_t target; - aqm_time_t interval; - uint32_t flags; -}; - -/* codel status variables */ -struct codel_status { - uint32_t count; /* number of dropped pkts since entering drop state */ - uint16_t dropping; /* dropping state */ - aqm_time_t drop_next_time; /* time for next drop */ - aqm_time_t first_above_time; /* time for first ts over target we observed */ - uint16_t isqrt; /* last isqrt for control low */ - uint16_t maxpkt_size; /* max packet size seen so far */ -}; - -struct mbuf *codel_extract_head(struct dn_queue *, aqm_time_t *); -aqm_time_t control_law(struct codel_status *, - struct dn_aqm_codel_parms *, aqm_time_t ); - -__inline static struct mbuf * -codel_dodequeue(struct dn_queue *q, aqm_time_t now, uint16_t *ok_to_drop) -{ - struct mbuf * m; - struct dn_aqm_codel_parms *cprms; - struct codel_status *cst; - aqm_time_t pkt_ts, sojourn_time; - - *ok_to_drop = 0; - m = codel_extract_head(q, &pkt_ts); - - cst = q->aqm_status; - - if (m == NULL) { - /* queue is empty - we can't be above target */ - cst->first_above_time= 0; - return m; - } - - cprms = q->fs->aqmcfg; - - /* To span a large range of bandwidths, CoDel runs two - * different AQMs in parallel. One is sojourn-time-based - * and takes effect when the time to send an MTU-sized - * packet is less than target. The 1st term of the "if" - * below does this. The other is backlog-based and takes - * effect when the time to send an MTU-sized packet is >= - * target. The goal here is to keep the output link - * utilization high by never allowing the queue to get - * smaller than the amount that arrives in a typical - * interarrival time (MTU-sized packets arriving spaced - * by the amount of time it takes to send such a packet on - * the bottleneck). The 2nd term of the "if" does this. - */ - sojourn_time = now - pkt_ts; - if (sojourn_time < cprms->target || q->ni.len_bytes <= cst->maxpkt_size) { - /* went below - stay below for at least interval */ - cst->first_above_time = 0; - } else { - if (cst->first_above_time == 0) { - /* just went above from below. if still above at - * first_above_time, will say it's ok to drop. */ - cst->first_above_time = now + cprms->interval; - } else if (now >= cst->first_above_time) { - *ok_to_drop = 1; - } - } - return m; -} - -/* - * Dequeue a packet from queue 'q' - */ -__inline static struct mbuf * -codel_dequeue(struct dn_queue *q) -{ - struct mbuf *m; - struct dn_aqm_codel_parms *cprms; - struct codel_status *cst; - aqm_time_t now; - uint16_t ok_to_drop; - - cst = q->aqm_status;; - cprms = q->fs->aqmcfg; - now = AQM_UNOW; - - m = codel_dodequeue(q, now, &ok_to_drop); - if (cst->dropping) { - if (!ok_to_drop) { - /* sojourn time below target - leave dropping state */ - cst->dropping = false; - } - /* - * Time for the next drop. Drop current packet and dequeue - * next. If the dequeue doesn't take us out of dropping - * state, schedule the next drop. A large backlog might - * result in drop rates so high that the next drop should - * happen now, hence the 'while' loop. - */ - while (now >= cst->drop_next_time && cst->dropping) { - - /* mark the packet */ - if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) { - cst->count++; - /* schedule the next mark. */ - cst->drop_next_time = control_law(cst, cprms, - cst->drop_next_time); - return m; - } - - /* drop the packet */ - update_stats(q, 0, 1); - FREE_PKT(m); - m = codel_dodequeue(q, now, &ok_to_drop); - - if (!ok_to_drop) { - /* leave dropping state */ - cst->dropping = false; - } else { - cst->count++; - /* schedule the next drop. */ - cst->drop_next_time = control_law(cst, cprms, - cst->drop_next_time); - } - } - /* If we get here we're not in dropping state. The 'ok_to_drop' - * return from dodequeue means that the sojourn time has been - * above 'target' for 'interval' so enter dropping state. - */ - } else if (ok_to_drop) { - - /* if ECN option is disabled or the packet cannot be marked, - * drop the packet and extract another. - */ - if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) { - update_stats(q, 0, 1); - FREE_PKT(m); - m = codel_dodequeue(q, now, &ok_to_drop); - } - - cst->dropping = true; - - /* If min went above target close to when it last went - * below, assume that the drop rate that controlled the - * queue on the last cycle is a good starting point to - * control it now. ('drop_next' will be at most 'interval' - * later than the time of the last drop so 'now - drop_next' - * is a good approximation of the time from the last drop - * until now.) - */ - cst->count = (cst->count > 2 && ((aqm_stime_t)now - - (aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)? - cst->count - 2 : 1; - /* we don't have to set initial guess for Newton's method isqrt as - * we initilaize isqrt in control_law function when count == 1 */ - cst->drop_next_time = control_law(cst, cprms, now); - } - - return m; -} - -#endif diff --git a/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h b/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h deleted file mode 100644 index 7512d327..00000000 --- a/freebsd/sys/netpfil/ipfw/dn_aqm_pie.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * PIE - Proportional Integral controller Enhanced AQM algorithm. - * - * $FreeBSD$ - * - * Copyright (C) 2016 Centre for Advanced Internet Architectures, - * Swinburne University of Technology, Melbourne, Australia. - * Portions of this code were made possible in part by a gift from - * The Comcast Innovation Fund. - * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef _IP_DN_AQM_PIE_H -#define _IP_DN_AQM_PIE_H - -#define DN_AQM_PIE 2 -#define PIE_DQ_THRESHOLD_BITS 14 -/* 2^14 =16KB */ -#define PIE_DQ_THRESHOLD (1L << PIE_DQ_THRESHOLD_BITS) -#define MEAN_PKTSIZE 800 - -/* 31-bits because random() generates range from 0->(2**31)-1 */ -#define PIE_PROB_BITS 31 -#define PIE_MAX_PROB ((1LL<<PIE_PROB_BITS) -1) - -/* for 16-bits, we have 3-bits for integer part and 13-bits for fraction */ -#define PIE_FIX_POINT_BITS 13 -#define PIE_SCALE (1L<<PIE_FIX_POINT_BITS) - - -/* PIE options */ -enum { - PIE_ECN_ENABLED =1, - PIE_CAPDROP_ENABLED = 2, - PIE_ON_OFF_MODE_ENABLED = 4, - PIE_DEPRATEEST_ENABLED = 8, - PIE_DERAND_ENABLED = 16 -}; - -/* PIE parameters */ -struct dn_aqm_pie_parms { - aqm_time_t qdelay_ref; /* AQM Latency Target (default: 15ms) */ - aqm_time_t tupdate; /* a period to calculate drop probability (default:15ms) */ - aqm_time_t max_burst; /* AQM Max Burst Allowance (default: 150ms) */ - uint16_t max_ecnth; /*AQM Max ECN Marking Threshold (default: 10%) */ - uint16_t alpha; /* (default: 1/8) */ - uint16_t beta; /* (default: 1+1/4) */ - uint32_t flags; /* PIE options */ -}; - -/* PIE status variables */ -struct pie_status{ - struct callout aqm_pie_callout; - aqm_time_t burst_allowance; - uint32_t drop_prob; - aqm_time_t current_qdelay; - aqm_time_t qdelay_old; - uint64_t accu_prob; - aqm_time_t measurement_start; - aqm_time_t avg_dq_time; - uint32_t dq_count; - uint32_t sflags; - struct dn_aqm_pie_parms *parms; /* pointer to PIE configurations */ - /* pointer to parent queue of FQ-PIE sub-queues, or queue of owner fs. */ - struct dn_queue *pq; - struct mtx lock_mtx; - uint32_t one_third_q_size; /* 1/3 of queue size, for speed optization */ -}; - -enum { - ENQUE = 1, - DROP, - MARKECN -}; - -/* PIE current state */ -enum { - PIE_ACTIVE = 1, - PIE_INMEASUREMENT = 2 -}; - -/* - * Check if eneque should drop packet to control delay or not based on - * PIe algorithm. - * return DROP if it is time to drop or ENQUE otherwise. - * This function is used by PIE and FQ-PIE. - */ -__inline static int -drop_early(struct pie_status *pst, uint32_t qlen) -{ - struct dn_aqm_pie_parms *pprms; - - pprms = pst->parms; - - /* queue is not congested */ - - if ((pst->qdelay_old < (pprms->qdelay_ref >> 1) - && pst->drop_prob < PIE_MAX_PROB / 5 ) - || qlen <= 2 * MEAN_PKTSIZE) - return ENQUE; - - - if (pst->drop_prob == 0) - pst->accu_prob = 0; - - /* increment accu_prob */ - if (pprms->flags & PIE_DERAND_ENABLED) - pst->accu_prob += pst->drop_prob; - - /* De-randomize option - * if accu_prob < 0.85 -> enqueue - * if accu_prob>8.5 ->drop - * between 0.85 and 8.5 || !De-randomize --> drop on prob - * - * (0.85 = 17/20 ,8.5 = 17/2) - */ - if (pprms->flags & PIE_DERAND_ENABLED) { - if(pst->accu_prob < (uint64_t) (PIE_MAX_PROB * 17 / 20)) - return ENQUE; - if( pst->accu_prob >= (uint64_t) (PIE_MAX_PROB * 17 / 2)) - return DROP; - } - - if (random() < pst->drop_prob) { - pst->accu_prob = 0; - return DROP; - } - - return ENQUE; -} - -#endif diff --git a/freebsd/sys/netpfil/ipfw/dn_heap.h b/freebsd/sys/netpfil/ipfw/dn_heap.h deleted file mode 100644 index bbc11c14..00000000 --- a/freebsd/sys/netpfil/ipfw/dn_heap.h +++ /dev/null @@ -1,191 +0,0 @@ -/*- - * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * Binary heap and hash tables, header file - * - * $FreeBSD$ - */ - -#ifndef _IP_DN_HEAP_H -#define _IP_DN_HEAP_H - -#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) -#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) - -/* - * This module implements a binary heap supporting random extraction. - * - * A heap entry contains an uint64_t key and a pointer to object. - * DN_KEY_LT(a,b) returns true if key 'a' is smaller than 'b' - * - * The heap is a struct dn_heap plus a dynamically allocated - * array of dn_heap_entry entries. 'size' represents the size of - * the array, 'elements' count entries in use. The topmost - * element has the smallest key. - * The heap supports ordered insert, and extract from the top. - * To extract an object from the middle of the heap, we the object - * must reserve an 'int32_t' to store the position of the object - * in the heap itself, and the location of this field must be - * passed as an argument to heap_init() -- use -1 if the feature - * is not used. - */ -struct dn_heap_entry { - uint64_t key; /* sorting key, smallest comes first */ - void *object; /* object pointer */ -}; - -struct dn_heap { - int size; /* the size of the array */ - int elements; /* elements in use */ - int ofs; /* offset in the object of heap index */ - struct dn_heap_entry *p; /* array of "size" entries */ -}; - -enum { - HEAP_SCAN_DEL = 1, - HEAP_SCAN_END = 2, -}; - -/* - * heap_init() reinitializes the heap setting the size and the offset - * of the index for random extraction (use -1 if not used). - * The 'elements' counter is set to 0. - * - * SET_HEAP_OFS() indicates where, in the object, is stored the index - * for random extractions from the heap. - * - * heap_free() frees the memory associated to a heap. - * - * heap_insert() adds a key-pointer pair to the heap - * - * HEAP_TOP() returns a pointer to the top element of the heap, - * but makes no checks on its existence (XXX should we change ?) - * - * heap_extract() removes the entry at the top, returning the pointer. - * (the key should have been read before). - * - * heap_scan() invokes a callback on each entry of the heap. - * The callback can return a combination of HEAP_SCAN_DEL and - * HEAP_SCAN_END. HEAP_SCAN_DEL means the current element must - * be removed, and HEAP_SCAN_END means to terminate the scan. - * heap_scan() returns the number of elements removed. - * Because the order is not guaranteed, we should use heap_scan() - * only as a last resort mechanism. - */ -#define HEAP_TOP(h) ((h)->p) -#define SET_HEAP_OFS(h, n) do { (h)->ofs = n; } while (0) -int heap_init(struct dn_heap *h, int size, int ofs); -int heap_insert(struct dn_heap *h, uint64_t key1, void *p); -void heap_extract(struct dn_heap *h, void *obj); -void heap_free(struct dn_heap *h); -int heap_scan(struct dn_heap *, int (*)(void *, uintptr_t), uintptr_t); - -/*------------------------------------------------------ - * This module implements a generic hash table with support for - * running callbacks on the entire table. To avoid allocating - * memory during hash table operations, objects must reserve - * space for a link field. XXX if the heap is moderately full, - * an SLIST suffices, and we can tolerate the cost of a hash - * computation on each removal. - * - * dn_ht_init() initializes the table, setting the number of - * buckets, the offset of the link field, the main callbacks. - * Callbacks are: - * - * hash(key, flags, arg) called to return a bucket index. - * match(obj, key, flags, arg) called to determine if key - * matches the current 'obj' in the heap - * newh(key, flags, arg) optional, used to allocate a new - * object during insertions. - * - * dn_ht_free() frees the heap or unlink elements. - * DNHT_REMOVE unlink elements, 0 frees the heap. - * You need two calls to do both. - * - * dn_ht_find() is the main lookup function, which can also be - * used to insert or delete elements in the hash table. - * The final 'arg' is passed to all callbacks. - * - * dn_ht_scan() is used to invoke a callback on all entries of - * the heap, or possibly on just one bucket. The callback - * is invoked with a pointer to the object, and must return - * one of DNHT_SCAN_DEL or DNHT_SCAN_END to request the - * removal of the object from the heap and the end of the - * scan, respectively. - * - * dn_ht_scan_bucket() is similar to dn_ht_scan(), except that it scans - * only the specific bucket of the table. The bucket is a in-out - * parameter and return a valid bucket number if the original - * is invalid. - * - * A combination of flags can be used to modify the operation - * of the dn_ht_find(), and of the callbacks: - * - * DNHT_KEY_IS_OBJ means the key is the object pointer. - * It is usually of interest for the hash and match functions. - * - * DNHT_MATCH_PTR during a lookup, match pointers instead - * of calling match(). Normally used when removing specific - * entries. Does not imply KEY_IS_OBJ as the latter _is_ used - * by the match function. - * - * DNHT_INSERT insert the element if not found. - * Calls new() to allocates a new object unless - * DNHT_KEY_IS_OBJ is set. - * - * DNHT_UNIQUE only insert if object not found. - * XXX should it imply DNHT_INSERT ? - * - * DNHT_REMOVE remove objects if we find them. - */ -struct dn_ht; /* should be opaque */ - -struct dn_ht *dn_ht_init(struct dn_ht *, int buckets, int ofs, - uint32_t (*hash)(uintptr_t, int, void *), - int (*match)(void *, uintptr_t, int, void *), - void *(*newh)(uintptr_t, int, void *)); -void dn_ht_free(struct dn_ht *, int flags); - -void *dn_ht_find(struct dn_ht *, uintptr_t, int, void *); -int dn_ht_scan(struct dn_ht *, int (*)(void *, void *), void *); -int dn_ht_scan_bucket(struct dn_ht *, int * , int (*)(void *, void *), void *); -int dn_ht_entries(struct dn_ht *); - -enum { /* flags values. - * first two are returned by the scan callback to indicate - * to delete the matching element or to end the scan - */ - DNHT_SCAN_DEL = 0x0001, - DNHT_SCAN_END = 0x0002, - DNHT_KEY_IS_OBJ = 0x0004, /* key is the obj pointer */ - DNHT_MATCH_PTR = 0x0008, /* match by pointer, not match() */ - DNHT_INSERT = 0x0010, /* insert if not found */ - DNHT_UNIQUE = 0x0020, /* report error if already there */ - DNHT_REMOVE = 0x0040, /* remove on find or dn_ht_free */ -}; - -#endif /* _IP_DN_HEAP_H */ diff --git a/freebsd/sys/netpfil/ipfw/dn_sched.h b/freebsd/sys/netpfil/ipfw/dn_sched.h deleted file mode 100644 index ab32771b..00000000 --- a/freebsd/sys/netpfil/ipfw/dn_sched.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2010 Riccardo Panicucci, Luigi Rizzo, Universita` di Pisa - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * The API to write a packet scheduling algorithm for dummynet. - * - * $FreeBSD$ - */ - -#ifndef _DN_SCHED_H -#define _DN_SCHED_H - -#define DN_MULTIQUEUE 0x01 -/* - * Descriptor for a scheduling algorithm. - * Contains all function pointers for a given scheduler - * This is typically created when a module is loaded, and stored - * in a global list of schedulers. - */ -struct dn_alg { - uint32_t type; /* the scheduler type */ - const char *name; /* scheduler name */ - uint32_t flags; /* DN_MULTIQUEUE if supports multiple queues */ - - /* - * The following define the size of 3 optional data structures - * that may need to be allocated at runtime, and are appended - * to each of the base data structures: scheduler, sched.inst, - * and queue. We don't have a per-flowset structure. - */ - /* + parameters attached to the template, e.g. - * default queue sizes, weights, quantum size, and so on; - */ - size_t schk_datalen; - - /* + per-instance parameters, such as timestamps, - * containers for queues, etc; - */ - size_t si_datalen; - - size_t q_datalen; /* per-queue parameters (e.g. S,F) */ - - /* - * Methods implemented by the scheduler: - * enqueue enqueue packet 'm' on scheduler 's', queue 'q'. - * q is NULL for !MULTIQUEUE. - * Return 0 on success, 1 on drop (packet consumed anyways). - * Note that q should be interpreted only as a hint - * on the flow that the mbuf belongs to: while a - * scheduler will normally enqueue m into q, it is ok - * to leave q alone and put the mbuf elsewhere. - * This function is called in two cases: - * - when a new packet arrives to the scheduler; - * - when a scheduler is reconfigured. In this case the - * call is issued by the new_queue callback, with a - * non empty queue (q) and m pointing to the first - * mbuf in the queue. For this reason, the function - * should internally check for (m != q->mq.head) - * before calling dn_enqueue(). - * - * dequeue Called when scheduler instance 's' can - * dequeue a packet. Return NULL if none are available. - * XXX what about non work-conserving ? - * - * config called on 'sched X config ...', normally writes - * in the area of size sch_arg - * - * destroy called on 'sched delete', frees everything - * in sch_arg (other parts are handled by more specific - * functions) - * - * new_sched called when a new instance is created, e.g. - * to create the local queue for !MULTIQUEUE, set V or - * copy parameters for WFQ, and so on. - * - * free_sched called when deleting an instance, cleans - * extra data in the per-instance area. - * - * new_fsk called when a flowset is linked to a scheduler, - * e.g. to validate parameters such as weights etc. - * free_fsk when a flowset is unlinked from a scheduler. - * (probably unnecessary) - * - * new_queue called to set the per-queue parameters, - * e.g. S and F, adjust sum of weights in the parent, etc. - * - * The new_queue callback is normally called from when - * creating a new queue. In some cases (such as a - * scheduler change or reconfiguration) it can be called - * with a non empty queue. In this case, the queue - * In case of non empty queue, the new_queue callback could - * need to call the enqueue function. In this case, - * the callback should eventually call enqueue() passing - * as m the first element in the queue. - * - * free_queue actions related to a queue removal, e.g. undo - * all the above. If the queue has data in it, also remove - * from the scheduler. This can e.g. happen during a reconfigure. - */ - int (*enqueue)(struct dn_sch_inst *, struct dn_queue *, - struct mbuf *); - struct mbuf * (*dequeue)(struct dn_sch_inst *); - - int (*config)(struct dn_schk *); - int (*destroy)(struct dn_schk*); - int (*new_sched)(struct dn_sch_inst *); - int (*free_sched)(struct dn_sch_inst *); - int (*new_fsk)(struct dn_fsk *f); - int (*free_fsk)(struct dn_fsk *f); - int (*new_queue)(struct dn_queue *q); - int (*free_queue)(struct dn_queue *q); -#ifdef NEW_AQM - /* Getting scheduler extra parameters */ - int (*getconfig)(struct dn_schk *, struct dn_extra_parms *); -#endif - - /* run-time fields */ - int ref_count; /* XXX number of instances in the system */ - SLIST_ENTRY(dn_alg) next; /* Next scheduler in the list */ -}; - -/* MSVC does not support initializers so we need this ugly macro */ -#ifdef _WIN32 -#define _SI(fld) -#else -#define _SI(fld) fld -#endif - -/* - * Additionally, dummynet exports some functions and macros - * to be used by schedulers: - */ - -void dn_free_pkts(struct mbuf *mnext); -int dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop); -/* bound a variable between min and max */ -int ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg); - -/* - * Extract the head of a queue, update stats. Must be the very last - * thing done on a dequeue as the queue itself may go away. - */ -static __inline struct mbuf* -dn_dequeue(struct dn_queue *q) -{ - struct mbuf *m = q->mq.head; - if (m == NULL) - return NULL; -#ifdef NEW_AQM - /* Call AQM dequeue function */ - if (q->fs->aqmfp && q->fs->aqmfp->dequeue ) - return q->fs->aqmfp->dequeue(q); -#endif - q->mq.head = m->m_nextpkt; - q->mq.count--; - - /* Update stats for the queue */ - q->ni.length--; - q->ni.len_bytes -= m->m_pkthdr.len; - if (q->_si) { - q->_si->ni.length--; - q->_si->ni.len_bytes -= m->m_pkthdr.len; - } - if (q->ni.length == 0) /* queue is now idle */ - q->q_time = dn_cfg.curr_time; - return m; -} - -int dn_sched_modevent(module_t mod, int cmd, void *arg); - -#define DECLARE_DNSCHED_MODULE(name, dnsched) \ - static moduledata_t name##_mod = { \ - #name, dn_sched_modevent, dnsched \ - }; \ - DECLARE_MODULE(name, name##_mod, \ - SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); \ - MODULE_DEPEND(name, dummynet, 3, 3, 3) -#endif /* _DN_SCHED_H */ diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h deleted file mode 100644 index 4b65781e..00000000 --- a/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel.h +++ /dev/null @@ -1,167 +0,0 @@ -/*- - * Copyright (C) 2016 Centre for Advanced Internet Architectures, - * Swinburne University of Technology, Melbourne, Australia. - * Portions of this code were made possible in part by a gift from - * The Comcast Innovation Fund. - * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * FQ_Codel Structures and helper functions - * - * $FreeBSD$ - */ - -#ifndef _IP_DN_SCHED_FQ_CODEL_H -#define _IP_DN_SCHED_FQ_CODEL_H - -/* list of queues */ -STAILQ_HEAD(fq_codel_list, fq_codel_flow) ; - -/* fq_codel parameters including codel */ -struct dn_sch_fq_codel_parms { - struct dn_aqm_codel_parms ccfg; /* CoDel Parameters */ - /* FQ_CODEL Parameters */ - uint32_t flows_cnt; /* number of flows */ - uint32_t limit; /* hard limit of fq_codel queue size*/ - uint32_t quantum; -}; /* defaults */ - -/* flow (sub-queue) stats */ -struct flow_stats { - uint64_t tot_pkts; /* statistics counters */ - uint64_t tot_bytes; - uint32_t length; /* Queue length, in packets */ - uint32_t len_bytes; /* Queue length, in bytes */ - uint32_t drops; -}; - -/* A flow of packets (sub-queue).*/ -struct fq_codel_flow { - struct mq mq; /* list of packets */ - struct flow_stats stats; /* statistics */ - int deficit; - int active; /* 1: flow is active (in a list) */ - struct codel_status cst; - STAILQ_ENTRY(fq_codel_flow) flowchain; -}; - -/* extra fq_codel scheduler configurations */ -struct fq_codel_schk { - struct dn_sch_fq_codel_parms cfg; -}; - -/* fq_codel scheduler instance */ -struct fq_codel_si { - struct dn_sch_inst _si; /* standard scheduler instance */ - struct dn_queue main_q; /* main queue is after si directly */ - - struct fq_codel_flow *flows; /* array of flows (queues) */ - uint32_t perturbation; /* random value */ - struct fq_codel_list newflows; /* list of new queues */ - struct fq_codel_list oldflows; /* list of old queues */ -}; - -/* Helper function to update queue&main-queue and scheduler statistics. - * negative len + drop -> drop - * negative len -> dequeue - * positive len -> enqueue - * positive len + drop -> drop during enqueue - */ -__inline static void -fq_update_stats(struct fq_codel_flow *q, struct fq_codel_si *si, int len, - int drop) -{ - int inc = 0; - - if (len < 0) - inc = -1; - else if (len > 0) - inc = 1; - - if (drop) { - si->main_q.ni.drops ++; - q->stats.drops ++; - si->_si.ni.drops ++; - io_pkt_drop ++; - } - - if (!drop || (drop && len < 0)) { - /* Update stats for the main queue */ - si->main_q.ni.length += inc; - si->main_q.ni.len_bytes += len; - - /*update sub-queue stats */ - q->stats.length += inc; - q->stats.len_bytes += len; - - /*update scheduler instance stats */ - si->_si.ni.length += inc; - si->_si.ni.len_bytes += len; - } - - if (inc > 0) { - si->main_q.ni.tot_bytes += len; - si->main_q.ni.tot_pkts ++; - - q->stats.tot_bytes +=len; - q->stats.tot_pkts++; - - si->_si.ni.tot_bytes +=len; - si->_si.ni.tot_pkts ++; - } - -} - -/* extract the head of fq_codel sub-queue */ -__inline static struct mbuf * -fq_codel_extract_head(struct fq_codel_flow *q, aqm_time_t *pkt_ts, struct fq_codel_si *si) -{ - struct mbuf *m = q->mq.head; - - if (m == NULL) - return m; - q->mq.head = m->m_nextpkt; - - fq_update_stats(q, si, -m->m_pkthdr.len, 0); - - if (si->main_q.ni.length == 0) /* queue is now idle */ - si->main_q.q_time = dn_cfg.curr_time; - - /* extract packet timestamp*/ - struct m_tag *mtag; - mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL); - if (mtag == NULL){ - D("timestamp tag is not found!"); - *pkt_ts = 0; - } else { - *pkt_ts = *(aqm_time_t *)(mtag + 1); - m_tag_delete(m,mtag); - } - - return m; -} - - -#endif diff --git a/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h b/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h deleted file mode 100644 index da663dc8..00000000 --- a/freebsd/sys/netpfil/ipfw/dn_sched_fq_codel_helper.h +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Codel - The Controlled-Delay Active Queue Management algorithm. - * - * $FreeBSD$ - * - * Copyright (C) 2016 Centre for Advanced Internet Architectures, - * Swinburne University of Technology, Melbourne, Australia. - * Portions of this code were made possible in part by a gift from - * The Comcast Innovation Fund. - * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> - * - * Copyright (C) 2011-2014 Kathleen Nichols <nichols@pollere.com>. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * o Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification. - * - * o Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * o The names of the authors may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * Alternatively, provided that this notice is retained in full, this - * software may be distributed under the terms of the GNU General Public - * License ("GPL") version 2, in which case the provisions of the GPL - * apply INSTEAD OF those given above. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _IP_DN_SCHED_FQ_CODEL_HELPER_H -#define _IP_DN_SCHED_FQ_CODEL_HELPER_H - -__inline static struct mbuf * -fqc_dodequeue(struct fq_codel_flow *q, aqm_time_t now, uint16_t *ok_to_drop, - struct fq_codel_si *si) -{ - struct mbuf * m; - struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1); - aqm_time_t pkt_ts, sojourn_time; - - *ok_to_drop = 0; - m = fq_codel_extract_head(q, &pkt_ts, si); - - if (m == NULL) { - /*queue is empty - we can't be above target*/ - q->cst.first_above_time= 0; - return m; - } - - /* To span a large range of bandwidths, CoDel runs two - * different AQMs in parallel. One is sojourn-time-based - * and takes effect when the time to send an MTU-sized - * packet is less than target. The 1st term of the "if" - * below does this. The other is backlog-based and takes - * effect when the time to send an MTU-sized packet is >= - * target. The goal here is to keep the output link - * utilization high by never allowing the queue to get - * smaller than the amount that arrives in a typical - * interarrival time (MTU-sized packets arriving spaced - * by the amount of time it takes to send such a packet on - * the bottleneck). The 2nd term of the "if" does this. - */ - sojourn_time = now - pkt_ts; - if (sojourn_time < schk->cfg.ccfg.target || q->stats.len_bytes <= q->cst.maxpkt_size) { - /* went below - stay below for at least interval */ - q->cst.first_above_time = 0; - } else { - if (q->cst.first_above_time == 0) { - /* just went above from below. if still above at - * first_above_time, will say it's ok to drop. */ - q->cst.first_above_time = now + schk->cfg.ccfg.interval; - } else if (now >= q->cst.first_above_time) { - *ok_to_drop = 1; - } - } - return m; -} - -/* Codel dequeue function */ -__inline static struct mbuf * -fqc_codel_dequeue(struct fq_codel_flow *q, struct fq_codel_si *si) -{ - struct mbuf *m; - struct dn_aqm_codel_parms *cprms; - struct codel_status *cst; - aqm_time_t now; - uint16_t ok_to_drop; - struct fq_codel_schk *schk = (struct fq_codel_schk *)(si->_si.sched+1); - - cst = &q->cst; - cprms = &schk->cfg.ccfg; - - now = AQM_UNOW; - m = fqc_dodequeue(q, now, &ok_to_drop, si); - - if (cst->dropping) { - if (!ok_to_drop) { - /* sojourn time below target - leave dropping state */ - cst->dropping = false; - } - - /* Time for the next drop. Drop current packet and dequeue - * next. If the dequeue doesn't take us out of dropping - * state, schedule the next drop. A large backlog might - * result in drop rates so high that the next drop should - * happen now, hence the 'while' loop. - */ - while (now >= cst->drop_next_time && cst->dropping) { - - /* mark the packet */ - if (cprms->flags & CODEL_ECN_ENABLED && ecn_mark(m)) { - cst->count++; - /* schedule the next mark. */ - cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time); - return m; - } - - /* drop the packet */ - fq_update_stats(q, si, 0, 1); - m_freem(m); - m = fqc_dodequeue(q, now, &ok_to_drop, si); - - if (!ok_to_drop) { - /* leave dropping state */ - cst->dropping = false; - } else { - cst->count++; - /* schedule the next drop. */ - cst->drop_next_time = control_law(cst, cprms, cst->drop_next_time); - } - } - /* If we get here we're not in dropping state. The 'ok_to_drop' - * return from dodequeue means that the sojourn time has been - * above 'target' for 'interval' so enter dropping state. - */ - } else if (ok_to_drop) { - - /* if ECN option is disabled or the packet cannot be marked, - * drop the packet and extract another. - */ - if (!(cprms->flags & CODEL_ECN_ENABLED) || !ecn_mark(m)) { - fq_update_stats(q, si, 0, 1); - m_freem(m); - m = fqc_dodequeue(q, now, &ok_to_drop,si); - } - - cst->dropping = true; - - /* If min went above target close to when it last went - * below, assume that the drop rate that controlled the - * queue on the last cycle is a good starting point to - * control it now. ('drop_next' will be at most 'interval' - * later than the time of the last drop so 'now - drop_next' - * is a good approximation of the time from the last drop - * until now.) - */ - cst->count = (cst->count > 2 && ((aqm_stime_t)now - - (aqm_stime_t)cst->drop_next_time) < 8* cprms->interval)? cst->count - 2 : 1; - - /* we don't have to set initial guess for Newton's method isqrt as - * we initilaize isqrt in control_law function when count == 1 */ - cst->drop_next_time = control_law(cst, cprms, now); - } - - return m; -} - -#endif diff --git a/freebsd/sys/netpfil/ipfw/ip_dn_private.h b/freebsd/sys/netpfil/ipfw/ip_dn_private.h deleted file mode 100644 index 2fce1366..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_dn_private.h +++ /dev/null @@ -1,463 +0,0 @@ -/*- - * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * internal dummynet APIs. - * - * $FreeBSD$ - */ - -#ifndef _IP_DN_PRIVATE_H -#define _IP_DN_PRIVATE_H - -/* debugging support - * use ND() to remove debugging, D() to print a line, - * DX(level, ...) to print above a certain level - * If you redefine D() you are expected to redefine all. - */ -#ifndef D -#define ND(fmt, ...) do {} while (0) -#define D1(fmt, ...) do {} while (0) -#define D(fmt, ...) printf("%-10s " fmt "\n", \ - __FUNCTION__, ## __VA_ARGS__) -#define DX(lev, fmt, ...) do { \ - if (dn_cfg.debug > lev) D(fmt, ## __VA_ARGS__); } while (0) -#endif - -MALLOC_DECLARE(M_DUMMYNET); - -#ifndef __linux__ -#define div64(a, b) ((int64_t)(a) / (int64_t)(b)) -#endif - -#define DN_LOCK_INIT() do { \ - mtx_init(&dn_cfg.uh_mtx, "dn_uh", NULL, MTX_DEF); \ - mtx_init(&dn_cfg.bh_mtx, "dn_bh", NULL, MTX_DEF); \ - } while (0) -#define DN_LOCK_DESTROY() do { \ - mtx_destroy(&dn_cfg.uh_mtx); \ - mtx_destroy(&dn_cfg.bh_mtx); \ - } while (0) -#if 0 /* not used yet */ -#define DN_UH_RLOCK() mtx_lock(&dn_cfg.uh_mtx) -#define DN_UH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx) -#define DN_UH_WLOCK() mtx_lock(&dn_cfg.uh_mtx) -#define DN_UH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx) -#define DN_UH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED) -#endif - -#define DN_BH_RLOCK() mtx_lock(&dn_cfg.uh_mtx) -#define DN_BH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx) -#define DN_BH_WLOCK() mtx_lock(&dn_cfg.uh_mtx) -#define DN_BH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx) -#define DN_BH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED) - -SLIST_HEAD(dn_schk_head, dn_schk); -SLIST_HEAD(dn_sch_inst_head, dn_sch_inst); -SLIST_HEAD(dn_fsk_head, dn_fsk); -SLIST_HEAD(dn_queue_head, dn_queue); -SLIST_HEAD(dn_alg_head, dn_alg); - -#ifdef NEW_AQM -SLIST_HEAD(dn_aqm_head, dn_aqm); /* for new AQMs */ -#endif - -struct mq { /* a basic queue of packets*/ - struct mbuf *head, *tail; - int count; -}; - -static inline void -set_oid(struct dn_id *o, int type, int len) -{ - o->type = type; - o->len = len; - o->subtype = 0; -} - -/* - * configuration and global data for a dummynet instance - * - * When a configuration is modified from userland, 'id' is incremented - * so we can use the value to check for stale pointers. - */ -struct dn_parms { - uint32_t id; /* configuration version */ - - /* defaults (sysctl-accessible) */ - int red_lookup_depth; - int red_avg_pkt_size; - int red_max_pkt_size; - int hash_size; - int max_hash_size; - long byte_limit; /* max queue sizes */ - long slot_limit; - - int io_fast; - int debug; - - /* timekeeping */ - struct timeval prev_t; /* last time dummynet_tick ran */ - struct dn_heap evheap; /* scheduled events */ - - /* counters of objects -- used for reporting space */ - int schk_count; - int si_count; - int fsk_count; - int queue_count; - - /* ticks and other stuff */ - uint64_t curr_time; - /* flowsets and schedulers are in hash tables, with 'hash_size' - * buckets. fshash is looked up at every packet arrival - * so better be generous if we expect many entries. - */ - struct dn_ht *fshash; - struct dn_ht *schedhash; - /* list of flowsets without a scheduler -- use sch_chain */ - struct dn_fsk_head fsu; /* list of unlinked flowsets */ - struct dn_alg_head schedlist; /* list of algorithms */ -#ifdef NEW_AQM - struct dn_aqm_head aqmlist; /* list of AQMs */ -#endif - - /* Store the fs/sch to scan when draining. The value is the - * bucket number of the hash table. Expire can be disabled - * with net.inet.ip.dummynet.expire=0, or it happens every - * expire ticks. - **/ - int drain_fs; - int drain_sch; - uint32_t expire; - uint32_t expire_cycle; /* tick count */ - - int init_done; - - /* if the upper half is busy doing something long, - * can set the busy flag and we will enqueue packets in - * a queue for later processing. - */ - int busy; - struct mq pending; - -#ifdef _KERNEL - /* - * This file is normally used in the kernel, unless we do - * some userland tests, in which case we do not need a mtx. - * uh_mtx arbitrates between system calls and also - * protects fshash, schedhash and fsunlinked. - * These structures are readonly for the lower half. - * bh_mtx protects all other structures which may be - * modified upon packet arrivals - */ -#if defined( __linux__ ) || defined( _WIN32 ) - spinlock_t uh_mtx; - spinlock_t bh_mtx; -#else - struct mtx uh_mtx; - struct mtx bh_mtx; -#endif - -#endif /* _KERNEL */ -}; - -/* - * Delay line, contains all packets on output from a link. - * Every scheduler instance has one. - */ -struct delay_line { - struct dn_id oid; - struct dn_sch_inst *si; - struct mq mq; -}; - -/* - * The kernel side of a flowset. It is linked in a hash table - * of flowsets, and in a list of children of their parent scheduler. - * qht is either the queue or (if HAVE_MASK) a hash table queues. - * Note that the mask to use is the (flow_mask|sched_mask), which - * changes as we attach/detach schedulers. So we store it here. - * - * XXX If we want to add scheduler-specific parameters, we need to - * put them in external storage because the scheduler may not be - * available when the fsk is created. - */ -struct dn_fsk { /* kernel side of a flowset */ - struct dn_fs fs; - SLIST_ENTRY(dn_fsk) fsk_next; /* hash chain for fshash */ - - struct ipfw_flow_id fsk_mask; - - /* qht is a hash table of queues, or just a single queue - * a bit in fs.flags tells us which one - */ - struct dn_ht *qht; - struct dn_schk *sched; /* Sched we are linked to */ - SLIST_ENTRY(dn_fsk) sch_chain; /* list of fsk attached to sched */ - - /* bucket index used by drain routine to drain queues for this - * flowset - */ - int drain_bucket; - /* Parameter realted to RED / GRED */ - /* original values are in dn_fs*/ - int w_q ; /* queue weight (scaled) */ - int max_th ; /* maximum threshold for queue (scaled) */ - int min_th ; /* minimum threshold for queue (scaled) */ - int max_p ; /* maximum value for p_b (scaled) */ - - u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ - u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ - u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ - u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ - u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ - u_int lookup_depth ; /* depth of lookup table */ - int lookup_step ; /* granularity inside the lookup table */ - int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ - int avg_pkt_size ; /* medium packet size */ - int max_pkt_size ; /* max packet size */ -#ifdef NEW_AQM - struct dn_aqm *aqmfp; /* Pointer to AQM functions */ - void *aqmcfg; /* configuration parameters for AQM */ -#endif -}; - -/* - * A queue is created as a child of a flowset unless it belongs to - * a !MULTIQUEUE scheduler. It is normally in a hash table in the - * flowset. fs always points to the parent flowset. - * si normally points to the sch_inst, unless the flowset has been - * detached from the scheduler -- in this case si == NULL and we - * should not enqueue. - */ -struct dn_queue { - struct dn_flow ni; /* oid, flow_id, stats */ - struct mq mq; /* packets queue */ - struct dn_sch_inst *_si; /* owner scheduler instance */ - SLIST_ENTRY(dn_queue) q_next; /* hash chain list for qht */ - struct dn_fsk *fs; /* parent flowset. */ - - /* RED parameters */ - int avg; /* average queue length est. (scaled) */ - int count; /* arrivals since last RED drop */ - int random; /* random value (scaled) */ - uint64_t q_time; /* start of queue idle time */ -#ifdef NEW_AQM - void *aqm_status; /* per-queue status variables*/ -#endif - -}; - -/* - * The kernel side of a scheduler. Contains the userland config, - * a link, pointer to extra config arguments from command line, - * kernel flags, and a pointer to the scheduler methods. - * It is stored in a hash table, and holds a list of all - * flowsets and scheduler instances. - * XXX sch must be at the beginning, see schk_hash(). - */ -struct dn_schk { - struct dn_sch sch; - struct dn_alg *fp; /* Pointer to scheduler functions */ - struct dn_link link; /* The link, embedded */ - struct dn_profile *profile; /* delay profile, if any */ - struct dn_id *cfg; /* extra config arguments */ - - SLIST_ENTRY(dn_schk) schk_next; /* hash chain for schedhash */ - - struct dn_fsk_head fsk_list; /* all fsk linked to me */ - struct dn_fsk *fs; /* Flowset for !MULTIQUEUE */ - - /* bucket index used by the drain routine to drain the scheduler - * instance for this flowset. - */ - int drain_bucket; - - /* Hash table of all instances (through sch.sched_mask) - * or single instance if no mask. Always valid. - */ - struct dn_ht *siht; -}; - - -/* - * Scheduler instance. - * Contains variables and all queues relative to a this instance. - * This struct is created a runtime. - */ -struct dn_sch_inst { - struct dn_flow ni; /* oid, flowid and stats */ - SLIST_ENTRY(dn_sch_inst) si_next; /* hash chain for siht */ - struct delay_line dline; - struct dn_schk *sched; /* the template */ - int kflags; /* DN_ACTIVE */ - - int64_t credit; /* bits I can transmit (more or less). */ - uint64_t sched_time; /* time link was scheduled in ready_heap */ - uint64_t idle_time; /* start of scheduler instance idle time */ - - /* q_count is the number of queues that this instance is using. - * The counter is incremented or decremented when - * a reference from the queue is created or deleted. - * It is used to make sure that a scheduler instance can be safely - * deleted by the drain routine. See notes below. - */ - int q_count; - -}; - -/* - * NOTE about object drain. - * The system will automatically (XXX check when) drain queues and - * scheduler instances when they are idle. - * A queue is idle when it has no packets; an instance is idle when - * it is not in the evheap heap, and the corresponding delay line is empty. - * A queue can be safely deleted when it is idle because of the scheduler - * function xxx_free_queue() will remove any references to it. - * An instance can be only deleted when no queues reference it. To be sure - * of that, a counter (q_count) stores the number of queues that are pointing - * to the instance. - * - * XXX - * Order of scan: - * - take all flowset in a bucket for the flowset hash table - * - take all queues in a bucket for the flowset - * - increment the queue bucket - * - scan next flowset bucket - * Nothing is done if a bucket contains no entries. - * - * The same schema is used for sceduler instances - */ - - -/* kernel-side flags. Linux has DN_DELETE in fcntl.h - */ -enum { - /* 1 and 2 are reserved for the SCAN flags */ - DN_DESTROY = 0x0004, /* destroy */ - DN_DELETE_FS = 0x0008, /* destroy flowset */ - DN_DETACH = 0x0010, - DN_ACTIVE = 0x0020, /* object is in evheap */ - DN_F_DLINE = 0x0040, /* object is a delay line */ - DN_DEL_SAFE = 0x0080, /* delete a queue only if no longer needed - * by scheduler */ - DN_QHT_IS_Q = 0x0100, /* in flowset, qht is a single queue */ -}; - -extern struct dn_parms dn_cfg; -//VNET_DECLARE(struct dn_parms, _base_dn_cfg); -//#define dn_cfg VNET(_base_dn_cfg) - -int dummynet_io(struct mbuf **, int , struct ip_fw_args *); -void dummynet_task(void *context, int pending); -void dn_reschedule(void); - -struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *, - struct ipfw_flow_id *); -struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *); - -/* - * copy_range is a template for requests for ranges of pipes/queues/scheds. - * The number of ranges is variable and can be derived by o.len. - * As a default, we use a small number of entries so that the struct - * fits easily on the stack and is sufficient for most common requests. - */ -#define DEFAULT_RANGES 5 -struct copy_range { - struct dn_id o; - uint32_t r[ 2 * DEFAULT_RANGES ]; -}; - -struct copy_args { - char **start; - char *end; - int flags; - int type; - struct copy_range *extra; /* extra filtering */ -}; - -struct sockopt; -int ip_dummynet_compat(struct sockopt *sopt); -int dummynet_get(struct sockopt *sopt, void **compat); -int dn_c_copy_q (void *_ni, void *arg); -int dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq); -int dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq); -int dn_compat_copy_queue(struct copy_args *a, void *_o); -int dn_compat_copy_pipe(struct copy_args *a, void *_o); -int copy_data_helper_compat(void *_o, void *_arg); -int dn_compat_calc_size(void); -int do_config(void *p, int l); - -/* function to drain idle object */ -void dn_drain_scheduler(void); -void dn_drain_queue(void); - -#ifdef NEW_AQM -int ecn_mark(struct mbuf* m); - -/* moved from ip_dn_io.c to here to be available for AQMs modules*/ -static inline void -mq_append(struct mq *q, struct mbuf *m) -{ -#ifdef USERSPACE - // buffers from netmap need to be copied - // XXX note that the routine is not expected to fail - ND("append %p to %p", m, q); - if (m->m_flags & M_STACK) { - struct mbuf *m_new; - void *p; - int l, ofs; - - ofs = m->m_data - m->__m_extbuf; - // XXX allocate - MGETHDR(m_new, M_NOWAIT, MT_DATA); - ND("*** WARNING, volatile buf %p ext %p %d dofs %d m_new %p", - m, m->__m_extbuf, m->__m_extlen, ofs, m_new); - p = m_new->__m_extbuf; /* new pointer */ - l = m_new->__m_extlen; /* new len */ - if (l <= m->__m_extlen) { - panic("extlen too large"); - } - - *m_new = *m; // copy - m_new->m_flags &= ~M_STACK; - m_new->__m_extbuf = p; // point to new buffer - _pkt_copy(m->__m_extbuf, p, m->__m_extlen); - m_new->m_data = p + ofs; - m = m_new; - } -#endif /* USERSPACE */ - if (q->head == NULL) - q->head = m; - else - q->tail->m_nextpkt = m; - q->count++; - q->tail = m; - m->m_nextpkt = NULL; -} -#endif /* NEW_AQM */ - -#endif /* _IP_DN_PRIVATE_H */ diff --git a/freebsd/sys/netpfil/ipfw/ip_fw2.c b/freebsd/sys/netpfil/ipfw/ip_fw2.c deleted file mode 100644 index 8e51bd8c..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw2.c +++ /dev/null @@ -1,3024 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * The FreeBSD IP packet firewall, main file - */ - -#include <rtems/bsd/local/opt_ipfw.h> -#include <rtems/bsd/local/opt_ipdivert.h> -#include <rtems/bsd/local/opt_inet.h> -#ifndef INET -#error "IPFIREWALL requires INET" -#endif /* INET */ -#include <rtems/bsd/local/opt_inet6.h> -#include <rtems/bsd/local/opt_ipsec.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/condvar.h> -#include <sys/counter.h> -#include <sys/eventhandler.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/jail.h> -#include <sys/module.h> -#include <sys/priv.h> -#include <sys/proc.h> -#include <sys/rwlock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/sysctl.h> -#include <sys/syslog.h> -#include <sys/ucred.h> -#include <net/ethernet.h> /* for ETHERTYPE_IP */ -#include <net/if.h> -#include <net/if_var.h> -#include <net/route.h> -#include <net/pfil.h> -#include <net/vnet.h> - -#include <netpfil/pf/pf_mtag.h> - -#include <netinet/in.h> -#include <netinet/in_var.h> -#include <netinet/in_pcb.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/ip_icmp.h> -#include <netinet/ip_fw.h> -#include <netinet/ip_carp.h> -#include <netinet/pim.h> -#include <netinet/tcp_var.h> -#include <netinet/udp.h> -#include <netinet/udp_var.h> -#include <netinet/sctp.h> - -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#include <netinet/in_fib.h> -#ifdef INET6 -#include <netinet6/in6_fib.h> -#include <netinet6/in6_pcb.h> -#include <netinet6/scope6_var.h> -#include <netinet6/ip6_var.h> -#endif - -#include <netpfil/ipfw/ip_fw_private.h> - -#include <machine/in_cksum.h> /* XXX for in_cksum */ - -#ifdef MAC -#include <security/mac/mac_framework.h> -#endif - -/* - * static variables followed by global ones. - * All ipfw global variables are here. - */ - -static VNET_DEFINE(int, fw_deny_unknown_exthdrs); -#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) - -static VNET_DEFINE(int, fw_permit_single_frag6) = 1; -#define V_fw_permit_single_frag6 VNET(fw_permit_single_frag6) - -#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT -static int default_to_accept = 1; -#else -static int default_to_accept; -#endif - -VNET_DEFINE(int, autoinc_step); -VNET_DEFINE(int, fw_one_pass) = 1; - -VNET_DEFINE(unsigned int, fw_tables_max); -VNET_DEFINE(unsigned int, fw_tables_sets) = 0; /* Don't use set-aware tables */ -/* Use 128 tables by default */ -static unsigned int default_fw_tables = IPFW_TABLES_DEFAULT; - -#ifndef LINEAR_SKIPTO -static int jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, - int tablearg, int jump_backwards); -#define JUMP(ch, f, num, targ, back) jump_fast(ch, f, num, targ, back) -#else -static int jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, - int tablearg, int jump_backwards); -#define JUMP(ch, f, num, targ, back) jump_linear(ch, f, num, targ, back) -#endif - -/* - * Each rule belongs to one of 32 different sets (0..31). - * The variable set_disable contains one bit per set. - * If the bit is set, all rules in the corresponding set - * are disabled. Set RESVD_SET(31) is reserved for the default rule - * and rules that are not deleted by the flush command, - * and CANNOT be disabled. - * Rules in set RESVD_SET can only be deleted individually. - */ -VNET_DEFINE(u_int32_t, set_disable); -#define V_set_disable VNET(set_disable) - -VNET_DEFINE(int, fw_verbose); -/* counter for ipfw_log(NULL...) */ -VNET_DEFINE(u_int64_t, norule_counter); -VNET_DEFINE(int, verbose_limit); - -/* layer3_chain contains the list of rules for layer 3 */ -VNET_DEFINE(struct ip_fw_chain, layer3_chain); - -/* ipfw_vnet_ready controls when we are open for business */ -VNET_DEFINE(int, ipfw_vnet_ready) = 0; - -VNET_DEFINE(int, ipfw_nat_ready) = 0; - -ipfw_nat_t *ipfw_nat_ptr = NULL; -struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int); -ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; -ipfw_nat_cfg_t *ipfw_nat_del_ptr; -ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; -ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; - -#ifdef SYSCTL_NODE -uint32_t dummy_def = IPFW_DEFAULT_RULE; -static int sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS); -static int sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS); - -SYSBEGIN(f3) - -SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, - CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, - "Only do a single pass through ipfw when using dummynet(4)"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, - "Rule number auto-increment step"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, - CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, - "Log matches to ipfw rules"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, - "Set upper limit of matches of ipfw rules logged"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, - &dummy_def, 0, - "The default/max possible rule number."); -SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_max, - CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_table_num, "IU", - "Maximum number of concurrently used tables"); -SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, tables_sets, - CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, - 0, 0, sysctl_ipfw_tables_sets, "IU", - "Use per-set namespace for tables"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, - &default_to_accept, 0, - "Make the default rule accept all packets."); -TUNABLE_INT("net.inet.ip.fw.tables_max", (int *)&default_fw_tables); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, - CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(layer3_chain.n_rules), 0, - "Number of static rules"); - -#ifdef INET6 -SYSCTL_DECL(_net_inet6_ip6); -SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); -SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, - CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, - &VNET_NAME(fw_deny_unknown_exthdrs), 0, - "Deny packets with unknown IPv6 Extension Headers"); -SYSCTL_INT(_net_inet6_ip6_fw, OID_AUTO, permit_single_frag6, - CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE, - &VNET_NAME(fw_permit_single_frag6), 0, - "Permit single packet IPv6 fragments"); -#endif /* INET6 */ - -SYSEND - -#endif /* SYSCTL_NODE */ - - -/* - * Some macros used in the various matching options. - * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T - * Other macros just cast void * into the appropriate type - */ -#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) -#define TCP(p) ((struct tcphdr *)(p)) -#define SCTP(p) ((struct sctphdr *)(p)) -#define UDP(p) ((struct udphdr *)(p)) -#define ICMP(p) ((struct icmphdr *)(p)) -#define ICMP6(p) ((struct icmp6_hdr *)(p)) - -static __inline int -icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) -{ - int type = icmp->icmp_type; - - return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<<type)) ); -} - -#define TT ( (1 << ICMP_ECHO) | (1 << ICMP_ROUTERSOLICIT) | \ - (1 << ICMP_TSTAMP) | (1 << ICMP_IREQ) | (1 << ICMP_MASKREQ) ) - -static int -is_icmp_query(struct icmphdr *icmp) -{ - int type = icmp->icmp_type; - - return (type <= ICMP_MAXTYPE && (TT & (1<<type)) ); -} -#undef TT - -/* - * The following checks use two arrays of 8 or 16 bits to store the - * bits that we want set or clear, respectively. They are in the - * low and high half of cmd->arg1 or cmd->d[0]. - * - * We scan options and store the bits we find set. We succeed if - * - * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear - * - * The code is sometimes optimized not to store additional variables. - */ - -static int -flags_match(ipfw_insn *cmd, u_int8_t bits) -{ - u_char want_clear; - bits = ~bits; - - if ( ((cmd->arg1 & 0xff) & bits) != 0) - return 0; /* some bits we want set were clear */ - want_clear = (cmd->arg1 >> 8) & 0xff; - if ( (want_clear & bits) != want_clear) - return 0; /* some bits we want clear were set */ - return 1; -} - -static int -ipopts_match(struct ip *ip, ipfw_insn *cmd) -{ - int optlen, bits = 0; - u_char *cp = (u_char *)(ip + 1); - int x = (ip->ip_hl << 2) - sizeof (struct ip); - - for (; x > 0; x -= optlen, cp += optlen) { - int opt = cp[IPOPT_OPTVAL]; - - if (opt == IPOPT_EOL) - break; - if (opt == IPOPT_NOP) - optlen = 1; - else { - optlen = cp[IPOPT_OLEN]; - if (optlen <= 0 || optlen > x) - return 0; /* invalid or truncated */ - } - switch (opt) { - - default: - break; - - case IPOPT_LSRR: - bits |= IP_FW_IPOPT_LSRR; - break; - - case IPOPT_SSRR: - bits |= IP_FW_IPOPT_SSRR; - break; - - case IPOPT_RR: - bits |= IP_FW_IPOPT_RR; - break; - - case IPOPT_TS: - bits |= IP_FW_IPOPT_TS; - break; - } - } - return (flags_match(cmd, bits)); -} - -static int -tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) -{ - int optlen, bits = 0; - u_char *cp = (u_char *)(tcp + 1); - int x = (tcp->th_off << 2) - sizeof(struct tcphdr); - - for (; x > 0; x -= optlen, cp += optlen) { - int opt = cp[0]; - if (opt == TCPOPT_EOL) - break; - if (opt == TCPOPT_NOP) - optlen = 1; - else { - optlen = cp[1]; - if (optlen <= 0) - break; - } - - switch (opt) { - - default: - break; - - case TCPOPT_MAXSEG: - bits |= IP_FW_TCPOPT_MSS; - break; - - case TCPOPT_WINDOW: - bits |= IP_FW_TCPOPT_WINDOW; - break; - - case TCPOPT_SACK_PERMITTED: - case TCPOPT_SACK: - bits |= IP_FW_TCPOPT_SACK; - break; - - case TCPOPT_TIMESTAMP: - bits |= IP_FW_TCPOPT_TS; - break; - - } - } - return (flags_match(cmd, bits)); -} - -static int -iface_match(struct ifnet *ifp, ipfw_insn_if *cmd, struct ip_fw_chain *chain, - uint32_t *tablearg) -{ - - if (ifp == NULL) /* no iface with this packet, match fails */ - return (0); - - /* Check by name or by IP address */ - if (cmd->name[0] != '\0') { /* match by name */ - if (cmd->name[0] == '\1') /* use tablearg to match */ - return ipfw_lookup_table(chain, cmd->p.kidx, 0, - &ifp->if_index, tablearg); - /* Check name */ - if (cmd->p.glob) { - if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) - return(1); - } else { - if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) - return(1); - } - } else { -#if !defined(USERSPACE) && defined(__FreeBSD__) /* and OSX too ? */ - struct ifaddr *ia; - - if_addr_rlock(ifp); - TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { - if (ia->ifa_addr->sa_family != AF_INET) - continue; - if (cmd->p.ip.s_addr == ((struct sockaddr_in *) - (ia->ifa_addr))->sin_addr.s_addr) { - if_addr_runlock(ifp); - return(1); /* match */ - } - } - if_addr_runlock(ifp); -#endif /* __FreeBSD__ */ - } - return(0); /* no match, fail ... */ -} - -/* - * The verify_path function checks if a route to the src exists and - * if it is reachable via ifp (when provided). - * - * The 'verrevpath' option checks that the interface that an IP packet - * arrives on is the same interface that traffic destined for the - * packet's source address would be routed out of. - * The 'versrcreach' option just checks that the source address is - * reachable via any route (except default) in the routing table. - * These two are a measure to block forged packets. This is also - * commonly known as "anti-spoofing" or Unicast Reverse Path - * Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs - * is purposely reminiscent of the Cisco IOS command, - * - * ip verify unicast reverse-path - * ip verify unicast source reachable-via any - * - * which implements the same functionality. But note that the syntax - * is misleading, and the check may be performed on all IP packets - * whether unicast, multicast, or broadcast. - */ -static int -verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) -{ -#if defined(USERSPACE) || !defined(__FreeBSD__) - return 0; -#else - struct nhop4_basic nh4; - - if (fib4_lookup_nh_basic(fib, src, NHR_IFAIF, 0, &nh4) != 0) - return (0); - - /* - * If ifp is provided, check for equality with rtentry. - * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, - * in order to pass packets injected back by if_simloop(): - * routing entry (via lo0) for our own address - * may exist, so we need to handle routing assymetry. - */ - if (ifp != NULL && ifp != nh4.nh_ifp) - return (0); - - /* if no ifp provided, check if rtentry is not default route */ - if (ifp == NULL && (nh4.nh_flags & NHF_DEFAULT) != 0) - return (0); - - /* or if this is a blackhole/reject route */ - if (ifp == NULL && (nh4.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0) - return (0); - - /* found valid route */ - return 1; -#endif /* __FreeBSD__ */ -} - -#ifdef INET6 -/* - * ipv6 specific rules here... - */ -static __inline int -icmp6type_match (int type, ipfw_insn_u32 *cmd) -{ - return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); -} - -static int -flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) -{ - int i; - for (i=0; i <= cmd->o.arg1; ++i ) - if (curr_flow == cmd->d[i] ) - return 1; - return 0; -} - -/* support for IP6_*_ME opcodes */ -static const struct in6_addr lla_mask = {{{ - 0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -}}}; - -static int -ipfw_localip6(struct in6_addr *in6) -{ - struct rm_priotracker in6_ifa_tracker; - struct in6_ifaddr *ia; - - if (IN6_IS_ADDR_MULTICAST(in6)) - return (0); - - if (!IN6_IS_ADDR_LINKLOCAL(in6)) - return (in6_localip(in6)); - - IN6_IFADDR_RLOCK(&in6_ifa_tracker); - TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { - if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) - continue; - if (IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr, - in6, &lla_mask)) { - IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); - return (1); - } - } - IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); - return (0); -} - -static int -verify_path6(struct in6_addr *src, struct ifnet *ifp, u_int fib) -{ - struct nhop6_basic nh6; - - if (IN6_IS_SCOPE_LINKLOCAL(src)) - return (1); - - if (fib6_lookup_nh_basic(fib, src, 0, NHR_IFAIF, 0, &nh6) != 0) - return (0); - - /* If ifp is provided, check for equality with route table. */ - if (ifp != NULL && ifp != nh6.nh_ifp) - return (0); - - /* if no ifp provided, check if rtentry is not default route */ - if (ifp == NULL && (nh6.nh_flags & NHF_DEFAULT) != 0) - return (0); - - /* or if this is a blackhole/reject route */ - if (ifp == NULL && (nh6.nh_flags & (NHF_REJECT|NHF_BLACKHOLE)) != 0) - return (0); - - /* found valid route */ - return 1; -} - -static int -is_icmp6_query(int icmp6_type) -{ - if ((icmp6_type <= ICMP6_MAXTYPE) && - (icmp6_type == ICMP6_ECHO_REQUEST || - icmp6_type == ICMP6_MEMBERSHIP_QUERY || - icmp6_type == ICMP6_WRUREQUEST || - icmp6_type == ICMP6_FQDN_QUERY || - icmp6_type == ICMP6_NI_QUERY)) - return (1); - - return (0); -} - -static void -send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) -{ - struct mbuf *m; - - m = args->m; - if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { - struct tcphdr *tcp; - tcp = (struct tcphdr *)((char *)ip6 + hlen); - - if ((tcp->th_flags & TH_RST) == 0) { - struct mbuf *m0; - m0 = ipfw_send_pkt(args->m, &(args->f_id), - ntohl(tcp->th_seq), ntohl(tcp->th_ack), - tcp->th_flags | TH_RST); - if (m0 != NULL) - ip6_output(m0, NULL, NULL, 0, NULL, NULL, - NULL); - } - FREE_PKT(m); - } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ -#if 0 - /* - * Unlike above, the mbufs need to line up with the ip6 hdr, - * as the contents are read. We need to m_adj() the - * needed amount. - * The mbuf will however be thrown away so we can adjust it. - * Remember we did an m_pullup on it already so we - * can make some assumptions about contiguousness. - */ - if (args->L3offset) - m_adj(m, args->L3offset); -#endif - icmp6_error(m, ICMP6_DST_UNREACH, code, 0); - } else - FREE_PKT(m); - - args->m = NULL; -} - -#endif /* INET6 */ - - -/* - * sends a reject message, consuming the mbuf passed as an argument. - */ -static void -send_reject(struct ip_fw_args *args, int code, int iplen, struct ip *ip) -{ - -#if 0 - /* XXX When ip is not guaranteed to be at mtod() we will - * need to account for this */ - * The mbuf will however be thrown away so we can adjust it. - * Remember we did an m_pullup on it already so we - * can make some assumptions about contiguousness. - */ - if (args->L3offset) - m_adj(m, args->L3offset); -#endif - if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ - icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); - } else if (args->f_id.proto == IPPROTO_TCP) { - struct tcphdr *const tcp = - L3HDR(struct tcphdr, mtod(args->m, struct ip *)); - if ( (tcp->th_flags & TH_RST) == 0) { - struct mbuf *m; - m = ipfw_send_pkt(args->m, &(args->f_id), - ntohl(tcp->th_seq), ntohl(tcp->th_ack), - tcp->th_flags | TH_RST); - if (m != NULL) - ip_output(m, NULL, NULL, 0, NULL, NULL); - } - FREE_PKT(args->m); - } else - FREE_PKT(args->m); - args->m = NULL; -} - -/* - * Support for uid/gid/jail lookup. These tests are expensive - * (because we may need to look into the list of active sockets) - * so we cache the results. ugid_lookupp is 0 if we have not - * yet done a lookup, 1 if we succeeded, and -1 if we tried - * and failed. The function always returns the match value. - * We could actually spare the variable and use *uc, setting - * it to '(void *)check_uidgid if we have no info, NULL if - * we tried and failed, or any other value if successful. - */ -static int -check_uidgid(ipfw_insn_u32 *insn, struct ip_fw_args *args, int *ugid_lookupp, - struct ucred **uc) -{ -#if defined(USERSPACE) - return 0; // not supported in userspace -#else -#ifndef __FreeBSD__ - /* XXX */ - return cred_check(insn, proto, oif, - dst_ip, dst_port, src_ip, src_port, - (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb); -#else /* FreeBSD */ - struct in_addr src_ip, dst_ip; - struct inpcbinfo *pi; - struct ipfw_flow_id *id; - struct inpcb *pcb, *inp; - struct ifnet *oif; - int lookupflags; - int match; - - id = &args->f_id; - inp = args->inp; - oif = args->oif; - - /* - * Check to see if the UDP or TCP stack supplied us with - * the PCB. If so, rather then holding a lock and looking - * up the PCB, we can use the one that was supplied. - */ - if (inp && *ugid_lookupp == 0) { - INP_LOCK_ASSERT(inp); - if (inp->inp_socket != NULL) { - *uc = crhold(inp->inp_cred); - *ugid_lookupp = 1; - } else - *ugid_lookupp = -1; - } - /* - * If we have already been here and the packet has no - * PCB entry associated with it, then we can safely - * assume that this is a no match. - */ - if (*ugid_lookupp == -1) - return (0); - if (id->proto == IPPROTO_TCP) { - lookupflags = 0; - pi = &V_tcbinfo; - } else if (id->proto == IPPROTO_UDP) { - lookupflags = INPLOOKUP_WILDCARD; - pi = &V_udbinfo; - } else - return 0; - lookupflags |= INPLOOKUP_RLOCKPCB; - match = 0; - if (*ugid_lookupp == 0) { - if (id->addr_type == 6) { -#ifdef INET6 - if (oif == NULL) - pcb = in6_pcblookup_mbuf(pi, - &id->src_ip6, htons(id->src_port), - &id->dst_ip6, htons(id->dst_port), - lookupflags, oif, args->m); - else - pcb = in6_pcblookup_mbuf(pi, - &id->dst_ip6, htons(id->dst_port), - &id->src_ip6, htons(id->src_port), - lookupflags, oif, args->m); -#else - *ugid_lookupp = -1; - return (0); -#endif - } else { - src_ip.s_addr = htonl(id->src_ip); - dst_ip.s_addr = htonl(id->dst_ip); - if (oif == NULL) - pcb = in_pcblookup_mbuf(pi, - src_ip, htons(id->src_port), - dst_ip, htons(id->dst_port), - lookupflags, oif, args->m); - else - pcb = in_pcblookup_mbuf(pi, - dst_ip, htons(id->dst_port), - src_ip, htons(id->src_port), - lookupflags, oif, args->m); - } - if (pcb != NULL) { - INP_RLOCK_ASSERT(pcb); - *uc = crhold(pcb->inp_cred); - *ugid_lookupp = 1; - INP_RUNLOCK(pcb); - } - if (*ugid_lookupp == 0) { - /* - * We tried and failed, set the variable to -1 - * so we will not try again on this packet. - */ - *ugid_lookupp = -1; - return (0); - } - } - if (insn->o.opcode == O_UID) -#ifndef __rtems__ - match = ((*uc)->cr_uid == (uid_t)insn->d[0]); -#else /* __rtems__ */ - match = (BSD_DEFAULT_UID == (uid_t)insn->d[0]); -#endif /* __rtems__ */ - else if (insn->o.opcode == O_GID) - match = groupmember((gid_t)insn->d[0], *uc); - else if (insn->o.opcode == O_JAIL) -#ifndef __rtems__ - match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); -#else /* __rtems__ */ - match = (BSD_DEFAULT_PRISON->pr_id == (int)insn->d[0]); -#endif /* __rtems__ */ - return (match); -#endif /* __FreeBSD__ */ -#endif /* not supported in userspace */ -} - -/* - * Helper function to set args with info on the rule after the matching - * one. slot is precise, whereas we guess rule_id as they are - * assigned sequentially. - */ -static inline void -set_match(struct ip_fw_args *args, int slot, - struct ip_fw_chain *chain) -{ - args->rule.chain_id = chain->id; - args->rule.slot = slot + 1; /* we use 0 as a marker */ - args->rule.rule_id = 1 + chain->map[slot]->id; - args->rule.rulenum = chain->map[slot]->rulenum; -} - -#ifndef LINEAR_SKIPTO -/* - * Helper function to enable cached rule lookups using - * cached_id and cached_pos fields in ipfw rule. - */ -static int -jump_fast(struct ip_fw_chain *chain, struct ip_fw *f, int num, - int tablearg, int jump_backwards) -{ - int f_pos; - - /* If possible use cached f_pos (in f->cached_pos), - * whose version is written in f->cached_id - * (horrible hacks to avoid changing the ABI). - */ - if (num != IP_FW_TARG && f->cached_id == chain->id) - f_pos = f->cached_pos; - else { - int i = IP_FW_ARG_TABLEARG(chain, num, skipto); - /* make sure we do not jump backward */ - if (jump_backwards == 0 && i <= f->rulenum) - i = f->rulenum + 1; - if (chain->idxmap != NULL) - f_pos = chain->idxmap[i]; - else - f_pos = ipfw_find_rule(chain, i, 0); - /* update the cache */ - if (num != IP_FW_TARG) { - f->cached_id = chain->id; - f->cached_pos = f_pos; - } - } - - return (f_pos); -} -#else -/* - * Helper function to enable real fast rule lookups. - */ -static int -jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num, - int tablearg, int jump_backwards) -{ - int f_pos; - - num = IP_FW_ARG_TABLEARG(chain, num, skipto); - /* make sure we do not jump backward */ - if (jump_backwards == 0 && num <= f->rulenum) - num = f->rulenum + 1; - f_pos = chain->idxmap[num]; - - return (f_pos); -} -#endif - -#define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f) -/* - * The main check routine for the firewall. - * - * All arguments are in args so we can modify them and return them - * back to the caller. - * - * Parameters: - * - * args->m (in/out) The packet; we set to NULL when/if we nuke it. - * Starts with the IP header. - * args->eh (in) Mac header if present, NULL for layer3 packet. - * args->L3offset Number of bytes bypassed if we came from L2. - * e.g. often sizeof(eh) ** NOTYET ** - * args->oif Outgoing interface, NULL if packet is incoming. - * The incoming interface is in the mbuf. (in) - * args->divert_rule (in/out) - * Skip up to the first rule past this rule number; - * upon return, non-zero port number for divert or tee. - * - * args->rule Pointer to the last matching rule (in/out) - * args->next_hop Socket we are forwarding to (out). - * args->next_hop6 IPv6 next hop we are forwarding to (out). - * args->f_id Addresses grabbed from the packet (out) - * args->rule.info a cookie depending on rule action - * - * Return value: - * - * IP_FW_PASS the packet must be accepted - * IP_FW_DENY the packet must be dropped - * IP_FW_DIVERT divert packet, port in m_tag - * IP_FW_TEE tee packet, port in m_tag - * IP_FW_DUMMYNET to dummynet, pipe in args->cookie - * IP_FW_NETGRAPH into netgraph, cookie args->cookie - * args->rule contains the matching rule, - * args->rule.info has additional information. - * - */ -int -ipfw_chk(struct ip_fw_args *args) -{ - - /* - * Local variables holding state while processing a packet: - * - * IMPORTANT NOTE: to speed up the processing of rules, there - * are some assumption on the values of the variables, which - * are documented here. Should you change them, please check - * the implementation of the various instructions to make sure - * that they still work. - * - * args->eh The MAC header. It is non-null for a layer2 - * packet, it is NULL for a layer-3 packet. - * **notyet** - * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. - * - * m | args->m Pointer to the mbuf, as received from the caller. - * It may change if ipfw_chk() does an m_pullup, or if it - * consumes the packet because it calls send_reject(). - * XXX This has to change, so that ipfw_chk() never modifies - * or consumes the buffer. - * ip is the beginning of the ip(4 or 6) header. - * Calculated by adding the L3offset to the start of data. - * (Until we start using L3offset, the packet is - * supposed to start with the ip header). - */ - struct mbuf *m = args->m; - struct ip *ip = mtod(m, struct ip *); - - /* - * For rules which contain uid/gid or jail constraints, cache - * a copy of the users credentials after the pcb lookup has been - * executed. This will speed up the processing of rules with - * these types of constraints, as well as decrease contention - * on pcb related locks. - */ -#ifndef __FreeBSD__ - struct bsd_ucred ucred_cache; -#else - struct ucred *ucred_cache = NULL; -#endif - int ucred_lookup = 0; - - /* - * oif | args->oif If NULL, ipfw_chk has been called on the - * inbound path (ether_input, ip_input). - * If non-NULL, ipfw_chk has been called on the outbound path - * (ether_output, ip_output). - */ - struct ifnet *oif = args->oif; - - int f_pos = 0; /* index of current rule in the array */ - int retval = 0; - - /* - * hlen The length of the IP header. - */ - u_int hlen = 0; /* hlen >0 means we have an IP pkt */ - - /* - * offset The offset of a fragment. offset != 0 means that - * we have a fragment at this offset of an IPv4 packet. - * offset == 0 means that (if this is an IPv4 packet) - * this is the first or only fragment. - * For IPv6 offset|ip6f_mf == 0 means there is no Fragment Header - * or there is a single packet fragment (fragment header added - * without needed). We will treat a single packet fragment as if - * there was no fragment header (or log/block depending on the - * V_fw_permit_single_frag6 sysctl setting). - */ - u_short offset = 0; - u_short ip6f_mf = 0; - - /* - * Local copies of addresses. They are only valid if we have - * an IP packet. - * - * proto The protocol. Set to 0 for non-ip packets, - * or to the protocol read from the packet otherwise. - * proto != 0 means that we have an IPv4 packet. - * - * src_port, dst_port port numbers, in HOST format. Only - * valid for TCP and UDP packets. - * - * src_ip, dst_ip ip addresses, in NETWORK format. - * Only valid for IPv4 packets. - */ - uint8_t proto; - uint16_t src_port = 0, dst_port = 0; /* NOTE: host format */ - struct in_addr src_ip, dst_ip; /* NOTE: network format */ - uint16_t iplen=0; - int pktlen; - uint16_t etype = 0; /* Host order stored ether type */ - - /* - * dyn_dir = MATCH_UNKNOWN when rules unchecked, - * MATCH_NONE when checked and not matched (q = NULL), - * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) - */ - int dyn_dir = MATCH_UNKNOWN; - uint16_t dyn_name = 0; - ipfw_dyn_rule *q = NULL; - struct ip_fw_chain *chain = &V_layer3_chain; - - /* - * We store in ulp a pointer to the upper layer protocol header. - * In the ipv4 case this is easy to determine from the header, - * but for ipv6 we might have some additional headers in the middle. - * ulp is NULL if not found. - */ - void *ulp = NULL; /* upper layer protocol pointer. */ - - /* XXX ipv6 variables */ - int is_ipv6 = 0; - uint8_t icmp6_type = 0; - uint16_t ext_hd = 0; /* bits vector for extension header filtering */ - /* end of ipv6 variables */ - - int is_ipv4 = 0; - - int done = 0; /* flag to exit the outer loop */ - - if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) - return (IP_FW_PASS); /* accept */ - - dst_ip.s_addr = 0; /* make sure it is initialized */ - src_ip.s_addr = 0; /* make sure it is initialized */ - pktlen = m->m_pkthdr.len; - args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ - proto = args->f_id.proto = 0; /* mark f_id invalid */ - /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ - -/* - * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, - * then it sets p to point at the offset "len" in the mbuf. WARNING: the - * pointer might become stale after other pullups (but we never use it - * this way). - */ -#define PULLUP_TO(_len, p, T) PULLUP_LEN(_len, p, sizeof(T)) -#define PULLUP_LEN(_len, p, T) \ -do { \ - int x = (_len) + T; \ - if ((m)->m_len < x) { \ - args->m = m = m_pullup(m, x); \ - if (m == NULL) \ - goto pullup_failed; \ - } \ - p = (mtod(m, char *) + (_len)); \ -} while (0) - - /* - * if we have an ether header, - */ - if (args->eh) - etype = ntohs(args->eh->ether_type); - - /* Identify IP packets and fill up variables. */ - if (pktlen >= sizeof(struct ip6_hdr) && - (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { - struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; - is_ipv6 = 1; - args->f_id.addr_type = 6; - hlen = sizeof(struct ip6_hdr); - proto = ip6->ip6_nxt; - - /* Search extension headers to find upper layer protocols */ - while (ulp == NULL && offset == 0) { - switch (proto) { - case IPPROTO_ICMPV6: - PULLUP_TO(hlen, ulp, struct icmp6_hdr); - icmp6_type = ICMP6(ulp)->icmp6_type; - break; - - case IPPROTO_TCP: - PULLUP_TO(hlen, ulp, struct tcphdr); - dst_port = TCP(ulp)->th_dport; - src_port = TCP(ulp)->th_sport; - /* save flags for dynamic rules */ - args->f_id._flags = TCP(ulp)->th_flags; - break; - - case IPPROTO_SCTP: - PULLUP_TO(hlen, ulp, struct sctphdr); - src_port = SCTP(ulp)->src_port; - dst_port = SCTP(ulp)->dest_port; - break; - - case IPPROTO_UDP: - PULLUP_TO(hlen, ulp, struct udphdr); - dst_port = UDP(ulp)->uh_dport; - src_port = UDP(ulp)->uh_sport; - break; - - case IPPROTO_HOPOPTS: /* RFC 2460 */ - PULLUP_TO(hlen, ulp, struct ip6_hbh); - ext_hd |= EXT_HOPOPTS; - hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; - proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; - ulp = NULL; - break; - - case IPPROTO_ROUTING: /* RFC 2460 */ - PULLUP_TO(hlen, ulp, struct ip6_rthdr); - switch (((struct ip6_rthdr *)ulp)->ip6r_type) { - case 0: - ext_hd |= EXT_RTHDR0; - break; - case 2: - ext_hd |= EXT_RTHDR2; - break; - default: - if (V_fw_verbose) - printf("IPFW2: IPV6 - Unknown " - "Routing Header type(%d)\n", - ((struct ip6_rthdr *) - ulp)->ip6r_type); - if (V_fw_deny_unknown_exthdrs) - return (IP_FW_DENY); - break; - } - ext_hd |= EXT_ROUTING; - hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; - proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; - ulp = NULL; - break; - - case IPPROTO_FRAGMENT: /* RFC 2460 */ - PULLUP_TO(hlen, ulp, struct ip6_frag); - ext_hd |= EXT_FRAGMENT; - hlen += sizeof (struct ip6_frag); - proto = ((struct ip6_frag *)ulp)->ip6f_nxt; - offset = ((struct ip6_frag *)ulp)->ip6f_offlg & - IP6F_OFF_MASK; - ip6f_mf = ((struct ip6_frag *)ulp)->ip6f_offlg & - IP6F_MORE_FRAG; - if (V_fw_permit_single_frag6 == 0 && - offset == 0 && ip6f_mf == 0) { - if (V_fw_verbose) - printf("IPFW2: IPV6 - Invalid " - "Fragment Header\n"); - if (V_fw_deny_unknown_exthdrs) - return (IP_FW_DENY); - break; - } - args->f_id.extra = - ntohl(((struct ip6_frag *)ulp)->ip6f_ident); - ulp = NULL; - break; - - case IPPROTO_DSTOPTS: /* RFC 2460 */ - PULLUP_TO(hlen, ulp, struct ip6_hbh); - ext_hd |= EXT_DSTOPTS; - hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; - proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; - ulp = NULL; - break; - - case IPPROTO_AH: /* RFC 2402 */ - PULLUP_TO(hlen, ulp, struct ip6_ext); - ext_hd |= EXT_AH; - hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; - proto = ((struct ip6_ext *)ulp)->ip6e_nxt; - ulp = NULL; - break; - - case IPPROTO_ESP: /* RFC 2406 */ - PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */ - /* Anything past Seq# is variable length and - * data past this ext. header is encrypted. */ - ext_hd |= EXT_ESP; - break; - - case IPPROTO_NONE: /* RFC 2460 */ - /* - * Packet ends here, and IPv6 header has - * already been pulled up. If ip6e_len!=0 - * then octets must be ignored. - */ - ulp = ip; /* non-NULL to get out of loop. */ - break; - - case IPPROTO_OSPFIGP: - /* XXX OSPF header check? */ - PULLUP_TO(hlen, ulp, struct ip6_ext); - break; - - case IPPROTO_PIM: - /* XXX PIM header check? */ - PULLUP_TO(hlen, ulp, struct pim); - break; - - case IPPROTO_CARP: - PULLUP_TO(hlen, ulp, struct carp_header); - if (((struct carp_header *)ulp)->carp_version != - CARP_VERSION) - return (IP_FW_DENY); - if (((struct carp_header *)ulp)->carp_type != - CARP_ADVERTISEMENT) - return (IP_FW_DENY); - break; - - case IPPROTO_IPV6: /* RFC 2893 */ - PULLUP_TO(hlen, ulp, struct ip6_hdr); - break; - - case IPPROTO_IPV4: /* RFC 2893 */ - PULLUP_TO(hlen, ulp, struct ip); - break; - - default: - if (V_fw_verbose) - printf("IPFW2: IPV6 - Unknown " - "Extension Header(%d), ext_hd=%x\n", - proto, ext_hd); - if (V_fw_deny_unknown_exthdrs) - return (IP_FW_DENY); - PULLUP_TO(hlen, ulp, struct ip6_ext); - break; - } /*switch */ - } - ip = mtod(m, struct ip *); - ip6 = (struct ip6_hdr *)ip; - args->f_id.src_ip6 = ip6->ip6_src; - args->f_id.dst_ip6 = ip6->ip6_dst; - args->f_id.src_ip = 0; - args->f_id.dst_ip = 0; - args->f_id.flow_id6 = ntohl(ip6->ip6_flow); - } else if (pktlen >= sizeof(struct ip) && - (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) { - is_ipv4 = 1; - hlen = ip->ip_hl << 2; - args->f_id.addr_type = 4; - - /* - * Collect parameters into local variables for faster matching. - */ - proto = ip->ip_p; - src_ip = ip->ip_src; - dst_ip = ip->ip_dst; - offset = ntohs(ip->ip_off) & IP_OFFMASK; - iplen = ntohs(ip->ip_len); - pktlen = iplen < pktlen ? iplen : pktlen; - - if (offset == 0) { - switch (proto) { - case IPPROTO_TCP: - PULLUP_TO(hlen, ulp, struct tcphdr); - dst_port = TCP(ulp)->th_dport; - src_port = TCP(ulp)->th_sport; - /* save flags for dynamic rules */ - args->f_id._flags = TCP(ulp)->th_flags; - break; - - case IPPROTO_SCTP: - PULLUP_TO(hlen, ulp, struct sctphdr); - src_port = SCTP(ulp)->src_port; - dst_port = SCTP(ulp)->dest_port; - break; - - case IPPROTO_UDP: - PULLUP_TO(hlen, ulp, struct udphdr); - dst_port = UDP(ulp)->uh_dport; - src_port = UDP(ulp)->uh_sport; - break; - - case IPPROTO_ICMP: - PULLUP_TO(hlen, ulp, struct icmphdr); - //args->f_id.flags = ICMP(ulp)->icmp_type; - break; - - default: - break; - } - } - - ip = mtod(m, struct ip *); - args->f_id.src_ip = ntohl(src_ip.s_addr); - args->f_id.dst_ip = ntohl(dst_ip.s_addr); - } -#undef PULLUP_TO - if (proto) { /* we may have port numbers, store them */ - args->f_id.proto = proto; - args->f_id.src_port = src_port = ntohs(src_port); - args->f_id.dst_port = dst_port = ntohs(dst_port); - } - - IPFW_PF_RLOCK(chain); - if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ - IPFW_PF_RUNLOCK(chain); - return (IP_FW_PASS); /* accept */ - } - if (args->rule.slot) { - /* - * Packet has already been tagged as a result of a previous - * match on rule args->rule aka args->rule_id (PIPE, QUEUE, - * REASS, NETGRAPH, DIVERT/TEE...) - * Validate the slot and continue from the next one - * if still present, otherwise do a lookup. - */ - f_pos = (args->rule.chain_id == chain->id) ? - args->rule.slot : - ipfw_find_rule(chain, args->rule.rulenum, - args->rule.rule_id); - } else { - f_pos = 0; - } - - /* - * Now scan the rules, and parse microinstructions for each rule. - * We have two nested loops and an inner switch. Sometimes we - * need to break out of one or both loops, or re-enter one of - * the loops with updated variables. Loop variables are: - * - * f_pos (outer loop) points to the current rule. - * On output it points to the matching rule. - * done (outer loop) is used as a flag to break the loop. - * l (inner loop) residual length of current rule. - * cmd points to the current microinstruction. - * - * We break the inner loop by setting l=0 and possibly - * cmdlen=0 if we don't want to advance cmd. - * We break the outer loop by setting done=1 - * We can restart the inner loop by setting l>0 and f_pos, f, cmd - * as needed. - */ - for (; f_pos < chain->n_rules; f_pos++) { - ipfw_insn *cmd; - uint32_t tablearg = 0; - int l, cmdlen, skip_or; /* skip rest of OR block */ - struct ip_fw *f; - - f = chain->map[f_pos]; - if (V_set_disable & (1 << f->set) ) - continue; - - skip_or = 0; - for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; - l -= cmdlen, cmd += cmdlen) { - int match; - - /* - * check_body is a jump target used when we find a - * CHECK_STATE, and need to jump to the body of - * the target rule. - */ - -/* check_body: */ - cmdlen = F_LEN(cmd); - /* - * An OR block (insn_1 || .. || insn_n) has the - * F_OR bit set in all but the last instruction. - * The first match will set "skip_or", and cause - * the following instructions to be skipped until - * past the one with the F_OR bit clear. - */ - if (skip_or) { /* skip this instruction */ - if ((cmd->len & F_OR) == 0) - skip_or = 0; /* next one is good */ - continue; - } - match = 0; /* set to 1 if we succeed */ - - switch (cmd->opcode) { - /* - * The first set of opcodes compares the packet's - * fields with some pattern, setting 'match' if a - * match is found. At the end of the loop there is - * logic to deal with F_NOT and F_OR flags associated - * with the opcode. - */ - case O_NOP: - match = 1; - break; - - case O_FORWARD_MAC: - printf("ipfw: opcode %d unimplemented\n", - cmd->opcode); - break; - - case O_GID: - case O_UID: - case O_JAIL: - /* - * We only check offset == 0 && proto != 0, - * as this ensures that we have a - * packet with the ports info. - */ - if (offset != 0) - break; - if (proto == IPPROTO_TCP || - proto == IPPROTO_UDP) - match = check_uidgid( - (ipfw_insn_u32 *)cmd, - args, &ucred_lookup, -#ifdef __FreeBSD__ - &ucred_cache); -#else - (void *)&ucred_cache); -#endif - break; - - case O_RECV: - match = iface_match(m->m_pkthdr.rcvif, - (ipfw_insn_if *)cmd, chain, &tablearg); - break; - - case O_XMIT: - match = iface_match(oif, (ipfw_insn_if *)cmd, - chain, &tablearg); - break; - - case O_VIA: - match = iface_match(oif ? oif : - m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd, - chain, &tablearg); - break; - - case O_MACADDR2: - if (args->eh != NULL) { /* have MAC header */ - u_int32_t *want = (u_int32_t *) - ((ipfw_insn_mac *)cmd)->addr; - u_int32_t *mask = (u_int32_t *) - ((ipfw_insn_mac *)cmd)->mask; - u_int32_t *hdr = (u_int32_t *)args->eh; - - match = - ( want[0] == (hdr[0] & mask[0]) && - want[1] == (hdr[1] & mask[1]) && - want[2] == (hdr[2] & mask[2]) ); - } - break; - - case O_MAC_TYPE: - if (args->eh != NULL) { - u_int16_t *p = - ((ipfw_insn_u16 *)cmd)->ports; - int i; - - for (i = cmdlen - 1; !match && i>0; - i--, p += 2) - match = (etype >= p[0] && - etype <= p[1]); - } - break; - - case O_FRAG: - match = (offset != 0); - break; - - case O_IN: /* "out" is "not in" */ - match = (oif == NULL); - break; - - case O_LAYER2: - match = (args->eh != NULL); - break; - - case O_DIVERTED: - { - /* For diverted packets, args->rule.info - * contains the divert port (in host format) - * reason and direction. - */ - uint32_t i = args->rule.info; - match = (i&IPFW_IS_MASK) == IPFW_IS_DIVERT && - cmd->arg1 & ((i & IPFW_INFO_IN) ? 1 : 2); - } - break; - - case O_PROTO: - /* - * We do not allow an arg of 0 so the - * check of "proto" only suffices. - */ - match = (proto == cmd->arg1); - break; - - case O_IP_SRC: - match = is_ipv4 && - (((ipfw_insn_ip *)cmd)->addr.s_addr == - src_ip.s_addr); - break; - - case O_IP_DST_LOOKUP: - { - void *pkey; - uint32_t vidx, key; - uint16_t keylen; - - if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { - /* Determine lookup key type */ - vidx = ((ipfw_insn_u32 *)cmd)->d[1]; - if (vidx != 4 /* uid */ && - vidx != 5 /* jail */ && - is_ipv6 == 0 && is_ipv4 == 0) - break; - /* Determine key length */ - if (vidx == 0 /* dst-ip */ || - vidx == 1 /* src-ip */) - keylen = is_ipv6 ? - sizeof(struct in6_addr): - sizeof(in_addr_t); - else { - keylen = sizeof(key); - pkey = &key; - } - if (vidx == 0 /* dst-ip */) - pkey = is_ipv4 ? (void *)&dst_ip: - (void *)&args->f_id.dst_ip6; - else if (vidx == 1 /* src-ip */) - pkey = is_ipv4 ? (void *)&src_ip: - (void *)&args->f_id.src_ip6; - else if (vidx == 6 /* dscp */) { - if (is_ipv4) - key = ip->ip_tos >> 2; - else { - key = args->f_id.flow_id6; - key = (key & 0x0f) << 2 | - (key & 0xf000) >> 14; - } - key &= 0x3f; - } else if (vidx == 2 /* dst-port */ || - vidx == 3 /* src-port */) { - /* Skip fragments */ - if (offset != 0) - break; - /* Skip proto without ports */ - if (proto != IPPROTO_TCP && - proto != IPPROTO_UDP && - proto != IPPROTO_SCTP) - break; - if (vidx == 2 /* dst-port */) - key = dst_port; - else - key = src_port; - } -#ifndef USERSPACE - else if (vidx == 4 /* uid */ || - vidx == 5 /* jail */) { - check_uidgid( - (ipfw_insn_u32 *)cmd, - args, &ucred_lookup, -#ifdef __FreeBSD__ - &ucred_cache); - if (vidx == 4 /* uid */) -#ifndef __rtems__ - key = ucred_cache->cr_uid; -#else /* __rtems__ */ - key = BSD_DEFAULT_UID; -#endif /* __rtems__ */ - else if (vidx == 5 /* jail */) -#ifndef __rtems__ - key = ucred_cache->cr_prison->pr_id; -#else /* __rtems__ */ - key = BSD_DEFAULT_PRISON->pr_id; -#endif /* __rtems__ */ -#else /* !__FreeBSD__ */ - (void *)&ucred_cache); - if (vidx == 4 /* uid */) - key = ucred_cache.uid; - else if (vidx == 5 /* jail */) - key = ucred_cache.xid; -#endif /* !__FreeBSD__ */ - } -#endif /* !USERSPACE */ - else - break; - match = ipfw_lookup_table(chain, - cmd->arg1, keylen, pkey, &vidx); - if (!match) - break; - tablearg = vidx; - break; - } - /* cmdlen =< F_INSN_SIZE(ipfw_insn_u32) */ - /* FALLTHROUGH */ - } - case O_IP_SRC_LOOKUP: - { - void *pkey; - uint32_t vidx; - uint16_t keylen; - - if (is_ipv4) { - keylen = sizeof(in_addr_t); - if (cmd->opcode == O_IP_DST_LOOKUP) - pkey = &dst_ip; - else - pkey = &src_ip; - } else if (is_ipv6) { - keylen = sizeof(struct in6_addr); - if (cmd->opcode == O_IP_DST_LOOKUP) - pkey = &args->f_id.dst_ip6; - else - pkey = &args->f_id.src_ip6; - } else - break; - match = ipfw_lookup_table(chain, cmd->arg1, - keylen, pkey, &vidx); - if (!match) - break; - if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) { - match = ((ipfw_insn_u32 *)cmd)->d[0] == - TARG_VAL(chain, vidx, tag); - if (!match) - break; - } - tablearg = vidx; - break; - } - - case O_IP_FLOW_LOOKUP: - { - uint32_t v = 0; - match = ipfw_lookup_table(chain, - cmd->arg1, 0, &args->f_id, &v); - if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) - match = ((ipfw_insn_u32 *)cmd)->d[0] == - TARG_VAL(chain, v, tag); - if (match) - tablearg = v; - } - break; - case O_IP_SRC_MASK: - case O_IP_DST_MASK: - if (is_ipv4) { - uint32_t a = - (cmd->opcode == O_IP_DST_MASK) ? - dst_ip.s_addr : src_ip.s_addr; - uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; - int i = cmdlen-1; - - for (; !match && i>0; i-= 2, p+= 2) - match = (p[0] == (a & p[1])); - } - break; - - case O_IP_SRC_ME: - if (is_ipv4) { - struct ifnet *tif; - - INADDR_TO_IFP(src_ip, tif); - match = (tif != NULL); - break; - } -#ifdef INET6 - /* FALLTHROUGH */ - case O_IP6_SRC_ME: - match= is_ipv6 && ipfw_localip6(&args->f_id.src_ip6); -#endif - break; - - case O_IP_DST_SET: - case O_IP_SRC_SET: - if (is_ipv4) { - u_int32_t *d = (u_int32_t *)(cmd+1); - u_int32_t addr = - cmd->opcode == O_IP_DST_SET ? - args->f_id.dst_ip : - args->f_id.src_ip; - - if (addr < d[0]) - break; - addr -= d[0]; /* subtract base */ - match = (addr < cmd->arg1) && - ( d[ 1 + (addr>>5)] & - (1<<(addr & 0x1f)) ); - } - break; - - case O_IP_DST: - match = is_ipv4 && - (((ipfw_insn_ip *)cmd)->addr.s_addr == - dst_ip.s_addr); - break; - - case O_IP_DST_ME: - if (is_ipv4) { - struct ifnet *tif; - - INADDR_TO_IFP(dst_ip, tif); - match = (tif != NULL); - break; - } -#ifdef INET6 - /* FALLTHROUGH */ - case O_IP6_DST_ME: - match= is_ipv6 && ipfw_localip6(&args->f_id.dst_ip6); -#endif - break; - - - case O_IP_SRCPORT: - case O_IP_DSTPORT: - /* - * offset == 0 && proto != 0 is enough - * to guarantee that we have a - * packet with port info. - */ - if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) - && offset == 0) { - u_int16_t x = - (cmd->opcode == O_IP_SRCPORT) ? - src_port : dst_port ; - u_int16_t *p = - ((ipfw_insn_u16 *)cmd)->ports; - int i; - - for (i = cmdlen - 1; !match && i>0; - i--, p += 2) - match = (x>=p[0] && x<=p[1]); - } - break; - - case O_ICMPTYPE: - match = (offset == 0 && proto==IPPROTO_ICMP && - icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); - break; - -#ifdef INET6 - case O_ICMP6TYPE: - match = is_ipv6 && offset == 0 && - proto==IPPROTO_ICMPV6 && - icmp6type_match( - ICMP6(ulp)->icmp6_type, - (ipfw_insn_u32 *)cmd); - break; -#endif /* INET6 */ - - case O_IPOPT: - match = (is_ipv4 && - ipopts_match(ip, cmd) ); - break; - - case O_IPVER: - match = (is_ipv4 && - cmd->arg1 == ip->ip_v); - break; - - case O_IPID: - case O_IPLEN: - case O_IPTTL: - if (is_ipv4) { /* only for IP packets */ - uint16_t x; - uint16_t *p; - int i; - - if (cmd->opcode == O_IPLEN) - x = iplen; - else if (cmd->opcode == O_IPTTL) - x = ip->ip_ttl; - else /* must be IPID */ - x = ntohs(ip->ip_id); - if (cmdlen == 1) { - match = (cmd->arg1 == x); - break; - } - /* otherwise we have ranges */ - p = ((ipfw_insn_u16 *)cmd)->ports; - i = cmdlen - 1; - for (; !match && i>0; i--, p += 2) - match = (x >= p[0] && x <= p[1]); - } - break; - - case O_IPPRECEDENCE: - match = (is_ipv4 && - (cmd->arg1 == (ip->ip_tos & 0xe0)) ); - break; - - case O_IPTOS: - match = (is_ipv4 && - flags_match(cmd, ip->ip_tos)); - break; - - case O_DSCP: - { - uint32_t *p; - uint16_t x; - - p = ((ipfw_insn_u32 *)cmd)->d; - - if (is_ipv4) - x = ip->ip_tos >> 2; - else if (is_ipv6) { - uint8_t *v; - v = &((struct ip6_hdr *)ip)->ip6_vfc; - x = (*v & 0x0F) << 2; - v++; - x |= *v >> 6; - } else - break; - - /* DSCP bitmask is stored as low_u32 high_u32 */ - if (x >= 32) - match = *(p + 1) & (1 << (x - 32)); - else - match = *p & (1 << x); - } - break; - - case O_TCPDATALEN: - if (proto == IPPROTO_TCP && offset == 0) { - struct tcphdr *tcp; - uint16_t x; - uint16_t *p; - int i; - - tcp = TCP(ulp); - x = iplen - - ((ip->ip_hl + tcp->th_off) << 2); - if (cmdlen == 1) { - match = (cmd->arg1 == x); - break; - } - /* otherwise we have ranges */ - p = ((ipfw_insn_u16 *)cmd)->ports; - i = cmdlen - 1; - for (; !match && i>0; i--, p += 2) - match = (x >= p[0] && x <= p[1]); - } - break; - - case O_TCPFLAGS: - match = (proto == IPPROTO_TCP && offset == 0 && - flags_match(cmd, TCP(ulp)->th_flags)); - break; - - case O_TCPOPTS: - if (proto == IPPROTO_TCP && offset == 0 && ulp){ - PULLUP_LEN(hlen, ulp, - (TCP(ulp)->th_off << 2)); - match = tcpopts_match(TCP(ulp), cmd); - } - break; - - case O_TCPSEQ: - match = (proto == IPPROTO_TCP && offset == 0 && - ((ipfw_insn_u32 *)cmd)->d[0] == - TCP(ulp)->th_seq); - break; - - case O_TCPACK: - match = (proto == IPPROTO_TCP && offset == 0 && - ((ipfw_insn_u32 *)cmd)->d[0] == - TCP(ulp)->th_ack); - break; - - case O_TCPWIN: - if (proto == IPPROTO_TCP && offset == 0) { - uint16_t x; - uint16_t *p; - int i; - - x = ntohs(TCP(ulp)->th_win); - if (cmdlen == 1) { - match = (cmd->arg1 == x); - break; - } - /* Otherwise we have ranges. */ - p = ((ipfw_insn_u16 *)cmd)->ports; - i = cmdlen - 1; - for (; !match && i > 0; i--, p += 2) - match = (x >= p[0] && x <= p[1]); - } - break; - - case O_ESTAB: - /* reject packets which have SYN only */ - /* XXX should i also check for TH_ACK ? */ - match = (proto == IPPROTO_TCP && offset == 0 && - (TCP(ulp)->th_flags & - (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); - break; - - case O_ALTQ: { - struct pf_mtag *at; - struct m_tag *mtag; - ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; - - /* - * ALTQ uses mbuf tags from another - * packet filtering system - pf(4). - * We allocate a tag in its format - * and fill it in, pretending to be pf(4). - */ - match = 1; - at = pf_find_mtag(m); - if (at != NULL && at->qid != 0) - break; - mtag = m_tag_get(PACKET_TAG_PF, - sizeof(struct pf_mtag), M_NOWAIT | M_ZERO); - if (mtag == NULL) { - /* - * Let the packet fall back to the - * default ALTQ. - */ - break; - } - m_tag_prepend(m, mtag); - at = (struct pf_mtag *)(mtag + 1); - at->qid = altq->qid; - at->hdr = ip; - break; - } - - case O_LOG: - ipfw_log(chain, f, hlen, args, m, - oif, offset | ip6f_mf, tablearg, ip); - match = 1; - break; - - case O_PROB: - match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); - break; - - case O_VERREVPATH: - /* Outgoing packets automatically pass/match */ - match = ((oif != NULL) || - (m->m_pkthdr.rcvif == NULL) || - ( -#ifdef INET6 - is_ipv6 ? - verify_path6(&(args->f_id.src_ip6), - m->m_pkthdr.rcvif, args->f_id.fib) : -#endif - verify_path(src_ip, m->m_pkthdr.rcvif, - args->f_id.fib))); - break; - - case O_VERSRCREACH: - /* Outgoing packets automatically pass/match */ - match = (hlen > 0 && ((oif != NULL) || -#ifdef INET6 - is_ipv6 ? - verify_path6(&(args->f_id.src_ip6), - NULL, args->f_id.fib) : -#endif - verify_path(src_ip, NULL, args->f_id.fib))); - break; - - case O_ANTISPOOF: - /* Outgoing packets automatically pass/match */ - if (oif == NULL && hlen > 0 && - ( (is_ipv4 && in_localaddr(src_ip)) -#ifdef INET6 - || (is_ipv6 && - in6_localaddr(&(args->f_id.src_ip6))) -#endif - )) - match = -#ifdef INET6 - is_ipv6 ? verify_path6( - &(args->f_id.src_ip6), - m->m_pkthdr.rcvif, - args->f_id.fib) : -#endif - verify_path(src_ip, - m->m_pkthdr.rcvif, - args->f_id.fib); - else - match = 1; - break; - - case O_IPSEC: -#ifdef IPSEC - match = (m_tag_find(m, - PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); -#endif - /* otherwise no match */ - break; - -#ifdef INET6 - case O_IP6_SRC: - match = is_ipv6 && - IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, - &((ipfw_insn_ip6 *)cmd)->addr6); - break; - - case O_IP6_DST: - match = is_ipv6 && - IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, - &((ipfw_insn_ip6 *)cmd)->addr6); - break; - case O_IP6_SRC_MASK: - case O_IP6_DST_MASK: - if (is_ipv6) { - int i = cmdlen - 1; - struct in6_addr p; - struct in6_addr *d = - &((ipfw_insn_ip6 *)cmd)->addr6; - - for (; !match && i > 0; d += 2, - i -= F_INSN_SIZE(struct in6_addr) - * 2) { - p = (cmd->opcode == - O_IP6_SRC_MASK) ? - args->f_id.src_ip6: - args->f_id.dst_ip6; - APPLY_MASK(&p, &d[1]); - match = - IN6_ARE_ADDR_EQUAL(&d[0], - &p); - } - } - break; - - case O_FLOW6ID: - match = is_ipv6 && - flow6id_match(args->f_id.flow_id6, - (ipfw_insn_u32 *) cmd); - break; - - case O_EXT_HDR: - match = is_ipv6 && - (ext_hd & ((ipfw_insn *) cmd)->arg1); - break; - - case O_IP6: - match = is_ipv6; - break; -#endif - - case O_IP4: - match = is_ipv4; - break; - - case O_TAG: { - struct m_tag *mtag; - uint32_t tag = TARG(cmd->arg1, tag); - - /* Packet is already tagged with this tag? */ - mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); - - /* We have `untag' action when F_NOT flag is - * present. And we must remove this mtag from - * mbuf and reset `match' to zero (`match' will - * be inversed later). - * Otherwise we should allocate new mtag and - * push it into mbuf. - */ - if (cmd->len & F_NOT) { /* `untag' action */ - if (mtag != NULL) - m_tag_delete(m, mtag); - match = 0; - } else { - if (mtag == NULL) { - mtag = m_tag_alloc( MTAG_IPFW, - tag, 0, M_NOWAIT); - if (mtag != NULL) - m_tag_prepend(m, mtag); - } - match = 1; - } - break; - } - - case O_FIB: /* try match the specified fib */ - if (args->f_id.fib == cmd->arg1) - match = 1; - break; - - case O_SOCKARG: { -#ifndef USERSPACE /* not supported in userspace */ - struct inpcb *inp = args->inp; - struct inpcbinfo *pi; - - if (is_ipv6) /* XXX can we remove this ? */ - break; - - if (proto == IPPROTO_TCP) - pi = &V_tcbinfo; - else if (proto == IPPROTO_UDP) - pi = &V_udbinfo; - else - break; - - /* - * XXXRW: so_user_cookie should almost - * certainly be inp_user_cookie? - */ - - /* For incoming packet, lookup up the - inpcb using the src/dest ip/port tuple */ - if (inp == NULL) { - inp = in_pcblookup(pi, - src_ip, htons(src_port), - dst_ip, htons(dst_port), - INPLOOKUP_RLOCKPCB, NULL); - if (inp != NULL) { - tablearg = - inp->inp_socket->so_user_cookie; - if (tablearg) - match = 1; - INP_RUNLOCK(inp); - } - } else { - if (inp->inp_socket) { - tablearg = - inp->inp_socket->so_user_cookie; - if (tablearg) - match = 1; - } - } -#endif /* !USERSPACE */ - break; - } - - case O_TAGGED: { - struct m_tag *mtag; - uint32_t tag = TARG(cmd->arg1, tag); - - if (cmdlen == 1) { - match = m_tag_locate(m, MTAG_IPFW, - tag, NULL) != NULL; - break; - } - - /* we have ranges */ - for (mtag = m_tag_first(m); - mtag != NULL && !match; - mtag = m_tag_next(m, mtag)) { - uint16_t *p; - int i; - - if (mtag->m_tag_cookie != MTAG_IPFW) - continue; - - p = ((ipfw_insn_u16 *)cmd)->ports; - i = cmdlen - 1; - for(; !match && i > 0; i--, p += 2) - match = - mtag->m_tag_id >= p[0] && - mtag->m_tag_id <= p[1]; - } - break; - } - - /* - * The second set of opcodes represents 'actions', - * i.e. the terminal part of a rule once the packet - * matches all previous patterns. - * Typically there is only one action for each rule, - * and the opcode is stored at the end of the rule - * (but there are exceptions -- see below). - * - * In general, here we set retval and terminate the - * outer loop (would be a 'break 3' in some language, - * but we need to set l=0, done=1) - * - * Exceptions: - * O_COUNT and O_SKIPTO actions: - * instead of terminating, we jump to the next rule - * (setting l=0), or to the SKIPTO target (setting - * f/f_len, cmd and l as needed), respectively. - * - * O_TAG, O_LOG and O_ALTQ action parameters: - * perform some action and set match = 1; - * - * O_LIMIT and O_KEEP_STATE: these opcodes are - * not real 'actions', and are stored right - * before the 'action' part of the rule. - * These opcodes try to install an entry in the - * state tables; if successful, we continue with - * the next opcode (match=1; break;), otherwise - * the packet must be dropped (set retval, - * break loops with l=0, done=1) - * - * O_PROBE_STATE and O_CHECK_STATE: these opcodes - * cause a lookup of the state table, and a jump - * to the 'action' part of the parent rule - * if an entry is found, or - * (CHECK_STATE only) a jump to the next rule if - * the entry is not found. - * The result of the lookup is cached so that - * further instances of these opcodes become NOPs. - * The jump to the next rule is done by setting - * l=0, cmdlen=0. - */ - case O_LIMIT: - case O_KEEP_STATE: - if (ipfw_install_state(chain, f, - (ipfw_insn_limit *)cmd, args, tablearg)) { - /* error or limit violation */ - retval = IP_FW_DENY; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - } - match = 1; - break; - - case O_PROBE_STATE: - case O_CHECK_STATE: - /* - * dynamic rules are checked at the first - * keep-state or check-state occurrence, - * with the result being stored in dyn_dir - * and dyn_name. - * The compiler introduces a PROBE_STATE - * instruction for us when we have a - * KEEP_STATE (because PROBE_STATE needs - * to be run first). - * - * (dyn_dir == MATCH_UNKNOWN) means this is - * first lookup for such f_id. Do lookup. - * - * (dyn_dir != MATCH_UNKNOWN && - * dyn_name != 0 && dyn_name != cmd->arg1) - * means previous lookup didn't find dynamic - * rule for specific state name and current - * lookup will search rule with another state - * name. Redo lookup. - * - * (dyn_dir != MATCH_UNKNOWN && dyn_name == 0) - * means previous lookup was for `any' name - * and it didn't find rule. No need to do - * lookup again. - */ - if ((dyn_dir == MATCH_UNKNOWN || - (dyn_name != 0 && - dyn_name != cmd->arg1)) && - (q = ipfw_lookup_dyn_rule(&args->f_id, - &dyn_dir, proto == IPPROTO_TCP ? - TCP(ulp): NULL, - (dyn_name = cmd->arg1))) != NULL) { - /* - * Found dynamic entry, update stats - * and jump to the 'action' part of - * the parent rule by setting - * f, cmd, l and clearing cmdlen. - */ - IPFW_INC_DYN_COUNTER(q, pktlen); - /* XXX we would like to have f_pos - * readily accessible in the dynamic - * rule, instead of having to - * lookup q->rule. - */ - f = q->rule; - f_pos = ipfw_find_rule(chain, - f->rulenum, f->id); - cmd = ACTION_PTR(f); - l = f->cmd_len - f->act_ofs; - ipfw_dyn_unlock(q); - cmdlen = 0; - match = 1; - break; - } - /* - * Dynamic entry not found. If CHECK_STATE, - * skip to next rule, if PROBE_STATE just - * ignore and continue with next opcode. - */ - if (cmd->opcode == O_CHECK_STATE) - l = 0; /* exit inner loop */ - match = 1; - break; - - case O_ACCEPT: - retval = 0; /* accept */ - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - - case O_PIPE: - case O_QUEUE: - set_match(args, f_pos, chain); - args->rule.info = TARG(cmd->arg1, pipe); - if (cmd->opcode == O_PIPE) - args->rule.info |= IPFW_IS_PIPE; - if (V_fw_one_pass) - args->rule.info |= IPFW_ONEPASS; - retval = IP_FW_DUMMYNET; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - - case O_DIVERT: - case O_TEE: - if (args->eh) /* not on layer 2 */ - break; - /* otherwise this is terminal */ - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - retval = (cmd->opcode == O_DIVERT) ? - IP_FW_DIVERT : IP_FW_TEE; - set_match(args, f_pos, chain); - args->rule.info = TARG(cmd->arg1, divert); - break; - - case O_COUNT: - IPFW_INC_RULE_COUNTER(f, pktlen); - l = 0; /* exit inner loop */ - break; - - case O_SKIPTO: - IPFW_INC_RULE_COUNTER(f, pktlen); - f_pos = JUMP(chain, f, cmd->arg1, tablearg, 0); - /* - * Skip disabled rules, and re-enter - * the inner loop with the correct - * f_pos, f, l and cmd. - * Also clear cmdlen and skip_or - */ - for (; f_pos < chain->n_rules - 1 && - (V_set_disable & - (1 << chain->map[f_pos]->set)); - f_pos++) - ; - /* Re-enter the inner loop at the skipto rule. */ - f = chain->map[f_pos]; - l = f->cmd_len; - cmd = f->cmd; - match = 1; - cmdlen = 0; - skip_or = 0; - continue; - break; /* not reached */ - - case O_CALLRETURN: { - /* - * Implementation of `subroutine' call/return, - * in the stack carried in an mbuf tag. This - * is different from `skipto' in that any call - * address is possible (`skipto' must prevent - * backward jumps to avoid endless loops). - * We have `return' action when F_NOT flag is - * present. The `m_tag_id' field is used as - * stack pointer. - */ - struct m_tag *mtag; - uint16_t jmpto, *stack; - -#define IS_CALL ((cmd->len & F_NOT) == 0) -#define IS_RETURN ((cmd->len & F_NOT) != 0) - /* - * Hand-rolled version of m_tag_locate() with - * wildcard `type'. - * If not already tagged, allocate new tag. - */ - mtag = m_tag_first(m); - while (mtag != NULL) { - if (mtag->m_tag_cookie == - MTAG_IPFW_CALL) - break; - mtag = m_tag_next(m, mtag); - } - if (mtag == NULL && IS_CALL) { - mtag = m_tag_alloc(MTAG_IPFW_CALL, 0, - IPFW_CALLSTACK_SIZE * - sizeof(uint16_t), M_NOWAIT); - if (mtag != NULL) - m_tag_prepend(m, mtag); - } - - /* - * On error both `call' and `return' just - * continue with next rule. - */ - if (IS_RETURN && (mtag == NULL || - mtag->m_tag_id == 0)) { - l = 0; /* exit inner loop */ - break; - } - if (IS_CALL && (mtag == NULL || - mtag->m_tag_id >= IPFW_CALLSTACK_SIZE)) { - printf("ipfw: call stack error, " - "go to next rule\n"); - l = 0; /* exit inner loop */ - break; - } - - IPFW_INC_RULE_COUNTER(f, pktlen); - stack = (uint16_t *)(mtag + 1); - - /* - * The `call' action may use cached f_pos - * (in f->next_rule), whose version is written - * in f->next_rule. - * The `return' action, however, doesn't have - * fixed jump address in cmd->arg1 and can't use - * cache. - */ - if (IS_CALL) { - stack[mtag->m_tag_id] = f->rulenum; - mtag->m_tag_id++; - f_pos = JUMP(chain, f, cmd->arg1, - tablearg, 1); - } else { /* `return' action */ - mtag->m_tag_id--; - jmpto = stack[mtag->m_tag_id] + 1; - f_pos = ipfw_find_rule(chain, jmpto, 0); - } - - /* - * Skip disabled rules, and re-enter - * the inner loop with the correct - * f_pos, f, l and cmd. - * Also clear cmdlen and skip_or - */ - for (; f_pos < chain->n_rules - 1 && - (V_set_disable & - (1 << chain->map[f_pos]->set)); f_pos++) - ; - /* Re-enter the inner loop at the dest rule. */ - f = chain->map[f_pos]; - l = f->cmd_len; - cmd = f->cmd; - cmdlen = 0; - skip_or = 0; - continue; - break; /* NOTREACHED */ - } -#undef IS_CALL -#undef IS_RETURN - - case O_REJECT: - /* - * Drop the packet and send a reject notice - * if the packet is not ICMP (or is an ICMP - * query), and it is not multicast/broadcast. - */ - if (hlen > 0 && is_ipv4 && offset == 0 && - (proto != IPPROTO_ICMP || - is_icmp_query(ICMP(ulp))) && - !(m->m_flags & (M_BCAST|M_MCAST)) && - !IN_MULTICAST(ntohl(dst_ip.s_addr))) { - send_reject(args, cmd->arg1, iplen, ip); - m = args->m; - } - /* FALLTHROUGH */ -#ifdef INET6 - case O_UNREACH6: - if (hlen > 0 && is_ipv6 && - ((offset & IP6F_OFF_MASK) == 0) && - (proto != IPPROTO_ICMPV6 || - (is_icmp6_query(icmp6_type) == 1)) && - !(m->m_flags & (M_BCAST|M_MCAST)) && - !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { - send_reject6( - args, cmd->arg1, hlen, - (struct ip6_hdr *)ip); - m = args->m; - } - /* FALLTHROUGH */ -#endif - case O_DENY: - retval = IP_FW_DENY; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - - case O_FORWARD_IP: - if (args->eh) /* not valid on layer2 pkts */ - break; - if (q == NULL || q->rule != f || - dyn_dir == MATCH_FORWARD) { - struct sockaddr_in *sa; - - sa = &(((ipfw_insn_sa *)cmd)->sa); - if (sa->sin_addr.s_addr == INADDR_ANY) { -#ifdef INET6 - /* - * We use O_FORWARD_IP opcode for - * fwd rule with tablearg, but tables - * now support IPv6 addresses. And - * when we are inspecting IPv6 packet, - * we can use nh6 field from - * table_value as next_hop6 address. - */ - if (is_ipv6) { - struct sockaddr_in6 *sa6; - - sa6 = args->next_hop6 = - &args->hopstore6; - sa6->sin6_family = AF_INET6; - sa6->sin6_len = sizeof(*sa6); - sa6->sin6_addr = TARG_VAL( - chain, tablearg, nh6); - /* - * Set sin6_scope_id only for - * link-local unicast addresses. - */ - if (IN6_IS_ADDR_LINKLOCAL( - &sa6->sin6_addr)) - sa6->sin6_scope_id = - TARG_VAL(chain, - tablearg, - zoneid); - } else -#endif - { - sa = args->next_hop = - &args->hopstore; - sa->sin_family = AF_INET; - sa->sin_len = sizeof(*sa); - sa->sin_addr.s_addr = htonl( - TARG_VAL(chain, tablearg, - nh4)); - } - } else { - args->next_hop = sa; - } - } - retval = IP_FW_PASS; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - -#ifdef INET6 - case O_FORWARD_IP6: - if (args->eh) /* not valid on layer2 pkts */ - break; - if (q == NULL || q->rule != f || - dyn_dir == MATCH_FORWARD) { - struct sockaddr_in6 *sin6; - - sin6 = &(((ipfw_insn_sa6 *)cmd)->sa); - args->next_hop6 = sin6; - } - retval = IP_FW_PASS; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; -#endif - - case O_NETGRAPH: - case O_NGTEE: - set_match(args, f_pos, chain); - args->rule.info = TARG(cmd->arg1, netgraph); - if (V_fw_one_pass) - args->rule.info |= IPFW_ONEPASS; - retval = (cmd->opcode == O_NETGRAPH) ? - IP_FW_NETGRAPH : IP_FW_NGTEE; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - - case O_SETFIB: { - uint32_t fib; - - IPFW_INC_RULE_COUNTER(f, pktlen); - fib = TARG(cmd->arg1, fib) & 0x7FFF; - if (fib >= rt_numfibs) - fib = 0; - M_SETFIB(m, fib); - args->f_id.fib = fib; - l = 0; /* exit inner loop */ - break; - } - - case O_SETDSCP: { - uint16_t code; - - code = TARG(cmd->arg1, dscp) & 0x3F; - l = 0; /* exit inner loop */ - if (is_ipv4) { - uint16_t old; - - old = *(uint16_t *)ip; - ip->ip_tos = (code << 2) | - (ip->ip_tos & 0x03); - ip->ip_sum = cksum_adjust(ip->ip_sum, - old, *(uint16_t *)ip); - } else if (is_ipv6) { - uint8_t *v; - - v = &((struct ip6_hdr *)ip)->ip6_vfc; - *v = (*v & 0xF0) | (code >> 2); - v++; - *v = (*v & 0x3F) | ((code & 0x03) << 6); - } else - break; - - IPFW_INC_RULE_COUNTER(f, pktlen); - break; - } - - case O_NAT: - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - if (!IPFW_NAT_LOADED) { - retval = IP_FW_DENY; - break; - } - - struct cfg_nat *t; - int nat_id; - - set_match(args, f_pos, chain); - /* Check if this is 'global' nat rule */ - if (cmd->arg1 == IP_FW_NAT44_GLOBAL) { - retval = ipfw_nat_ptr(args, NULL, m); - break; - } - t = ((ipfw_insn_nat *)cmd)->nat; - if (t == NULL) { - nat_id = TARG(cmd->arg1, nat); - t = (*lookup_nat_ptr)(&chain->nat, nat_id); - - if (t == NULL) { - retval = IP_FW_DENY; - break; - } - if (cmd->arg1 != IP_FW_TARG) - ((ipfw_insn_nat *)cmd)->nat = t; - } - retval = ipfw_nat_ptr(args, t, m); - break; - - case O_REASS: { - int ip_off; - - IPFW_INC_RULE_COUNTER(f, pktlen); - l = 0; /* in any case exit inner loop */ - ip_off = ntohs(ip->ip_off); - - /* if not fragmented, go to next rule */ - if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) - break; - - args->m = m = ip_reass(m); - - /* - * do IP header checksum fixup. - */ - if (m == NULL) { /* fragment got swallowed */ - retval = IP_FW_DENY; - } else { /* good, packet complete */ - int hlen; - - ip = mtod(m, struct ip *); - hlen = ip->ip_hl << 2; - ip->ip_sum = 0; - if (hlen == sizeof(struct ip)) - ip->ip_sum = in_cksum_hdr(ip); - else - ip->ip_sum = in_cksum(m, hlen); - retval = IP_FW_REASS; - set_match(args, f_pos, chain); - } - done = 1; /* exit outer loop */ - break; - } - case O_EXTERNAL_ACTION: - l = 0; /* in any case exit inner loop */ - retval = ipfw_run_eaction(chain, args, - cmd, &done); - /* - * If both @retval and @done are zero, - * consider this as rule matching and - * update counters. - */ - if (retval == 0 && done == 0) { - IPFW_INC_RULE_COUNTER(f, pktlen); - /* - * Reset the result of the last - * dynamic state lookup. - * External action can change - * @args content, and it may be - * used for new state lookup later. - */ - dyn_dir = MATCH_UNKNOWN; - } - break; - - default: - panic("-- unknown opcode %d\n", cmd->opcode); - } /* end of switch() on opcodes */ - /* - * if we get here with l=0, then match is irrelevant. - */ - - if (cmd->len & F_NOT) - match = !match; - - if (match) { - if (cmd->len & F_OR) - skip_or = 1; - } else { - if (!(cmd->len & F_OR)) /* not an OR block, */ - break; /* try next rule */ - } - - } /* end of inner loop, scan opcodes */ -#undef PULLUP_LEN - - if (done) - break; - -/* next_rule:; */ /* try next rule */ - - } /* end of outer for, scan rules */ - - if (done) { - struct ip_fw *rule = chain->map[f_pos]; - /* Update statistics */ - IPFW_INC_RULE_COUNTER(rule, pktlen); - } else { - retval = IP_FW_DENY; - printf("ipfw: ouch!, skip past end of rules, denying packet\n"); - } - IPFW_PF_RUNLOCK(chain); -#ifdef __FreeBSD__ - if (ucred_cache != NULL) - crfree(ucred_cache); -#endif - return (retval); - -pullup_failed: - if (V_fw_verbose) - printf("ipfw: pullup failed\n"); - return (IP_FW_DENY); -} - -/* - * Set maximum number of tables that can be used in given VNET ipfw instance. - */ -#ifdef SYSCTL_NODE -static int -sysctl_ipfw_table_num(SYSCTL_HANDLER_ARGS) -{ - int error; - unsigned int ntables; - - ntables = V_fw_tables_max; - - error = sysctl_handle_int(oidp, &ntables, 0, req); - /* Read operation or some error */ - if ((error != 0) || (req->newptr == NULL)) - return (error); - - return (ipfw_resize_tables(&V_layer3_chain, ntables)); -} - -/* - * Switches table namespace between global and per-set. - */ -static int -sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS) -{ - int error; - unsigned int sets; - - sets = V_fw_tables_sets; - - error = sysctl_handle_int(oidp, &sets, 0, req); - /* Read operation or some error */ - if ((error != 0) || (req->newptr == NULL)) - return (error); - - return (ipfw_switch_tables_namespace(&V_layer3_chain, sets)); -} -#endif - -/* - * Module and VNET glue - */ - -/* - * Stuff that must be initialised only on boot or module load - */ -static int -ipfw_init(void) -{ - int error = 0; - - /* - * Only print out this stuff the first time around, - * when called from the sysinit code. - */ - printf("ipfw2 " -#ifdef INET6 - "(+ipv6) " -#endif - "initialized, divert %s, nat %s, " - "default to %s, logging ", -#ifdef IPDIVERT - "enabled", -#else - "loadable", -#endif -#ifdef IPFIREWALL_NAT - "enabled", -#else - "loadable", -#endif - default_to_accept ? "accept" : "deny"); - - /* - * Note: V_xxx variables can be accessed here but the vnet specific - * initializer may not have been called yet for the VIMAGE case. - * Tuneables will have been processed. We will print out values for - * the default vnet. - * XXX This should all be rationalized AFTER 8.0 - */ - if (V_fw_verbose == 0) - printf("disabled\n"); - else if (V_verbose_limit == 0) - printf("unlimited\n"); - else - printf("limited to %d packets/entry by default\n", - V_verbose_limit); - - /* Check user-supplied table count for validness */ - if (default_fw_tables > IPFW_TABLES_MAX) - default_fw_tables = IPFW_TABLES_MAX; - - ipfw_init_sopt_handler(); - ipfw_init_obj_rewriter(); - ipfw_iface_init(); - return (error); -} - -#ifndef __rtems__ -/* - * Called for the removal of the last instance only on module unload. - */ -static void -ipfw_destroy(void) -{ - - ipfw_iface_destroy(); - ipfw_destroy_sopt_handler(); - ipfw_destroy_obj_rewriter(); - printf("IP firewall unloaded\n"); -} -#endif /* __rtems__ */ - -/* - * Stuff that must be initialized for every instance - * (including the first of course). - */ -static int -vnet_ipfw_init(const void *unused) -{ - int error, first; - struct ip_fw *rule = NULL; - struct ip_fw_chain *chain; - - chain = &V_layer3_chain; - - first = IS_DEFAULT_VNET(curvnet) ? 1 : 0; - - /* First set up some values that are compile time options */ - V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ - V_fw_deny_unknown_exthdrs = 1; -#ifdef IPFIREWALL_VERBOSE - V_fw_verbose = 1; -#endif -#ifdef IPFIREWALL_VERBOSE_LIMIT - V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; -#endif -#ifdef IPFIREWALL_NAT - LIST_INIT(&chain->nat); -#endif - - /* Init shared services hash table */ - ipfw_init_srv(chain); - - ipfw_init_counters(); - /* insert the default rule and create the initial map */ - chain->n_rules = 1; - chain->map = malloc(sizeof(struct ip_fw *), M_IPFW, M_WAITOK | M_ZERO); - rule = ipfw_alloc_rule(chain, sizeof(struct ip_fw)); - - /* Set initial number of tables */ - V_fw_tables_max = default_fw_tables; - error = ipfw_init_tables(chain, first); - if (error) { - printf("ipfw2: setting up tables failed\n"); - free(chain->map, M_IPFW); - free(rule, M_IPFW); - return (ENOSPC); - } - - /* fill and insert the default rule */ - rule->act_ofs = 0; - rule->rulenum = IPFW_DEFAULT_RULE; - rule->cmd_len = 1; - rule->set = RESVD_SET; - rule->cmd[0].len = 1; - rule->cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; - chain->default_rule = chain->map[0] = rule; - chain->id = rule->id = 1; - /* Pre-calculate rules length for legacy dump format */ - chain->static_len = sizeof(struct ip_fw_rule0); - - IPFW_LOCK_INIT(chain); - ipfw_dyn_init(chain); - ipfw_eaction_init(chain, first); -#ifdef LINEAR_SKIPTO - ipfw_init_skipto_cache(chain); -#endif - ipfw_bpf_init(first); - - /* First set up some values that are compile time options */ - V_ipfw_vnet_ready = 1; /* Open for business */ - - /* - * Hook the sockopt handler and pfil hooks for ipv4 and ipv6. - * Even if the latter two fail we still keep the module alive - * because the sockopt and layer2 paths are still useful. - * ipfw[6]_hook return 0 on success, ENOENT on failure, - * so we can ignore the exact return value and just set a flag. - * - * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so - * changes in the underlying (per-vnet) variables trigger - * immediate hook()/unhook() calls. - * In layer2 we have the same behaviour, except that V_ether_ipfw - * is checked on each packet because there are no pfil hooks. - */ - V_ip_fw_ctl_ptr = ipfw_ctl3; - error = ipfw_attach_hooks(1); - return (error); -} - -#ifndef __rtems__ -/* - * Called for the removal of each instance. - */ -static int -vnet_ipfw_uninit(const void *unused) -{ - struct ip_fw *reap; - struct ip_fw_chain *chain = &V_layer3_chain; - int i, last; - - V_ipfw_vnet_ready = 0; /* tell new callers to go away */ - /* - * disconnect from ipv4, ipv6, layer2 and sockopt. - * Then grab, release and grab again the WLOCK so we make - * sure the update is propagated and nobody will be in. - */ - (void)ipfw_attach_hooks(0 /* detach */); - V_ip_fw_ctl_ptr = NULL; - - last = IS_DEFAULT_VNET(curvnet) ? 1 : 0; - - IPFW_UH_WLOCK(chain); - IPFW_UH_WUNLOCK(chain); - - ipfw_dyn_uninit(0); /* run the callout_drain */ - - IPFW_UH_WLOCK(chain); - - reap = NULL; - IPFW_WLOCK(chain); - for (i = 0; i < chain->n_rules; i++) - ipfw_reap_add(chain, &reap, chain->map[i]); - free(chain->map, M_IPFW); -#ifdef LINEAR_SKIPTO - ipfw_destroy_skipto_cache(chain); -#endif - IPFW_WUNLOCK(chain); - IPFW_UH_WUNLOCK(chain); - ipfw_destroy_tables(chain, last); - ipfw_eaction_uninit(chain, last); - if (reap != NULL) - ipfw_reap_rules(reap); - vnet_ipfw_iface_destroy(chain); - ipfw_destroy_srv(chain); - IPFW_LOCK_DESTROY(chain); - ipfw_dyn_uninit(1); /* free the remaining parts */ - ipfw_destroy_counters(); - ipfw_bpf_uninit(last); - return (0); -} -#endif /* __rtems__ */ - -/* - * Module event handler. - * In general we have the choice of handling most of these events by the - * event handler or by the (VNET_)SYS(UN)INIT handlers. I have chosen to - * use the SYSINIT handlers as they are more capable of expressing the - * flow of control during module and vnet operations, so this is just - * a skeleton. Note there is no SYSINIT equivalent of the module - * SHUTDOWN handler, but we don't have anything to do in that case anyhow. - */ -static int -ipfw_modevent(module_t mod, int type, void *unused) -{ - int err = 0; - - switch (type) { - case MOD_LOAD: - /* Called once at module load or - * system boot if compiled in. */ - break; - case MOD_QUIESCE: - /* Called before unload. May veto unloading. */ - break; - case MOD_UNLOAD: - /* Called during unload. */ - break; - case MOD_SHUTDOWN: - /* Called during system shutdown. */ - break; - default: - err = EOPNOTSUPP; - break; - } - return err; -} - -static moduledata_t ipfwmod = { - "ipfw", - ipfw_modevent, - 0 -}; - -/* Define startup order. */ -#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL -#define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */ -#define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */ -#define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */ - -DECLARE_MODULE(ipfw, ipfwmod, IPFW_SI_SUB_FIREWALL, IPFW_MODEVENT_ORDER); -FEATURE(ipfw_ctl3, "ipfw new sockopt calls"); -MODULE_VERSION(ipfw, 3); -/* should declare some dependencies here */ - -/* - * Starting up. Done in order after ipfwmod() has been called. - * VNET_SYSINIT is also called for each existing vnet and each new vnet. - */ -SYSINIT(ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, - ipfw_init, NULL); -VNET_SYSINIT(vnet_ipfw_init, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, - vnet_ipfw_init, NULL); - -/* - * Closing up shop. These are done in REVERSE ORDER, but still - * after ipfwmod() has been called. Not called on reboot. - * VNET_SYSUNINIT is also called for each exiting vnet as it exits. - * or when the module is unloaded. - */ -SYSUNINIT(ipfw_destroy, IPFW_SI_SUB_FIREWALL, IPFW_MODULE_ORDER, - ipfw_destroy, NULL); -VNET_SYSUNINIT(vnet_ipfw_uninit, IPFW_SI_SUB_FIREWALL, IPFW_VNET_ORDER, - vnet_ipfw_uninit, NULL); -/* end of file */ diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c b/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c deleted file mode 100644 index a204fddd..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_bpf.c +++ /dev/null @@ -1,211 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2016 Yandex LLC - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/mbuf.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <net/ethernet.h> -#include <net/if.h> -#include <net/if_pflog.h> -#include <net/if_var.h> -#include <net/if_clone.h> -#include <net/if_types.h> -#include <net/vnet.h> -#include <net/bpf.h> - -#include <netinet/in.h> -#include <netinet/ip_fw.h> -#include <netinet/ip_var.h> -#include <netpfil/ipfw/ip_fw_private.h> - -static VNET_DEFINE(struct ifnet *, log_if); -static VNET_DEFINE(struct ifnet *, pflog_if); -static VNET_DEFINE(struct if_clone *, ipfw_cloner); -static VNET_DEFINE(struct if_clone *, ipfwlog_cloner); -#define V_ipfw_cloner VNET(ipfw_cloner) -#define V_ipfwlog_cloner VNET(ipfwlog_cloner) -#define V_log_if VNET(log_if) -#define V_pflog_if VNET(pflog_if) - -static struct rmlock log_if_lock; -#define LOGIF_LOCK_INIT(x) rm_init(&log_if_lock, "ipfw log_if lock") -#define LOGIF_LOCK_DESTROY(x) rm_destroy(&log_if_lock) -#define LOGIF_RLOCK_TRACKER struct rm_priotracker _log_tracker -#define LOGIF_RLOCK(x) rm_rlock(&log_if_lock, &_log_tracker) -#define LOGIF_RUNLOCK(x) rm_runlock(&log_if_lock, &_log_tracker) -#define LOGIF_WLOCK(x) rm_wlock(&log_if_lock) -#define LOGIF_WUNLOCK(x) rm_wunlock(&log_if_lock) - -static const char ipfwname[] = "ipfw"; -static const char ipfwlogname[] = "ipfwlog"; - -static int -ipfw_bpf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) -{ - - return (EINVAL); -} - -static int -ipfw_bpf_output(struct ifnet *ifp, struct mbuf *m, - const struct sockaddr *dst, struct route *ro) -{ - - if (m != NULL) - FREE_PKT(m); - return (0); -} - -static void -ipfw_clone_destroy(struct ifnet *ifp) -{ - - LOGIF_WLOCK(); - if (ifp->if_hdrlen == ETHER_HDR_LEN) - V_log_if = NULL; - else - V_pflog_if = NULL; - LOGIF_WUNLOCK(); - - bpfdetach(ifp); - if_detach(ifp); - if_free(ifp); -} - -static int -ipfw_clone_create(struct if_clone *ifc, int unit, caddr_t params) -{ - struct ifnet *ifp; - - ifp = if_alloc(IFT_PFLOG); - if (ifp == NULL) - return (ENOSPC); - if_initname(ifp, ipfwname, unit); - ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_mtu = 65536; - ifp->if_ioctl = ipfw_bpf_ioctl; - ifp->if_output = ipfw_bpf_output; - ifp->if_hdrlen = ETHER_HDR_LEN; - if_attach(ifp); - bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN); - LOGIF_WLOCK(); - if (V_log_if != NULL) { - LOGIF_WUNLOCK(); - bpfdetach(ifp); - if_detach(ifp); - if_free(ifp); - return (EEXIST); - } - V_log_if = ifp; - LOGIF_WUNLOCK(); - return (0); -} - -static int -ipfwlog_clone_create(struct if_clone *ifc, int unit, caddr_t params) -{ - struct ifnet *ifp; - - ifp = if_alloc(IFT_PFLOG); - if (ifp == NULL) - return (ENOSPC); - if_initname(ifp, ipfwlogname, unit); - ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_mtu = 65536; - ifp->if_ioctl = ipfw_bpf_ioctl; - ifp->if_output = ipfw_bpf_output; - ifp->if_hdrlen = PFLOG_HDRLEN; - if_attach(ifp); - bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN); - LOGIF_WLOCK(); - if (V_pflog_if != NULL) { - LOGIF_WUNLOCK(); - bpfdetach(ifp); - if_detach(ifp); - if_free(ifp); - return (EEXIST); - } - V_pflog_if = ifp; - LOGIF_WUNLOCK(); - return (0); -} - -void -ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m) -{ - LOGIF_RLOCK_TRACKER; - - LOGIF_RLOCK(); - if (dlen == ETHER_HDR_LEN) { - if (V_log_if == NULL) { - LOGIF_RUNLOCK(); - return; - } - BPF_MTAP2(V_log_if, data, dlen, m); - } else if (dlen == PFLOG_HDRLEN) { - if (V_pflog_if == NULL) { - LOGIF_RUNLOCK(); - return; - } - BPF_MTAP2(V_pflog_if, data, dlen, m); - } - LOGIF_RUNLOCK(); -} - -void -ipfw_bpf_init(int first) -{ - - if (first) { - LOGIF_LOCK_INIT(); - V_log_if = NULL; - V_pflog_if = NULL; - } - V_ipfw_cloner = if_clone_simple(ipfwname, ipfw_clone_create, - ipfw_clone_destroy, 0); - V_ipfwlog_cloner = if_clone_simple(ipfwlogname, ipfwlog_clone_create, - ipfw_clone_destroy, 0); -} - -void -ipfw_bpf_uninit(int last) -{ - - if_clone_detach(V_ipfw_cloner); - if_clone_detach(V_ipfwlog_cloner); - if (last) - LOGIF_LOCK_DESTROY(); -} - diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c b/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c deleted file mode 100644 index b5a90f53..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_dynamic.c +++ /dev/null @@ -1,1822 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#define DEB(x) -#define DDB(x) x - -/* - * Dynamic rule support for ipfw - */ - -#include <rtems/bsd/local/opt_ipfw.h> -#include <rtems/bsd/local/opt_inet.h> -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ -#include <rtems/bsd/local/opt_inet6.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/kernel.h> -#include <sys/ktr.h> -#include <sys/lock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <sys/sysctl.h> -#include <sys/syslog.h> -#include <net/ethernet.h> /* for ETHERTYPE_IP */ -#include <net/if.h> -#include <net/if_var.h> -#include <net/pfil.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> /* ip_defttl */ -#include <netinet/ip_fw.h> -#include <netinet/tcp_var.h> -#include <netinet/udp.h> - -#include <netinet/ip6.h> /* IN6_ARE_ADDR_EQUAL */ -#ifdef INET6 -#include <netinet6/in6_var.h> -#include <netinet6/ip6_var.h> -#endif - -#include <netpfil/ipfw/ip_fw_private.h> - -#include <machine/in_cksum.h> /* XXX for in_cksum */ - -#ifdef MAC -#include <security/mac/mac_framework.h> -#endif - -/* - * Description of dynamic rules. - * - * Dynamic rules are stored in lists accessed through a hash table - * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can - * be modified through the sysctl variable dyn_buckets which is - * updated when the table becomes empty. - * - * XXX currently there is only one list, ipfw_dyn. - * - * When a packet is received, its address fields are first masked - * with the mask defined for the rule, then hashed, then matched - * against the entries in the corresponding list. - * Dynamic rules can be used for different purposes: - * + stateful rules; - * + enforcing limits on the number of sessions; - * + in-kernel NAT (not implemented yet) - * - * The lifetime of dynamic rules is regulated by dyn_*_lifetime, - * measured in seconds and depending on the flags. - * - * The total number of dynamic rules is equal to UMA zone items count. - * The max number of dynamic rules is dyn_max. When we reach - * the maximum number of rules we do not create anymore. This is - * done to avoid consuming too much memory, but also too much - * time when searching on each packet (ideally, we should try instead - * to put a limit on the length of the list on each bucket...). - * - * Each dynamic rule holds a pointer to the parent ipfw rule so - * we know what action to perform. Dynamic rules are removed when - * the parent rule is deleted. This can be changed by dyn_keep_states - * sysctl. - * - * There are some limitations with dynamic rules -- we do not - * obey the 'randomized match', and we do not do multiple - * passes through the firewall. XXX check the latter!!! - */ - -struct ipfw_dyn_bucket { - struct mtx mtx; /* Bucket protecting lock */ - ipfw_dyn_rule *head; /* Pointer to first rule */ -}; - -/* - * Static variables followed by global ones - */ -static VNET_DEFINE(struct ipfw_dyn_bucket *, ipfw_dyn_v); -static VNET_DEFINE(u_int32_t, dyn_buckets_max); -static VNET_DEFINE(u_int32_t, curr_dyn_buckets); -static VNET_DEFINE(struct callout, ipfw_timeout); -#define V_ipfw_dyn_v VNET(ipfw_dyn_v) -#define V_dyn_buckets_max VNET(dyn_buckets_max) -#define V_curr_dyn_buckets VNET(curr_dyn_buckets) -#define V_ipfw_timeout VNET(ipfw_timeout) - -static VNET_DEFINE(uma_zone_t, ipfw_dyn_rule_zone); -#define V_ipfw_dyn_rule_zone VNET(ipfw_dyn_rule_zone) - -#define IPFW_BUCK_LOCK_INIT(b) \ - mtx_init(&(b)->mtx, "IPFW dynamic bucket", NULL, MTX_DEF) -#define IPFW_BUCK_LOCK_DESTROY(b) \ - mtx_destroy(&(b)->mtx) -#define IPFW_BUCK_LOCK(i) mtx_lock(&V_ipfw_dyn_v[(i)].mtx) -#define IPFW_BUCK_UNLOCK(i) mtx_unlock(&V_ipfw_dyn_v[(i)].mtx) -#define IPFW_BUCK_ASSERT(i) mtx_assert(&V_ipfw_dyn_v[(i)].mtx, MA_OWNED) - - -static VNET_DEFINE(int, dyn_keep_states); -#define V_dyn_keep_states VNET(dyn_keep_states) - -/* - * Timeouts for various events in handing dynamic rules. - */ -static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); -static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); -static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); -static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); -static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); -static VNET_DEFINE(u_int32_t, dyn_short_lifetime); - -#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) -#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) -#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) -#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) -#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) -#define V_dyn_short_lifetime VNET(dyn_short_lifetime) - -/* - * Keepalives are sent if dyn_keepalive is set. They are sent every - * dyn_keepalive_period seconds, in the last dyn_keepalive_interval - * seconds of lifetime of a rule. - * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower - * than dyn_keepalive_period. - */ - -static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); -static VNET_DEFINE(u_int32_t, dyn_keepalive_period); -static VNET_DEFINE(u_int32_t, dyn_keepalive); -static VNET_DEFINE(time_t, dyn_keepalive_last); - -#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) -#define V_dyn_keepalive_period VNET(dyn_keepalive_period) -#define V_dyn_keepalive VNET(dyn_keepalive) -#define V_dyn_keepalive_last VNET(dyn_keepalive_last) - -static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ - -#define DYN_COUNT uma_zone_get_cur(V_ipfw_dyn_rule_zone) -#define V_dyn_max VNET(dyn_max) - -/* for userspace, we emulate the uma_zone_counter with ipfw_dyn_count */ -static int ipfw_dyn_count; /* number of objects */ - -#ifdef USERSPACE /* emulation of UMA object counters for userspace */ -#define uma_zone_get_cur(x) ipfw_dyn_count -#endif /* USERSPACE */ - -static int last_log; /* Log ratelimiting */ - -static void ipfw_dyn_tick(void *vnetx); -static void check_dyn_rules(struct ip_fw_chain *, ipfw_range_tlv *, int, int); -#ifdef SYSCTL_NODE - -static int sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS); -static int sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS); - -SYSBEGIN(f2) - -SYSCTL_DECL(_net_inet_ip_fw); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_buckets_max), 0, - "Max number of dyn. buckets"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, - CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, - "Current Number of dyn. buckets"); -SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_count, - CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RD, 0, 0, sysctl_ipfw_dyn_count, "IU", - "Number of dyn. rules"); -SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, dyn_max, - CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 0, 0, sysctl_ipfw_dyn_max, "IU", - "Max number of dyn. rules"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, - "Lifetime of dyn. rules for acks"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, - "Lifetime of dyn. rules for syn"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, - "Lifetime of dyn. rules for fin"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, - "Lifetime of dyn. rules for rst"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, - "Lifetime of dyn. rules for UDP"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, - "Lifetime of dyn. rules for other situations"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, - "Enable keepalives for dyn. rules"); -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, dyn_keep_states, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(dyn_keep_states), 0, - "Do not flush dynamic states on rule deletion"); - -SYSEND - -#endif /* SYSCTL_NODE */ - - -#ifdef INET6 -static __inline int -hash_packet6(struct ipfw_flow_id *id) -{ - u_int32_t i; - i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ - (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ - (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ - (id->src_ip6.__u6_addr.__u6_addr32[3]); - return ntohl(i); -} -#endif - -/* - * IMPORTANT: the hash function for dynamic rules must be commutative - * in source and destination (ip,port), because rules are bidirectional - * and we want to find both in the same bucket. - */ -static __inline int -hash_packet(struct ipfw_flow_id *id, int buckets) -{ - u_int32_t i; - -#ifdef INET6 - if (IS_IP6_FLOW_ID(id)) - i = hash_packet6(id); - else -#endif /* INET6 */ - i = (id->dst_ip) ^ (id->src_ip); - i ^= (id->dst_port) ^ (id->src_port); - return (i & (buckets - 1)); -} - -#if 0 -#define DYN_DEBUG(fmt, ...) do { \ - printf("%s: " fmt "\n", __func__, __VA_ARGS__); \ -} while (0) -#else -#define DYN_DEBUG(fmt, ...) -#endif - -static char *default_state_name = "default"; -struct dyn_state_obj { - struct named_object no; - char name[64]; -}; - -#define DYN_STATE_OBJ(ch, cmd) \ - ((struct dyn_state_obj *)SRV_OBJECT(ch, (cmd)->arg1)) -/* - * Classifier callback. - * Return 0 if opcode contains object that should be referenced - * or rewritten. - */ -static int -dyn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - - DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); - /* Don't rewrite "check-state any" */ - if (cmd->arg1 == 0 && - cmd->opcode == O_CHECK_STATE) - return (1); - - *puidx = cmd->arg1; - *ptype = 0; - return (0); -} - -static void -dyn_update(ipfw_insn *cmd, uint16_t idx) -{ - - cmd->arg1 = idx; - DYN_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); -} - -static int -dyn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, - struct named_object **pno) -{ - ipfw_obj_ntlv *ntlv; - const char *name; - - DYN_DEBUG("uidx %d", ti->uidx); - if (ti->uidx != 0) { - if (ti->tlvs == NULL) - return (EINVAL); - /* Search ntlv in the buffer provided by user */ - ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, - IPFW_TLV_STATE_NAME); - if (ntlv == NULL) - return (EINVAL); - name = ntlv->name; - } else - name = default_state_name; - /* - * Search named object with corresponding name. - * Since states objects are global - ignore the set value - * and use zero instead. - */ - *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0, - IPFW_TLV_STATE_NAME, name); - /* - * We always return success here. - * The caller will check *pno and mark object as unresolved, - * then it will automatically create "default" object. - */ - return (0); -} - -static struct named_object * -dyn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) -{ - - DYN_DEBUG("kidx %d", idx); - return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx)); -} - -static int -dyn_create(struct ip_fw_chain *ch, struct tid_info *ti, - uint16_t *pkidx) -{ - struct namedobj_instance *ni; - struct dyn_state_obj *obj; - struct named_object *no; - ipfw_obj_ntlv *ntlv; - char *name; - - DYN_DEBUG("uidx %d", ti->uidx); - if (ti->uidx != 0) { - if (ti->tlvs == NULL) - return (EINVAL); - ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, - IPFW_TLV_STATE_NAME); - if (ntlv == NULL) - return (EINVAL); - name = ntlv->name; - } else - name = default_state_name; - - ni = CHAIN_TO_SRV(ch); - obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO); - obj->no.name = obj->name; - obj->no.etlv = IPFW_TLV_STATE_NAME; - strlcpy(obj->name, name, sizeof(obj->name)); - - IPFW_UH_WLOCK(ch); - no = ipfw_objhash_lookup_name_type(ni, 0, - IPFW_TLV_STATE_NAME, name); - if (no != NULL) { - /* - * Object is already created. - * Just return its kidx and bump refcount. - */ - *pkidx = no->kidx; - no->refcnt++; - IPFW_UH_WUNLOCK(ch); - free(obj, M_IPFW); - DYN_DEBUG("\tfound kidx %d", *pkidx); - return (0); - } - if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) { - DYN_DEBUG("\talloc_idx failed for %s", name); - IPFW_UH_WUNLOCK(ch); - free(obj, M_IPFW); - return (ENOSPC); - } - ipfw_objhash_add(ni, &obj->no); - IPFW_WLOCK(ch); - SRV_OBJECT(ch, obj->no.kidx) = obj; - IPFW_WUNLOCK(ch); - obj->no.refcnt++; - *pkidx = obj->no.kidx; - IPFW_UH_WUNLOCK(ch); - DYN_DEBUG("\tcreated kidx %d", *pkidx); - return (0); -} - -static void -dyn_destroy(struct ip_fw_chain *ch, struct named_object *no) -{ - struct dyn_state_obj *obj; - - IPFW_UH_WLOCK_ASSERT(ch); - - KASSERT(no->refcnt == 1, - ("Destroying object '%s' (type %u, idx %u) with refcnt %u", - no->name, no->etlv, no->kidx, no->refcnt)); - - DYN_DEBUG("kidx %d", no->kidx); - IPFW_WLOCK(ch); - obj = SRV_OBJECT(ch, no->kidx); - SRV_OBJECT(ch, no->kidx) = NULL; - IPFW_WUNLOCK(ch); - ipfw_objhash_del(CHAIN_TO_SRV(ch), no); - ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), no->kidx); - - free(obj, M_IPFW); -} - -static struct opcode_obj_rewrite dyn_opcodes[] = { - { - O_KEEP_STATE, IPFW_TLV_STATE_NAME, - dyn_classify, dyn_update, - dyn_findbyname, dyn_findbykidx, - dyn_create, dyn_destroy - }, - { - O_CHECK_STATE, IPFW_TLV_STATE_NAME, - dyn_classify, dyn_update, - dyn_findbyname, dyn_findbykidx, - dyn_create, dyn_destroy - }, - { - O_PROBE_STATE, IPFW_TLV_STATE_NAME, - dyn_classify, dyn_update, - dyn_findbyname, dyn_findbykidx, - dyn_create, dyn_destroy - }, - { - O_LIMIT, IPFW_TLV_STATE_NAME, - dyn_classify, dyn_update, - dyn_findbyname, dyn_findbykidx, - dyn_create, dyn_destroy - }, -}; -/** - * Print customizable flow id description via log(9) facility. - */ -static void -print_dyn_rule_flags(struct ipfw_flow_id *id, int dyn_type, int log_flags, - char *prefix, char *postfix) -{ - struct in_addr da; -#ifdef INET6 - char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; -#else - char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; -#endif - -#ifdef INET6 - if (IS_IP6_FLOW_ID(id)) { - ip6_sprintf(src, &id->src_ip6); - ip6_sprintf(dst, &id->dst_ip6); - } else -#endif - { - da.s_addr = htonl(id->src_ip); - inet_ntop(AF_INET, &da, src, sizeof(src)); - da.s_addr = htonl(id->dst_ip); - inet_ntop(AF_INET, &da, dst, sizeof(dst)); - } - log(log_flags, "ipfw: %s type %d %s %d -> %s %d, %d %s\n", - prefix, dyn_type, src, id->src_port, dst, - id->dst_port, DYN_COUNT, postfix); -} - -#define print_dyn_rule(id, dtype, prefix, postfix) \ - print_dyn_rule_flags(id, dtype, LOG_DEBUG, prefix, postfix) - -#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) -#define TIME_LE(a,b) ((int)((a)-(b)) < 0) - -static void -dyn_update_proto_state(ipfw_dyn_rule *q, const struct ipfw_flow_id *id, - const struct tcphdr *tcp, int dir) -{ - uint32_t ack; - u_char flags; - - if (id->proto == IPPROTO_TCP) { - flags = id->_flags & (TH_FIN | TH_SYN | TH_RST); -#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) -#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) -#define TCP_FLAGS (TH_FLAGS | (TH_FLAGS << 8)) -#define ACK_FWD 0x10000 /* fwd ack seen */ -#define ACK_REV 0x20000 /* rev ack seen */ - - q->state |= (dir == MATCH_FORWARD) ? flags : (flags << 8); - switch (q->state & TCP_FLAGS) { - case TH_SYN: /* opening */ - q->expire = time_uptime + V_dyn_syn_lifetime; - break; - - case BOTH_SYN: /* move to established */ - case BOTH_SYN | TH_FIN: /* one side tries to close */ - case BOTH_SYN | (TH_FIN << 8): -#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) - if (tcp == NULL) - break; - - ack = ntohl(tcp->th_ack); - if (dir == MATCH_FORWARD) { - if (q->ack_fwd == 0 || - _SEQ_GE(ack, q->ack_fwd)) { - q->ack_fwd = ack; - q->state |= ACK_FWD; - } - } else { - if (q->ack_rev == 0 || - _SEQ_GE(ack, q->ack_rev)) { - q->ack_rev = ack; - q->state |= ACK_REV; - } - } - if ((q->state & (ACK_FWD | ACK_REV)) == - (ACK_FWD | ACK_REV)) { - q->expire = time_uptime + V_dyn_ack_lifetime; - q->state &= ~(ACK_FWD | ACK_REV); - } - break; - - case BOTH_SYN | BOTH_FIN: /* both sides closed */ - if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) - V_dyn_fin_lifetime = - V_dyn_keepalive_period - 1; - q->expire = time_uptime + V_dyn_fin_lifetime; - break; - - default: -#if 0 - /* - * reset or some invalid combination, but can also - * occur if we use keep-state the wrong way. - */ - if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) - printf("invalid state: 0x%x\n", q->state); -#endif - if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) - V_dyn_rst_lifetime = - V_dyn_keepalive_period - 1; - q->expire = time_uptime + V_dyn_rst_lifetime; - break; - } - } else if (id->proto == IPPROTO_UDP) { - q->expire = time_uptime + V_dyn_udp_lifetime; - } else { - /* other protocols */ - q->expire = time_uptime + V_dyn_short_lifetime; - } -} - -/* - * Lookup a dynamic rule, locked version. - */ -static ipfw_dyn_rule * -lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int i, int *match_direction, - struct tcphdr *tcp, uint16_t kidx) -{ - /* - * Stateful ipfw extensions. - * Lookup into dynamic session queue. - */ - ipfw_dyn_rule *prev, *q = NULL; - int dir; - - IPFW_BUCK_ASSERT(i); - - dir = MATCH_NONE; - for (prev = NULL, q = V_ipfw_dyn_v[i].head; q; prev = q, q = q->next) { - if (q->dyn_type == O_LIMIT_PARENT) - continue; - - if (pkt->proto != q->id.proto) - continue; - - if (kidx != 0 && kidx != q->kidx) - continue; - - if (IS_IP6_FLOW_ID(pkt)) { - if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.src_ip6) && - IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.dst_ip6) && - pkt->src_port == q->id.src_port && - pkt->dst_port == q->id.dst_port) { - dir = MATCH_FORWARD; - break; - } - if (IN6_ARE_ADDR_EQUAL(&pkt->src_ip6, &q->id.dst_ip6) && - IN6_ARE_ADDR_EQUAL(&pkt->dst_ip6, &q->id.src_ip6) && - pkt->src_port == q->id.dst_port && - pkt->dst_port == q->id.src_port) { - dir = MATCH_REVERSE; - break; - } - } else { - if (pkt->src_ip == q->id.src_ip && - pkt->dst_ip == q->id.dst_ip && - pkt->src_port == q->id.src_port && - pkt->dst_port == q->id.dst_port) { - dir = MATCH_FORWARD; - break; - } - if (pkt->src_ip == q->id.dst_ip && - pkt->dst_ip == q->id.src_ip && - pkt->src_port == q->id.dst_port && - pkt->dst_port == q->id.src_port) { - dir = MATCH_REVERSE; - break; - } - } - } - if (q == NULL) - goto done; /* q = NULL, not found */ - - if (prev != NULL) { /* found and not in front */ - prev->next = q->next; - q->next = V_ipfw_dyn_v[i].head; - V_ipfw_dyn_v[i].head = q; - } - - /* update state according to flags */ - dyn_update_proto_state(q, pkt, tcp, dir); -done: - if (match_direction != NULL) - *match_direction = dir; - return (q); -} - -ipfw_dyn_rule * -ipfw_lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, - struct tcphdr *tcp, uint16_t kidx) -{ - ipfw_dyn_rule *q; - int i; - - i = hash_packet(pkt, V_curr_dyn_buckets); - - IPFW_BUCK_LOCK(i); - q = lookup_dyn_rule_locked(pkt, i, match_direction, tcp, kidx); - if (q == NULL) - IPFW_BUCK_UNLOCK(i); - /* NB: return table locked when q is not NULL */ - return q; -} - -/* - * Unlock bucket mtx - * @p - pointer to dynamic rule - */ -void -ipfw_dyn_unlock(ipfw_dyn_rule *q) -{ - - IPFW_BUCK_UNLOCK(q->bucket); -} - -static int -resize_dynamic_table(struct ip_fw_chain *chain, int nbuckets) -{ - int i, k, nbuckets_old; - ipfw_dyn_rule *q; - struct ipfw_dyn_bucket *dyn_v, *dyn_v_old; - - /* Check if given number is power of 2 and less than 64k */ - if ((nbuckets > 65536) || (!powerof2(nbuckets))) - return 1; - - CTR3(KTR_NET, "%s: resize dynamic hash: %d -> %d", __func__, - V_curr_dyn_buckets, nbuckets); - - /* Allocate and initialize new hash */ - dyn_v = malloc(nbuckets * sizeof(*dyn_v), M_IPFW, - M_WAITOK | M_ZERO); - - for (i = 0 ; i < nbuckets; i++) - IPFW_BUCK_LOCK_INIT(&dyn_v[i]); - - /* - * Call upper half lock, as get_map() do to ease - * read-only access to dynamic rules hash from sysctl - */ - IPFW_UH_WLOCK(chain); - - /* - * Acquire chain write lock to permit hash access - * for main traffic path without additional locks - */ - IPFW_WLOCK(chain); - - /* Save old values */ - nbuckets_old = V_curr_dyn_buckets; - dyn_v_old = V_ipfw_dyn_v; - - /* Skip relinking if array is not set up */ - if (V_ipfw_dyn_v == NULL) - V_curr_dyn_buckets = 0; - - /* Re-link all dynamic states */ - for (i = 0 ; i < V_curr_dyn_buckets ; i++) { - while (V_ipfw_dyn_v[i].head != NULL) { - /* Remove from current chain */ - q = V_ipfw_dyn_v[i].head; - V_ipfw_dyn_v[i].head = q->next; - - /* Get new hash value */ - k = hash_packet(&q->id, nbuckets); - q->bucket = k; - /* Add to the new head */ - q->next = dyn_v[k].head; - dyn_v[k].head = q; - } - } - - /* Update current pointers/buckets values */ - V_curr_dyn_buckets = nbuckets; - V_ipfw_dyn_v = dyn_v; - - IPFW_WUNLOCK(chain); - - IPFW_UH_WUNLOCK(chain); - - /* Start periodic callout on initial creation */ - if (dyn_v_old == NULL) { - callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, curvnet, 0); - return (0); - } - - /* Destroy all mutexes */ - for (i = 0 ; i < nbuckets_old ; i++) - IPFW_BUCK_LOCK_DESTROY(&dyn_v_old[i]); - - /* Free old hash */ - free(dyn_v_old, M_IPFW); - - return 0; -} - -/** - * Install state of type 'type' for a dynamic session. - * The hash table contains two type of rules: - * - regular rules (O_KEEP_STATE) - * - rules for sessions with limited number of sess per user - * (O_LIMIT). When they are created, the parent is - * increased by 1, and decreased on delete. In this case, - * the third parameter is the parent rule and not the chain. - * - "parent" rules for the above (O_LIMIT_PARENT). - */ -static ipfw_dyn_rule * -add_dyn_rule(struct ipfw_flow_id *id, int i, uint8_t dyn_type, - struct ip_fw *rule, uint16_t kidx) -{ - ipfw_dyn_rule *r; - - IPFW_BUCK_ASSERT(i); - - r = uma_zalloc(V_ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); - if (r == NULL) { - if (last_log != time_uptime) { - last_log = time_uptime; - log(LOG_DEBUG, - "ipfw: Cannot allocate dynamic state, " - "consider increasing net.inet.ip.fw.dyn_max\n"); - } - return NULL; - } - ipfw_dyn_count++; - - /* - * refcount on parent is already incremented, so - * it is safe to use parent unlocked. - */ - if (dyn_type == O_LIMIT) { - ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; - if ( parent->dyn_type != O_LIMIT_PARENT) - panic("invalid parent"); - r->parent = parent; - rule = parent->rule; - } - - r->id = *id; - r->expire = time_uptime + V_dyn_syn_lifetime; - r->rule = rule; - r->dyn_type = dyn_type; - IPFW_ZERO_DYN_COUNTER(r); - r->count = 0; - r->kidx = kidx; - r->bucket = i; - r->next = V_ipfw_dyn_v[i].head; - V_ipfw_dyn_v[i].head = r; - DEB(print_dyn_rule(id, dyn_type, "add dyn entry", "total");) - return r; -} - -/** - * lookup dynamic parent rule using pkt and rule as search keys. - * If the lookup fails, then install one. - */ -static ipfw_dyn_rule * -lookup_dyn_parent(struct ipfw_flow_id *pkt, int *pindex, struct ip_fw *rule, - uint16_t kidx) -{ - ipfw_dyn_rule *q; - int i, is_v6; - - is_v6 = IS_IP6_FLOW_ID(pkt); - i = hash_packet( pkt, V_curr_dyn_buckets ); - *pindex = i; - IPFW_BUCK_LOCK(i); - for (q = V_ipfw_dyn_v[i].head ; q != NULL ; q=q->next) - if (q->dyn_type == O_LIMIT_PARENT && - kidx == q->kidx && - rule == q->rule && - pkt->proto == q->id.proto && - pkt->src_port == q->id.src_port && - pkt->dst_port == q->id.dst_port && - ( - (is_v6 && - IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), - &(q->id.src_ip6)) && - IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), - &(q->id.dst_ip6))) || - (!is_v6 && - pkt->src_ip == q->id.src_ip && - pkt->dst_ip == q->id.dst_ip) - ) - ) { - q->expire = time_uptime + V_dyn_short_lifetime; - DEB(print_dyn_rule(pkt, q->dyn_type, - "lookup_dyn_parent found", "");) - return q; - } - - /* Add virtual limiting rule */ - return add_dyn_rule(pkt, i, O_LIMIT_PARENT, rule, kidx); -} - -/** - * Install dynamic state for rule type cmd->o.opcode - * - * Returns 1 (failure) if state is not installed because of errors or because - * session limitations are enforced. - */ -int -ipfw_install_state(struct ip_fw_chain *chain, struct ip_fw *rule, - ipfw_insn_limit *cmd, struct ip_fw_args *args, uint32_t tablearg) -{ - ipfw_dyn_rule *q; - int i; - - DEB(print_dyn_rule(&args->f_id, cmd->o.opcode, "install_state", - (cmd->o.arg1 == 0 ? "": DYN_STATE_OBJ(chain, &cmd->o)->name));) - - i = hash_packet(&args->f_id, V_curr_dyn_buckets); - - IPFW_BUCK_LOCK(i); - - q = lookup_dyn_rule_locked(&args->f_id, i, NULL, NULL, cmd->o.arg1); - if (q != NULL) { /* should never occur */ - DEB( - if (last_log != time_uptime) { - last_log = time_uptime; - printf("ipfw: %s: entry already present, done\n", - __func__); - }) - IPFW_BUCK_UNLOCK(i); - return (0); - } - - /* - * State limiting is done via uma(9) zone limiting. - * Save pointer to newly-installed rule and reject - * packet if add_dyn_rule() returned NULL. - * Note q is currently set to NULL. - */ - - switch (cmd->o.opcode) { - case O_KEEP_STATE: /* bidir rule */ - q = add_dyn_rule(&args->f_id, i, O_KEEP_STATE, rule, - cmd->o.arg1); - break; - - case O_LIMIT: { /* limit number of sessions */ - struct ipfw_flow_id id; - ipfw_dyn_rule *parent; - uint32_t conn_limit; - uint16_t limit_mask = cmd->limit_mask; - int pindex; - - conn_limit = IP_FW_ARG_TABLEARG(chain, cmd->conn_limit, limit); - - DEB( - if (cmd->conn_limit == IP_FW_TARG) - printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " - "(tablearg)\n", __func__, conn_limit); - else - printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", - __func__, conn_limit); - ) - - id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; - id.proto = args->f_id.proto; - id.addr_type = args->f_id.addr_type; - id.fib = M_GETFIB(args->m); - - if (IS_IP6_FLOW_ID (&(args->f_id))) { - bzero(&id.src_ip6, sizeof(id.src_ip6)); - bzero(&id.dst_ip6, sizeof(id.dst_ip6)); - - if (limit_mask & DYN_SRC_ADDR) - id.src_ip6 = args->f_id.src_ip6; - if (limit_mask & DYN_DST_ADDR) - id.dst_ip6 = args->f_id.dst_ip6; - } else { - if (limit_mask & DYN_SRC_ADDR) - id.src_ip = args->f_id.src_ip; - if (limit_mask & DYN_DST_ADDR) - id.dst_ip = args->f_id.dst_ip; - } - if (limit_mask & DYN_SRC_PORT) - id.src_port = args->f_id.src_port; - if (limit_mask & DYN_DST_PORT) - id.dst_port = args->f_id.dst_port; - - /* - * We have to release lock for previous bucket to - * avoid possible deadlock - */ - IPFW_BUCK_UNLOCK(i); - - parent = lookup_dyn_parent(&id, &pindex, rule, cmd->o.arg1); - if (parent == NULL) { - printf("ipfw: %s: add parent failed\n", __func__); - IPFW_BUCK_UNLOCK(pindex); - return (1); - } - - if (parent->count >= conn_limit) { - if (V_fw_verbose && last_log != time_uptime) { - last_log = time_uptime; - char sbuf[24]; - last_log = time_uptime; - snprintf(sbuf, sizeof(sbuf), - "%d drop session", - parent->rule->rulenum); - print_dyn_rule_flags(&args->f_id, - cmd->o.opcode, - LOG_SECURITY | LOG_DEBUG, - sbuf, "too many entries"); - } - IPFW_BUCK_UNLOCK(pindex); - return (1); - } - /* Increment counter on parent */ - parent->count++; - IPFW_BUCK_UNLOCK(pindex); - - IPFW_BUCK_LOCK(i); - q = add_dyn_rule(&args->f_id, i, O_LIMIT, - (struct ip_fw *)parent, cmd->o.arg1); - if (q == NULL) { - /* Decrement index and notify caller */ - IPFW_BUCK_UNLOCK(i); - IPFW_BUCK_LOCK(pindex); - parent->count--; - IPFW_BUCK_UNLOCK(pindex); - return (1); - } - break; - } - default: - printf("ipfw: %s: unknown dynamic rule type %u\n", - __func__, cmd->o.opcode); - } - - if (q == NULL) { - IPFW_BUCK_UNLOCK(i); - return (1); /* Notify caller about failure */ - } - - dyn_update_proto_state(q, &args->f_id, NULL, MATCH_FORWARD); - IPFW_BUCK_UNLOCK(i); - return (0); -} - -/* - * Generate a TCP packet, containing either a RST or a keepalive. - * When flags & TH_RST, we are sending a RST packet, because of a - * "reset" action matched the packet. - * Otherwise we are sending a keepalive, and flags & TH_ - * The 'replyto' mbuf is the mbuf being replied to, if any, and is required - * so that MAC can label the reply appropriately. - */ -struct mbuf * -ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, - u_int32_t ack, int flags) -{ - struct mbuf *m = NULL; /* stupid compiler */ - int len, dir; - struct ip *h = NULL; /* stupid compiler */ -#ifdef INET6 - struct ip6_hdr *h6 = NULL; -#endif - struct tcphdr *th = NULL; - - MGETHDR(m, M_NOWAIT, MT_DATA); - if (m == NULL) - return (NULL); - - M_SETFIB(m, id->fib); -#ifdef MAC - if (replyto != NULL) - mac_netinet_firewall_reply(replyto, m); - else - mac_netinet_firewall_send(m); -#else - (void)replyto; /* don't warn about unused arg */ -#endif - - switch (id->addr_type) { - case 4: - len = sizeof(struct ip) + sizeof(struct tcphdr); - break; -#ifdef INET6 - case 6: - len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); - break; -#endif - default: - /* XXX: log me?!? */ - FREE_PKT(m); - return (NULL); - } - dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); - - m->m_data += max_linkhdr; - m->m_flags |= M_SKIP_FIREWALL; - m->m_pkthdr.len = m->m_len = len; - m->m_pkthdr.rcvif = NULL; - bzero(m->m_data, len); - - switch (id->addr_type) { - case 4: - h = mtod(m, struct ip *); - - /* prepare for checksum */ - h->ip_p = IPPROTO_TCP; - h->ip_len = htons(sizeof(struct tcphdr)); - if (dir) { - h->ip_src.s_addr = htonl(id->src_ip); - h->ip_dst.s_addr = htonl(id->dst_ip); - } else { - h->ip_src.s_addr = htonl(id->dst_ip); - h->ip_dst.s_addr = htonl(id->src_ip); - } - - th = (struct tcphdr *)(h + 1); - break; -#ifdef INET6 - case 6: - h6 = mtod(m, struct ip6_hdr *); - - /* prepare for checksum */ - h6->ip6_nxt = IPPROTO_TCP; - h6->ip6_plen = htons(sizeof(struct tcphdr)); - if (dir) { - h6->ip6_src = id->src_ip6; - h6->ip6_dst = id->dst_ip6; - } else { - h6->ip6_src = id->dst_ip6; - h6->ip6_dst = id->src_ip6; - } - - th = (struct tcphdr *)(h6 + 1); - break; -#endif - } - - if (dir) { - th->th_sport = htons(id->src_port); - th->th_dport = htons(id->dst_port); - } else { - th->th_sport = htons(id->dst_port); - th->th_dport = htons(id->src_port); - } - th->th_off = sizeof(struct tcphdr) >> 2; - - if (flags & TH_RST) { - if (flags & TH_ACK) { - th->th_seq = htonl(ack); - th->th_flags = TH_RST; - } else { - if (flags & TH_SYN) - seq++; - th->th_ack = htonl(seq); - th->th_flags = TH_RST | TH_ACK; - } - } else { - /* - * Keepalive - use caller provided sequence numbers - */ - th->th_seq = htonl(seq); - th->th_ack = htonl(ack); - th->th_flags = TH_ACK; - } - - switch (id->addr_type) { - case 4: - th->th_sum = in_cksum(m, len); - - /* finish the ip header */ - h->ip_v = 4; - h->ip_hl = sizeof(*h) >> 2; - h->ip_tos = IPTOS_LOWDELAY; - h->ip_off = htons(0); - h->ip_len = htons(len); - h->ip_ttl = V_ip_defttl; - h->ip_sum = 0; - break; -#ifdef INET6 - case 6: - th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), - sizeof(struct tcphdr)); - - /* finish the ip6 header */ - h6->ip6_vfc |= IPV6_VERSION; - h6->ip6_hlim = IPV6_DEFHLIM; - break; -#endif - } - - return (m); -} - -/* - * Queue keepalive packets for given dynamic rule - */ -static struct mbuf ** -ipfw_dyn_send_ka(struct mbuf **mtailp, ipfw_dyn_rule *q) -{ - struct mbuf *m_rev, *m_fwd; - - m_rev = (q->state & ACK_REV) ? NULL : - ipfw_send_pkt(NULL, &(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); - m_fwd = (q->state & ACK_FWD) ? NULL : - ipfw_send_pkt(NULL, &(q->id), q->ack_fwd - 1, q->ack_rev, 0); - - if (m_rev != NULL) { - *mtailp = m_rev; - mtailp = &(*mtailp)->m_nextpkt; - } - if (m_fwd != NULL) { - *mtailp = m_fwd; - mtailp = &(*mtailp)->m_nextpkt; - } - - return (mtailp); -} - -/* - * This procedure is used to perform various maintenance - * on dynamic hash list. Currently it is called every second. - */ -static void -ipfw_dyn_tick(void * vnetx) -{ - struct ip_fw_chain *chain; - int check_ka = 0; -#ifdef VIMAGE - struct vnet *vp = vnetx; -#endif - - CURVNET_SET(vp); - - chain = &V_layer3_chain; - - /* Run keepalive checks every keepalive_period iff ka is enabled */ - if ((V_dyn_keepalive_last + V_dyn_keepalive_period <= time_uptime) && - (V_dyn_keepalive != 0)) { - V_dyn_keepalive_last = time_uptime; - check_ka = 1; - } - - check_dyn_rules(chain, NULL, check_ka, 1); - - callout_reset_on(&V_ipfw_timeout, hz, ipfw_dyn_tick, vnetx, 0); - - CURVNET_RESTORE(); -} - - -/* - * Walk through all dynamic states doing generic maintenance: - * 1) free expired states - * 2) free all states based on deleted rule / set - * 3) send keepalives for states if needed - * - * @chain - pointer to current ipfw rules chain - * @rule - delete all states originated by given rule if != NULL - * @set - delete all states originated by any rule in set @set if != RESVD_SET - * @check_ka - perform checking/sending keepalives - * @timer - indicate call from timer routine. - * - * Timer routine must call this function unlocked to permit - * sending keepalives/resizing table. - * - * Others has to call function with IPFW_UH_WLOCK held. - * Additionally, function assume that dynamic rule/set is - * ALREADY deleted so no new states can be generated by - * 'deleted' rules. - * - * Write lock is needed to ensure that unused parent rules - * are not freed by other instance (see stage 2, 3) - */ -static void -check_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt, - int check_ka, int timer) -{ - struct mbuf *m0, *m, *mnext, **mtailp; - struct ip *h; - int i, dyn_count, new_buckets = 0, max_buckets; - int expired = 0, expired_limits = 0, parents = 0, total = 0; - ipfw_dyn_rule *q, *q_prev, *q_next; - ipfw_dyn_rule *exp_head, **exptailp; - ipfw_dyn_rule *exp_lhead, **expltailp; - - KASSERT(V_ipfw_dyn_v != NULL, ("%s: dynamic table not allocated", - __func__)); - - /* Avoid possible LOR */ - KASSERT(!check_ka || timer, ("%s: keepalive check with lock held", - __func__)); - - /* - * Do not perform any checks if we currently have no dynamic states - */ - if (DYN_COUNT == 0) - return; - - /* Expired states */ - exp_head = NULL; - exptailp = &exp_head; - - /* Expired limit states */ - exp_lhead = NULL; - expltailp = &exp_lhead; - - /* - * We make a chain of packets to go out here -- not deferring - * until after we drop the IPFW dynamic rule lock would result - * in a lock order reversal with the normal packet input -> ipfw - * call stack. - */ - m0 = NULL; - mtailp = &m0; - - /* Protect from hash resizing */ - if (timer != 0) - IPFW_UH_WLOCK(chain); - else - IPFW_UH_WLOCK_ASSERT(chain); - -#define NEXT_RULE() { q_prev = q; q = q->next ; continue; } - - /* Stage 1: perform requested deletion */ - for (i = 0 ; i < V_curr_dyn_buckets ; i++) { - IPFW_BUCK_LOCK(i); - for (q = V_ipfw_dyn_v[i].head, q_prev = q; q ; ) { - /* account every rule */ - total++; - - /* Skip parent rules at all */ - if (q->dyn_type == O_LIMIT_PARENT) { - parents++; - NEXT_RULE(); - } - - /* - * Remove rules which are: - * 1) expired - * 2) matches deletion range - */ - if ((TIME_LEQ(q->expire, time_uptime)) || - (rt != NULL && ipfw_match_range(q->rule, rt))) { - if (TIME_LE(time_uptime, q->expire) && - q->dyn_type == O_KEEP_STATE && - V_dyn_keep_states != 0) { - /* - * Do not delete state if - * it is not expired and - * dyn_keep_states is ON. - * However we need to re-link it - * to any other stable rule - */ - q->rule = chain->default_rule; - NEXT_RULE(); - } - - /* Unlink q from current list */ - q_next = q->next; - if (q == V_ipfw_dyn_v[i].head) - V_ipfw_dyn_v[i].head = q_next; - else - q_prev->next = q_next; - - q->next = NULL; - - /* queue q to expire list */ - if (q->dyn_type != O_LIMIT) { - *exptailp = q; - exptailp = &(*exptailp)->next; - DEB(print_dyn_rule(&q->id, q->dyn_type, - "unlink entry", "left"); - ) - } else { - /* Separate list for limit rules */ - *expltailp = q; - expltailp = &(*expltailp)->next; - expired_limits++; - DEB(print_dyn_rule(&q->id, q->dyn_type, - "unlink limit entry", "left"); - ) - } - - q = q_next; - expired++; - continue; - } - - /* - * Check if we need to send keepalive: - * we need to ensure if is time to do KA, - * this is established TCP session, and - * expire time is within keepalive interval - */ - if ((check_ka != 0) && (q->id.proto == IPPROTO_TCP) && - ((q->state & BOTH_SYN) == BOTH_SYN) && - (TIME_LEQ(q->expire, time_uptime + - V_dyn_keepalive_interval))) - mtailp = ipfw_dyn_send_ka(mtailp, q); - - NEXT_RULE(); - } - IPFW_BUCK_UNLOCK(i); - } - - /* Stage 2: decrement counters from O_LIMIT parents */ - if (expired_limits != 0) { - /* - * XXX: Note that deleting set with more than one - * heavily-used LIMIT rules can result in overwhelming - * locking due to lack of per-hash value sorting - * - * We should probably think about: - * 1) pre-allocating hash of size, say, - * MAX(16, V_curr_dyn_buckets / 1024) - * 2) checking if expired_limits is large enough - * 3) If yes, init hash (or its part), re-link - * current list and start decrementing procedure in - * each bucket separately - */ - - /* - * Small optimization: do not unlock bucket until - * we see the next item resides in different bucket - */ - if (exp_lhead != NULL) { - i = exp_lhead->parent->bucket; - IPFW_BUCK_LOCK(i); - } - for (q = exp_lhead; q != NULL; q = q->next) { - if (i != q->parent->bucket) { - IPFW_BUCK_UNLOCK(i); - i = q->parent->bucket; - IPFW_BUCK_LOCK(i); - } - - /* Decrease parent refcount */ - q->parent->count--; - } - if (exp_lhead != NULL) - IPFW_BUCK_UNLOCK(i); - } - - /* - * We protectet ourselves from unused parent deletion - * (from the timer function) by holding UH write lock. - */ - - /* Stage 3: remove unused parent rules */ - if ((parents != 0) && (expired != 0)) { - for (i = 0 ; i < V_curr_dyn_buckets ; i++) { - IPFW_BUCK_LOCK(i); - for (q = V_ipfw_dyn_v[i].head, q_prev = q ; q ; ) { - if (q->dyn_type != O_LIMIT_PARENT) - NEXT_RULE(); - - if (q->count != 0) - NEXT_RULE(); - - /* Parent rule without consumers */ - - /* Unlink q from current list */ - q_next = q->next; - if (q == V_ipfw_dyn_v[i].head) - V_ipfw_dyn_v[i].head = q_next; - else - q_prev->next = q_next; - - q->next = NULL; - - /* Add to expired list */ - *exptailp = q; - exptailp = &(*exptailp)->next; - - DEB(print_dyn_rule(&q->id, q->dyn_type, - "unlink parent entry", "left"); - ) - - expired++; - - q = q_next; - } - IPFW_BUCK_UNLOCK(i); - } - } - -#undef NEXT_RULE - - if (timer != 0) { - /* - * Check if we need to resize hash: - * if current number of states exceeds number of buckes in hash, - * grow hash size to the minimum power of 2 which is bigger than - * current states count. Limit hash size by 64k. - */ - max_buckets = (V_dyn_buckets_max > 65536) ? - 65536 : V_dyn_buckets_max; - - dyn_count = DYN_COUNT; - - if ((dyn_count > V_curr_dyn_buckets * 2) && - (dyn_count < max_buckets)) { - new_buckets = V_curr_dyn_buckets; - while (new_buckets < dyn_count) { - new_buckets *= 2; - - if (new_buckets >= max_buckets) - break; - } - } - - IPFW_UH_WUNLOCK(chain); - } - - /* Finally delete old states ad limits if any */ - for (q = exp_head; q != NULL; q = q_next) { - q_next = q->next; - uma_zfree(V_ipfw_dyn_rule_zone, q); - ipfw_dyn_count--; - } - - for (q = exp_lhead; q != NULL; q = q_next) { - q_next = q->next; - uma_zfree(V_ipfw_dyn_rule_zone, q); - ipfw_dyn_count--; - } - - /* - * The rest code MUST be called from timer routine only - * without holding any locks - */ - if (timer == 0) - return; - - /* Send keepalive packets if any */ - for (m = m0; m != NULL; m = mnext) { - mnext = m->m_nextpkt; - m->m_nextpkt = NULL; - h = mtod(m, struct ip *); - if (h->ip_v == 4) - ip_output(m, NULL, NULL, 0, NULL, NULL); -#ifdef INET6 - else - ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); -#endif - } - - /* Run table resize without holding any locks */ - if (new_buckets != 0) - resize_dynamic_table(chain, new_buckets); -} - -/* - * Deletes all dynamic rules originated by given rule or all rules in - * given set. Specify RESVD_SET to indicate set should not be used. - * @chain - pointer to current ipfw rules chain - * @rr - delete all states originated by rules in matched range. - * - * Function has to be called with IPFW_UH_WLOCK held. - * Additionally, function assume that dynamic rule/set is - * ALREADY deleted so no new states can be generated by - * 'deleted' rules. - */ -void -ipfw_expire_dyn_rules(struct ip_fw_chain *chain, ipfw_range_tlv *rt) -{ - - check_dyn_rules(chain, rt, 0, 0); -} - -/* - * Check if rule contains at least one dynamic opcode. - * - * Returns 1 if such opcode is found, 0 otherwise. - */ -int -ipfw_is_dyn_rule(struct ip_fw *rule) -{ - int cmdlen, l; - ipfw_insn *cmd; - - l = rule->cmd_len; - cmd = rule->cmd; - cmdlen = 0; - for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - - switch (cmd->opcode) { - case O_LIMIT: - case O_KEEP_STATE: - case O_PROBE_STATE: - case O_CHECK_STATE: - return (1); - } - } - - return (0); -} - -void -ipfw_dyn_init(struct ip_fw_chain *chain) -{ - - V_ipfw_dyn_v = NULL; - V_dyn_buckets_max = 256; /* must be power of 2 */ - V_curr_dyn_buckets = 256; /* must be power of 2 */ - - V_dyn_ack_lifetime = 300; - V_dyn_syn_lifetime = 20; - V_dyn_fin_lifetime = 1; - V_dyn_rst_lifetime = 1; - V_dyn_udp_lifetime = 10; - V_dyn_short_lifetime = 5; - - V_dyn_keepalive_interval = 20; - V_dyn_keepalive_period = 5; - V_dyn_keepalive = 1; /* do send keepalives */ - V_dyn_keepalive_last = time_uptime; - - V_dyn_max = 16384; /* max # of dynamic rules */ - - V_ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", - sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - - /* Enforce limit on dynamic rules */ - uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max); - - callout_init(&V_ipfw_timeout, 1); - - /* - * This can potentially be done on first dynamic rule - * being added to chain. - */ - resize_dynamic_table(chain, V_curr_dyn_buckets); - IPFW_ADD_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); -} - -void -ipfw_dyn_uninit(int pass) -{ - int i; - - if (pass == 0) { - callout_drain(&V_ipfw_timeout); - return; - } - IPFW_DEL_OBJ_REWRITER(IS_DEFAULT_VNET(curvnet), dyn_opcodes); - - if (V_ipfw_dyn_v != NULL) { - /* - * Skip deleting all dynamic states - - * uma_zdestroy() does this more efficiently; - */ - - /* Destroy all mutexes */ - for (i = 0 ; i < V_curr_dyn_buckets ; i++) - IPFW_BUCK_LOCK_DESTROY(&V_ipfw_dyn_v[i]); - free(V_ipfw_dyn_v, M_IPFW); - V_ipfw_dyn_v = NULL; - } - - uma_zdestroy(V_ipfw_dyn_rule_zone); -} - -#ifdef SYSCTL_NODE -/* - * Get/set maximum number of dynamic states in given VNET instance. - */ -static int -sysctl_ipfw_dyn_max(SYSCTL_HANDLER_ARGS) -{ - int error; - unsigned int nstates; - - nstates = V_dyn_max; - - error = sysctl_handle_int(oidp, &nstates, 0, req); - /* Read operation or some error */ - if ((error != 0) || (req->newptr == NULL)) - return (error); - - V_dyn_max = nstates; - uma_zone_set_max(V_ipfw_dyn_rule_zone, V_dyn_max); - - return (0); -} - -/* - * Get current number of dynamic states in given VNET instance. - */ -static int -sysctl_ipfw_dyn_count(SYSCTL_HANDLER_ARGS) -{ - int error; - unsigned int nstates; - - nstates = DYN_COUNT; - - error = sysctl_handle_int(oidp, &nstates, 0, req); - - return (error); -} -#endif - -/* - * Returns size of dynamic states in legacy format - */ -int -ipfw_dyn_len(void) -{ - - return (V_ipfw_dyn_v == NULL) ? 0 : - (DYN_COUNT * sizeof(ipfw_dyn_rule)); -} - -/* - * Returns number of dynamic states. - * Used by dump format v1 (current). - */ -int -ipfw_dyn_get_count(void) -{ - - return (V_ipfw_dyn_v == NULL) ? 0 : DYN_COUNT; -} - -static void -export_dyn_rule(ipfw_dyn_rule *src, ipfw_dyn_rule *dst) -{ - - memcpy(dst, src, sizeof(*src)); - memcpy(&(dst->rule), &(src->rule->rulenum), sizeof(src->rule->rulenum)); - /* - * store set number into high word of - * dst->rule pointer. - */ - memcpy((char *)&dst->rule + sizeof(src->rule->rulenum), - &(src->rule->set), sizeof(src->rule->set)); - /* - * store a non-null value in "next". - * The userland code will interpret a - * NULL here as a marker - * for the last dynamic rule. - */ - memcpy(&dst->next, &dst, sizeof(dst)); - dst->expire = - TIME_LEQ(dst->expire, time_uptime) ? 0 : dst->expire - time_uptime; -} - -/* - * Fills int buffer given by @sd with dynamic states. - * Used by dump format v1 (current). - * - * Returns 0 on success. - */ -int -ipfw_dump_states(struct ip_fw_chain *chain, struct sockopt_data *sd) -{ - ipfw_dyn_rule *p; - ipfw_obj_dyntlv *dst, *last; - ipfw_obj_ctlv *ctlv; - int i; - size_t sz; - - if (V_ipfw_dyn_v == NULL) - return (0); - - IPFW_UH_RLOCK_ASSERT(chain); - - ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); - if (ctlv == NULL) - return (ENOMEM); - sz = sizeof(ipfw_obj_dyntlv); - ctlv->head.type = IPFW_TLV_DYNSTATE_LIST; - ctlv->objsize = sz; - last = NULL; - - for (i = 0 ; i < V_curr_dyn_buckets; i++) { - IPFW_BUCK_LOCK(i); - for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) { - dst = (ipfw_obj_dyntlv *)ipfw_get_sopt_space(sd, sz); - if (dst == NULL) { - IPFW_BUCK_UNLOCK(i); - return (ENOMEM); - } - - export_dyn_rule(p, &dst->state); - dst->head.length = sz; - dst->head.type = IPFW_TLV_DYN_ENT; - last = dst; - } - IPFW_BUCK_UNLOCK(i); - } - - if (last != NULL) /* mark last dynamic rule */ - last->head.flags = IPFW_DF_LAST; - - return (0); -} - -/* - * Fill given buffer with dynamic states (legacy format). - * IPFW_UH_RLOCK has to be held while calling. - */ -void -ipfw_get_dynamic(struct ip_fw_chain *chain, char **pbp, const char *ep) -{ - ipfw_dyn_rule *p, *last = NULL; - char *bp; - int i; - - if (V_ipfw_dyn_v == NULL) - return; - bp = *pbp; - - IPFW_UH_RLOCK_ASSERT(chain); - - for (i = 0 ; i < V_curr_dyn_buckets; i++) { - IPFW_BUCK_LOCK(i); - for (p = V_ipfw_dyn_v[i].head ; p != NULL; p = p->next) { - if (bp + sizeof *p <= ep) { - ipfw_dyn_rule *dst = - (ipfw_dyn_rule *)bp; - - export_dyn_rule(p, dst); - last = dst; - bp += sizeof(ipfw_dyn_rule); - } - } - IPFW_BUCK_UNLOCK(i); - } - - if (last != NULL) /* mark last dynamic rule */ - bzero(&last->next, sizeof(last)); - *pbp = bp; -} -/* end of file */ diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c b/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c deleted file mode 100644 index 25f1b358..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_eaction.c +++ /dev/null @@ -1,388 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2016-2017 Yandex LLC - * Copyright (c) 2016-2017 Andrey V. Elsukov <ae@FreeBSD.org> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/hash.h> -#include <sys/lock.h> -#include <sys/rwlock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/queue.h> -#include <net/pfil.h> - -#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ -#include <netinet/in.h> -#include <netinet/ip_var.h> /* struct ipfw_rule_ref */ -#include <netinet/ip_fw.h> - -#include <netpfil/ipfw/ip_fw_private.h> - -#include <rtems/bsd/local/opt_ipfw.h> - -/* - * External actions support for ipfw. - * - * This code provides KPI for implementing loadable modules, that - * can provide handlers for external action opcodes in the ipfw's - * rules. - * Module should implement opcode handler with type ipfw_eaction_t. - * This handler will be called by ipfw_chk() function when - * O_EXTERNAL_ACTION opcode is matched. The handler must return - * value used as return value in ipfw_chk(), i.e. IP_FW_PASS, - * IP_FW_DENY (see ip_fw_private.h). - * Also the last argument must be set by handler. If it is zero, - * the search continues to the next rule. If it has non zero value, - * the search terminates. - * - * The module that implements external action should register its - * handler and name with ipfw_add_eaction() function. - * This function will return eaction_id, that can be used by module. - * - * It is possible to pass some additional information to external - * action handler using O_EXTERNAL_INSTANCE and O_EXTERNAL_DATA opcodes. - * Such opcodes should be next after the O_EXTERNAL_ACTION opcode. - * For the O_EXTERNAL_INSTANCE opcode the cmd->arg1 contains index of named - * object related to an instance of external action. - * For the O_EXTERNAL_DATA opcode the cmd contains the data that can be used - * by external action handler without needing to create named instance. - * - * In case when eaction module uses named instances, it should register - * opcode rewriting routines for O_EXTERNAL_INSTANCE opcode. The - * classifier callback can look back into O_EXTERNAL_ACTION opcode (it - * must be in the (ipfw_insn *)(cmd - 1)). By arg1 from O_EXTERNAL_ACTION - * it can deteremine eaction_id and compare it with its own. - * The macro IPFW_TLV_EACTION_NAME(eaction_id) can be used to deteremine - * the type of named_object related to external action instance. - * - * On module unload handler should be deregistered with ipfw_del_eaction() - * function using known eaction_id. - */ - -struct eaction_obj { - struct named_object no; - ipfw_eaction_t *handler; - char name[64]; -}; - -#define EACTION_OBJ(ch, cmd) \ - ((struct eaction_obj *)SRV_OBJECT((ch), (cmd)->arg1)) - -#if 0 -#define EACTION_DEBUG(fmt, ...) do { \ - printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \ -} while (0) -#else -#define EACTION_DEBUG(fmt, ...) -#endif - -const char *default_eaction_typename = "drop"; -static int -default_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args, - ipfw_insn *cmd, int *done) -{ - - *done = 1; /* terminate the search */ - return (IP_FW_DENY); -} - -/* - * Opcode rewriting callbacks. - */ -static int -eaction_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - - EACTION_DEBUG("opcode %d, arg1 %d", cmd->opcode, cmd->arg1); - *puidx = cmd->arg1; - *ptype = 0; - return (0); -} - -static void -eaction_update(ipfw_insn *cmd, uint16_t idx) -{ - - cmd->arg1 = idx; - EACTION_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1); -} - -static int -eaction_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, - struct named_object **pno) -{ - ipfw_obj_ntlv *ntlv; - - if (ti->tlvs == NULL) - return (EINVAL); - - /* Search ntlv in the buffer provided by user */ - ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, - IPFW_TLV_EACTION); - if (ntlv == NULL) - return (EINVAL); - EACTION_DEBUG("name %s, uidx %u, type %u", ntlv->name, - ti->uidx, ti->type); - /* - * Search named object with corresponding name. - * Since eaction objects are global - ignore the set value - * and use zero instead. - */ - *pno = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), - 0, IPFW_TLV_EACTION, ntlv->name); - if (*pno == NULL) - return (ESRCH); - return (0); -} - -static struct named_object * -eaction_findbykidx(struct ip_fw_chain *ch, uint16_t idx) -{ - - EACTION_DEBUG("kidx %u", idx); - return (ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), idx)); -} - -static struct opcode_obj_rewrite eaction_opcodes[] = { - { - .opcode = O_EXTERNAL_ACTION, - .etlv = IPFW_TLV_EACTION, - .classifier = eaction_classify, - .update = eaction_update, - .find_byname = eaction_findbyname, - .find_bykidx = eaction_findbykidx, - }, -}; - -static int -create_eaction_obj(struct ip_fw_chain *ch, ipfw_eaction_t handler, - const char *name, uint16_t *eaction_id) -{ - struct namedobj_instance *ni; - struct eaction_obj *obj; - - IPFW_UH_UNLOCK_ASSERT(ch); - - ni = CHAIN_TO_SRV(ch); - obj = malloc(sizeof(*obj), M_IPFW, M_WAITOK | M_ZERO); - obj->no.name = obj->name; - obj->no.etlv = IPFW_TLV_EACTION; - obj->handler = handler; - strlcpy(obj->name, name, sizeof(obj->name)); - - IPFW_UH_WLOCK(ch); - if (ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_EACTION, - name) != NULL) { - /* - * Object is already created. - * We don't allow eactions with the same name. - */ - IPFW_UH_WUNLOCK(ch); - free(obj, M_IPFW); - EACTION_DEBUG("External action with typename " - "'%s' already exists", name); - return (EEXIST); - } - if (ipfw_objhash_alloc_idx(ni, &obj->no.kidx) != 0) { - IPFW_UH_WUNLOCK(ch); - free(obj, M_IPFW); - EACTION_DEBUG("alloc_idx failed"); - return (ENOSPC); - } - ipfw_objhash_add(ni, &obj->no); - IPFW_WLOCK(ch); - SRV_OBJECT(ch, obj->no.kidx) = obj; - IPFW_WUNLOCK(ch); - obj->no.refcnt++; - IPFW_UH_WUNLOCK(ch); - - if (eaction_id != NULL) - *eaction_id = obj->no.kidx; - return (0); -} - -static void -destroy_eaction_obj(struct ip_fw_chain *ch, struct named_object *no) -{ - struct namedobj_instance *ni; - struct eaction_obj *obj; - - IPFW_UH_WLOCK_ASSERT(ch); - - ni = CHAIN_TO_SRV(ch); - IPFW_WLOCK(ch); - obj = SRV_OBJECT(ch, no->kidx); - SRV_OBJECT(ch, no->kidx) = NULL; - IPFW_WUNLOCK(ch); - ipfw_objhash_del(ni, no); - ipfw_objhash_free_idx(ni, no->kidx); - free(obj, M_IPFW); -} - -/* - * Resets all eaction opcodes to default handlers. - */ -static void -reset_eaction_obj(struct ip_fw_chain *ch, uint16_t eaction_id) -{ - struct named_object *no; - struct ip_fw *rule; - ipfw_insn *cmd; - int i; - - IPFW_UH_WLOCK_ASSERT(ch); - - no = ipfw_objhash_lookup_name_type(CHAIN_TO_SRV(ch), 0, - IPFW_TLV_EACTION, default_eaction_typename); - if (no == NULL) - panic("Default external action handler is not found"); - if (eaction_id == no->kidx) - panic("Wrong eaction_id"); - EACTION_DEBUG("replace id %u with %u", eaction_id, no->kidx); - IPFW_WLOCK(ch); - for (i = 0; i < ch->n_rules; i++) { - rule = ch->map[i]; - cmd = ACTION_PTR(rule); - if (cmd->opcode != O_EXTERNAL_ACTION) - continue; - if (cmd->arg1 != eaction_id) - continue; - cmd->arg1 = no->kidx; /* Set to default id */ - /* - * XXX: we only bump refcount on default_eaction. - * Refcount on the original object will be just - * ignored on destroy. But on default_eaction it - * will be decremented on rule deletion. - */ - no->refcnt++; - /* - * Since named_object related to this instance will be - * also destroyed, truncate the chain of opcodes to - * remove the rest of cmd chain just after O_EXTERNAL_ACTION - * opcode. - */ - if (rule->act_ofs < rule->cmd_len - 1) { - EACTION_DEBUG("truncate rule %d: len %u -> %u", - rule->rulenum, rule->cmd_len, rule->act_ofs + 1); - rule->cmd_len = rule->act_ofs + 1; - } - } - IPFW_WUNLOCK(ch); -} - -/* - * Initialize external actions framework. - * Create object with default eaction handler "drop". - */ -int -ipfw_eaction_init(struct ip_fw_chain *ch, int first) -{ - int error; - - error = create_eaction_obj(ch, default_eaction, - default_eaction_typename, NULL); - if (error != 0) - return (error); - IPFW_ADD_OBJ_REWRITER(first, eaction_opcodes); - EACTION_DEBUG("External actions support initialized"); - return (0); -} - -void -ipfw_eaction_uninit(struct ip_fw_chain *ch, int last) -{ - struct namedobj_instance *ni; - struct named_object *no; - - ni = CHAIN_TO_SRV(ch); - - IPFW_UH_WLOCK(ch); - no = ipfw_objhash_lookup_name_type(ni, 0, IPFW_TLV_EACTION, - default_eaction_typename); - if (no != NULL) - destroy_eaction_obj(ch, no); - IPFW_UH_WUNLOCK(ch); - IPFW_DEL_OBJ_REWRITER(last, eaction_opcodes); - EACTION_DEBUG("External actions support uninitialized"); -} - -/* - * Registers external action handler to the global array. - * On success it returns eaction id, otherwise - zero. - */ -uint16_t -ipfw_add_eaction(struct ip_fw_chain *ch, ipfw_eaction_t handler, - const char *name) -{ - uint16_t eaction_id; - - eaction_id = 0; - if (ipfw_check_object_name_generic(name) == 0) { - create_eaction_obj(ch, handler, name, &eaction_id); - EACTION_DEBUG("Registered external action '%s' with id %u", - name, eaction_id); - } - return (eaction_id); -} - -/* - * Deregisters external action handler with id eaction_id. - */ -int -ipfw_del_eaction(struct ip_fw_chain *ch, uint16_t eaction_id) -{ - struct named_object *no; - - IPFW_UH_WLOCK(ch); - no = ipfw_objhash_lookup_kidx(CHAIN_TO_SRV(ch), eaction_id); - if (no == NULL || no->etlv != IPFW_TLV_EACTION) { - IPFW_UH_WUNLOCK(ch); - return (EINVAL); - } - if (no->refcnt > 1) - reset_eaction_obj(ch, eaction_id); - EACTION_DEBUG("External action '%s' with id %u unregistered", - no->name, eaction_id); - destroy_eaction_obj(ch, no); - IPFW_UH_WUNLOCK(ch); - return (0); -} - -int -ipfw_run_eaction(struct ip_fw_chain *ch, struct ip_fw_args *args, - ipfw_insn *cmd, int *done) -{ - - return (EACTION_OBJ(ch, cmd)->handler(ch, args, cmd, done)); -} diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_iface.c b/freebsd/sys/netpfil/ipfw/ip_fw_iface.c deleted file mode 100644 index d7f8eb26..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_iface.c +++ /dev/null @@ -1,542 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2014 Yandex LLC. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * Kernel interface tracking API. - * - */ - -#include <rtems/bsd/local/opt_ipfw.h> -#include <rtems/bsd/local/opt_inet.h> -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ -#include <rtems/bsd/local/opt_inet6.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/rwlock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <sys/queue.h> -#include <sys/eventhandler.h> -#include <net/if.h> -#include <net/if_var.h> -#include <net/pfil.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/ip_var.h> /* struct ipfw_rule_ref */ -#include <netinet/ip_fw.h> - -#include <netpfil/ipfw/ip_fw_private.h> - -#define CHAIN_TO_II(ch) ((struct namedobj_instance *)ch->ifcfg) - -#define DEFAULT_IFACES 128 - -static void handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif, - uint16_t ifindex); -static void handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif, - uint16_t ifindex); -static int list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd); - -static struct ipfw_sopt_handler scodes[] = { - { IP_FW_XIFLIST, 0, HDIR_GET, list_ifaces }, -}; - -/* - * FreeBSD Kernel interface. - */ -static void ipfw_kifhandler(void *arg, struct ifnet *ifp); -static int ipfw_kiflookup(char *name); -static void iface_khandler_register(void); -static void iface_khandler_deregister(void); - -static eventhandler_tag ipfw_ifdetach_event, ipfw_ifattach_event; -static int num_vnets = 0; -static struct mtx vnet_mtx; - -/* - * Checks if kernel interface is contained in our tracked - * interface list and calls attach/detach handler. - */ -static void -ipfw_kifhandler(void *arg, struct ifnet *ifp) -{ - struct ip_fw_chain *ch; - struct ipfw_iface *iif; - struct namedobj_instance *ii; - uintptr_t htype; - - if (V_ipfw_vnet_ready == 0) - return; - - ch = &V_layer3_chain; - htype = (uintptr_t)arg; - - IPFW_UH_WLOCK(ch); - ii = CHAIN_TO_II(ch); - if (ii == NULL) { - IPFW_UH_WUNLOCK(ch); - return; - } - iif = (struct ipfw_iface*)ipfw_objhash_lookup_name(ii, 0, - if_name(ifp)); - if (iif != NULL) { - if (htype == 1) - handle_ifattach(ch, iif, ifp->if_index); - else - handle_ifdetach(ch, iif, ifp->if_index); - } - IPFW_UH_WUNLOCK(ch); -} - -/* - * Reference current VNET as iface tracking API user. - * Registers interface tracking handlers for first VNET. - */ -static void -iface_khandler_register() -{ - int create; - - create = 0; - - mtx_lock(&vnet_mtx); - if (num_vnets == 0) - create = 1; - num_vnets++; - mtx_unlock(&vnet_mtx); - - if (create == 0) - return; - - printf("IPFW: starting up interface tracker\n"); - - ipfw_ifdetach_event = EVENTHANDLER_REGISTER( - ifnet_departure_event, ipfw_kifhandler, NULL, - EVENTHANDLER_PRI_ANY); - ipfw_ifattach_event = EVENTHANDLER_REGISTER( - ifnet_arrival_event, ipfw_kifhandler, (void*)((uintptr_t)1), - EVENTHANDLER_PRI_ANY); -} - -/* - * - * Detach interface event handlers on last VNET instance - * detach. - */ -static void -iface_khandler_deregister() -{ - int destroy; - - destroy = 0; - mtx_lock(&vnet_mtx); - if (num_vnets == 1) - destroy = 1; - num_vnets--; - mtx_unlock(&vnet_mtx); - - if (destroy == 0) - return; - - EVENTHANDLER_DEREGISTER(ifnet_arrival_event, - ipfw_ifattach_event); - EVENTHANDLER_DEREGISTER(ifnet_departure_event, - ipfw_ifdetach_event); -} - -/* - * Retrieves ifindex for given @name. - * - * Returns ifindex or 0. - */ -static int -ipfw_kiflookup(char *name) -{ - struct ifnet *ifp; - int ifindex; - - ifindex = 0; - - if ((ifp = ifunit_ref(name)) != NULL) { - ifindex = ifp->if_index; - if_rele(ifp); - } - - return (ifindex); -} - -/* - * Global ipfw startup hook. - * Since we perform lazy initialization, do nothing except - * mutex init. - */ -int -ipfw_iface_init() -{ - - mtx_init(&vnet_mtx, "IPFW ifhandler mtx", NULL, MTX_DEF); - IPFW_ADD_SOPT_HANDLER(1, scodes); - return (0); -} - -/* - * Global ipfw destroy hook. - * Unregister khandlers iff init has been done. - */ -void -ipfw_iface_destroy() -{ - - IPFW_DEL_SOPT_HANDLER(1, scodes); - mtx_destroy(&vnet_mtx); -} - -/* - * Perform actual init on internal request. - * Inits both namehash and global khandler. - */ -static void -vnet_ipfw_iface_init(struct ip_fw_chain *ch) -{ - struct namedobj_instance *ii; - - ii = ipfw_objhash_create(DEFAULT_IFACES); - IPFW_UH_WLOCK(ch); - if (ch->ifcfg == NULL) { - ch->ifcfg = ii; - ii = NULL; - } - IPFW_UH_WUNLOCK(ch); - - if (ii != NULL) { - /* Already initialized. Free namehash. */ - ipfw_objhash_destroy(ii); - } else { - /* We're the first ones. Init kernel hooks. */ - iface_khandler_register(); - } -} - -static int -destroy_iface(struct namedobj_instance *ii, struct named_object *no, - void *arg) -{ - - /* Assume all consumers have been already detached */ - free(no, M_IPFW); - return (0); -} - -/* - * Per-VNET ipfw detach hook. - * - */ -void -vnet_ipfw_iface_destroy(struct ip_fw_chain *ch) -{ - struct namedobj_instance *ii; - - IPFW_UH_WLOCK(ch); - ii = CHAIN_TO_II(ch); - ch->ifcfg = NULL; - IPFW_UH_WUNLOCK(ch); - - if (ii != NULL) { - ipfw_objhash_foreach(ii, destroy_iface, ch); - ipfw_objhash_destroy(ii); - iface_khandler_deregister(); - } -} - -/* - * Notify the subsystem that we are interested in tracking - * interface @name. This function has to be called without - * holding any locks to permit allocating the necessary states - * for proper interface tracking. - * - * Returns 0 on success. - */ -int -ipfw_iface_ref(struct ip_fw_chain *ch, char *name, - struct ipfw_ifc *ic) -{ - struct namedobj_instance *ii; - struct ipfw_iface *iif, *tmp; - - if (strlen(name) >= sizeof(iif->ifname)) - return (EINVAL); - - IPFW_UH_WLOCK(ch); - - ii = CHAIN_TO_II(ch); - if (ii == NULL) { - - /* - * First request to subsystem. - * Let's perform init. - */ - IPFW_UH_WUNLOCK(ch); - vnet_ipfw_iface_init(ch); - IPFW_UH_WLOCK(ch); - ii = CHAIN_TO_II(ch); - } - - iif = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name); - - if (iif != NULL) { - iif->no.refcnt++; - ic->iface = iif; - IPFW_UH_WUNLOCK(ch); - return (0); - } - - IPFW_UH_WUNLOCK(ch); - - /* Not found. Let's create one */ - iif = malloc(sizeof(struct ipfw_iface), M_IPFW, M_WAITOK | M_ZERO); - TAILQ_INIT(&iif->consumers); - iif->no.name = iif->ifname; - strlcpy(iif->ifname, name, sizeof(iif->ifname)); - - /* - * Ref & link to the list. - * - * We assume ifnet_arrival_event / ifnet_departure_event - * are not holding any locks. - */ - iif->no.refcnt = 1; - IPFW_UH_WLOCK(ch); - - tmp = (struct ipfw_iface *)ipfw_objhash_lookup_name(ii, 0, name); - if (tmp != NULL) { - /* Interface has been created since unlock. Ref and return */ - tmp->no.refcnt++; - ic->iface = tmp; - IPFW_UH_WUNLOCK(ch); - free(iif, M_IPFW); - return (0); - } - - iif->ifindex = ipfw_kiflookup(name); - if (iif->ifindex != 0) - iif->resolved = 1; - - ipfw_objhash_add(ii, &iif->no); - ic->iface = iif; - - IPFW_UH_WUNLOCK(ch); - - return (0); -} - -/* - * Adds @ic to the list of iif interface consumers. - * Must be called with holding both UH+WLOCK. - * Callback may be immediately called (if interface exists). - */ -void -ipfw_iface_add_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic) -{ - struct ipfw_iface *iif; - - IPFW_UH_WLOCK_ASSERT(ch); - IPFW_WLOCK_ASSERT(ch); - - iif = ic->iface; - - TAILQ_INSERT_TAIL(&iif->consumers, ic, next); - if (iif->resolved != 0) - ic->cb(ch, ic->cbdata, iif->ifindex); -} - -/* - * Unlinks interface tracker object @ic from interface. - * Must be called while holding UH lock. - */ -void -ipfw_iface_del_notify(struct ip_fw_chain *ch, struct ipfw_ifc *ic) -{ - struct ipfw_iface *iif; - - IPFW_UH_WLOCK_ASSERT(ch); - - iif = ic->iface; - TAILQ_REMOVE(&iif->consumers, ic, next); -} - -/* - * Unreference interface specified by @ic. - * Must be called while holding UH lock. - */ -void -ipfw_iface_unref(struct ip_fw_chain *ch, struct ipfw_ifc *ic) -{ - struct ipfw_iface *iif; - - IPFW_UH_WLOCK_ASSERT(ch); - - iif = ic->iface; - ic->iface = NULL; - - iif->no.refcnt--; - /* TODO: check for references & delete */ -} - -/* - * Interface arrival handler. - */ -static void -handle_ifattach(struct ip_fw_chain *ch, struct ipfw_iface *iif, - uint16_t ifindex) -{ - struct ipfw_ifc *ic; - - IPFW_UH_WLOCK_ASSERT(ch); - - iif->gencnt++; - iif->resolved = 1; - iif->ifindex = ifindex; - - IPFW_WLOCK(ch); - TAILQ_FOREACH(ic, &iif->consumers, next) - ic->cb(ch, ic->cbdata, iif->ifindex); - IPFW_WUNLOCK(ch); -} - -/* - * Interface departure handler. - */ -static void -handle_ifdetach(struct ip_fw_chain *ch, struct ipfw_iface *iif, - uint16_t ifindex) -{ - struct ipfw_ifc *ic; - - IPFW_UH_WLOCK_ASSERT(ch); - - IPFW_WLOCK(ch); - TAILQ_FOREACH(ic, &iif->consumers, next) - ic->cb(ch, ic->cbdata, 0); - IPFW_WUNLOCK(ch); - - iif->gencnt++; - iif->resolved = 0; - iif->ifindex = 0; -} - -struct dump_iface_args { - struct ip_fw_chain *ch; - struct sockopt_data *sd; -}; - -static int -export_iface_internal(struct namedobj_instance *ii, struct named_object *no, - void *arg) -{ - ipfw_iface_info *i; - struct dump_iface_args *da; - struct ipfw_iface *iif; - - da = (struct dump_iface_args *)arg; - - i = (ipfw_iface_info *)ipfw_get_sopt_space(da->sd, sizeof(*i)); - KASSERT(i != NULL, ("previously checked buffer is not enough")); - - iif = (struct ipfw_iface *)no; - - strlcpy(i->ifname, iif->ifname, sizeof(i->ifname)); - if (iif->resolved) - i->flags |= IPFW_IFFLAG_RESOLVED; - i->ifindex = iif->ifindex; - i->refcnt = iif->no.refcnt; - i->gencnt = iif->gencnt; - return (0); -} - -/* - * Lists all interface currently tracked by ipfw. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size - * Reply: [ ipfw_obj_lheader ipfw_iface_info x N ] - * - * Returns 0 on success - */ -static int -list_ifaces(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct namedobj_instance *ii; - struct _ipfw_obj_lheader *olh; - struct dump_iface_args da; - uint32_t count, size; - - olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); - if (olh == NULL) - return (EINVAL); - if (sd->valsize < olh->size) - return (EINVAL); - - IPFW_UH_RLOCK(ch); - ii = CHAIN_TO_II(ch); - if (ii != NULL) - count = ipfw_objhash_count(ii); - else - count = 0; - size = count * sizeof(ipfw_iface_info) + sizeof(ipfw_obj_lheader); - - /* Fill in header regadless of buffer size */ - olh->count = count; - olh->objsize = sizeof(ipfw_iface_info); - - if (size > olh->size) { - olh->size = size; - IPFW_UH_RUNLOCK(ch); - return (ENOMEM); - } - olh->size = size; - - da.ch = ch; - da.sd = sd; - - if (ii != NULL) - ipfw_objhash_foreach(ii, export_iface_internal, &da); - IPFW_UH_RUNLOCK(ch); - - return (0); -} - diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_log.c b/freebsd/sys/netpfil/ipfw/ip_fw_log.c deleted file mode 100644 index 6d251cda..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_log.c +++ /dev/null @@ -1,419 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * Logging support for ipfw - */ - -#include <rtems/bsd/local/opt_ipfw.h> -#include <rtems/bsd/local/opt_inet.h> -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ -#include <rtems/bsd/local/opt_inet6.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/mbuf.h> -#include <sys/socket.h> -#include <sys/sysctl.h> -#include <sys/syslog.h> -#include <net/ethernet.h> /* for ETHERTYPE_IP */ -#include <net/if.h> -#include <net/if_var.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip_icmp.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> -#include <netinet/tcp_var.h> -#include <netinet/udp.h> - -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#ifdef INET6 -#include <netinet6/in6_var.h> /* ip6_sprintf() */ -#endif - -#include <netpfil/ipfw/ip_fw_private.h> - -#ifdef MAC -#include <security/mac/mac_framework.h> -#endif - -/* - * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T - * Other macros just cast void * into the appropriate type - */ -#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) -#define TCP(p) ((struct tcphdr *)(p)) -#define SCTP(p) ((struct sctphdr *)(p)) -#define UDP(p) ((struct udphdr *)(p)) -#define ICMP(p) ((struct icmphdr *)(p)) -#define ICMP6(p) ((struct icmp6_hdr *)(p)) - -#ifdef __APPLE__ -#undef snprintf -#define snprintf sprintf -#define SNPARGS(buf, len) buf + len -#define SNP(buf) buf -#else /* !__APPLE__ */ -#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 -#define SNP(buf) buf, sizeof(buf) -#endif /* !__APPLE__ */ - -#define TARG(k, f) IP_FW_ARG_TABLEARG(chain, k, f) -/* - * We enter here when we have a rule with O_LOG. - * XXX this function alone takes about 2Kbytes of code! - */ -void -ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen, - struct ip_fw_args *args, struct mbuf *m, struct ifnet *oif, - u_short offset, uint32_t tablearg, struct ip *ip) -{ - char *action; - int limit_reached = 0; - char action2[92], proto[128], fragment[32]; - - if (V_fw_verbose == 0) { - if (args->eh) /* layer2, use orig hdr */ - ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m); - else { - /* Add fake header. Later we will store - * more info in the header. - */ - if (ip->ip_v == 4) - ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00", - ETHER_HDR_LEN, m); - else if (ip->ip_v == 6) - ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd", - ETHER_HDR_LEN, m); - else - /* Obviously bogus EtherType. */ - ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff", - ETHER_HDR_LEN, m); - } - return; - } - /* the old 'log' function */ - fragment[0] = '\0'; - proto[0] = '\0'; - - if (f == NULL) { /* bogus pkt */ - if (V_verbose_limit != 0 && V_norule_counter >= V_verbose_limit) - return; - V_norule_counter++; - if (V_norule_counter == V_verbose_limit) - limit_reached = V_verbose_limit; - action = "Refuse"; - } else { /* O_LOG is the first action, find the real one */ - ipfw_insn *cmd = ACTION_PTR(f); - ipfw_insn_log *l = (ipfw_insn_log *)cmd; - - if (l->max_log != 0 && l->log_left == 0) - return; - l->log_left--; - if (l->log_left == 0) - limit_reached = l->max_log; - cmd += F_LEN(cmd); /* point to first action */ - if (cmd->opcode == O_ALTQ) { - ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; - - snprintf(SNPARGS(action2, 0), "Altq %d", - altq->qid); - cmd += F_LEN(cmd); - } - if (cmd->opcode == O_PROB || cmd->opcode == O_TAG || - cmd->opcode == O_SETDSCP) - cmd += F_LEN(cmd); - - action = action2; - switch (cmd->opcode) { - case O_DENY: - action = "Deny"; - break; - - case O_REJECT: - if (cmd->arg1==ICMP_REJECT_RST) - action = "Reset"; - else if (cmd->arg1==ICMP_UNREACH_HOST) - action = "Reject"; - else - snprintf(SNPARGS(action2, 0), "Unreach %d", - cmd->arg1); - break; - - case O_UNREACH6: - if (cmd->arg1==ICMP6_UNREACH_RST) - action = "Reset"; - else - snprintf(SNPARGS(action2, 0), "Unreach %d", - cmd->arg1); - break; - - case O_ACCEPT: - action = "Accept"; - break; - case O_COUNT: - action = "Count"; - break; - case O_DIVERT: - snprintf(SNPARGS(action2, 0), "Divert %d", - TARG(cmd->arg1, divert)); - break; - case O_TEE: - snprintf(SNPARGS(action2, 0), "Tee %d", - TARG(cmd->arg1, divert)); - break; - case O_SETFIB: - snprintf(SNPARGS(action2, 0), "SetFib %d", - TARG(cmd->arg1, fib) & 0x7FFF); - break; - case O_SKIPTO: - snprintf(SNPARGS(action2, 0), "SkipTo %d", - TARG(cmd->arg1, skipto)); - break; - case O_PIPE: - snprintf(SNPARGS(action2, 0), "Pipe %d", - TARG(cmd->arg1, pipe)); - break; - case O_QUEUE: - snprintf(SNPARGS(action2, 0), "Queue %d", - TARG(cmd->arg1, pipe)); - break; - case O_FORWARD_IP: { - char buf[INET_ADDRSTRLEN]; - ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; - int len; - struct in_addr dummyaddr; - if (sa->sa.sin_addr.s_addr == INADDR_ANY) - dummyaddr.s_addr = htonl(tablearg); - else - dummyaddr.s_addr = sa->sa.sin_addr.s_addr; - - len = snprintf(SNPARGS(action2, 0), "Forward to %s", - inet_ntoa_r(dummyaddr, buf)); - - if (sa->sa.sin_port) - snprintf(SNPARGS(action2, len), ":%d", - sa->sa.sin_port); - } - break; -#ifdef INET6 - case O_FORWARD_IP6: { - char buf[INET6_ADDRSTRLEN]; - ipfw_insn_sa6 *sa = (ipfw_insn_sa6 *)cmd; - int len; - - len = snprintf(SNPARGS(action2, 0), "Forward to [%s]", - ip6_sprintf(buf, &sa->sa.sin6_addr)); - - if (sa->sa.sin6_port) - snprintf(SNPARGS(action2, len), ":%u", - sa->sa.sin6_port); - } - break; -#endif - case O_NETGRAPH: - snprintf(SNPARGS(action2, 0), "Netgraph %d", - cmd->arg1); - break; - case O_NGTEE: - snprintf(SNPARGS(action2, 0), "Ngtee %d", - cmd->arg1); - break; - case O_NAT: - action = "Nat"; - break; - case O_REASS: - action = "Reass"; - break; - case O_CALLRETURN: - if (cmd->len & F_NOT) - action = "Return"; - else - snprintf(SNPARGS(action2, 0), "Call %d", - cmd->arg1); - break; - case O_EXTERNAL_ACTION: - snprintf(SNPARGS(action2, 0), "Eaction %s", - ((struct named_object *)SRV_OBJECT(chain, - cmd->arg1))->name); - break; - default: - action = "UNKNOWN"; - break; - } - } - - if (hlen == 0) { /* non-ip */ - snprintf(SNPARGS(proto, 0), "MAC"); - - } else { - int len; -#ifdef INET6 - char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; -#else - char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; -#endif - struct icmphdr *icmp; - struct tcphdr *tcp; - struct udphdr *udp; -#ifdef INET6 - struct ip6_hdr *ip6 = NULL; - struct icmp6_hdr *icmp6; - u_short ip6f_mf; -#endif - src[0] = '\0'; - dst[0] = '\0'; -#ifdef INET6 - ip6f_mf = offset & IP6F_MORE_FRAG; - offset &= IP6F_OFF_MASK; - - if (IS_IP6_FLOW_ID(&(args->f_id))) { - char ip6buf[INET6_ADDRSTRLEN]; - snprintf(src, sizeof(src), "[%s]", - ip6_sprintf(ip6buf, &args->f_id.src_ip6)); - snprintf(dst, sizeof(dst), "[%s]", - ip6_sprintf(ip6buf, &args->f_id.dst_ip6)); - - ip6 = (struct ip6_hdr *)ip; - tcp = (struct tcphdr *)(((char *)ip) + hlen); - udp = (struct udphdr *)(((char *)ip) + hlen); - } else -#endif - { - tcp = L3HDR(struct tcphdr, ip); - udp = L3HDR(struct udphdr, ip); - - inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); - inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); - } - - switch (args->f_id.proto) { - case IPPROTO_TCP: - len = snprintf(SNPARGS(proto, 0), "TCP %s", src); - if (offset == 0) - snprintf(SNPARGS(proto, len), ":%d %s:%d", - ntohs(tcp->th_sport), - dst, - ntohs(tcp->th_dport)); - else - snprintf(SNPARGS(proto, len), " %s", dst); - break; - - case IPPROTO_UDP: - len = snprintf(SNPARGS(proto, 0), "UDP %s", src); - if (offset == 0) - snprintf(SNPARGS(proto, len), ":%d %s:%d", - ntohs(udp->uh_sport), - dst, - ntohs(udp->uh_dport)); - else - snprintf(SNPARGS(proto, len), " %s", dst); - break; - - case IPPROTO_ICMP: - icmp = L3HDR(struct icmphdr, ip); - if (offset == 0) - len = snprintf(SNPARGS(proto, 0), - "ICMP:%u.%u ", - icmp->icmp_type, icmp->icmp_code); - else - len = snprintf(SNPARGS(proto, 0), "ICMP "); - len += snprintf(SNPARGS(proto, len), "%s", src); - snprintf(SNPARGS(proto, len), " %s", dst); - break; -#ifdef INET6 - case IPPROTO_ICMPV6: - icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen); - if (offset == 0) - len = snprintf(SNPARGS(proto, 0), - "ICMPv6:%u.%u ", - icmp6->icmp6_type, icmp6->icmp6_code); - else - len = snprintf(SNPARGS(proto, 0), "ICMPv6 "); - len += snprintf(SNPARGS(proto, len), "%s", src); - snprintf(SNPARGS(proto, len), " %s", dst); - break; -#endif - default: - len = snprintf(SNPARGS(proto, 0), "P:%d %s", - args->f_id.proto, src); - snprintf(SNPARGS(proto, len), " %s", dst); - break; - } - -#ifdef INET6 - if (IS_IP6_FLOW_ID(&(args->f_id))) { - if (offset || ip6f_mf) - snprintf(SNPARGS(fragment, 0), - " (frag %08x:%d@%d%s)", - args->f_id.extra, - ntohs(ip6->ip6_plen) - hlen, - ntohs(offset) << 3, ip6f_mf ? "+" : ""); - } else -#endif - { - int ipoff, iplen; - ipoff = ntohs(ip->ip_off); - iplen = ntohs(ip->ip_len); - if (ipoff & (IP_MF | IP_OFFMASK)) - snprintf(SNPARGS(fragment, 0), - " (frag %d:%d@%d%s)", - ntohs(ip->ip_id), iplen - (ip->ip_hl << 2), - offset << 3, - (ipoff & IP_MF) ? "+" : ""); - } - } -#ifdef __FreeBSD__ - if (oif || m->m_pkthdr.rcvif) - log(LOG_SECURITY | LOG_INFO, - "ipfw: %d %s %s %s via %s%s\n", - f ? f->rulenum : -1, - action, proto, oif ? "out" : "in", - oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, - fragment); - else -#endif - log(LOG_SECURITY | LOG_INFO, - "ipfw: %d %s %s [no if info]%s\n", - f ? f->rulenum : -1, - action, proto, fragment); - if (limit_reached) - log(LOG_SECURITY | LOG_NOTICE, - "ipfw: limit %d reached on entry %d\n", - limit_reached, f ? f->rulenum : -1); -} -/* end of file */ diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c b/freebsd/sys/netpfil/ipfw/ip_fw_nat.c deleted file mode 100644 index 77c55797..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_nat.c +++ /dev/null @@ -1,1243 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2008 Paolo Pisati - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/eventhandler.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/module.h> -#include <sys/rwlock.h> -#include <sys/rmlock.h> - -#include <netinet/libalias/alias.h> -#include <netinet/libalias/alias_local.h> - -#include <net/if.h> -#include <net/if_var.h> -#include <net/pfil.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> - -#include <netpfil/ipfw/ip_fw_private.h> - -#include <machine/in_cksum.h> /* XXX for in_cksum */ - -struct cfg_spool { - LIST_ENTRY(cfg_spool) _next; /* chain of spool instances */ - struct in_addr addr; - uint16_t port; -}; - -/* Nat redirect configuration. */ -struct cfg_redir { - LIST_ENTRY(cfg_redir) _next; /* chain of redir instances */ - uint16_t mode; /* type of redirect mode */ - uint16_t proto; /* protocol: tcp/udp */ - struct in_addr laddr; /* local ip address */ - struct in_addr paddr; /* public ip address */ - struct in_addr raddr; /* remote ip address */ - uint16_t lport; /* local port */ - uint16_t pport; /* public port */ - uint16_t rport; /* remote port */ - uint16_t pport_cnt; /* number of public ports */ - uint16_t rport_cnt; /* number of remote ports */ - struct alias_link **alink; - u_int16_t spool_cnt; /* num of entry in spool chain */ - /* chain of spool instances */ - LIST_HEAD(spool_chain, cfg_spool) spool_chain; -}; - -/* Nat configuration data struct. */ -struct cfg_nat { - /* chain of nat instances */ - LIST_ENTRY(cfg_nat) _next; - int id; /* nat id */ - struct in_addr ip; /* nat ip address */ - struct libalias *lib; /* libalias instance */ - int mode; /* aliasing mode */ - int redir_cnt; /* number of entry in spool chain */ - /* chain of redir instances */ - LIST_HEAD(redir_chain, cfg_redir) redir_chain; - char if_name[IF_NAMESIZE]; /* interface name */ -}; - -static eventhandler_tag ifaddr_event_tag; - -static void -ifaddr_change(void *arg __unused, struct ifnet *ifp) -{ - struct cfg_nat *ptr; - struct ifaddr *ifa; - struct ip_fw_chain *chain; - - KASSERT(curvnet == ifp->if_vnet, - ("curvnet(%p) differs from iface vnet(%p)", curvnet, ifp->if_vnet)); - - if (V_ipfw_vnet_ready == 0 || V_ipfw_nat_ready == 0) - return; - - chain = &V_layer3_chain; - IPFW_UH_WLOCK(chain); - /* Check every nat entry... */ - LIST_FOREACH(ptr, &chain->nat, _next) { - /* ...using nic 'ifp->if_xname' as dynamic alias address. */ - if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) - continue; - if_addr_rlock(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr == NULL) - continue; - if (ifa->ifa_addr->sa_family != AF_INET) - continue; - IPFW_WLOCK(chain); - ptr->ip = ((struct sockaddr_in *) - (ifa->ifa_addr))->sin_addr; - LibAliasSetAddress(ptr->lib, ptr->ip); - IPFW_WUNLOCK(chain); - } - if_addr_runlock(ifp); - } - IPFW_UH_WUNLOCK(chain); -} - -/* - * delete the pointers for nat entry ix, or all of them if ix < 0 - */ -static void -flush_nat_ptrs(struct ip_fw_chain *chain, const int ix) -{ - int i; - ipfw_insn_nat *cmd; - - IPFW_WLOCK_ASSERT(chain); - for (i = 0; i < chain->n_rules; i++) { - cmd = (ipfw_insn_nat *)ACTION_PTR(chain->map[i]); - /* XXX skip log and the like ? */ - if (cmd->o.opcode == O_NAT && cmd->nat != NULL && - (ix < 0 || cmd->nat->id == ix)) - cmd->nat = NULL; - } -} - -static void -del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head) -{ - struct cfg_redir *r, *tmp_r; - struct cfg_spool *s, *tmp_s; - int i, num; - - LIST_FOREACH_SAFE(r, head, _next, tmp_r) { - num = 1; /* Number of alias_link to delete. */ - switch (r->mode) { - case NAT44_REDIR_PORT: - num = r->pport_cnt; - /* FALLTHROUGH */ - case NAT44_REDIR_ADDR: - case NAT44_REDIR_PROTO: - /* Delete all libalias redirect entry. */ - for (i = 0; i < num; i++) - LibAliasRedirectDelete(n->lib, r->alink[i]); - /* Del spool cfg if any. */ - LIST_FOREACH_SAFE(s, &r->spool_chain, _next, tmp_s) { - LIST_REMOVE(s, _next); - free(s, M_IPFW); - } - free(r->alink, M_IPFW); - LIST_REMOVE(r, _next); - free(r, M_IPFW); - break; - default: - printf("unknown redirect mode: %u\n", r->mode); - /* XXX - panic?!?!? */ - break; - } - } -} - -static int -add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) -{ - struct cfg_redir *r; - struct cfg_spool *s; - struct nat44_cfg_redir *ser_r; - struct nat44_cfg_spool *ser_s; - - int cnt, off, i; - - for (cnt = 0, off = 0; cnt < ptr->redir_cnt; cnt++) { - ser_r = (struct nat44_cfg_redir *)&buf[off]; - r = malloc(sizeof(*r), M_IPFW, M_WAITOK | M_ZERO); - r->mode = ser_r->mode; - r->laddr = ser_r->laddr; - r->paddr = ser_r->paddr; - r->raddr = ser_r->raddr; - r->lport = ser_r->lport; - r->pport = ser_r->pport; - r->rport = ser_r->rport; - r->pport_cnt = ser_r->pport_cnt; - r->rport_cnt = ser_r->rport_cnt; - r->proto = ser_r->proto; - r->spool_cnt = ser_r->spool_cnt; - //memcpy(r, ser_r, SOF_REDIR); - LIST_INIT(&r->spool_chain); - off += sizeof(struct nat44_cfg_redir); - r->alink = malloc(sizeof(struct alias_link *) * r->pport_cnt, - M_IPFW, M_WAITOK | M_ZERO); - switch (r->mode) { - case NAT44_REDIR_ADDR: - r->alink[0] = LibAliasRedirectAddr(ptr->lib, r->laddr, - r->paddr); - break; - case NAT44_REDIR_PORT: - for (i = 0 ; i < r->pport_cnt; i++) { - /* If remotePort is all ports, set it to 0. */ - u_short remotePortCopy = r->rport + i; - if (r->rport_cnt == 1 && r->rport == 0) - remotePortCopy = 0; - r->alink[i] = LibAliasRedirectPort(ptr->lib, - r->laddr, htons(r->lport + i), r->raddr, - htons(remotePortCopy), r->paddr, - htons(r->pport + i), r->proto); - if (r->alink[i] == NULL) { - r->alink[0] = NULL; - break; - } - } - break; - case NAT44_REDIR_PROTO: - r->alink[0] = LibAliasRedirectProto(ptr->lib ,r->laddr, - r->raddr, r->paddr, r->proto); - break; - default: - printf("unknown redirect mode: %u\n", r->mode); - break; - } - if (r->alink[0] == NULL) { - printf("LibAliasRedirect* returned NULL\n"); - free(r->alink, M_IPFW); - free(r, M_IPFW); - return (EINVAL); - } - /* LSNAT handling. */ - for (i = 0; i < r->spool_cnt; i++) { - ser_s = (struct nat44_cfg_spool *)&buf[off]; - s = malloc(sizeof(*s), M_IPFW, M_WAITOK | M_ZERO); - s->addr = ser_s->addr; - s->port = ser_s->port; - LibAliasAddServer(ptr->lib, r->alink[0], - s->addr, htons(s->port)); - off += sizeof(struct nat44_cfg_spool); - /* Hook spool entry. */ - LIST_INSERT_HEAD(&r->spool_chain, s, _next); - } - /* And finally hook this redir entry. */ - LIST_INSERT_HEAD(&ptr->redir_chain, r, _next); - } - - return (0); -} - -static void -free_nat_instance(struct cfg_nat *ptr) -{ - - del_redir_spool_cfg(ptr, &ptr->redir_chain); - LibAliasUninit(ptr->lib); - free(ptr, M_IPFW); -} - - -/* - * ipfw_nat - perform mbuf header translation. - * - * Note V_layer3_chain has to be locked while calling ipfw_nat() in - * 'global' operation mode (t == NULL). - * - */ -static int -ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) -{ - struct mbuf *mcl; - struct ip *ip; - /* XXX - libalias duct tape */ - int ldt, retval, found; - struct ip_fw_chain *chain; - char *c; - - ldt = 0; - retval = 0; - mcl = m_megapullup(m, m->m_pkthdr.len); - if (mcl == NULL) { - args->m = NULL; - return (IP_FW_DENY); - } - ip = mtod(mcl, struct ip *); - - /* - * XXX - Libalias checksum offload 'duct tape': - * - * locally generated packets have only pseudo-header checksum - * calculated and libalias will break it[1], so mark them for - * later fix. Moreover there are cases when libalias modifies - * tcp packet data[2], mark them for later fix too. - * - * [1] libalias was never meant to run in kernel, so it does - * not have any knowledge about checksum offloading, and - * expects a packet with a full internet checksum. - * Unfortunately, packets generated locally will have just the - * pseudo header calculated, and when libalias tries to adjust - * the checksum it will actually compute a wrong value. - * - * [2] when libalias modifies tcp's data content, full TCP - * checksum has to be recomputed: the problem is that - * libalias does not have any idea about checksum offloading. - * To work around this, we do not do checksumming in LibAlias, - * but only mark the packets in th_x2 field. If we receive a - * marked packet, we calculate correct checksum for it - * aware of offloading. Why such a terrible hack instead of - * recalculating checksum for each packet? - * Because the previous checksum was not checked! - * Recalculating checksums for EVERY packet will hide ALL - * transmission errors. Yes, marked packets still suffer from - * this problem. But, sigh, natd(8) has this problem, too. - * - * TODO: -make libalias mbuf aware (so - * it can handle delayed checksum and tso) - */ - - if (mcl->m_pkthdr.rcvif == NULL && - mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) - ldt = 1; - - c = mtod(mcl, char *); - - /* Check if this is 'global' instance */ - if (t == NULL) { - if (args->oif == NULL) { - /* Wrong direction, skip processing */ - args->m = mcl; - return (IP_FW_NAT); - } - - found = 0; - chain = &V_layer3_chain; - IPFW_RLOCK_ASSERT(chain); - /* Check every nat entry... */ - LIST_FOREACH(t, &chain->nat, _next) { - if ((t->mode & PKT_ALIAS_SKIP_GLOBAL) != 0) - continue; - retval = LibAliasOutTry(t->lib, c, - mcl->m_len + M_TRAILINGSPACE(mcl), 0); - if (retval == PKT_ALIAS_OK) { - /* Nat instance recognises state */ - found = 1; - break; - } - } - if (found != 1) { - /* No instance found, return ignore */ - args->m = mcl; - return (IP_FW_NAT); - } - } else { - if (args->oif == NULL) - retval = LibAliasIn(t->lib, c, - mcl->m_len + M_TRAILINGSPACE(mcl)); - else - retval = LibAliasOut(t->lib, c, - mcl->m_len + M_TRAILINGSPACE(mcl)); - } - - /* - * We drop packet when: - * 1. libalias returns PKT_ALIAS_ERROR; - * 2. For incoming packets: - * a) for unresolved fragments; - * b) libalias returns PKT_ALIAS_IGNORED and - * PKT_ALIAS_DENY_INCOMING flag is set. - */ - if (retval == PKT_ALIAS_ERROR || - (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT || - (retval == PKT_ALIAS_IGNORED && - (t->mode & PKT_ALIAS_DENY_INCOMING) != 0)))) { - /* XXX - should i add some logging? */ - m_free(mcl); - args->m = NULL; - return (IP_FW_DENY); - } - - if (retval == PKT_ALIAS_RESPOND) - mcl->m_flags |= M_SKIP_FIREWALL; - mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); - - /* - * XXX - libalias checksum offload - * 'duct tape' (see above) - */ - - if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && - ip->ip_p == IPPROTO_TCP) { - struct tcphdr *th; - - th = (struct tcphdr *)(ip + 1); - if (th->th_x2) - ldt = 1; - } - - if (ldt) { - struct tcphdr *th; - struct udphdr *uh; - uint16_t ip_len, cksum; - - ip_len = ntohs(ip->ip_len); - cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(ip->ip_p + ip_len - (ip->ip_hl << 2))); - - switch (ip->ip_p) { - case IPPROTO_TCP: - th = (struct tcphdr *)(ip + 1); - /* - * Maybe it was set in - * libalias... - */ - th->th_x2 = 0; - th->th_sum = cksum; - mcl->m_pkthdr.csum_data = - offsetof(struct tcphdr, th_sum); - break; - case IPPROTO_UDP: - uh = (struct udphdr *)(ip + 1); - uh->uh_sum = cksum; - mcl->m_pkthdr.csum_data = - offsetof(struct udphdr, uh_sum); - break; - } - /* No hw checksum offloading: do it ourselves */ - if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) { - in_delayed_cksum(mcl); - mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } - } - args->m = mcl; - return (IP_FW_NAT); -} - -static struct cfg_nat * -lookup_nat(struct nat_list *l, int nat_id) -{ - struct cfg_nat *res; - - LIST_FOREACH(res, l, _next) { - if (res->id == nat_id) - break; - } - return res; -} - -static struct cfg_nat * -lookup_nat_name(struct nat_list *l, char *name) -{ - struct cfg_nat *res; - int id; - char *errptr; - - id = strtol(name, &errptr, 10); - if (id == 0 || *errptr != '\0') - return (NULL); - - LIST_FOREACH(res, l, _next) { - if (res->id == id) - break; - } - return (res); -} - -/* IP_FW3 configuration routines */ - -static void -nat44_config(struct ip_fw_chain *chain, struct nat44_cfg_nat *ucfg) -{ - struct cfg_nat *ptr, *tcfg; - int gencnt; - - /* - * Find/create nat rule. - */ - IPFW_UH_WLOCK(chain); - gencnt = chain->gencnt; - ptr = lookup_nat_name(&chain->nat, ucfg->name); - if (ptr == NULL) { - IPFW_UH_WUNLOCK(chain); - /* New rule: allocate and init new instance. */ - ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_WAITOK | M_ZERO); - ptr->lib = LibAliasInit(NULL); - LIST_INIT(&ptr->redir_chain); - } else { - /* Entry already present: temporarily unhook it. */ - IPFW_WLOCK(chain); - LIST_REMOVE(ptr, _next); - flush_nat_ptrs(chain, ptr->id); - IPFW_WUNLOCK(chain); - IPFW_UH_WUNLOCK(chain); - } - - /* - * Basic nat (re)configuration. - */ - ptr->id = strtol(ucfg->name, NULL, 10); - /* - * XXX - what if this rule doesn't nat any ip and just - * redirect? - * do we set aliasaddress to 0.0.0.0? - */ - ptr->ip = ucfg->ip; - ptr->redir_cnt = ucfg->redir_cnt; - ptr->mode = ucfg->mode; - strlcpy(ptr->if_name, ucfg->if_name, sizeof(ptr->if_name)); - LibAliasSetMode(ptr->lib, ptr->mode, ~0); - LibAliasSetAddress(ptr->lib, ptr->ip); - - /* - * Redir and LSNAT configuration. - */ - /* Delete old cfgs. */ - del_redir_spool_cfg(ptr, &ptr->redir_chain); - /* Add new entries. */ - add_redir_spool_cfg((char *)(ucfg + 1), ptr); - IPFW_UH_WLOCK(chain); - - /* Extra check to avoid race with another ipfw_nat_cfg() */ - tcfg = NULL; - if (gencnt != chain->gencnt) - tcfg = lookup_nat_name(&chain->nat, ucfg->name); - IPFW_WLOCK(chain); - if (tcfg != NULL) - LIST_REMOVE(tcfg, _next); - LIST_INSERT_HEAD(&chain->nat, ptr, _next); - IPFW_WUNLOCK(chain); - chain->gencnt++; - - IPFW_UH_WUNLOCK(chain); - - if (tcfg != NULL) - free_nat_instance(ptr); -} - -/* - * Creates/configure nat44 instance - * Data layout (v0)(current): - * Request: [ ipfw_obj_header nat44_cfg_nat .. ] - * - * Returns 0 on success - */ -static int -nat44_cfg(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - struct nat44_cfg_nat *ucfg; - int id; - size_t read; - char *errptr; - - /* Check minimum header size */ - if (sd->valsize < (sizeof(*oh) + sizeof(*ucfg))) - return (EINVAL); - - oh = (ipfw_obj_header *)sd->kbuf; - - /* Basic length checks for TLVs */ - if (oh->ntlv.head.length != sizeof(oh->ntlv)) - return (EINVAL); - - ucfg = (struct nat44_cfg_nat *)(oh + 1); - - /* Check if name is properly terminated and looks like number */ - if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name)) - return (EINVAL); - id = strtol(ucfg->name, &errptr, 10); - if (id == 0 || *errptr != '\0') - return (EINVAL); - - read = sizeof(*oh) + sizeof(*ucfg); - /* Check number of redirs */ - if (sd->valsize < read + ucfg->redir_cnt*sizeof(struct nat44_cfg_redir)) - return (EINVAL); - - nat44_config(chain, ucfg); - return (0); -} - -/* - * Destroys given nat instances. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * - * Returns 0 on success - */ -static int -nat44_destroy(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - struct cfg_nat *ptr; - ipfw_obj_ntlv *ntlv; - - /* Check minimum header size */ - if (sd->valsize < sizeof(*oh)) - return (EINVAL); - - oh = (ipfw_obj_header *)sd->kbuf; - - /* Basic length checks for TLVs */ - if (oh->ntlv.head.length != sizeof(oh->ntlv)) - return (EINVAL); - - ntlv = &oh->ntlv; - /* Check if name is properly terminated */ - if (strnlen(ntlv->name, sizeof(ntlv->name)) == sizeof(ntlv->name)) - return (EINVAL); - - IPFW_UH_WLOCK(chain); - ptr = lookup_nat_name(&chain->nat, ntlv->name); - if (ptr == NULL) { - IPFW_UH_WUNLOCK(chain); - return (ESRCH); - } - IPFW_WLOCK(chain); - LIST_REMOVE(ptr, _next); - flush_nat_ptrs(chain, ptr->id); - IPFW_WUNLOCK(chain); - IPFW_UH_WUNLOCK(chain); - - free_nat_instance(ptr); - - return (0); -} - -static void -export_nat_cfg(struct cfg_nat *ptr, struct nat44_cfg_nat *ucfg) -{ - - snprintf(ucfg->name, sizeof(ucfg->name), "%d", ptr->id); - ucfg->ip = ptr->ip; - ucfg->redir_cnt = ptr->redir_cnt; - ucfg->mode = ptr->mode; - strlcpy(ucfg->if_name, ptr->if_name, sizeof(ucfg->if_name)); -} - -/* - * Gets config for given nat instance - * Data layout (v0)(current): - * Request: [ ipfw_obj_header nat44_cfg_nat .. ] - * - * Returns 0 on success - */ -static int -nat44_get_cfg(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - struct nat44_cfg_nat *ucfg; - struct cfg_nat *ptr; - struct cfg_redir *r; - struct cfg_spool *s; - struct nat44_cfg_redir *ser_r; - struct nat44_cfg_spool *ser_s; - size_t sz; - - sz = sizeof(*oh) + sizeof(*ucfg); - /* Check minimum header size */ - if (sd->valsize < sz) - return (EINVAL); - - oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); - - /* Basic length checks for TLVs */ - if (oh->ntlv.head.length != sizeof(oh->ntlv)) - return (EINVAL); - - ucfg = (struct nat44_cfg_nat *)(oh + 1); - - /* Check if name is properly terminated */ - if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name)) - return (EINVAL); - - IPFW_UH_RLOCK(chain); - ptr = lookup_nat_name(&chain->nat, ucfg->name); - if (ptr == NULL) { - IPFW_UH_RUNLOCK(chain); - return (ESRCH); - } - - export_nat_cfg(ptr, ucfg); - - /* Estimate memory amount */ - sz = sizeof(ipfw_obj_header) + sizeof(struct nat44_cfg_nat); - LIST_FOREACH(r, &ptr->redir_chain, _next) { - sz += sizeof(struct nat44_cfg_redir); - LIST_FOREACH(s, &r->spool_chain, _next) - sz += sizeof(struct nat44_cfg_spool); - } - - ucfg->size = sz; - if (sd->valsize < sz) { - - /* - * Submitted buffer size is not enough. - * WE've already filled in @ucfg structure with - * relevant info including size, so we - * can return. Buffer will be flushed automatically. - */ - IPFW_UH_RUNLOCK(chain); - return (ENOMEM); - } - - /* Size OK, let's copy data */ - LIST_FOREACH(r, &ptr->redir_chain, _next) { - ser_r = (struct nat44_cfg_redir *)ipfw_get_sopt_space(sd, - sizeof(*ser_r)); - ser_r->mode = r->mode; - ser_r->laddr = r->laddr; - ser_r->paddr = r->paddr; - ser_r->raddr = r->raddr; - ser_r->lport = r->lport; - ser_r->pport = r->pport; - ser_r->rport = r->rport; - ser_r->pport_cnt = r->pport_cnt; - ser_r->rport_cnt = r->rport_cnt; - ser_r->proto = r->proto; - ser_r->spool_cnt = r->spool_cnt; - - LIST_FOREACH(s, &r->spool_chain, _next) { - ser_s = (struct nat44_cfg_spool *)ipfw_get_sopt_space( - sd, sizeof(*ser_s)); - - ser_s->addr = s->addr; - ser_s->port = s->port; - } - } - - IPFW_UH_RUNLOCK(chain); - - return (0); -} - -/* - * Lists all nat44 instances currently available in kernel. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ] - * Reply: [ ipfw_obj_lheader nat44_cfg_nat x N ] - * - * Returns 0 on success - */ -static int -nat44_list_nat(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_lheader *olh; - struct nat44_cfg_nat *ucfg; - struct cfg_nat *ptr; - int nat_count; - - /* Check minimum header size */ - if (sd->valsize < sizeof(ipfw_obj_lheader)) - return (EINVAL); - - olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); - IPFW_UH_RLOCK(chain); - nat_count = 0; - LIST_FOREACH(ptr, &chain->nat, _next) - nat_count++; - - olh->count = nat_count; - olh->objsize = sizeof(struct nat44_cfg_nat); - olh->size = sizeof(*olh) + olh->count * olh->objsize; - - if (sd->valsize < olh->size) { - IPFW_UH_RUNLOCK(chain); - return (ENOMEM); - } - - LIST_FOREACH(ptr, &chain->nat, _next) { - ucfg = (struct nat44_cfg_nat *)ipfw_get_sopt_space(sd, - sizeof(*ucfg)); - export_nat_cfg(ptr, ucfg); - } - - IPFW_UH_RUNLOCK(chain); - - return (0); -} - -/* - * Gets log for given nat instance - * Data layout (v0)(current): - * Request: [ ipfw_obj_header nat44_cfg_nat ] - * Reply: [ ipfw_obj_header nat44_cfg_nat LOGBUFFER ] - * - * Returns 0 on success - */ -static int -nat44_get_log(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - struct nat44_cfg_nat *ucfg; - struct cfg_nat *ptr; - void *pbuf; - size_t sz; - - sz = sizeof(*oh) + sizeof(*ucfg); - /* Check minimum header size */ - if (sd->valsize < sz) - return (EINVAL); - - oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); - - /* Basic length checks for TLVs */ - if (oh->ntlv.head.length != sizeof(oh->ntlv)) - return (EINVAL); - - ucfg = (struct nat44_cfg_nat *)(oh + 1); - - /* Check if name is properly terminated */ - if (strnlen(ucfg->name, sizeof(ucfg->name)) == sizeof(ucfg->name)) - return (EINVAL); - - IPFW_UH_RLOCK(chain); - ptr = lookup_nat_name(&chain->nat, ucfg->name); - if (ptr == NULL) { - IPFW_UH_RUNLOCK(chain); - return (ESRCH); - } - - if (ptr->lib->logDesc == NULL) { - IPFW_UH_RUNLOCK(chain); - return (ENOENT); - } - - export_nat_cfg(ptr, ucfg); - - /* Estimate memory amount */ - ucfg->size = sizeof(struct nat44_cfg_nat) + LIBALIAS_BUF_SIZE; - if (sd->valsize < sz + sizeof(*oh)) { - - /* - * Submitted buffer size is not enough. - * WE've already filled in @ucfg structure with - * relevant info including size, so we - * can return. Buffer will be flushed automatically. - */ - IPFW_UH_RUNLOCK(chain); - return (ENOMEM); - } - - pbuf = (void *)ipfw_get_sopt_space(sd, LIBALIAS_BUF_SIZE); - memcpy(pbuf, ptr->lib->logDesc, LIBALIAS_BUF_SIZE); - - IPFW_UH_RUNLOCK(chain); - - return (0); -} - -static struct ipfw_sopt_handler scodes[] = { - { IP_FW_NAT44_XCONFIG, 0, HDIR_SET, nat44_cfg }, - { IP_FW_NAT44_DESTROY, 0, HDIR_SET, nat44_destroy }, - { IP_FW_NAT44_XGETCONFIG, 0, HDIR_GET, nat44_get_cfg }, - { IP_FW_NAT44_LIST_NAT, 0, HDIR_GET, nat44_list_nat }, - { IP_FW_NAT44_XGETLOG, 0, HDIR_GET, nat44_get_log }, -}; - - -/* - * Legacy configuration routines - */ - -struct cfg_spool_legacy { - LIST_ENTRY(cfg_spool_legacy) _next; - struct in_addr addr; - u_short port; -}; - -struct cfg_redir_legacy { - LIST_ENTRY(cfg_redir) _next; - u_int16_t mode; - struct in_addr laddr; - struct in_addr paddr; - struct in_addr raddr; - u_short lport; - u_short pport; - u_short rport; - u_short pport_cnt; - u_short rport_cnt; - int proto; - struct alias_link **alink; - u_int16_t spool_cnt; - LIST_HEAD(, cfg_spool_legacy) spool_chain; -}; - -struct cfg_nat_legacy { - LIST_ENTRY(cfg_nat_legacy) _next; - int id; - struct in_addr ip; - char if_name[IF_NAMESIZE]; - int mode; - struct libalias *lib; - int redir_cnt; - LIST_HEAD(, cfg_redir_legacy) redir_chain; -}; - -static int -ipfw_nat_cfg(struct sockopt *sopt) -{ - struct cfg_nat_legacy *cfg; - struct nat44_cfg_nat *ucfg; - struct cfg_redir_legacy *rdir; - struct nat44_cfg_redir *urdir; - char *buf; - size_t len, len2; - int error, i; - - len = sopt->sopt_valsize; - len2 = len + 128; - - /* - * Allocate 2x buffer to store converted structures. - * new redir_cfg has shrunk, so we're sure that - * new buffer size is enough. - */ - buf = malloc(roundup2(len, 8) + len2, M_TEMP, M_WAITOK | M_ZERO); - error = sooptcopyin(sopt, buf, len, sizeof(struct cfg_nat_legacy)); - if (error != 0) - goto out; - - cfg = (struct cfg_nat_legacy *)buf; - if (cfg->id < 0) { - error = EINVAL; - goto out; - } - - ucfg = (struct nat44_cfg_nat *)&buf[roundup2(len, 8)]; - snprintf(ucfg->name, sizeof(ucfg->name), "%d", cfg->id); - strlcpy(ucfg->if_name, cfg->if_name, sizeof(ucfg->if_name)); - ucfg->ip = cfg->ip; - ucfg->mode = cfg->mode; - ucfg->redir_cnt = cfg->redir_cnt; - - if (len < sizeof(*cfg) + cfg->redir_cnt * sizeof(*rdir)) { - error = EINVAL; - goto out; - } - - urdir = (struct nat44_cfg_redir *)(ucfg + 1); - rdir = (struct cfg_redir_legacy *)(cfg + 1); - for (i = 0; i < cfg->redir_cnt; i++) { - urdir->mode = rdir->mode; - urdir->laddr = rdir->laddr; - urdir->paddr = rdir->paddr; - urdir->raddr = rdir->raddr; - urdir->lport = rdir->lport; - urdir->pport = rdir->pport; - urdir->rport = rdir->rport; - urdir->pport_cnt = rdir->pport_cnt; - urdir->rport_cnt = rdir->rport_cnt; - urdir->proto = rdir->proto; - urdir->spool_cnt = rdir->spool_cnt; - - urdir++; - rdir++; - } - - nat44_config(&V_layer3_chain, ucfg); - -out: - free(buf, M_TEMP); - return (error); -} - -static int -ipfw_nat_del(struct sockopt *sopt) -{ - struct cfg_nat *ptr; - struct ip_fw_chain *chain = &V_layer3_chain; - int i; - - sooptcopyin(sopt, &i, sizeof i, sizeof i); - /* XXX validate i */ - IPFW_UH_WLOCK(chain); - ptr = lookup_nat(&chain->nat, i); - if (ptr == NULL) { - IPFW_UH_WUNLOCK(chain); - return (EINVAL); - } - IPFW_WLOCK(chain); - LIST_REMOVE(ptr, _next); - flush_nat_ptrs(chain, i); - IPFW_WUNLOCK(chain); - IPFW_UH_WUNLOCK(chain); - free_nat_instance(ptr); - return (0); -} - -static int -ipfw_nat_get_cfg(struct sockopt *sopt) -{ - struct ip_fw_chain *chain = &V_layer3_chain; - struct cfg_nat *n; - struct cfg_nat_legacy *ucfg; - struct cfg_redir *r; - struct cfg_spool *s; - struct cfg_redir_legacy *ser_r; - struct cfg_spool_legacy *ser_s; - char *data; - int gencnt, nat_cnt, len, error; - - nat_cnt = 0; - len = sizeof(nat_cnt); - - IPFW_UH_RLOCK(chain); -retry: - gencnt = chain->gencnt; - /* Estimate memory amount */ - LIST_FOREACH(n, &chain->nat, _next) { - nat_cnt++; - len += sizeof(struct cfg_nat_legacy); - LIST_FOREACH(r, &n->redir_chain, _next) { - len += sizeof(struct cfg_redir_legacy); - LIST_FOREACH(s, &r->spool_chain, _next) - len += sizeof(struct cfg_spool_legacy); - } - } - IPFW_UH_RUNLOCK(chain); - - data = malloc(len, M_TEMP, M_WAITOK | M_ZERO); - bcopy(&nat_cnt, data, sizeof(nat_cnt)); - - nat_cnt = 0; - len = sizeof(nat_cnt); - - IPFW_UH_RLOCK(chain); - if (gencnt != chain->gencnt) { - free(data, M_TEMP); - goto retry; - } - /* Serialize all the data. */ - LIST_FOREACH(n, &chain->nat, _next) { - ucfg = (struct cfg_nat_legacy *)&data[len]; - ucfg->id = n->id; - ucfg->ip = n->ip; - ucfg->redir_cnt = n->redir_cnt; - ucfg->mode = n->mode; - strlcpy(ucfg->if_name, n->if_name, sizeof(ucfg->if_name)); - len += sizeof(struct cfg_nat_legacy); - LIST_FOREACH(r, &n->redir_chain, _next) { - ser_r = (struct cfg_redir_legacy *)&data[len]; - ser_r->mode = r->mode; - ser_r->laddr = r->laddr; - ser_r->paddr = r->paddr; - ser_r->raddr = r->raddr; - ser_r->lport = r->lport; - ser_r->pport = r->pport; - ser_r->rport = r->rport; - ser_r->pport_cnt = r->pport_cnt; - ser_r->rport_cnt = r->rport_cnt; - ser_r->proto = r->proto; - ser_r->spool_cnt = r->spool_cnt; - len += sizeof(struct cfg_redir_legacy); - LIST_FOREACH(s, &r->spool_chain, _next) { - ser_s = (struct cfg_spool_legacy *)&data[len]; - ser_s->addr = s->addr; - ser_s->port = s->port; - len += sizeof(struct cfg_spool_legacy); - } - } - } - IPFW_UH_RUNLOCK(chain); - - error = sooptcopyout(sopt, data, len); - free(data, M_TEMP); - - return (error); -} - -static int -ipfw_nat_get_log(struct sockopt *sopt) -{ - uint8_t *data; - struct cfg_nat *ptr; - int i, size; - struct ip_fw_chain *chain; - IPFW_RLOCK_TRACKER; - - chain = &V_layer3_chain; - - IPFW_RLOCK(chain); - /* one pass to count, one to copy the data */ - i = 0; - LIST_FOREACH(ptr, &chain->nat, _next) { - if (ptr->lib->logDesc == NULL) - continue; - i++; - } - size = i * (LIBALIAS_BUF_SIZE + sizeof(int)); - data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO); - if (data == NULL) { - IPFW_RUNLOCK(chain); - return (ENOSPC); - } - i = 0; - LIST_FOREACH(ptr, &chain->nat, _next) { - if (ptr->lib->logDesc == NULL) - continue; - bcopy(&ptr->id, &data[i], sizeof(int)); - i += sizeof(int); - bcopy(ptr->lib->logDesc, &data[i], LIBALIAS_BUF_SIZE); - i += LIBALIAS_BUF_SIZE; - } - IPFW_RUNLOCK(chain); - sooptcopyout(sopt, data, size); - free(data, M_IPFW); - return(0); -} - -static int -vnet_ipfw_nat_init(const void *arg __unused) -{ - - V_ipfw_nat_ready = 1; - return (0); -} - -static int -vnet_ipfw_nat_uninit(const void *arg __unused) -{ - struct cfg_nat *ptr, *ptr_temp; - struct ip_fw_chain *chain; - - chain = &V_layer3_chain; - IPFW_WLOCK(chain); - V_ipfw_nat_ready = 0; - LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { - LIST_REMOVE(ptr, _next); - free_nat_instance(ptr); - } - flush_nat_ptrs(chain, -1 /* flush all */); - IPFW_WUNLOCK(chain); - return (0); -} - -static void -ipfw_nat_init(void) -{ - - /* init ipfw hooks */ - ipfw_nat_ptr = ipfw_nat; - lookup_nat_ptr = lookup_nat; - ipfw_nat_cfg_ptr = ipfw_nat_cfg; - ipfw_nat_del_ptr = ipfw_nat_del; - ipfw_nat_get_cfg_ptr = ipfw_nat_get_cfg; - ipfw_nat_get_log_ptr = ipfw_nat_get_log; - IPFW_ADD_SOPT_HANDLER(1, scodes); - - ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_change, - NULL, EVENTHANDLER_PRI_ANY); -} - -static void -ipfw_nat_destroy(void) -{ - - EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag); - /* deregister ipfw_nat */ - IPFW_DEL_SOPT_HANDLER(1, scodes); - ipfw_nat_ptr = NULL; - lookup_nat_ptr = NULL; - ipfw_nat_cfg_ptr = NULL; - ipfw_nat_del_ptr = NULL; - ipfw_nat_get_cfg_ptr = NULL; - ipfw_nat_get_log_ptr = NULL; -} - -static int -ipfw_nat_modevent(module_t mod, int type, void *unused) -{ - int err = 0; - - switch (type) { - case MOD_LOAD: - break; - - case MOD_UNLOAD: - break; - - default: - return EOPNOTSUPP; - break; - } - return err; -} - -static moduledata_t ipfw_nat_mod = { - "ipfw_nat", - ipfw_nat_modevent, - 0 -}; - -/* Define startup order. */ -#define IPFW_NAT_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL -#define IPFW_NAT_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */ -#define IPFW_NAT_MODULE_ORDER (IPFW_NAT_MODEVENT_ORDER + 1) -#define IPFW_NAT_VNET_ORDER (IPFW_NAT_MODEVENT_ORDER + 2) - -DECLARE_MODULE(ipfw_nat, ipfw_nat_mod, IPFW_NAT_SI_SUB_FIREWALL, SI_ORDER_ANY); -MODULE_DEPEND(ipfw_nat, libalias, 1, 1, 1); -MODULE_DEPEND(ipfw_nat, ipfw, 3, 3, 3); -MODULE_VERSION(ipfw_nat, 1); - -SYSINIT(ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER, - ipfw_nat_init, NULL); -VNET_SYSINIT(vnet_ipfw_nat_init, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_VNET_ORDER, - vnet_ipfw_nat_init, NULL); - -SYSUNINIT(ipfw_nat_destroy, IPFW_NAT_SI_SUB_FIREWALL, IPFW_NAT_MODULE_ORDER, - ipfw_nat_destroy, NULL); -VNET_SYSUNINIT(vnet_ipfw_nat_uninit, IPFW_NAT_SI_SUB_FIREWALL, - IPFW_NAT_VNET_ORDER, vnet_ipfw_nat_uninit, NULL); - -/* end of file */ diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c b/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c deleted file mode 100644 index d6d5ccbb..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_pfil.c +++ /dev/null @@ -1,582 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <rtems/bsd/local/opt_ipfw.h> -#include <rtems/bsd/local/opt_inet.h> -#include <rtems/bsd/local/opt_inet6.h> -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/module.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/sysctl.h> - -#include <net/if.h> -#include <net/route.h> -#include <net/ethernet.h> -#include <net/pfil.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> -#ifdef INET6 -#include <netinet/ip6.h> -#include <netinet6/ip6_var.h> -#include <netinet6/scope6_var.h> -#endif - -#include <netgraph/ng_ipfw.h> - -#include <netpfil/ipfw/ip_fw_private.h> - -#include <machine/in_cksum.h> - -static VNET_DEFINE(int, fw_enable) = 1; -#define V_fw_enable VNET(fw_enable) - -#ifdef INET6 -static VNET_DEFINE(int, fw6_enable) = 1; -#define V_fw6_enable VNET(fw6_enable) -#endif - -static VNET_DEFINE(int, fwlink_enable) = 0; -#define V_fwlink_enable VNET(fwlink_enable) - -int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); - -/* Forward declarations. */ -static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int); -int ipfw_check_packet(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); -int ipfw_check_frame(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); - -#ifdef SYSCTL_NODE - -SYSBEGIN(f1) - -SYSCTL_DECL(_net_inet_ip_fw); -SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, - CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, - &VNET_NAME(fw_enable), 0, ipfw_chg_hook, "I", "Enable ipfw"); -#ifdef INET6 -SYSCTL_DECL(_net_inet6_ip6_fw); -SYSCTL_PROC(_net_inet6_ip6_fw, OID_AUTO, enable, - CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, - &VNET_NAME(fw6_enable), 0, ipfw_chg_hook, "I", "Enable ipfw+6"); -#endif /* INET6 */ - -SYSCTL_DECL(_net_link_ether); -SYSCTL_PROC(_net_link_ether, OID_AUTO, ipfw, - CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, - &VNET_NAME(fwlink_enable), 0, ipfw_chg_hook, "I", - "Pass ether pkts through firewall"); - -SYSEND - -#endif /* SYSCTL_NODE */ - -/* - * The pfilter hook to pass packets to ipfw_chk and then to - * dummynet, divert, netgraph or other modules. - * The packet may be consumed. - */ -int -ipfw_check_packet(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, - struct inpcb *inp) -{ - struct ip_fw_args args; - struct m_tag *tag; - int ipfw; - int ret; - - /* convert dir to IPFW values */ - dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT; - bzero(&args, sizeof(args)); - -again: - /* - * extract and remove the tag if present. If we are left - * with onepass, optimize the outgoing path. - */ - tag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL); - if (tag != NULL) { - args.rule = *((struct ipfw_rule_ref *)(tag+1)); - m_tag_delete(*m0, tag); - if (args.rule.info & IPFW_ONEPASS) - return (0); - } - - args.m = *m0; - args.oif = dir == DIR_OUT ? ifp : NULL; - args.inp = inp; - - ipfw = ipfw_chk(&args); - *m0 = args.m; - - KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", - __func__)); - - /* breaking out of the switch means drop */ - ret = 0; /* default return value for pass */ - switch (ipfw) { - case IP_FW_PASS: - /* next_hop may be set by ipfw_chk */ - if (args.next_hop == NULL && args.next_hop6 == NULL) - break; /* pass */ -#if (!defined(INET6) && !defined(INET)) - ret = EACCES; -#else - { - struct m_tag *fwd_tag; - size_t len; - - KASSERT(args.next_hop == NULL || args.next_hop6 == NULL, - ("%s: both next_hop=%p and next_hop6=%p not NULL", __func__, - args.next_hop, args.next_hop6)); -#ifdef INET6 - if (args.next_hop6 != NULL) - len = sizeof(struct sockaddr_in6); -#endif -#ifdef INET - if (args.next_hop != NULL) - len = sizeof(struct sockaddr_in); -#endif - - /* Incoming packets should not be tagged so we do not - * m_tag_find. Outgoing packets may be tagged, so we - * reuse the tag if present. - */ - fwd_tag = (dir == DIR_IN) ? NULL : - m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); - if (fwd_tag != NULL) { - m_tag_unlink(*m0, fwd_tag); - } else { - fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, len, - M_NOWAIT); - if (fwd_tag == NULL) { - ret = EACCES; - break; /* i.e. drop */ - } - } -#ifdef INET6 - if (args.next_hop6 != NULL) { - struct sockaddr_in6 *sa6; - - sa6 = (struct sockaddr_in6 *)(fwd_tag + 1); - bcopy(args.next_hop6, sa6, len); - /* - * If nh6 address is link-local we should convert - * it to kernel internal form before doing any - * comparisons. - */ - if (sa6_embedscope(sa6, V_ip6_use_defzone) != 0) { - ret = EACCES; - break; - } - if (in6_localip(&sa6->sin6_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - (*m0)->m_flags |= M_IP6_NEXTHOP; - } -#endif -#ifdef INET - if (args.next_hop != NULL) { - bcopy(args.next_hop, (fwd_tag+1), len); - if (in_localip(args.next_hop->sin_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - (*m0)->m_flags |= M_IP_NEXTHOP; - } -#endif - m_tag_prepend(*m0, fwd_tag); - } -#endif /* INET || INET6 */ - break; - - case IP_FW_DENY: - ret = EACCES; - break; /* i.e. drop */ - - case IP_FW_DUMMYNET: - ret = EACCES; - if (ip_dn_io_ptr == NULL) - break; /* i.e. drop */ - if (mtod(*m0, struct ip *)->ip_v == 4) - ret = ip_dn_io_ptr(m0, dir, &args); - else if (mtod(*m0, struct ip *)->ip_v == 6) - ret = ip_dn_io_ptr(m0, dir | PROTO_IPV6, &args); - else - break; /* drop it */ - /* - * XXX should read the return value. - * dummynet normally eats the packet and sets *m0=NULL - * unless the packet can be sent immediately. In this - * case args is updated and we should re-run the - * check without clearing args. - */ - if (*m0 != NULL) - goto again; - break; - - case IP_FW_TEE: - case IP_FW_DIVERT: - if (ip_divert_ptr == NULL) { - ret = EACCES; - break; /* i.e. drop */ - } - ret = ipfw_divert(m0, dir, &args.rule, - (ipfw == IP_FW_TEE) ? 1 : 0); - /* continue processing for the original packet (tee). */ - if (*m0) - goto again; - break; - - case IP_FW_NGTEE: - case IP_FW_NETGRAPH: - if (ng_ipfw_input_p == NULL) { - ret = EACCES; - break; /* i.e. drop */ - } - ret = ng_ipfw_input_p(m0, dir, &args, - (ipfw == IP_FW_NGTEE) ? 1 : 0); - if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */ - goto again; /* continue with packet */ - break; - - case IP_FW_NAT: - /* honor one-pass in case of successful nat */ - if (V_fw_one_pass) - break; /* ret is already 0 */ - goto again; - - case IP_FW_REASS: - goto again; /* continue with packet */ - - default: - KASSERT(0, ("%s: unknown retval", __func__)); - } - - if (ret != 0) { - if (*m0) - FREE_PKT(*m0); - *m0 = NULL; - } - - return ret; -} - -/* - * ipfw processing for ethernet packets (in and out). - */ -int -ipfw_check_frame(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, - struct inpcb *inp) -{ - struct ether_header *eh; - struct ether_header save_eh; - struct mbuf *m; - int i, ret; - struct ip_fw_args args; - struct m_tag *mtag; - - /* fetch start point from rule, if any. remove the tag if present. */ - mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL); - if (mtag == NULL) { - args.rule.slot = 0; - } else { - args.rule = *((struct ipfw_rule_ref *)(mtag+1)); - m_tag_delete(*m0, mtag); - if (args.rule.info & IPFW_ONEPASS) - return (0); - } - - /* I need some amt of data to be contiguous */ - m = *m0; - i = min(m->m_pkthdr.len, max_protohdr); - if (m->m_len < i) { - m = m_pullup(m, i); - if (m == NULL) { - *m0 = m; - return (0); - } - } - eh = mtod(m, struct ether_header *); - save_eh = *eh; /* save copy for restore below */ - m_adj(m, ETHER_HDR_LEN); /* strip ethernet header */ - - args.m = m; /* the packet we are looking at */ - args.oif = dir == PFIL_OUT ? ifp: NULL; /* destination, if any */ - args.next_hop = NULL; /* we do not support forward yet */ - args.next_hop6 = NULL; /* we do not support forward yet */ - args.eh = &save_eh; /* MAC header for bridged/MAC packets */ - args.inp = NULL; /* used by ipfw uid/gid/jail rules */ - i = ipfw_chk(&args); - m = args.m; - if (m != NULL) { - /* - * Restore Ethernet header, as needed, in case the - * mbuf chain was replaced by ipfw. - */ - M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); - if (m == NULL) { - *m0 = NULL; - return (0); - } - if (eh != mtod(m, struct ether_header *)) - bcopy(&save_eh, mtod(m, struct ether_header *), - ETHER_HDR_LEN); - } - *m0 = m; - - ret = 0; - /* Check result of ipfw_chk() */ - switch (i) { - case IP_FW_PASS: - break; - - case IP_FW_DENY: - ret = EACCES; - break; /* i.e. drop */ - - case IP_FW_DUMMYNET: - ret = EACCES; - - if (ip_dn_io_ptr == NULL) - break; /* i.e. drop */ - - *m0 = NULL; - dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT; - ip_dn_io_ptr(&m, dir | PROTO_LAYER2, &args); - return 0; - - default: - KASSERT(0, ("%s: unknown retval", __func__)); - } - - if (ret != 0) { - if (*m0) - FREE_PKT(*m0); - *m0 = NULL; - } - - return ret; -} - -/* do the divert, return 1 on error 0 on success */ -static int -ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule, - int tee) -{ - /* - * ipfw_chk() has already tagged the packet with the divert tag. - * If tee is set, copy packet and return original. - * If not tee, consume packet and send it to divert socket. - */ - struct mbuf *clone; - struct ip *ip = mtod(*m0, struct ip *); - struct m_tag *tag; - - /* Cloning needed for tee? */ - if (tee == 0) { - clone = *m0; /* use the original mbuf */ - *m0 = NULL; - } else { - clone = m_dup(*m0, M_NOWAIT); - /* If we cannot duplicate the mbuf, we sacrifice the divert - * chain and continue with the tee-ed packet. - */ - if (clone == NULL) - return 1; - } - - /* - * Divert listeners can normally handle non-fragmented packets, - * but we can only reass in the non-tee case. - * This means that listeners on a tee rule may get fragments, - * and have to live with that. - * Note that we now have the 'reass' ipfw option so if we care - * we can do it before a 'tee'. - */ - if (!tee) switch (ip->ip_v) { - case IPVERSION: - if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) { - int hlen; - struct mbuf *reass; - - reass = ip_reass(clone); /* Reassemble packet. */ - if (reass == NULL) - return 0; /* not an error */ - /* if reass = NULL then it was consumed by ip_reass */ - /* - * IP header checksum fixup after reassembly and leave header - * in network byte order. - */ - ip = mtod(reass, struct ip *); - hlen = ip->ip_hl << 2; - ip->ip_sum = 0; - if (hlen == sizeof(struct ip)) - ip->ip_sum = in_cksum_hdr(ip); - else - ip->ip_sum = in_cksum(reass, hlen); - clone = reass; - } - break; -#ifdef INET6 - case IPV6_VERSION >> 4: - { - struct ip6_hdr *const ip6 = mtod(clone, struct ip6_hdr *); - - if (ip6->ip6_nxt == IPPROTO_FRAGMENT) { - int nxt, off; - - off = sizeof(struct ip6_hdr); - nxt = frag6_input(&clone, &off, 0); - if (nxt == IPPROTO_DONE) - return (0); - } - break; - } -#endif - } - - /* attach a tag to the packet with the reinject info */ - tag = m_tag_alloc(MTAG_IPFW_RULE, 0, - sizeof(struct ipfw_rule_ref), M_NOWAIT); - if (tag == NULL) { - FREE_PKT(clone); - return 1; - } - *((struct ipfw_rule_ref *)(tag+1)) = *rule; - m_tag_prepend(clone, tag); - - /* Do the dirty job... */ - ip_divert_ptr(clone, incoming); - return 0; -} - -/* - * attach or detach hooks for a given protocol family - */ -static int -ipfw_hook(int onoff, int pf) -{ - struct pfil_head *pfh; - pfil_func_t hook_func; - - pfh = pfil_head_get(PFIL_TYPE_AF, pf); - if (pfh == NULL) - return ENOENT; - - hook_func = (pf == AF_LINK) ? ipfw_check_frame : ipfw_check_packet; - - (void) (onoff ? pfil_add_hook : pfil_remove_hook) - (hook_func, NULL, PFIL_IN | PFIL_OUT | PFIL_WAITOK, pfh); - - return 0; -} - -int -ipfw_attach_hooks(int arg) -{ - int error = 0; - - if (arg == 0) /* detach */ - ipfw_hook(0, AF_INET); - else if (V_fw_enable && ipfw_hook(1, AF_INET) != 0) { - error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */ - printf("ipfw_hook() error\n"); - } -#ifdef INET6 - if (arg == 0) /* detach */ - ipfw_hook(0, AF_INET6); - else if (V_fw6_enable && ipfw_hook(1, AF_INET6) != 0) { - error = ENOENT; - printf("ipfw6_hook() error\n"); - } -#endif - if (arg == 0) /* detach */ - ipfw_hook(0, AF_LINK); - else if (V_fwlink_enable && ipfw_hook(1, AF_LINK) != 0) { - error = ENOENT; - printf("ipfw_link_hook() error\n"); - } - return error; -} - -int -ipfw_chg_hook(SYSCTL_HANDLER_ARGS) -{ - int newval; - int error; - int af; - - if (arg1 == &V_fw_enable) - af = AF_INET; -#ifdef INET6 - else if (arg1 == &V_fw6_enable) - af = AF_INET6; -#endif - else if (arg1 == &V_fwlink_enable) - af = AF_LINK; - else - return (EINVAL); - - newval = *(int *)arg1; - /* Handle sysctl change */ - error = sysctl_handle_int(oidp, &newval, 0, req); - - if (error) - return (error); - - /* Formalize new value */ - newval = (newval) ? 1 : 0; - - if (*(int *)arg1 == newval) - return (0); - - error = ipfw_hook(newval, af); - if (error) - return (error); - *(int *)arg1 = newval; - - return (0); -} -/* end of file */ diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c b/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c deleted file mode 100644 index 1acba1c1..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_sockopt.c +++ /dev/null @@ -1,4630 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa - * Copyright (c) 2014 Yandex LLC - * Copyright (c) 2014 Alexander V. Chernikov - * - * Supported by: Valeria Paoli - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * Control socket and rule management routines for ipfw. - * Control is currently implemented via IP_FW3 setsockopt() code. - */ - -#include <rtems/bsd/local/opt_ipfw.h> -#include <rtems/bsd/local/opt_inet.h> -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ -#include <rtems/bsd/local/opt_inet6.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> /* struct m_tag used by nested headers */ -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/priv.h> -#include <sys/proc.h> -#include <sys/rwlock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/sysctl.h> -#include <sys/syslog.h> -#include <sys/fnv_hash.h> -#include <net/if.h> -#include <net/pfil.h> -#include <net/route.h> -#include <net/vnet.h> -#include <vm/vm.h> -#include <vm/vm_extern.h> - -#include <netinet/in.h> -#include <netinet/ip_var.h> /* hooks */ -#include <netinet/ip_fw.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/ip_fw_table.h> - -#ifdef MAC -#include <security/mac/mac_framework.h> -#endif - -static int ipfw_ctl(struct sockopt *sopt); -static int check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, - struct rule_check_info *ci); -static int check_ipfw_rule1(struct ip_fw_rule *rule, int size, - struct rule_check_info *ci); -static int check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, - struct rule_check_info *ci); -static int rewrite_rule_uidx(struct ip_fw_chain *chain, - struct rule_check_info *ci); - -#define NAMEDOBJ_HASH_SIZE 32 - -struct namedobj_instance { - struct namedobjects_head *names; - struct namedobjects_head *values; - uint32_t nn_size; /* names hash size */ - uint32_t nv_size; /* number hash size */ - u_long *idx_mask; /* used items bitmask */ - uint32_t max_blocks; /* number of "long" blocks in bitmask */ - uint32_t count; /* number of items */ - uint16_t free_off[IPFW_MAX_SETS]; /* first possible free offset */ - objhash_hash_f *hash_f; - objhash_cmp_f *cmp_f; -}; -#define BLOCK_ITEMS (8 * sizeof(u_long)) /* Number of items for ffsl() */ - -static uint32_t objhash_hash_name(struct namedobj_instance *ni, - const void *key, uint32_t kopt); -static uint32_t objhash_hash_idx(struct namedobj_instance *ni, uint32_t val); -static int objhash_cmp_name(struct named_object *no, const void *name, - uint32_t set); - -MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); - -static int dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd); -static int add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd); -static int del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd); -static int clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd); -static int move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd); -static int manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd); -static int dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd); -static int dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd); - -/* ctl3 handler data */ -struct mtx ctl3_lock; -#define CTL3_LOCK_INIT() mtx_init(&ctl3_lock, "ctl3_lock", NULL, MTX_DEF) -#define CTL3_LOCK_DESTROY() mtx_destroy(&ctl3_lock) -#define CTL3_LOCK() mtx_lock(&ctl3_lock) -#define CTL3_UNLOCK() mtx_unlock(&ctl3_lock) - -static struct ipfw_sopt_handler *ctl3_handlers; -static size_t ctl3_hsize; -static uint64_t ctl3_refct, ctl3_gencnt; -#define CTL3_SMALLBUF 4096 /* small page-size write buffer */ -#define CTL3_LARGEBUF 16 * 1024 * 1024 /* handle large rulesets */ - -static int ipfw_flush_sopt_data(struct sockopt_data *sd); - -static struct ipfw_sopt_handler scodes[] = { - { IP_FW_XGET, 0, HDIR_GET, dump_config }, - { IP_FW_XADD, 0, HDIR_BOTH, add_rules }, - { IP_FW_XDEL, 0, HDIR_BOTH, del_rules }, - { IP_FW_XZERO, 0, HDIR_SET, clear_rules }, - { IP_FW_XRESETLOG, 0, HDIR_SET, clear_rules }, - { IP_FW_XMOVE, 0, HDIR_SET, move_rules }, - { IP_FW_SET_SWAP, 0, HDIR_SET, manage_sets }, - { IP_FW_SET_MOVE, 0, HDIR_SET, manage_sets }, - { IP_FW_SET_ENABLE, 0, HDIR_SET, manage_sets }, - { IP_FW_DUMP_SOPTCODES, 0, HDIR_GET, dump_soptcodes }, - { IP_FW_DUMP_SRVOBJECTS,0, HDIR_GET, dump_srvobjects }, -}; - -static int -set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule); -static struct opcode_obj_rewrite *find_op_rw(ipfw_insn *cmd, - uint16_t *puidx, uint8_t *ptype); -static int mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule, - uint32_t *bmask); -static int ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, - struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti); -static int ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, - struct tid_info *ti, struct obj_idx *pidx, int *unresolved); -static void unref_rule_objects(struct ip_fw_chain *chain, struct ip_fw *rule); -static void unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, - struct obj_idx *oib, struct obj_idx *end); -static int export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx, - struct sockopt_data *sd); - -/* - * Opcode object rewriter variables - */ -struct opcode_obj_rewrite *ctl3_rewriters; -static size_t ctl3_rsize; - -/* - * static variables followed by global ones - */ - -static VNET_DEFINE(uma_zone_t, ipfw_cntr_zone); -#define V_ipfw_cntr_zone VNET(ipfw_cntr_zone) - -void -ipfw_init_counters() -{ - - V_ipfw_cntr_zone = uma_zcreate("IPFW counters", - IPFW_RULE_CNTR_SIZE, NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, UMA_ZONE_PCPU); -} - -void -ipfw_destroy_counters() -{ - - uma_zdestroy(V_ipfw_cntr_zone); -} - -struct ip_fw * -ipfw_alloc_rule(struct ip_fw_chain *chain, size_t rulesize) -{ - struct ip_fw *rule; - - rule = malloc(rulesize, M_IPFW, M_WAITOK | M_ZERO); - rule->cntr = uma_zalloc(V_ipfw_cntr_zone, M_WAITOK | M_ZERO); - - return (rule); -} - -static void -free_rule(struct ip_fw *rule) -{ - - uma_zfree(V_ipfw_cntr_zone, rule->cntr); - free(rule, M_IPFW); -} - - -/* - * Find the smallest rule >= key, id. - * We could use bsearch but it is so simple that we code it directly - */ -int -ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id) -{ - int i, lo, hi; - struct ip_fw *r; - - for (lo = 0, hi = chain->n_rules - 1; lo < hi;) { - i = (lo + hi) / 2; - r = chain->map[i]; - if (r->rulenum < key) - lo = i + 1; /* continue from the next one */ - else if (r->rulenum > key) - hi = i; /* this might be good */ - else if (r->id < id) - lo = i + 1; /* continue from the next one */ - else /* r->id >= id */ - hi = i; /* this might be good */ - } - return hi; -} - -/* - * Builds skipto cache on rule set @map. - */ -static void -update_skipto_cache(struct ip_fw_chain *chain, struct ip_fw **map) -{ - int *smap, rulenum; - int i, mi; - - IPFW_UH_WLOCK_ASSERT(chain); - - mi = 0; - rulenum = map[mi]->rulenum; - smap = chain->idxmap_back; - - if (smap == NULL) - return; - - for (i = 0; i < 65536; i++) { - smap[i] = mi; - /* Use the same rule index until i < rulenum */ - if (i != rulenum || i == 65535) - continue; - /* Find next rule with num > i */ - rulenum = map[++mi]->rulenum; - while (rulenum == i) - rulenum = map[++mi]->rulenum; - } -} - -/* - * Swaps prepared (backup) index with current one. - */ -static void -swap_skipto_cache(struct ip_fw_chain *chain) -{ - int *map; - - IPFW_UH_WLOCK_ASSERT(chain); - IPFW_WLOCK_ASSERT(chain); - - map = chain->idxmap; - chain->idxmap = chain->idxmap_back; - chain->idxmap_back = map; -} - -/* - * Allocate and initialize skipto cache. - */ -void -ipfw_init_skipto_cache(struct ip_fw_chain *chain) -{ - int *idxmap, *idxmap_back; - - idxmap = malloc(65536 * sizeof(uint32_t *), M_IPFW, - M_WAITOK | M_ZERO); - idxmap_back = malloc(65536 * sizeof(uint32_t *), M_IPFW, - M_WAITOK | M_ZERO); - - /* - * Note we may be called at any time after initialization, - * for example, on first skipto rule, so we need to - * provide valid chain->idxmap on return - */ - - IPFW_UH_WLOCK(chain); - if (chain->idxmap != NULL) { - IPFW_UH_WUNLOCK(chain); - free(idxmap, M_IPFW); - free(idxmap_back, M_IPFW); - return; - } - - /* Set backup pointer first to permit building cache */ - chain->idxmap_back = idxmap_back; - update_skipto_cache(chain, chain->map); - IPFW_WLOCK(chain); - /* It is now safe to set chain->idxmap ptr */ - chain->idxmap = idxmap; - swap_skipto_cache(chain); - IPFW_WUNLOCK(chain); - IPFW_UH_WUNLOCK(chain); -} - -/* - * Destroys skipto cache. - */ -void -ipfw_destroy_skipto_cache(struct ip_fw_chain *chain) -{ - - if (chain->idxmap != NULL) - free(chain->idxmap, M_IPFW); - if (chain->idxmap != NULL) - free(chain->idxmap_back, M_IPFW); -} - - -/* - * allocate a new map, returns the chain locked. extra is the number - * of entries to add or delete. - */ -static struct ip_fw ** -get_map(struct ip_fw_chain *chain, int extra, int locked) -{ - - for (;;) { - struct ip_fw **map; - int i, mflags; - - mflags = M_ZERO | ((locked != 0) ? M_NOWAIT : M_WAITOK); - - i = chain->n_rules + extra; - map = malloc(i * sizeof(struct ip_fw *), M_IPFW, mflags); - if (map == NULL) { - printf("%s: cannot allocate map\n", __FUNCTION__); - return NULL; - } - if (!locked) - IPFW_UH_WLOCK(chain); - if (i >= chain->n_rules + extra) /* good */ - return map; - /* otherwise we lost the race, free and retry */ - if (!locked) - IPFW_UH_WUNLOCK(chain); - free(map, M_IPFW); - } -} - -/* - * swap the maps. It is supposed to be called with IPFW_UH_WLOCK - */ -static struct ip_fw ** -swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len) -{ - struct ip_fw **old_map; - - IPFW_WLOCK(chain); - chain->id++; - chain->n_rules = new_len; - old_map = chain->map; - chain->map = new_map; - swap_skipto_cache(chain); - IPFW_WUNLOCK(chain); - return old_map; -} - - -static void -export_cntr1_base(struct ip_fw *krule, struct ip_fw_bcounter *cntr) -{ - struct timeval boottime; - - cntr->size = sizeof(*cntr); - - if (krule->cntr != NULL) { - cntr->pcnt = counter_u64_fetch(krule->cntr); - cntr->bcnt = counter_u64_fetch(krule->cntr + 1); - cntr->timestamp = krule->timestamp; - } - if (cntr->timestamp > 0) { - getboottime(&boottime); - cntr->timestamp += boottime.tv_sec; - } -} - -static void -export_cntr0_base(struct ip_fw *krule, struct ip_fw_bcounter0 *cntr) -{ - struct timeval boottime; - - if (krule->cntr != NULL) { - cntr->pcnt = counter_u64_fetch(krule->cntr); - cntr->bcnt = counter_u64_fetch(krule->cntr + 1); - cntr->timestamp = krule->timestamp; - } - if (cntr->timestamp > 0) { - getboottime(&boottime); - cntr->timestamp += boottime.tv_sec; - } -} - -/* - * Copies rule @urule from v1 userland format (current). - * to kernel @krule. - * Assume @krule is zeroed. - */ -static void -import_rule1(struct rule_check_info *ci) -{ - struct ip_fw_rule *urule; - struct ip_fw *krule; - - urule = (struct ip_fw_rule *)ci->urule; - krule = (struct ip_fw *)ci->krule; - - /* copy header */ - krule->act_ofs = urule->act_ofs; - krule->cmd_len = urule->cmd_len; - krule->rulenum = urule->rulenum; - krule->set = urule->set; - krule->flags = urule->flags; - - /* Save rulenum offset */ - ci->urule_numoff = offsetof(struct ip_fw_rule, rulenum); - - /* Copy opcodes */ - memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); -} - -/* - * Export rule into v1 format (Current). - * Layout: - * [ ipfw_obj_tlv(IPFW_TLV_RULE_ENT) - * [ ip_fw_rule ] OR - * [ ip_fw_bcounter ip_fw_rule] (depends on rcntrs). - * ] - * Assume @data is zeroed. - */ -static void -export_rule1(struct ip_fw *krule, caddr_t data, int len, int rcntrs) -{ - struct ip_fw_bcounter *cntr; - struct ip_fw_rule *urule; - ipfw_obj_tlv *tlv; - - /* Fill in TLV header */ - tlv = (ipfw_obj_tlv *)data; - tlv->type = IPFW_TLV_RULE_ENT; - tlv->length = len; - - if (rcntrs != 0) { - /* Copy counters */ - cntr = (struct ip_fw_bcounter *)(tlv + 1); - urule = (struct ip_fw_rule *)(cntr + 1); - export_cntr1_base(krule, cntr); - } else - urule = (struct ip_fw_rule *)(tlv + 1); - - /* copy header */ - urule->act_ofs = krule->act_ofs; - urule->cmd_len = krule->cmd_len; - urule->rulenum = krule->rulenum; - urule->set = krule->set; - urule->flags = krule->flags; - urule->id = krule->id; - - /* Copy opcodes */ - memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); -} - - -/* - * Copies rule @urule from FreeBSD8 userland format (v0) - * to kernel @krule. - * Assume @krule is zeroed. - */ -static void -import_rule0(struct rule_check_info *ci) -{ - struct ip_fw_rule0 *urule; - struct ip_fw *krule; - int cmdlen, l; - ipfw_insn *cmd; - ipfw_insn_limit *lcmd; - ipfw_insn_if *cmdif; - - urule = (struct ip_fw_rule0 *)ci->urule; - krule = (struct ip_fw *)ci->krule; - - /* copy header */ - krule->act_ofs = urule->act_ofs; - krule->cmd_len = urule->cmd_len; - krule->rulenum = urule->rulenum; - krule->set = urule->set; - if ((urule->_pad & 1) != 0) - krule->flags |= IPFW_RULE_NOOPT; - - /* Save rulenum offset */ - ci->urule_numoff = offsetof(struct ip_fw_rule0, rulenum); - - /* Copy opcodes */ - memcpy(krule->cmd, urule->cmd, krule->cmd_len * sizeof(uint32_t)); - - /* - * Alter opcodes: - * 1) convert tablearg value from 65535 to 0 - * 2) Add high bit to O_SETFIB/O_SETDSCP values (to make room - * for targ). - * 3) convert table number in iface opcodes to u16 - * 4) convert old `nat global` into new 65535 - */ - l = krule->cmd_len; - cmd = krule->cmd; - cmdlen = 0; - - for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - - switch (cmd->opcode) { - /* Opcodes supporting tablearg */ - case O_TAG: - case O_TAGGED: - case O_PIPE: - case O_QUEUE: - case O_DIVERT: - case O_TEE: - case O_SKIPTO: - case O_CALLRETURN: - case O_NETGRAPH: - case O_NGTEE: - case O_NAT: - if (cmd->arg1 == IP_FW_TABLEARG) - cmd->arg1 = IP_FW_TARG; - else if (cmd->arg1 == 0) - cmd->arg1 = IP_FW_NAT44_GLOBAL; - break; - case O_SETFIB: - case O_SETDSCP: - if (cmd->arg1 == IP_FW_TABLEARG) - cmd->arg1 = IP_FW_TARG; - else - cmd->arg1 |= 0x8000; - break; - case O_LIMIT: - lcmd = (ipfw_insn_limit *)cmd; - if (lcmd->conn_limit == IP_FW_TABLEARG) - lcmd->conn_limit = IP_FW_TARG; - break; - /* Interface tables */ - case O_XMIT: - case O_RECV: - case O_VIA: - /* Interface table, possibly */ - cmdif = (ipfw_insn_if *)cmd; - if (cmdif->name[0] != '\1') - break; - - cmdif->p.kidx = (uint16_t)cmdif->p.glob; - break; - } - } -} - -/* - * Copies rule @krule from kernel to FreeBSD8 userland format (v0) - */ -static void -export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len) -{ - int cmdlen, l; - ipfw_insn *cmd; - ipfw_insn_limit *lcmd; - ipfw_insn_if *cmdif; - - /* copy header */ - memset(urule, 0, len); - urule->act_ofs = krule->act_ofs; - urule->cmd_len = krule->cmd_len; - urule->rulenum = krule->rulenum; - urule->set = krule->set; - if ((krule->flags & IPFW_RULE_NOOPT) != 0) - urule->_pad |= 1; - - /* Copy opcodes */ - memcpy(urule->cmd, krule->cmd, krule->cmd_len * sizeof(uint32_t)); - - /* Export counters */ - export_cntr0_base(krule, (struct ip_fw_bcounter0 *)&urule->pcnt); - - /* - * Alter opcodes: - * 1) convert tablearg value from 0 to 65535 - * 2) Remove highest bit from O_SETFIB/O_SETDSCP values. - * 3) convert table number in iface opcodes to int - */ - l = urule->cmd_len; - cmd = urule->cmd; - cmdlen = 0; - - for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - - switch (cmd->opcode) { - /* Opcodes supporting tablearg */ - case O_TAG: - case O_TAGGED: - case O_PIPE: - case O_QUEUE: - case O_DIVERT: - case O_TEE: - case O_SKIPTO: - case O_CALLRETURN: - case O_NETGRAPH: - case O_NGTEE: - case O_NAT: - if (cmd->arg1 == IP_FW_TARG) - cmd->arg1 = IP_FW_TABLEARG; - else if (cmd->arg1 == IP_FW_NAT44_GLOBAL) - cmd->arg1 = 0; - break; - case O_SETFIB: - case O_SETDSCP: - if (cmd->arg1 == IP_FW_TARG) - cmd->arg1 = IP_FW_TABLEARG; - else - cmd->arg1 &= ~0x8000; - break; - case O_LIMIT: - lcmd = (ipfw_insn_limit *)cmd; - if (lcmd->conn_limit == IP_FW_TARG) - lcmd->conn_limit = IP_FW_TABLEARG; - break; - /* Interface tables */ - case O_XMIT: - case O_RECV: - case O_VIA: - /* Interface table, possibly */ - cmdif = (ipfw_insn_if *)cmd; - if (cmdif->name[0] != '\1') - break; - - cmdif->p.glob = cmdif->p.kidx; - break; - } - } -} - -/* - * Add new rule(s) to the list possibly creating rule number for each. - * Update the rule_number in the input struct so the caller knows it as well. - * Must be called without IPFW_UH held - */ -static int -commit_rules(struct ip_fw_chain *chain, struct rule_check_info *rci, int count) -{ - int error, i, insert_before, tcount; - uint16_t rulenum, *pnum; - struct rule_check_info *ci; - struct ip_fw *krule; - struct ip_fw **map; /* the new array of pointers */ - - /* Check if we need to do table/obj index remap */ - tcount = 0; - for (ci = rci, i = 0; i < count; ci++, i++) { - if (ci->object_opcodes == 0) - continue; - - /* - * Rule has some object opcodes. - * We need to find (and create non-existing) - * kernel objects, and reference existing ones. - */ - error = rewrite_rule_uidx(chain, ci); - if (error != 0) { - - /* - * rewrite failed, state for current rule - * has been reverted. Check if we need to - * revert more. - */ - if (tcount > 0) { - - /* - * We have some more table rules - * we need to rollback. - */ - - IPFW_UH_WLOCK(chain); - while (ci != rci) { - ci--; - if (ci->object_opcodes == 0) - continue; - unref_rule_objects(chain,ci->krule); - - } - IPFW_UH_WUNLOCK(chain); - - } - - return (error); - } - - tcount++; - } - - /* get_map returns with IPFW_UH_WLOCK if successful */ - map = get_map(chain, count, 0 /* not locked */); - if (map == NULL) { - if (tcount > 0) { - /* Unbind tables */ - IPFW_UH_WLOCK(chain); - for (ci = rci, i = 0; i < count; ci++, i++) { - if (ci->object_opcodes == 0) - continue; - - unref_rule_objects(chain, ci->krule); - } - IPFW_UH_WUNLOCK(chain); - } - - return (ENOSPC); - } - - if (V_autoinc_step < 1) - V_autoinc_step = 1; - else if (V_autoinc_step > 1000) - V_autoinc_step = 1000; - - /* FIXME: Handle count > 1 */ - ci = rci; - krule = ci->krule; - rulenum = krule->rulenum; - - /* find the insertion point, we will insert before */ - insert_before = rulenum ? rulenum + 1 : IPFW_DEFAULT_RULE; - i = ipfw_find_rule(chain, insert_before, 0); - /* duplicate first part */ - if (i > 0) - bcopy(chain->map, map, i * sizeof(struct ip_fw *)); - map[i] = krule; - /* duplicate remaining part, we always have the default rule */ - bcopy(chain->map + i, map + i + 1, - sizeof(struct ip_fw *) *(chain->n_rules - i)); - if (rulenum == 0) { - /* Compute rule number and write it back */ - rulenum = i > 0 ? map[i-1]->rulenum : 0; - if (rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) - rulenum += V_autoinc_step; - krule->rulenum = rulenum; - /* Save number to userland rule */ - pnum = (uint16_t *)((caddr_t)ci->urule + ci->urule_numoff); - *pnum = rulenum; - } - - krule->id = chain->id + 1; - update_skipto_cache(chain, map); - map = swap_map(chain, map, chain->n_rules + 1); - chain->static_len += RULEUSIZE0(krule); - IPFW_UH_WUNLOCK(chain); - if (map) - free(map, M_IPFW); - return (0); -} - -/* - * Adds @rule to the list of rules to reap - */ -void -ipfw_reap_add(struct ip_fw_chain *chain, struct ip_fw **head, - struct ip_fw *rule) -{ - - IPFW_UH_WLOCK_ASSERT(chain); - - /* Unlink rule from everywhere */ - unref_rule_objects(chain, rule); - - *((struct ip_fw **)rule) = *head; - *head = rule; -} - -/* - * Reclaim storage associated with a list of rules. This is - * typically the list created using remove_rule. - * A NULL pointer on input is handled correctly. - */ -void -ipfw_reap_rules(struct ip_fw *head) -{ - struct ip_fw *rule; - - while ((rule = head) != NULL) { - head = *((struct ip_fw **)head); - free_rule(rule); - } -} - -/* - * Rules to keep are - * (default || reserved || !match_set || !match_number) - * where - * default ::= (rule->rulenum == IPFW_DEFAULT_RULE) - * // the default rule is always protected - * - * reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET) - * // RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush") - * - * match_set ::= (cmd == 0 || rule->set == set) - * // set number is ignored for cmd == 0 - * - * match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum) - * // number is ignored for cmd == 1 or n == 0 - * - */ -int -ipfw_match_range(struct ip_fw *rule, ipfw_range_tlv *rt) -{ - - /* Don't match default rule for modification queries */ - if (rule->rulenum == IPFW_DEFAULT_RULE && - (rt->flags & IPFW_RCFLAG_DEFAULT) == 0) - return (0); - - /* Don't match rules in reserved set for flush requests */ - if ((rt->flags & IPFW_RCFLAG_ALL) != 0 && rule->set == RESVD_SET) - return (0); - - /* If we're filtering by set, don't match other sets */ - if ((rt->flags & IPFW_RCFLAG_SET) != 0 && rule->set != rt->set) - return (0); - - if ((rt->flags & IPFW_RCFLAG_RANGE) != 0 && - (rule->rulenum < rt->start_rule || rule->rulenum > rt->end_rule)) - return (0); - - return (1); -} - -struct manage_sets_args { - uint16_t set; - uint8_t new_set; -}; - -static int -swap_sets_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct manage_sets_args *args; - - args = (struct manage_sets_args *)arg; - if (no->set == (uint8_t)args->set) - no->set = args->new_set; - else if (no->set == args->new_set) - no->set = (uint8_t)args->set; - return (0); -} - -static int -move_sets_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct manage_sets_args *args; - - args = (struct manage_sets_args *)arg; - if (no->set == (uint8_t)args->set) - no->set = args->new_set; - return (0); -} - -static int -test_sets_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct manage_sets_args *args; - - args = (struct manage_sets_args *)arg; - if (no->set != (uint8_t)args->set) - return (0); - if (ipfw_objhash_lookup_name_type(ni, args->new_set, - no->etlv, no->name) != NULL) - return (EEXIST); - return (0); -} - -/* - * Generic function to handler moving and swapping sets. - */ -int -ipfw_obj_manage_sets(struct namedobj_instance *ni, uint16_t type, - uint16_t set, uint8_t new_set, enum ipfw_sets_cmd cmd) -{ - struct manage_sets_args args; - struct named_object *no; - - args.set = set; - args.new_set = new_set; - switch (cmd) { - case SWAP_ALL: - return (ipfw_objhash_foreach_type(ni, swap_sets_cb, - &args, type)); - case TEST_ALL: - return (ipfw_objhash_foreach_type(ni, test_sets_cb, - &args, type)); - case MOVE_ALL: - return (ipfw_objhash_foreach_type(ni, move_sets_cb, - &args, type)); - case COUNT_ONE: - /* - * @set used to pass kidx. - * When @new_set is zero - reset object counter, - * otherwise increment it. - */ - no = ipfw_objhash_lookup_kidx(ni, set); - if (new_set != 0) - no->ocnt++; - else - no->ocnt = 0; - return (0); - case TEST_ONE: - /* @set used to pass kidx */ - no = ipfw_objhash_lookup_kidx(ni, set); - /* - * First check number of references: - * when it differs, this mean other rules are holding - * reference to given object, so it is not possible to - * change its set. Note that refcnt may account references - * to some going-to-be-added rules. Since we don't know - * their numbers (and even if they will be added) it is - * perfectly OK to return error here. - */ - if (no->ocnt != no->refcnt) - return (EBUSY); - if (ipfw_objhash_lookup_name_type(ni, new_set, type, - no->name) != NULL) - return (EEXIST); - return (0); - case MOVE_ONE: - /* @set used to pass kidx */ - no = ipfw_objhash_lookup_kidx(ni, set); - no->set = new_set; - return (0); - } - return (EINVAL); -} - -/* - * Delete rules matching range @rt. - * Saves number of deleted rules in @ndel. - * - * Returns 0 on success. - */ -static int -delete_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int *ndel) -{ - struct ip_fw *reap, *rule, **map; - int end, start; - int i, n, ndyn, ofs; - - reap = NULL; - IPFW_UH_WLOCK(chain); /* arbitrate writers */ - - /* - * Stage 1: Determine range to inspect. - * Range is half-inclusive, e.g [start, end). - */ - start = 0; - end = chain->n_rules - 1; - - if ((rt->flags & IPFW_RCFLAG_RANGE) != 0) { - start = ipfw_find_rule(chain, rt->start_rule, 0); - - end = ipfw_find_rule(chain, rt->end_rule, 0); - if (rt->end_rule != IPFW_DEFAULT_RULE) - while (chain->map[end]->rulenum == rt->end_rule) - end++; - } - - /* Allocate new map of the same size */ - map = get_map(chain, 0, 1 /* locked */); - if (map == NULL) { - IPFW_UH_WUNLOCK(chain); - return (ENOMEM); - } - - n = 0; - ndyn = 0; - ofs = start; - /* 1. bcopy the initial part of the map */ - if (start > 0) - bcopy(chain->map, map, start * sizeof(struct ip_fw *)); - /* 2. copy active rules between start and end */ - for (i = start; i < end; i++) { - rule = chain->map[i]; - if (ipfw_match_range(rule, rt) == 0) { - map[ofs++] = rule; - continue; - } - - n++; - if (ipfw_is_dyn_rule(rule) != 0) - ndyn++; - } - /* 3. copy the final part of the map */ - bcopy(chain->map + end, map + ofs, - (chain->n_rules - end) * sizeof(struct ip_fw *)); - /* 4. recalculate skipto cache */ - update_skipto_cache(chain, map); - /* 5. swap the maps (under UH_WLOCK + WHLOCK) */ - map = swap_map(chain, map, chain->n_rules - n); - /* 6. Remove all dynamic states originated by deleted rules */ - if (ndyn > 0) - ipfw_expire_dyn_rules(chain, rt); - /* 7. now remove the rules deleted from the old map */ - for (i = start; i < end; i++) { - rule = map[i]; - if (ipfw_match_range(rule, rt) == 0) - continue; - chain->static_len -= RULEUSIZE0(rule); - ipfw_reap_add(chain, &reap, rule); - } - IPFW_UH_WUNLOCK(chain); - - ipfw_reap_rules(reap); - if (map != NULL) - free(map, M_IPFW); - *ndel = n; - return (0); -} - -static int -move_objects(struct ip_fw_chain *ch, ipfw_range_tlv *rt) -{ - struct opcode_obj_rewrite *rw; - struct ip_fw *rule; - ipfw_insn *cmd; - int cmdlen, i, l, c; - uint16_t kidx; - - IPFW_UH_WLOCK_ASSERT(ch); - - /* Stage 1: count number of references by given rules */ - for (c = 0, i = 0; i < ch->n_rules - 1; i++) { - rule = ch->map[i]; - if (ipfw_match_range(rule, rt) == 0) - continue; - if (rule->set == rt->new_set) /* nothing to do */ - continue; - /* Search opcodes with named objects */ - for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd; - l > 0; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - rw = find_op_rw(cmd, &kidx, NULL); - if (rw == NULL || rw->manage_sets == NULL) - continue; - /* - * When manage_sets() returns non-zero value to - * COUNT_ONE command, consider this as an object - * doesn't support sets (e.g. disabled with sysctl). - * So, skip checks for this object. - */ - if (rw->manage_sets(ch, kidx, 1, COUNT_ONE) != 0) - continue; - c++; - } - } - if (c == 0) /* No objects found */ - return (0); - /* Stage 2: verify "ownership" */ - for (c = 0, i = 0; (i < ch->n_rules - 1) && c == 0; i++) { - rule = ch->map[i]; - if (ipfw_match_range(rule, rt) == 0) - continue; - if (rule->set == rt->new_set) /* nothing to do */ - continue; - /* Search opcodes with named objects */ - for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd; - l > 0 && c == 0; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - rw = find_op_rw(cmd, &kidx, NULL); - if (rw == NULL || rw->manage_sets == NULL) - continue; - /* Test for ownership and conflicting names */ - c = rw->manage_sets(ch, kidx, - (uint8_t)rt->new_set, TEST_ONE); - } - } - /* Stage 3: change set and cleanup */ - for (i = 0; i < ch->n_rules - 1; i++) { - rule = ch->map[i]; - if (ipfw_match_range(rule, rt) == 0) - continue; - if (rule->set == rt->new_set) /* nothing to do */ - continue; - /* Search opcodes with named objects */ - for (l = rule->cmd_len, cmdlen = 0, cmd = rule->cmd; - l > 0; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - rw = find_op_rw(cmd, &kidx, NULL); - if (rw == NULL || rw->manage_sets == NULL) - continue; - /* cleanup object counter */ - rw->manage_sets(ch, kidx, - 0 /* reset counter */, COUNT_ONE); - if (c != 0) - continue; - /* change set */ - rw->manage_sets(ch, kidx, - (uint8_t)rt->new_set, MOVE_ONE); - } - } - return (c); -}/* - * Changes set of given rule rannge @rt - * with each other. - * - * Returns 0 on success. - */ -static int -move_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt) -{ - struct ip_fw *rule; - int i; - - IPFW_UH_WLOCK(chain); - - /* - * Move rules with matching paramenerts to a new set. - * This one is much more complex. We have to ensure - * that all referenced tables (if any) are referenced - * by given rule subset only. Otherwise, we can't move - * them to new set and have to return error. - */ - if ((i = move_objects(chain, rt)) != 0) { - IPFW_UH_WUNLOCK(chain); - return (i); - } - - /* XXX: We have to do swap holding WLOCK */ - for (i = 0; i < chain->n_rules; i++) { - rule = chain->map[i]; - if (ipfw_match_range(rule, rt) == 0) - continue; - rule->set = rt->new_set; - } - - IPFW_UH_WUNLOCK(chain); - - return (0); -} - -/* - * Clear counters for a specific rule. - * Normally run under IPFW_UH_RLOCK, but these are idempotent ops - * so we only care that rules do not disappear. - */ -static void -clear_counters(struct ip_fw *rule, int log_only) -{ - ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); - - if (log_only == 0) - IPFW_ZERO_RULE_COUNTER(rule); - if (l->o.opcode == O_LOG) - l->log_left = l->max_log; -} - -/* - * Flushes rules counters and/or log values on matching range. - * - * Returns number of items cleared. - */ -static int -clear_range(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int log_only) -{ - struct ip_fw *rule; - int num; - int i; - - num = 0; - rt->flags |= IPFW_RCFLAG_DEFAULT; - - IPFW_UH_WLOCK(chain); /* arbitrate writers */ - for (i = 0; i < chain->n_rules; i++) { - rule = chain->map[i]; - if (ipfw_match_range(rule, rt) == 0) - continue; - clear_counters(rule, log_only); - num++; - } - IPFW_UH_WUNLOCK(chain); - - return (num); -} - -static int -check_range_tlv(ipfw_range_tlv *rt) -{ - - if (rt->head.length != sizeof(*rt)) - return (1); - if (rt->start_rule > rt->end_rule) - return (1); - if (rt->set >= IPFW_MAX_SETS || rt->new_set >= IPFW_MAX_SETS) - return (1); - - if ((rt->flags & IPFW_RCFLAG_USER) != rt->flags) - return (1); - - return (0); -} - -/* - * Delete rules matching specified parameters - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_range_tlv ] - * Reply: [ ipfw_obj_header ipfw_range_tlv ] - * - * Saves number of deleted rules in ipfw_range_tlv->new_set. - * - * Returns 0 on success. - */ -static int -del_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_range_header *rh; - int error, ndel; - - if (sd->valsize != sizeof(*rh)) - return (EINVAL); - - rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); - - if (check_range_tlv(&rh->range) != 0) - return (EINVAL); - - ndel = 0; - if ((error = delete_range(chain, &rh->range, &ndel)) != 0) - return (error); - - /* Save number of rules deleted */ - rh->range.new_set = ndel; - return (0); -} - -/* - * Move rules/sets matching specified parameters - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_range_tlv ] - * - * Returns 0 on success. - */ -static int -move_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_range_header *rh; - - if (sd->valsize != sizeof(*rh)) - return (EINVAL); - - rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); - - if (check_range_tlv(&rh->range) != 0) - return (EINVAL); - - return (move_range(chain, &rh->range)); -} - -/* - * Clear rule accounting data matching specified parameters - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_range_tlv ] - * Reply: [ ipfw_obj_header ipfw_range_tlv ] - * - * Saves number of cleared rules in ipfw_range_tlv->new_set. - * - * Returns 0 on success. - */ -static int -clear_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_range_header *rh; - int log_only, num; - char *msg; - - if (sd->valsize != sizeof(*rh)) - return (EINVAL); - - rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); - - if (check_range_tlv(&rh->range) != 0) - return (EINVAL); - - log_only = (op3->opcode == IP_FW_XRESETLOG); - - num = clear_range(chain, &rh->range, log_only); - - if (rh->range.flags & IPFW_RCFLAG_ALL) - msg = log_only ? "All logging counts reset" : - "Accounting cleared"; - else - msg = log_only ? "logging count reset" : "cleared"; - - if (V_fw_verbose) { - int lev = LOG_SECURITY | LOG_NOTICE; - log(lev, "ipfw: %s.\n", msg); - } - - /* Save number of rules cleared */ - rh->range.new_set = num; - return (0); -} - -static void -enable_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt) -{ - uint32_t v_set; - - IPFW_UH_WLOCK_ASSERT(chain); - - /* Change enabled/disabled sets mask */ - v_set = (V_set_disable | rt->set) & ~rt->new_set; - v_set &= ~(1 << RESVD_SET); /* set RESVD_SET always enabled */ - IPFW_WLOCK(chain); - V_set_disable = v_set; - IPFW_WUNLOCK(chain); -} - -static int -swap_sets(struct ip_fw_chain *chain, ipfw_range_tlv *rt, int mv) -{ - struct opcode_obj_rewrite *rw; - struct ip_fw *rule; - int i; - - IPFW_UH_WLOCK_ASSERT(chain); - - if (rt->set == rt->new_set) /* nothing to do */ - return (0); - - if (mv != 0) { - /* - * Berfore moving the rules we need to check that - * there aren't any conflicting named objects. - */ - for (rw = ctl3_rewriters; - rw < ctl3_rewriters + ctl3_rsize; rw++) { - if (rw->manage_sets == NULL) - continue; - i = rw->manage_sets(chain, (uint8_t)rt->set, - (uint8_t)rt->new_set, TEST_ALL); - if (i != 0) - return (EEXIST); - } - } - /* Swap or move two sets */ - for (i = 0; i < chain->n_rules - 1; i++) { - rule = chain->map[i]; - if (rule->set == (uint8_t)rt->set) - rule->set = (uint8_t)rt->new_set; - else if (rule->set == (uint8_t)rt->new_set && mv == 0) - rule->set = (uint8_t)rt->set; - } - for (rw = ctl3_rewriters; rw < ctl3_rewriters + ctl3_rsize; rw++) { - if (rw->manage_sets == NULL) - continue; - rw->manage_sets(chain, (uint8_t)rt->set, - (uint8_t)rt->new_set, mv != 0 ? MOVE_ALL: SWAP_ALL); - } - return (0); -} - -/* - * Swaps or moves set - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_range_tlv ] - * - * Returns 0 on success. - */ -static int -manage_sets(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_range_header *rh; - int ret; - - if (sd->valsize != sizeof(*rh)) - return (EINVAL); - - rh = (ipfw_range_header *)ipfw_get_sopt_space(sd, sd->valsize); - - if (rh->range.head.length != sizeof(ipfw_range_tlv)) - return (1); - /* enable_sets() expects bitmasks. */ - if (op3->opcode != IP_FW_SET_ENABLE && - (rh->range.set >= IPFW_MAX_SETS || - rh->range.new_set >= IPFW_MAX_SETS)) - return (EINVAL); - - ret = 0; - IPFW_UH_WLOCK(chain); - switch (op3->opcode) { - case IP_FW_SET_SWAP: - case IP_FW_SET_MOVE: - ret = swap_sets(chain, &rh->range, - op3->opcode == IP_FW_SET_MOVE); - break; - case IP_FW_SET_ENABLE: - enable_sets(chain, &rh->range); - break; - } - IPFW_UH_WUNLOCK(chain); - - return (ret); -} - -/** - * Remove all rules with given number, or do set manipulation. - * Assumes chain != NULL && *chain != NULL. - * - * The argument is an uint32_t. The low 16 bit are the rule or set number; - * the next 8 bits are the new set; the top 8 bits indicate the command: - * - * 0 delete rules numbered "rulenum" - * 1 delete rules in set "rulenum" - * 2 move rules "rulenum" to set "new_set" - * 3 move rules from set "rulenum" to set "new_set" - * 4 swap sets "rulenum" and "new_set" - * 5 delete rules "rulenum" and set "new_set" - */ -static int -del_entry(struct ip_fw_chain *chain, uint32_t arg) -{ - uint32_t num; /* rule number or old_set */ - uint8_t cmd, new_set; - int do_del, ndel; - int error = 0; - ipfw_range_tlv rt; - - num = arg & 0xffff; - cmd = (arg >> 24) & 0xff; - new_set = (arg >> 16) & 0xff; - - if (cmd > 5 || new_set > RESVD_SET) - return EINVAL; - if (cmd == 0 || cmd == 2 || cmd == 5) { - if (num >= IPFW_DEFAULT_RULE) - return EINVAL; - } else { - if (num > RESVD_SET) /* old_set */ - return EINVAL; - } - - /* Convert old requests into new representation */ - memset(&rt, 0, sizeof(rt)); - rt.start_rule = num; - rt.end_rule = num; - rt.set = num; - rt.new_set = new_set; - do_del = 0; - - switch (cmd) { - case 0: /* delete rules numbered "rulenum" */ - if (num == 0) - rt.flags |= IPFW_RCFLAG_ALL; - else - rt.flags |= IPFW_RCFLAG_RANGE; - do_del = 1; - break; - case 1: /* delete rules in set "rulenum" */ - rt.flags |= IPFW_RCFLAG_SET; - do_del = 1; - break; - case 5: /* delete rules "rulenum" and set "new_set" */ - rt.flags |= IPFW_RCFLAG_RANGE | IPFW_RCFLAG_SET; - rt.set = new_set; - rt.new_set = 0; - do_del = 1; - break; - case 2: /* move rules "rulenum" to set "new_set" */ - rt.flags |= IPFW_RCFLAG_RANGE; - break; - case 3: /* move rules from set "rulenum" to set "new_set" */ - IPFW_UH_WLOCK(chain); - error = swap_sets(chain, &rt, 1); - IPFW_UH_WUNLOCK(chain); - return (error); - case 4: /* swap sets "rulenum" and "new_set" */ - IPFW_UH_WLOCK(chain); - error = swap_sets(chain, &rt, 0); - IPFW_UH_WUNLOCK(chain); - return (error); - default: - return (ENOTSUP); - } - - if (do_del != 0) { - if ((error = delete_range(chain, &rt, &ndel)) != 0) - return (error); - - if (ndel == 0 && (cmd != 1 && num != 0)) - return (EINVAL); - - return (0); - } - - return (move_range(chain, &rt)); -} - -/** - * Reset some or all counters on firewall rules. - * The argument `arg' is an u_int32_t. The low 16 bit are the rule number, - * the next 8 bits are the set number, the top 8 bits are the command: - * 0 work with rules from all set's; - * 1 work with rules only from specified set. - * Specified rule number is zero if we want to clear all entries. - * log_only is 1 if we only want to reset logs, zero otherwise. - */ -static int -zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) -{ - struct ip_fw *rule; - char *msg; - int i; - - uint16_t rulenum = arg & 0xffff; - uint8_t set = (arg >> 16) & 0xff; - uint8_t cmd = (arg >> 24) & 0xff; - - if (cmd > 1) - return (EINVAL); - if (cmd == 1 && set > RESVD_SET) - return (EINVAL); - - IPFW_UH_RLOCK(chain); - if (rulenum == 0) { - V_norule_counter = 0; - for (i = 0; i < chain->n_rules; i++) { - rule = chain->map[i]; - /* Skip rules not in our set. */ - if (cmd == 1 && rule->set != set) - continue; - clear_counters(rule, log_only); - } - msg = log_only ? "All logging counts reset" : - "Accounting cleared"; - } else { - int cleared = 0; - for (i = 0; i < chain->n_rules; i++) { - rule = chain->map[i]; - if (rule->rulenum == rulenum) { - if (cmd == 0 || rule->set == set) - clear_counters(rule, log_only); - cleared = 1; - } - if (rule->rulenum > rulenum) - break; - } - if (!cleared) { /* we did not find any matching rules */ - IPFW_UH_RUNLOCK(chain); - return (EINVAL); - } - msg = log_only ? "logging count reset" : "cleared"; - } - IPFW_UH_RUNLOCK(chain); - - if (V_fw_verbose) { - int lev = LOG_SECURITY | LOG_NOTICE; - - if (rulenum) - log(lev, "ipfw: Entry %d %s.\n", rulenum, msg); - else - log(lev, "ipfw: %s.\n", msg); - } - return (0); -} - - -/* - * Check rule head in FreeBSD11 format - * - */ -static int -check_ipfw_rule1(struct ip_fw_rule *rule, int size, - struct rule_check_info *ci) -{ - int l; - - if (size < sizeof(*rule)) { - printf("ipfw: rule too short\n"); - return (EINVAL); - } - - /* Check for valid cmd_len */ - l = roundup2(RULESIZE(rule), sizeof(uint64_t)); - if (l != size) { - printf("ipfw: size mismatch (have %d want %d)\n", size, l); - return (EINVAL); - } - if (rule->act_ofs >= rule->cmd_len) { - printf("ipfw: bogus action offset (%u > %u)\n", - rule->act_ofs, rule->cmd_len - 1); - return (EINVAL); - } - - if (rule->rulenum > IPFW_DEFAULT_RULE - 1) - return (EINVAL); - - return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); -} - -/* - * Check rule head in FreeBSD8 format - * - */ -static int -check_ipfw_rule0(struct ip_fw_rule0 *rule, int size, - struct rule_check_info *ci) -{ - int l; - - if (size < sizeof(*rule)) { - printf("ipfw: rule too short\n"); - return (EINVAL); - } - - /* Check for valid cmd_len */ - l = sizeof(*rule) + rule->cmd_len * 4 - 4; - if (l != size) { - printf("ipfw: size mismatch (have %d want %d)\n", size, l); - return (EINVAL); - } - if (rule->act_ofs >= rule->cmd_len) { - printf("ipfw: bogus action offset (%u > %u)\n", - rule->act_ofs, rule->cmd_len - 1); - return (EINVAL); - } - - if (rule->rulenum > IPFW_DEFAULT_RULE - 1) - return (EINVAL); - - return (check_ipfw_rule_body(rule->cmd, rule->cmd_len, ci)); -} - -static int -check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci) -{ - int cmdlen, l; - int have_action; - - have_action = 0; - - /* - * Now go for the individual checks. Very simple ones, basically only - * instruction sizes. - */ - for (l = cmd_len; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - if (cmdlen > l) { - printf("ipfw: opcode %d size truncated\n", - cmd->opcode); - return EINVAL; - } - switch (cmd->opcode) { - case O_PROBE_STATE: - case O_KEEP_STATE: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - ci->object_opcodes++; - break; - case O_PROTO: - case O_IP_SRC_ME: - case O_IP_DST_ME: - case O_LAYER2: - case O_IN: - case O_FRAG: - case O_DIVERTED: - case O_IPOPT: - case O_IPTOS: - case O_IPPRECEDENCE: - case O_IPVER: - case O_SOCKARG: - case O_TCPFLAGS: - case O_TCPOPTS: - case O_ESTAB: - case O_VERREVPATH: - case O_VERSRCREACH: - case O_ANTISPOOF: - case O_IPSEC: -#ifdef INET6 - case O_IP6_SRC_ME: - case O_IP6_DST_ME: - case O_EXT_HDR: - case O_IP6: -#endif - case O_IP4: - case O_TAG: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - break; - - case O_EXTERNAL_ACTION: - if (cmd->arg1 == 0 || - cmdlen != F_INSN_SIZE(ipfw_insn)) { - printf("ipfw: invalid external " - "action opcode\n"); - return (EINVAL); - } - ci->object_opcodes++; - /* - * Do we have O_EXTERNAL_INSTANCE or O_EXTERNAL_DATA - * opcode? - */ - if (l != cmdlen) { - l -= cmdlen; - cmd += cmdlen; - cmdlen = F_LEN(cmd); - if (cmd->opcode == O_EXTERNAL_DATA) - goto check_action; - if (cmd->opcode != O_EXTERNAL_INSTANCE) { - printf("ipfw: invalid opcode " - "next to external action %u\n", - cmd->opcode); - return (EINVAL); - } - if (cmd->arg1 == 0 || - cmdlen != F_INSN_SIZE(ipfw_insn)) { - printf("ipfw: invalid external " - "action instance opcode\n"); - return (EINVAL); - } - ci->object_opcodes++; - } - goto check_action; - - case O_FIB: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - if (cmd->arg1 >= rt_numfibs) { - printf("ipfw: invalid fib number %d\n", - cmd->arg1); - return EINVAL; - } - break; - - case O_SETFIB: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - if ((cmd->arg1 != IP_FW_TARG) && - ((cmd->arg1 & 0x7FFF) >= rt_numfibs)) { - printf("ipfw: invalid fib number %d\n", - cmd->arg1 & 0x7FFF); - return EINVAL; - } - goto check_action; - - case O_UID: - case O_GID: - case O_JAIL: - case O_IP_SRC: - case O_IP_DST: - case O_TCPSEQ: - case O_TCPACK: - case O_PROB: - case O_ICMPTYPE: - if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) - goto bad_size; - break; - - case O_LIMIT: - if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) - goto bad_size; - ci->object_opcodes++; - break; - - case O_LOG: - if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) - goto bad_size; - - ((ipfw_insn_log *)cmd)->log_left = - ((ipfw_insn_log *)cmd)->max_log; - - break; - - case O_IP_SRC_MASK: - case O_IP_DST_MASK: - /* only odd command lengths */ - if ((cmdlen & 1) == 0) - goto bad_size; - break; - - case O_IP_SRC_SET: - case O_IP_DST_SET: - if (cmd->arg1 == 0 || cmd->arg1 > 256) { - printf("ipfw: invalid set size %d\n", - cmd->arg1); - return EINVAL; - } - if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + - (cmd->arg1+31)/32 ) - goto bad_size; - break; - - case O_IP_SRC_LOOKUP: - if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) - goto bad_size; - case O_IP_DST_LOOKUP: - if (cmd->arg1 >= V_fw_tables_max) { - printf("ipfw: invalid table number %d\n", - cmd->arg1); - return (EINVAL); - } - if (cmdlen != F_INSN_SIZE(ipfw_insn) && - cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 && - cmdlen != F_INSN_SIZE(ipfw_insn_u32)) - goto bad_size; - ci->object_opcodes++; - break; - case O_IP_FLOW_LOOKUP: - if (cmd->arg1 >= V_fw_tables_max) { - printf("ipfw: invalid table number %d\n", - cmd->arg1); - return (EINVAL); - } - if (cmdlen != F_INSN_SIZE(ipfw_insn) && - cmdlen != F_INSN_SIZE(ipfw_insn_u32)) - goto bad_size; - ci->object_opcodes++; - break; - case O_MACADDR2: - if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) - goto bad_size; - break; - - case O_NOP: - case O_IPID: - case O_IPTTL: - case O_IPLEN: - case O_TCPDATALEN: - case O_TCPWIN: - case O_TAGGED: - if (cmdlen < 1 || cmdlen > 31) - goto bad_size; - break; - - case O_DSCP: - if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1) - goto bad_size; - break; - - case O_MAC_TYPE: - case O_IP_SRCPORT: - case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ - if (cmdlen < 2 || cmdlen > 31) - goto bad_size; - break; - - case O_RECV: - case O_XMIT: - case O_VIA: - if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) - goto bad_size; - ci->object_opcodes++; - break; - - case O_ALTQ: - if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) - goto bad_size; - break; - - case O_PIPE: - case O_QUEUE: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - goto check_action; - - case O_FORWARD_IP: - if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) - goto bad_size; - goto check_action; -#ifdef INET6 - case O_FORWARD_IP6: - if (cmdlen != F_INSN_SIZE(ipfw_insn_sa6)) - goto bad_size; - goto check_action; -#endif /* INET6 */ - - case O_DIVERT: - case O_TEE: - if (ip_divert_ptr == NULL) - return EINVAL; - else - goto check_size; - case O_NETGRAPH: - case O_NGTEE: - if (ng_ipfw_input_p == NULL) - return EINVAL; - else - goto check_size; - case O_NAT: - if (!IPFW_NAT_LOADED) - return EINVAL; - if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) - goto bad_size; - goto check_action; - case O_CHECK_STATE: - ci->object_opcodes++; - /* FALLTHROUGH */ - case O_FORWARD_MAC: /* XXX not implemented yet */ - case O_COUNT: - case O_ACCEPT: - case O_DENY: - case O_REJECT: - case O_SETDSCP: -#ifdef INET6 - case O_UNREACH6: -#endif - case O_SKIPTO: - case O_REASS: - case O_CALLRETURN: -check_size: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; -check_action: - if (have_action) { - printf("ipfw: opcode %d, multiple actions" - " not allowed\n", - cmd->opcode); - return (EINVAL); - } - have_action = 1; - if (l != cmdlen) { - printf("ipfw: opcode %d, action must be" - " last opcode\n", - cmd->opcode); - return (EINVAL); - } - break; -#ifdef INET6 - case O_IP6_SRC: - case O_IP6_DST: - if (cmdlen != F_INSN_SIZE(struct in6_addr) + - F_INSN_SIZE(ipfw_insn)) - goto bad_size; - break; - - case O_FLOW6ID: - if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + - ((ipfw_insn_u32 *)cmd)->o.arg1) - goto bad_size; - break; - - case O_IP6_SRC_MASK: - case O_IP6_DST_MASK: - if ( !(cmdlen & 1) || cmdlen > 127) - goto bad_size; - break; - case O_ICMP6TYPE: - if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) ) - goto bad_size; - break; -#endif - - default: - switch (cmd->opcode) { -#ifndef INET6 - case O_IP6_SRC_ME: - case O_IP6_DST_ME: - case O_EXT_HDR: - case O_IP6: - case O_UNREACH6: - case O_IP6_SRC: - case O_IP6_DST: - case O_FLOW6ID: - case O_IP6_SRC_MASK: - case O_IP6_DST_MASK: - case O_ICMP6TYPE: - printf("ipfw: no IPv6 support in kernel\n"); - return (EPROTONOSUPPORT); -#endif - default: - printf("ipfw: opcode %d, unknown opcode\n", - cmd->opcode); - return (EINVAL); - } - } - } - if (have_action == 0) { - printf("ipfw: missing action\n"); - return (EINVAL); - } - return 0; - -bad_size: - printf("ipfw: opcode %d size %d wrong\n", - cmd->opcode, cmdlen); - return (EINVAL); -} - - -/* - * Translation of requests for compatibility with FreeBSD 7.2/8. - * a static variable tells us if we have an old client from userland, - * and if necessary we translate requests and responses between the - * two formats. - */ -static int is7 = 0; - -struct ip_fw7 { - struct ip_fw7 *next; /* linked list of rules */ - struct ip_fw7 *next_rule; /* ptr to next [skipto] rule */ - /* 'next_rule' is used to pass up 'set_disable' status */ - - uint16_t act_ofs; /* offset of action in 32-bit units */ - uint16_t cmd_len; /* # of 32-bit words in cmd */ - uint16_t rulenum; /* rule number */ - uint8_t set; /* rule set (0..31) */ - // #define RESVD_SET 31 /* set for default and persistent rules */ - uint8_t _pad; /* padding */ - // uint32_t id; /* rule id, only in v.8 */ - /* These fields are present in all rules. */ - uint64_t pcnt; /* Packet counter */ - uint64_t bcnt; /* Byte counter */ - uint32_t timestamp; /* tv_sec of last match */ - - ipfw_insn cmd[1]; /* storage for commands */ -}; - -static int convert_rule_to_7(struct ip_fw_rule0 *rule); -static int convert_rule_to_8(struct ip_fw_rule0 *rule); - -#ifndef RULESIZE7 -#define RULESIZE7(rule) (sizeof(struct ip_fw7) + \ - ((struct ip_fw7 *)(rule))->cmd_len * 4 - 4) -#endif - - -/* - * Copy the static and dynamic rules to the supplied buffer - * and return the amount of space actually used. - * Must be run under IPFW_UH_RLOCK - */ -static size_t -ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) -{ - char *bp = buf; - char *ep = bp + space; - struct ip_fw *rule; - struct ip_fw_rule0 *dst; - struct timeval boottime; - int error, i, l, warnflag; - time_t boot_seconds; - - warnflag = 0; - - getboottime(&boottime); - boot_seconds = boottime.tv_sec; - for (i = 0; i < chain->n_rules; i++) { - rule = chain->map[i]; - - if (is7) { - /* Convert rule to FreeBSd 7.2 format */ - l = RULESIZE7(rule); - if (bp + l + sizeof(uint32_t) <= ep) { - bcopy(rule, bp, l + sizeof(uint32_t)); - error = set_legacy_obj_kidx(chain, - (struct ip_fw_rule0 *)bp); - if (error != 0) - return (0); - error = convert_rule_to_7((struct ip_fw_rule0 *) bp); - if (error) - return 0; /*XXX correct? */ - /* - * XXX HACK. Store the disable mask in the "next" - * pointer in a wild attempt to keep the ABI the same. - * Why do we do this on EVERY rule? - */ - bcopy(&V_set_disable, - &(((struct ip_fw7 *)bp)->next_rule), - sizeof(V_set_disable)); - if (((struct ip_fw7 *)bp)->timestamp) - ((struct ip_fw7 *)bp)->timestamp += boot_seconds; - bp += l; - } - continue; /* go to next rule */ - } - - l = RULEUSIZE0(rule); - if (bp + l > ep) { /* should not happen */ - printf("overflow dumping static rules\n"); - break; - } - dst = (struct ip_fw_rule0 *)bp; - export_rule0(rule, dst, l); - error = set_legacy_obj_kidx(chain, dst); - - /* - * XXX HACK. Store the disable mask in the "next" - * pointer in a wild attempt to keep the ABI the same. - * Why do we do this on EVERY rule? - * - * XXX: "ipfw set show" (ab)uses IP_FW_GET to read disabled mask - * so we need to fail _after_ saving at least one mask. - */ - bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable)); - if (dst->timestamp) - dst->timestamp += boot_seconds; - bp += l; - - if (error != 0) { - if (error == 2) { - /* Non-fatal table rewrite error. */ - warnflag = 1; - continue; - } - printf("Stop on rule %d. Fail to convert table\n", - rule->rulenum); - break; - } - } - if (warnflag != 0) - printf("ipfw: process %s is using legacy interfaces," - " consider rebuilding\n", ""); - ipfw_get_dynamic(chain, &bp, ep); /* protected by the dynamic lock */ - return (bp - (char *)buf); -} - - -struct dump_args { - uint32_t b; /* start rule */ - uint32_t e; /* end rule */ - uint32_t rcount; /* number of rules */ - uint32_t rsize; /* rules size */ - uint32_t tcount; /* number of tables */ - int rcounters; /* counters */ -}; - -void -ipfw_export_obj_ntlv(struct named_object *no, ipfw_obj_ntlv *ntlv) -{ - - ntlv->head.type = no->etlv; - ntlv->head.length = sizeof(*ntlv); - ntlv->idx = no->kidx; - strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); -} - -/* - * Export named object info in instance @ni, identified by @kidx - * to ipfw_obj_ntlv. TLV is allocated from @sd space. - * - * Returns 0 on success. - */ -static int -export_objhash_ntlv(struct namedobj_instance *ni, uint16_t kidx, - struct sockopt_data *sd) -{ - struct named_object *no; - ipfw_obj_ntlv *ntlv; - - no = ipfw_objhash_lookup_kidx(ni, kidx); - KASSERT(no != NULL, ("invalid object kernel index passed")); - - ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); - if (ntlv == NULL) - return (ENOMEM); - - ipfw_export_obj_ntlv(no, ntlv); - return (0); -} - -/* - * Dumps static rules with table TLVs in buffer @sd. - * - * Returns 0 on success. - */ -static int -dump_static_rules(struct ip_fw_chain *chain, struct dump_args *da, - uint32_t *bmask, struct sockopt_data *sd) -{ - int error; - int i, l; - uint32_t tcount; - ipfw_obj_ctlv *ctlv; - struct ip_fw *krule; - struct namedobj_instance *ni; - caddr_t dst; - - /* Dump table names first (if any) */ - if (da->tcount > 0) { - /* Header first */ - ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); - if (ctlv == NULL) - return (ENOMEM); - ctlv->head.type = IPFW_TLV_TBLNAME_LIST; - ctlv->head.length = da->tcount * sizeof(ipfw_obj_ntlv) + - sizeof(*ctlv); - ctlv->count = da->tcount; - ctlv->objsize = sizeof(ipfw_obj_ntlv); - } - - i = 0; - tcount = da->tcount; - ni = ipfw_get_table_objhash(chain); - while (tcount > 0) { - if ((bmask[i / 32] & (1 << (i % 32))) == 0) { - i++; - continue; - } - - /* Jump to shared named object bitmask */ - if (i >= IPFW_TABLES_MAX) { - ni = CHAIN_TO_SRV(chain); - i -= IPFW_TABLES_MAX; - bmask += IPFW_TABLES_MAX / 32; - } - - if ((error = export_objhash_ntlv(ni, i, sd)) != 0) - return (error); - - i++; - tcount--; - } - - /* Dump rules */ - ctlv = (ipfw_obj_ctlv *)ipfw_get_sopt_space(sd, sizeof(*ctlv)); - if (ctlv == NULL) - return (ENOMEM); - ctlv->head.type = IPFW_TLV_RULE_LIST; - ctlv->head.length = da->rsize + sizeof(*ctlv); - ctlv->count = da->rcount; - - for (i = da->b; i < da->e; i++) { - krule = chain->map[i]; - - l = RULEUSIZE1(krule) + sizeof(ipfw_obj_tlv); - if (da->rcounters != 0) - l += sizeof(struct ip_fw_bcounter); - dst = (caddr_t)ipfw_get_sopt_space(sd, l); - if (dst == NULL) - return (ENOMEM); - - export_rule1(krule, dst, l, da->rcounters); - } - - return (0); -} - -/* - * Marks every object index used in @rule with bit in @bmask. - * Used to generate bitmask of referenced tables/objects for given ruleset - * or its part. - * - * Returns number of newly-referenced objects. - */ -static int -mark_object_kidx(struct ip_fw_chain *ch, struct ip_fw *rule, - uint32_t *bmask) -{ - struct opcode_obj_rewrite *rw; - ipfw_insn *cmd; - int bidx, cmdlen, l, count; - uint16_t kidx; - uint8_t subtype; - - l = rule->cmd_len; - cmd = rule->cmd; - cmdlen = 0; - count = 0; - for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - - rw = find_op_rw(cmd, &kidx, &subtype); - if (rw == NULL) - continue; - - bidx = kidx / 32; - /* - * Maintain separate bitmasks for table and - * non-table objects. - */ - if (rw->etlv != IPFW_TLV_TBL_NAME) - bidx += IPFW_TABLES_MAX / 32; - - if ((bmask[bidx] & (1 << (kidx % 32))) == 0) - count++; - - bmask[bidx] |= 1 << (kidx % 32); - } - - return (count); -} - -/* - * Dumps requested objects data - * Data layout (version 0)(current): - * Request: [ ipfw_cfg_lheader ] + IPFW_CFG_GET_* flags - * size = ipfw_cfg_lheader.size - * Reply: [ ipfw_cfg_lheader - * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) - * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) - * ipfw_obj_tlv(IPFW_TLV_RULE_ENT) [ ip_fw_bcounter (optional) ip_fw_rule ] - * ] (optional) - * [ ipfw_obj_ctlv(IPFW_TLV_STATE_LIST) ipfw_obj_dyntlv x N ] (optional) - * ] - * * NOTE IPFW_TLV_STATE_LIST has the single valid field: objsize. - * The rest (size, count) are set to zero and needs to be ignored. - * - * Returns 0 on success. - */ -static int -dump_config(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_cfg_lheader *hdr; - struct ip_fw *rule; - size_t sz, rnum; - uint32_t hdr_flags; - int error, i; - struct dump_args da; - uint32_t *bmask; - - hdr = (ipfw_cfg_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); - if (hdr == NULL) - return (EINVAL); - - error = 0; - bmask = NULL; - /* Allocate needed state. Note we allocate 2xspace mask, for table&srv */ - if (hdr->flags & IPFW_CFG_GET_STATIC) - bmask = malloc(IPFW_TABLES_MAX / 4, M_TEMP, M_WAITOK | M_ZERO); - - IPFW_UH_RLOCK(chain); - - /* - * STAGE 1: Determine size/count for objects in range. - * Prepare used tables bitmask. - */ - sz = sizeof(ipfw_cfg_lheader); - memset(&da, 0, sizeof(da)); - - da.b = 0; - da.e = chain->n_rules; - - if (hdr->end_rule != 0) { - /* Handle custom range */ - if ((rnum = hdr->start_rule) > IPFW_DEFAULT_RULE) - rnum = IPFW_DEFAULT_RULE; - da.b = ipfw_find_rule(chain, rnum, 0); - rnum = hdr->end_rule; - rnum = (rnum < IPFW_DEFAULT_RULE) ? rnum+1 : IPFW_DEFAULT_RULE; - da.e = ipfw_find_rule(chain, rnum, 0) + 1; - } - - if (hdr->flags & IPFW_CFG_GET_STATIC) { - for (i = da.b; i < da.e; i++) { - rule = chain->map[i]; - da.rsize += RULEUSIZE1(rule) + sizeof(ipfw_obj_tlv); - da.rcount++; - /* Update bitmask of used objects for given range */ - da.tcount += mark_object_kidx(chain, rule, bmask); - } - /* Add counters if requested */ - if (hdr->flags & IPFW_CFG_GET_COUNTERS) { - da.rsize += sizeof(struct ip_fw_bcounter) * da.rcount; - da.rcounters = 1; - } - - if (da.tcount > 0) - sz += da.tcount * sizeof(ipfw_obj_ntlv) + - sizeof(ipfw_obj_ctlv); - sz += da.rsize + sizeof(ipfw_obj_ctlv); - } - - if (hdr->flags & IPFW_CFG_GET_STATES) - sz += ipfw_dyn_get_count() * sizeof(ipfw_obj_dyntlv) + - sizeof(ipfw_obj_ctlv); - - - /* - * Fill header anyway. - * Note we have to save header fields to stable storage - * buffer inside @sd can be flushed after dumping rules - */ - hdr->size = sz; - hdr->set_mask = ~V_set_disable; - hdr_flags = hdr->flags; - hdr = NULL; - - if (sd->valsize < sz) { - error = ENOMEM; - goto cleanup; - } - - /* STAGE2: Store actual data */ - if (hdr_flags & IPFW_CFG_GET_STATIC) { - error = dump_static_rules(chain, &da, bmask, sd); - if (error != 0) - goto cleanup; - } - - if (hdr_flags & IPFW_CFG_GET_STATES) - error = ipfw_dump_states(chain, sd); - -cleanup: - IPFW_UH_RUNLOCK(chain); - - if (bmask != NULL) - free(bmask, M_TEMP); - - return (error); -} - -int -ipfw_check_object_name_generic(const char *name) -{ - int nsize; - - nsize = sizeof(((ipfw_obj_ntlv *)0)->name); - if (strnlen(name, nsize) == nsize) - return (EINVAL); - if (name[0] == '\0') - return (EINVAL); - return (0); -} - -/* - * Creates non-existent objects referenced by rule. - * - * Return 0 on success. - */ -int -create_objects_compat(struct ip_fw_chain *ch, ipfw_insn *cmd, - struct obj_idx *oib, struct obj_idx *pidx, struct tid_info *ti) -{ - struct opcode_obj_rewrite *rw; - struct obj_idx *p; - uint16_t kidx; - int error; - - /* - * Compatibility stuff: do actual creation for non-existing, - * but referenced objects. - */ - for (p = oib; p < pidx; p++) { - if (p->kidx != 0) - continue; - - ti->uidx = p->uidx; - ti->type = p->type; - ti->atype = 0; - - rw = find_op_rw(cmd + p->off, NULL, NULL); - KASSERT(rw != NULL, ("Unable to find handler for op %d", - (cmd + p->off)->opcode)); - - if (rw->create_object == NULL) - error = EOPNOTSUPP; - else - error = rw->create_object(ch, ti, &kidx); - if (error == 0) { - p->kidx = kidx; - continue; - } - - /* - * Error happened. We have to rollback everything. - * Drop all already acquired references. - */ - IPFW_UH_WLOCK(ch); - unref_oib_objects(ch, cmd, oib, pidx); - IPFW_UH_WUNLOCK(ch); - - return (error); - } - - return (0); -} - -/* - * Compatibility function for old ipfw(8) binaries. - * Rewrites table/nat kernel indices with userland ones. - * Convert tables matching '/^\d+$/' to their atoi() value. - * Use number 65535 for other tables. - * - * Returns 0 on success. - */ -static int -set_legacy_obj_kidx(struct ip_fw_chain *ch, struct ip_fw_rule0 *rule) -{ - struct opcode_obj_rewrite *rw; - struct named_object *no; - ipfw_insn *cmd; - char *end; - long val; - int cmdlen, error, l; - uint16_t kidx, uidx; - uint8_t subtype; - - error = 0; - - l = rule->cmd_len; - cmd = rule->cmd; - cmdlen = 0; - for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - - /* Check if is index in given opcode */ - rw = find_op_rw(cmd, &kidx, &subtype); - if (rw == NULL) - continue; - - /* Try to find referenced kernel object */ - no = rw->find_bykidx(ch, kidx); - if (no == NULL) - continue; - - val = strtol(no->name, &end, 10); - if (*end == '\0' && val < 65535) { - uidx = val; - } else { - - /* - * We are called via legacy opcode. - * Save error and show table as fake number - * not to make ipfw(8) hang. - */ - uidx = 65535; - error = 2; - } - - rw->update(cmd, uidx); - } - - return (error); -} - - -/* - * Unreferences all already-referenced objects in given @cmd rule, - * using information in @oib. - * - * Used to rollback partially converted rule on error. - */ -static void -unref_oib_objects(struct ip_fw_chain *ch, ipfw_insn *cmd, struct obj_idx *oib, - struct obj_idx *end) -{ - struct opcode_obj_rewrite *rw; - struct named_object *no; - struct obj_idx *p; - - IPFW_UH_WLOCK_ASSERT(ch); - - for (p = oib; p < end; p++) { - if (p->kidx == 0) - continue; - - rw = find_op_rw(cmd + p->off, NULL, NULL); - KASSERT(rw != NULL, ("Unable to find handler for op %d", - (cmd + p->off)->opcode)); - - /* Find & unref by existing idx */ - no = rw->find_bykidx(ch, p->kidx); - KASSERT(no != NULL, ("Ref'd object %d disappeared", p->kidx)); - no->refcnt--; - } -} - -/* - * Remove references from every object used in @rule. - * Used at rule removal code. - */ -static void -unref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule) -{ - struct opcode_obj_rewrite *rw; - struct named_object *no; - ipfw_insn *cmd; - int cmdlen, l; - uint16_t kidx; - uint8_t subtype; - - IPFW_UH_WLOCK_ASSERT(ch); - - l = rule->cmd_len; - cmd = rule->cmd; - cmdlen = 0; - for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - - rw = find_op_rw(cmd, &kidx, &subtype); - if (rw == NULL) - continue; - no = rw->find_bykidx(ch, kidx); - - KASSERT(no != NULL, ("object id %d not found", kidx)); - KASSERT(no->subtype == subtype, - ("wrong type %d (%d) for object id %d", - no->subtype, subtype, kidx)); - KASSERT(no->refcnt > 0, ("refcount for object %d is %d", - kidx, no->refcnt)); - - if (no->refcnt == 1 && rw->destroy_object != NULL) - rw->destroy_object(ch, no); - else - no->refcnt--; - } -} - - -/* - * Find and reference object (if any) stored in instruction @cmd. - * - * Saves object info in @pidx, sets - * - @unresolved to 1 if object should exists but not found - * - * Returns non-zero value in case of error. - */ -static int -ref_opcode_object(struct ip_fw_chain *ch, ipfw_insn *cmd, struct tid_info *ti, - struct obj_idx *pidx, int *unresolved) -{ - struct named_object *no; - struct opcode_obj_rewrite *rw; - int error; - - /* Check if this opcode is candidate for rewrite */ - rw = find_op_rw(cmd, &ti->uidx, &ti->type); - if (rw == NULL) - return (0); - - /* Need to rewrite. Save necessary fields */ - pidx->uidx = ti->uidx; - pidx->type = ti->type; - - /* Try to find referenced kernel object */ - error = rw->find_byname(ch, ti, &no); - if (error != 0) - return (error); - if (no == NULL) { - /* - * Report about unresolved object for automaic - * creation. - */ - *unresolved = 1; - return (0); - } - - /* - * Object is already exist. - * Its subtype should match with expected value. - */ - if (ti->type != no->subtype) - return (EINVAL); - - /* Bump refcount and update kidx. */ - no->refcnt++; - rw->update(cmd, no->kidx); - return (0); -} - -/* - * Finds and bumps refcount for objects referenced by given @rule. - * Auto-creates non-existing tables. - * Fills in @oib array with userland/kernel indexes. - * - * Returns 0 on success. - */ -static int -ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule, - struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti) -{ - struct obj_idx *pidx; - ipfw_insn *cmd; - int cmdlen, error, l, unresolved; - - pidx = oib; - l = rule->cmd_len; - cmd = rule->cmd; - cmdlen = 0; - error = 0; - - IPFW_UH_WLOCK(ch); - - /* Increase refcount on each existing referenced table. */ - for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - unresolved = 0; - - error = ref_opcode_object(ch, cmd, ti, pidx, &unresolved); - if (error != 0) - break; - /* - * Compatibility stuff for old clients: - * prepare to automaitcally create non-existing objects. - */ - if (unresolved != 0) { - pidx->off = rule->cmd_len - l; - pidx++; - } - } - - if (error != 0) { - /* Unref everything we have already done */ - unref_oib_objects(ch, rule->cmd, oib, pidx); - IPFW_UH_WUNLOCK(ch); - return (error); - } - IPFW_UH_WUNLOCK(ch); - - /* Perform auto-creation for non-existing objects */ - if (pidx != oib) - error = create_objects_compat(ch, rule->cmd, oib, pidx, ti); - - /* Calculate real number of dynamic objects */ - ci->object_opcodes = (uint16_t)(pidx - oib); - - return (error); -} - -/* - * Checks is opcode is referencing table of appropriate type. - * Adds reference count for found table if true. - * Rewrites user-supplied opcode values with kernel ones. - * - * Returns 0 on success and appropriate error code otherwise. - */ -static int -rewrite_rule_uidx(struct ip_fw_chain *chain, struct rule_check_info *ci) -{ - int error; - ipfw_insn *cmd; - uint8_t type; - struct obj_idx *p, *pidx_first, *pidx_last; - struct tid_info ti; - - /* - * Prepare an array for storing opcode indices. - * Use stack allocation by default. - */ - if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) { - /* Stack */ - pidx_first = ci->obuf; - } else - pidx_first = malloc( - ci->object_opcodes * sizeof(struct obj_idx), - M_IPFW, M_WAITOK | M_ZERO); - - error = 0; - type = 0; - memset(&ti, 0, sizeof(ti)); - - /* Use set rule is assigned to. */ - ti.set = ci->krule->set; - if (ci->ctlv != NULL) { - ti.tlvs = (void *)(ci->ctlv + 1); - ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv); - } - - /* Reference all used tables and other objects */ - error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti); - if (error != 0) - goto free; - /* - * Note that ref_rule_objects() might have updated ci->object_opcodes - * to reflect actual number of object opcodes. - */ - - /* Perform rewrite of remaining opcodes */ - p = pidx_first; - pidx_last = pidx_first + ci->object_opcodes; - for (p = pidx_first; p < pidx_last; p++) { - cmd = ci->krule->cmd + p->off; - update_opcode_kidx(cmd, p->kidx); - } - -free: - if (pidx_first != ci->obuf) - free(pidx_first, M_IPFW); - - return (error); -} - -/* - * Adds one or more rules to ipfw @chain. - * Data layout (version 0)(current): - * Request: - * [ - * ip_fw3_opheader - * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional *1) - * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] (*2) (*3) - * ] - * Reply: - * [ - * ip_fw3_opheader - * [ ipfw_obj_ctlv(IPFW_TLV_TBL_LIST) ipfw_obj_ntlv x N ] (optional) - * [ ipfw_obj_ctlv(IPFW_TLV_RULE_LIST) ip_fw x N ] - * ] - * - * Rules in reply are modified to store their actual ruleset number. - * - * (*1) TLVs inside IPFW_TLV_TBL_LIST needs to be sorted ascending - * according to their idx field and there has to be no duplicates. - * (*2) Numbered rules inside IPFW_TLV_RULE_LIST needs to be sorted ascending. - * (*3) Each ip_fw structure needs to be aligned to u64 boundary. - * - * Returns 0 on success. - */ -static int -add_rules(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_ctlv *ctlv, *rtlv, *tstate; - ipfw_obj_ntlv *ntlv; - int clen, error, idx; - uint32_t count, read; - struct ip_fw_rule *r; - struct rule_check_info rci, *ci, *cbuf; - int i, rsize; - - op3 = (ip_fw3_opheader *)ipfw_get_sopt_space(sd, sd->valsize); - ctlv = (ipfw_obj_ctlv *)(op3 + 1); - - read = sizeof(ip_fw3_opheader); - rtlv = NULL; - tstate = NULL; - cbuf = NULL; - memset(&rci, 0, sizeof(struct rule_check_info)); - - if (read + sizeof(*ctlv) > sd->valsize) - return (EINVAL); - - if (ctlv->head.type == IPFW_TLV_TBLNAME_LIST) { - clen = ctlv->head.length; - /* Check size and alignment */ - if (clen > sd->valsize || clen < sizeof(*ctlv)) - return (EINVAL); - if ((clen % sizeof(uint64_t)) != 0) - return (EINVAL); - - /* - * Some table names or other named objects. - * Check for validness. - */ - count = (ctlv->head.length - sizeof(*ctlv)) / sizeof(*ntlv); - if (ctlv->count != count || ctlv->objsize != sizeof(*ntlv)) - return (EINVAL); - - /* - * Check each TLV. - * Ensure TLVs are sorted ascending and - * there are no duplicates. - */ - idx = -1; - ntlv = (ipfw_obj_ntlv *)(ctlv + 1); - while (count > 0) { - if (ntlv->head.length != sizeof(ipfw_obj_ntlv)) - return (EINVAL); - - error = ipfw_check_object_name_generic(ntlv->name); - if (error != 0) - return (error); - - if (ntlv->idx <= idx) - return (EINVAL); - - idx = ntlv->idx; - count--; - ntlv++; - } - - tstate = ctlv; - read += ctlv->head.length; - ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); - } - - if (read + sizeof(*ctlv) > sd->valsize) - return (EINVAL); - - if (ctlv->head.type == IPFW_TLV_RULE_LIST) { - clen = ctlv->head.length; - if (clen + read > sd->valsize || clen < sizeof(*ctlv)) - return (EINVAL); - if ((clen % sizeof(uint64_t)) != 0) - return (EINVAL); - - /* - * TODO: Permit adding multiple rules at once - */ - if (ctlv->count != 1) - return (ENOTSUP); - - clen -= sizeof(*ctlv); - - if (ctlv->count > clen / sizeof(struct ip_fw_rule)) - return (EINVAL); - - /* Allocate state for each rule or use stack */ - if (ctlv->count == 1) { - memset(&rci, 0, sizeof(struct rule_check_info)); - cbuf = &rci; - } else - cbuf = malloc(ctlv->count * sizeof(*ci), M_TEMP, - M_WAITOK | M_ZERO); - ci = cbuf; - - /* - * Check each rule for validness. - * Ensure numbered rules are sorted ascending - * and properly aligned - */ - idx = 0; - r = (struct ip_fw_rule *)(ctlv + 1); - count = 0; - error = 0; - while (clen > 0) { - rsize = roundup2(RULESIZE(r), sizeof(uint64_t)); - if (rsize > clen || ctlv->count <= count) { - error = EINVAL; - break; - } - - ci->ctlv = tstate; - error = check_ipfw_rule1(r, rsize, ci); - if (error != 0) - break; - - /* Check sorting */ - if (r->rulenum != 0 && r->rulenum < idx) { - printf("rulenum %d idx %d\n", r->rulenum, idx); - error = EINVAL; - break; - } - idx = r->rulenum; - - ci->urule = (caddr_t)r; - - rsize = roundup2(rsize, sizeof(uint64_t)); - clen -= rsize; - r = (struct ip_fw_rule *)((caddr_t)r + rsize); - count++; - ci++; - } - - if (ctlv->count != count || error != 0) { - if (cbuf != &rci) - free(cbuf, M_TEMP); - return (EINVAL); - } - - rtlv = ctlv; - read += ctlv->head.length; - ctlv = (ipfw_obj_ctlv *)((caddr_t)ctlv + ctlv->head.length); - } - - if (read != sd->valsize || rtlv == NULL || rtlv->count == 0) { - if (cbuf != NULL && cbuf != &rci) - free(cbuf, M_TEMP); - return (EINVAL); - } - - /* - * Passed rules seems to be valid. - * Allocate storage and try to add them to chain. - */ - for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) { - clen = RULEKSIZE1((struct ip_fw_rule *)ci->urule); - ci->krule = ipfw_alloc_rule(chain, clen); - import_rule1(ci); - } - - if ((error = commit_rules(chain, cbuf, rtlv->count)) != 0) { - /* Free allocate krules */ - for (i = 0, ci = cbuf; i < rtlv->count; i++, ci++) - free_rule(ci->krule); - } - - if (cbuf != NULL && cbuf != &rci) - free(cbuf, M_TEMP); - - return (error); -} - -/* - * Lists all sopts currently registered. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size - * Reply: [ ipfw_obj_lheader ipfw_sopt_info x N ] - * - * Returns 0 on success - */ -static int -dump_soptcodes(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct _ipfw_obj_lheader *olh; - ipfw_sopt_info *i; - struct ipfw_sopt_handler *sh; - uint32_t count, n, size; - - olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); - if (olh == NULL) - return (EINVAL); - if (sd->valsize < olh->size) - return (EINVAL); - - CTL3_LOCK(); - count = ctl3_hsize; - size = count * sizeof(ipfw_sopt_info) + sizeof(ipfw_obj_lheader); - - /* Fill in header regadless of buffer size */ - olh->count = count; - olh->objsize = sizeof(ipfw_sopt_info); - - if (size > olh->size) { - olh->size = size; - CTL3_UNLOCK(); - return (ENOMEM); - } - olh->size = size; - - for (n = 1; n <= count; n++) { - i = (ipfw_sopt_info *)ipfw_get_sopt_space(sd, sizeof(*i)); - KASSERT(i != NULL, ("previously checked buffer is not enough")); - sh = &ctl3_handlers[n]; - i->opcode = sh->opcode; - i->version = sh->version; - i->refcnt = sh->refcnt; - } - CTL3_UNLOCK(); - - return (0); -} - -/* - * Compares two opcodes. - * Used both in qsort() and bsearch(). - * - * Returns 0 if match is found. - */ -static int -compare_opcodes(const void *_a, const void *_b) -{ - const struct opcode_obj_rewrite *a, *b; - - a = (const struct opcode_obj_rewrite *)_a; - b = (const struct opcode_obj_rewrite *)_b; - - if (a->opcode < b->opcode) - return (-1); - else if (a->opcode > b->opcode) - return (1); - - return (0); -} - -/* - * XXX: Rewrite bsearch() - */ -static int -find_op_rw_range(uint16_t op, struct opcode_obj_rewrite **plo, - struct opcode_obj_rewrite **phi) -{ - struct opcode_obj_rewrite *ctl3_max, *lo, *hi, h, *rw; - - memset(&h, 0, sizeof(h)); - h.opcode = op; - - rw = (struct opcode_obj_rewrite *)bsearch(&h, ctl3_rewriters, - ctl3_rsize, sizeof(h), compare_opcodes); - if (rw == NULL) - return (1); - - /* Find the first element matching the same opcode */ - lo = rw; - for ( ; lo > ctl3_rewriters && (lo - 1)->opcode == op; lo--) - ; - - /* Find the last element matching the same opcode */ - hi = rw; - ctl3_max = ctl3_rewriters + ctl3_rsize; - for ( ; (hi + 1) < ctl3_max && (hi + 1)->opcode == op; hi++) - ; - - *plo = lo; - *phi = hi; - - return (0); -} - -/* - * Finds opcode object rewriter based on @code. - * - * Returns pointer to handler or NULL. - */ -static struct opcode_obj_rewrite * -find_op_rw(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - struct opcode_obj_rewrite *rw, *lo, *hi; - uint16_t uidx; - uint8_t subtype; - - if (find_op_rw_range(cmd->opcode, &lo, &hi) != 0) - return (NULL); - - for (rw = lo; rw <= hi; rw++) { - if (rw->classifier(cmd, &uidx, &subtype) == 0) { - if (puidx != NULL) - *puidx = uidx; - if (ptype != NULL) - *ptype = subtype; - return (rw); - } - } - - return (NULL); -} -int -classify_opcode_kidx(ipfw_insn *cmd, uint16_t *puidx) -{ - - if (find_op_rw(cmd, puidx, NULL) == NULL) - return (1); - return (0); -} - -void -update_opcode_kidx(ipfw_insn *cmd, uint16_t idx) -{ - struct opcode_obj_rewrite *rw; - - rw = find_op_rw(cmd, NULL, NULL); - KASSERT(rw != NULL, ("No handler to update opcode %d", cmd->opcode)); - rw->update(cmd, idx); -} - -void -ipfw_init_obj_rewriter() -{ - - ctl3_rewriters = NULL; - ctl3_rsize = 0; -} - -void -ipfw_destroy_obj_rewriter() -{ - - if (ctl3_rewriters != NULL) - free(ctl3_rewriters, M_IPFW); - ctl3_rewriters = NULL; - ctl3_rsize = 0; -} - -/* - * Adds one or more opcode object rewrite handlers to the global array. - * Function may sleep. - */ -void -ipfw_add_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count) -{ - size_t sz; - struct opcode_obj_rewrite *tmp; - - CTL3_LOCK(); - - for (;;) { - sz = ctl3_rsize + count; - CTL3_UNLOCK(); - tmp = malloc(sizeof(*rw) * sz, M_IPFW, M_WAITOK | M_ZERO); - CTL3_LOCK(); - if (ctl3_rsize + count <= sz) - break; - - /* Retry */ - free(tmp, M_IPFW); - } - - /* Merge old & new arrays */ - sz = ctl3_rsize + count; - memcpy(tmp, ctl3_rewriters, ctl3_rsize * sizeof(*rw)); - memcpy(&tmp[ctl3_rsize], rw, count * sizeof(*rw)); - qsort(tmp, sz, sizeof(*rw), compare_opcodes); - /* Switch new and free old */ - if (ctl3_rewriters != NULL) - free(ctl3_rewriters, M_IPFW); - ctl3_rewriters = tmp; - ctl3_rsize = sz; - - CTL3_UNLOCK(); -} - -/* - * Removes one or more object rewrite handlers from the global array. - */ -int -ipfw_del_obj_rewriter(struct opcode_obj_rewrite *rw, size_t count) -{ - size_t sz; - struct opcode_obj_rewrite *ctl3_max, *ktmp, *lo, *hi; - int i; - - CTL3_LOCK(); - - for (i = 0; i < count; i++) { - if (find_op_rw_range(rw[i].opcode, &lo, &hi) != 0) - continue; - - for (ktmp = lo; ktmp <= hi; ktmp++) { - if (ktmp->classifier != rw[i].classifier) - continue; - - ctl3_max = ctl3_rewriters + ctl3_rsize; - sz = (ctl3_max - (ktmp + 1)) * sizeof(*ktmp); - memmove(ktmp, ktmp + 1, sz); - ctl3_rsize--; - break; - } - - } - - if (ctl3_rsize == 0) { - if (ctl3_rewriters != NULL) - free(ctl3_rewriters, M_IPFW); - ctl3_rewriters = NULL; - } - - CTL3_UNLOCK(); - - return (0); -} - -static int -export_objhash_ntlv_internal(struct namedobj_instance *ni, - struct named_object *no, void *arg) -{ - struct sockopt_data *sd; - ipfw_obj_ntlv *ntlv; - - sd = (struct sockopt_data *)arg; - ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); - if (ntlv == NULL) - return (ENOMEM); - ipfw_export_obj_ntlv(no, ntlv); - return (0); -} - -/* - * Lists all service objects. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ] size = ipfw_obj_lheader.size - * Reply: [ ipfw_obj_lheader [ ipfw_obj_ntlv x N ] (optional) ] - * Returns 0 on success - */ -static int -dump_srvobjects(struct ip_fw_chain *chain, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_lheader *hdr; - int count; - - hdr = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*hdr)); - if (hdr == NULL) - return (EINVAL); - - IPFW_UH_RLOCK(chain); - count = ipfw_objhash_count(CHAIN_TO_SRV(chain)); - hdr->size = sizeof(ipfw_obj_lheader) + count * sizeof(ipfw_obj_ntlv); - if (sd->valsize < hdr->size) { - IPFW_UH_RUNLOCK(chain); - return (ENOMEM); - } - hdr->count = count; - hdr->objsize = sizeof(ipfw_obj_ntlv); - if (count > 0) - ipfw_objhash_foreach(CHAIN_TO_SRV(chain), - export_objhash_ntlv_internal, sd); - IPFW_UH_RUNLOCK(chain); - return (0); -} - -/* - * Compares two sopt handlers (code, version and handler ptr). - * Used both as qsort() and bsearch(). - * Does not compare handler for latter case. - * - * Returns 0 if match is found. - */ -static int -compare_sh(const void *_a, const void *_b) -{ - const struct ipfw_sopt_handler *a, *b; - - a = (const struct ipfw_sopt_handler *)_a; - b = (const struct ipfw_sopt_handler *)_b; - - if (a->opcode < b->opcode) - return (-1); - else if (a->opcode > b->opcode) - return (1); - - if (a->version < b->version) - return (-1); - else if (a->version > b->version) - return (1); - - /* bsearch helper */ - if (a->handler == NULL) - return (0); - - if ((uintptr_t)a->handler < (uintptr_t)b->handler) - return (-1); - else if ((uintptr_t)a->handler > (uintptr_t)b->handler) - return (1); - - return (0); -} - -/* - * Finds sopt handler based on @code and @version. - * - * Returns pointer to handler or NULL. - */ -static struct ipfw_sopt_handler * -find_sh(uint16_t code, uint8_t version, sopt_handler_f *handler) -{ - struct ipfw_sopt_handler *sh, h; - - memset(&h, 0, sizeof(h)); - h.opcode = code; - h.version = version; - h.handler = handler; - - sh = (struct ipfw_sopt_handler *)bsearch(&h, ctl3_handlers, - ctl3_hsize, sizeof(h), compare_sh); - - return (sh); -} - -static int -find_ref_sh(uint16_t opcode, uint8_t version, struct ipfw_sopt_handler *psh) -{ - struct ipfw_sopt_handler *sh; - - CTL3_LOCK(); - if ((sh = find_sh(opcode, version, NULL)) == NULL) { - CTL3_UNLOCK(); - printf("ipfw: ipfw_ctl3 invalid option %d""v""%d\n", - opcode, version); - return (EINVAL); - } - sh->refcnt++; - ctl3_refct++; - /* Copy handler data to requested buffer */ - *psh = *sh; - CTL3_UNLOCK(); - - return (0); -} - -static void -find_unref_sh(struct ipfw_sopt_handler *psh) -{ - struct ipfw_sopt_handler *sh; - - CTL3_LOCK(); - sh = find_sh(psh->opcode, psh->version, NULL); - KASSERT(sh != NULL, ("ctl3 handler disappeared")); - sh->refcnt--; - ctl3_refct--; - CTL3_UNLOCK(); -} - -void -ipfw_init_sopt_handler() -{ - - CTL3_LOCK_INIT(); - IPFW_ADD_SOPT_HANDLER(1, scodes); -} - -void -ipfw_destroy_sopt_handler() -{ - - IPFW_DEL_SOPT_HANDLER(1, scodes); - CTL3_LOCK_DESTROY(); -} - -/* - * Adds one or more sockopt handlers to the global array. - * Function may sleep. - */ -void -ipfw_add_sopt_handler(struct ipfw_sopt_handler *sh, size_t count) -{ - size_t sz; - struct ipfw_sopt_handler *tmp; - - CTL3_LOCK(); - - for (;;) { - sz = ctl3_hsize + count; - CTL3_UNLOCK(); - tmp = malloc(sizeof(*sh) * sz, M_IPFW, M_WAITOK | M_ZERO); - CTL3_LOCK(); - if (ctl3_hsize + count <= sz) - break; - - /* Retry */ - free(tmp, M_IPFW); - } - - /* Merge old & new arrays */ - sz = ctl3_hsize + count; - memcpy(tmp, ctl3_handlers, ctl3_hsize * sizeof(*sh)); - memcpy(&tmp[ctl3_hsize], sh, count * sizeof(*sh)); - qsort(tmp, sz, sizeof(*sh), compare_sh); - /* Switch new and free old */ - if (ctl3_handlers != NULL) - free(ctl3_handlers, M_IPFW); - ctl3_handlers = tmp; - ctl3_hsize = sz; - ctl3_gencnt++; - - CTL3_UNLOCK(); -} - -/* - * Removes one or more sockopt handlers from the global array. - */ -int -ipfw_del_sopt_handler(struct ipfw_sopt_handler *sh, size_t count) -{ - size_t sz; - struct ipfw_sopt_handler *tmp, *h; - int i; - - CTL3_LOCK(); - - for (i = 0; i < count; i++) { - tmp = &sh[i]; - h = find_sh(tmp->opcode, tmp->version, tmp->handler); - if (h == NULL) - continue; - - sz = (ctl3_handlers + ctl3_hsize - (h + 1)) * sizeof(*h); - memmove(h, h + 1, sz); - ctl3_hsize--; - } - - if (ctl3_hsize == 0) { - if (ctl3_handlers != NULL) - free(ctl3_handlers, M_IPFW); - ctl3_handlers = NULL; - } - - ctl3_gencnt++; - - CTL3_UNLOCK(); - - return (0); -} - -/* - * Writes data accumulated in @sd to sockopt buffer. - * Zeroes internal @sd buffer. - */ -static int -ipfw_flush_sopt_data(struct sockopt_data *sd) -{ - struct sockopt *sopt; - int error; - size_t sz; - - sz = sd->koff; - if (sz == 0) - return (0); - - sopt = sd->sopt; - - if (sopt->sopt_dir == SOPT_GET) { - error = copyout(sd->kbuf, sopt->sopt_val, sz); - if (error != 0) - return (error); - } - - memset(sd->kbuf, 0, sd->ksize); - sd->ktotal += sz; - sd->koff = 0; - if (sd->ktotal + sd->ksize < sd->valsize) - sd->kavail = sd->ksize; - else - sd->kavail = sd->valsize - sd->ktotal; - - /* Update sopt buffer data */ - sopt->sopt_valsize = sd->ktotal; - sopt->sopt_val = sd->sopt_val + sd->ktotal; - - return (0); -} - -/* - * Ensures that @sd buffer has contiguous @neeeded number of - * bytes. - * - * Returns pointer to requested space or NULL. - */ -caddr_t -ipfw_get_sopt_space(struct sockopt_data *sd, size_t needed) -{ - int error; - caddr_t addr; - - if (sd->kavail < needed) { - /* - * Flush data and try another time. - */ - error = ipfw_flush_sopt_data(sd); - - if (sd->kavail < needed || error != 0) - return (NULL); - } - - addr = sd->kbuf + sd->koff; - sd->koff += needed; - sd->kavail -= needed; - return (addr); -} - -/* - * Requests @needed contiguous bytes from @sd buffer. - * Function is used to notify subsystem that we are - * interesed in first @needed bytes (request header) - * and the rest buffer can be safely zeroed. - * - * Returns pointer to requested space or NULL. - */ -caddr_t -ipfw_get_sopt_header(struct sockopt_data *sd, size_t needed) -{ - caddr_t addr; - - if ((addr = ipfw_get_sopt_space(sd, needed)) == NULL) - return (NULL); - - if (sd->kavail > 0) - memset(sd->kbuf + sd->koff, 0, sd->kavail); - - return (addr); -} - -/* - * New sockopt handler. - */ -int -ipfw_ctl3(struct sockopt *sopt) -{ - int error, locked; - size_t size, valsize; - struct ip_fw_chain *chain; - char xbuf[256]; - struct sockopt_data sdata; - struct ipfw_sopt_handler h; - ip_fw3_opheader *op3 = NULL; - - error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); - if (error != 0) - return (error); - - if (sopt->sopt_name != IP_FW3) - return (ipfw_ctl(sopt)); - - chain = &V_layer3_chain; - error = 0; - - /* Save original valsize before it is altered via sooptcopyin() */ - valsize = sopt->sopt_valsize; - memset(&sdata, 0, sizeof(sdata)); - /* Read op3 header first to determine actual operation */ - op3 = (ip_fw3_opheader *)xbuf; - error = sooptcopyin(sopt, op3, sizeof(*op3), sizeof(*op3)); - if (error != 0) - return (error); - sopt->sopt_valsize = valsize; - - /* - * Find and reference command. - */ - error = find_ref_sh(op3->opcode, op3->version, &h); - if (error != 0) - return (error); - - /* - * Disallow modifications in really-really secure mode, but still allow - * the logging counters to be reset. - */ - if ((h.dir & HDIR_SET) != 0 && h.opcode != IP_FW_XRESETLOG) { - error = securelevel_ge(sopt->sopt_td->td_ucred, 3); - if (error != 0) { - find_unref_sh(&h); - return (error); - } - } - - /* - * Fill in sockopt_data structure that may be useful for - * IP_FW3 get requests. - */ - locked = 0; - if (valsize <= sizeof(xbuf)) { - /* use on-stack buffer */ - sdata.kbuf = xbuf; - sdata.ksize = sizeof(xbuf); - sdata.kavail = valsize; - } else { - - /* - * Determine opcode type/buffer size: - * allocate sliding-window buf for data export or - * contiguous buffer for special ops. - */ - if ((h.dir & HDIR_SET) != 0) { - /* Set request. Allocate contigous buffer. */ - if (valsize > CTL3_LARGEBUF) { - find_unref_sh(&h); - return (EFBIG); - } - - size = valsize; - } else { - /* Get request. Allocate sliding window buffer */ - size = (valsize<CTL3_SMALLBUF) ? valsize:CTL3_SMALLBUF; - - if (size < valsize) { - /* We have to wire user buffer */ - error = vslock(sopt->sopt_val, valsize); - if (error != 0) - return (error); - locked = 1; - } - } - - sdata.kbuf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); - sdata.ksize = size; - sdata.kavail = size; - } - - sdata.sopt = sopt; - sdata.sopt_val = sopt->sopt_val; - sdata.valsize = valsize; - - /* - * Copy either all request (if valsize < bsize_max) - * or first bsize_max bytes to guarantee most consumers - * that all necessary data has been copied). - * Anyway, copy not less than sizeof(ip_fw3_opheader). - */ - if ((error = sooptcopyin(sopt, sdata.kbuf, sdata.ksize, - sizeof(ip_fw3_opheader))) != 0) - return (error); - op3 = (ip_fw3_opheader *)sdata.kbuf; - - /* Finally, run handler */ - error = h.handler(chain, op3, &sdata); - find_unref_sh(&h); - - /* Flush state and free buffers */ - if (error == 0) - error = ipfw_flush_sopt_data(&sdata); - else - ipfw_flush_sopt_data(&sdata); - - if (locked != 0) - vsunlock(sdata.sopt_val, valsize); - - /* Restore original pointer and set number of bytes written */ - sopt->sopt_val = sdata.sopt_val; - sopt->sopt_valsize = sdata.ktotal; - if (sdata.kbuf != xbuf) - free(sdata.kbuf, M_TEMP); - - return (error); -} - -/** - * {set|get}sockopt parser. - */ -int -ipfw_ctl(struct sockopt *sopt) -{ -#define RULE_MAXSIZE (512*sizeof(u_int32_t)) - int error; - size_t size, valsize; - struct ip_fw *buf; - struct ip_fw_rule0 *rule; - struct ip_fw_chain *chain; - u_int32_t rulenum[2]; - uint32_t opt; - struct rule_check_info ci; - IPFW_RLOCK_TRACKER; - - chain = &V_layer3_chain; - error = 0; - - /* Save original valsize before it is altered via sooptcopyin() */ - valsize = sopt->sopt_valsize; - opt = sopt->sopt_name; - - /* - * Disallow modifications in really-really secure mode, but still allow - * the logging counters to be reset. - */ - if (opt == IP_FW_ADD || - (sopt->sopt_dir == SOPT_SET && opt != IP_FW_RESETLOG)) { - error = securelevel_ge(sopt->sopt_td->td_ucred, 3); - if (error != 0) - return (error); - } - - switch (opt) { - case IP_FW_GET: - /* - * pass up a copy of the current rules. Static rules - * come first (the last of which has number IPFW_DEFAULT_RULE), - * followed by a possibly empty list of dynamic rule. - * The last dynamic rule has NULL in the "next" field. - * - * Note that the calculated size is used to bound the - * amount of data returned to the user. The rule set may - * change between calculating the size and returning the - * data in which case we'll just return what fits. - */ - for (;;) { - int len = 0, want; - - size = chain->static_len; - size += ipfw_dyn_len(); - if (size >= sopt->sopt_valsize) - break; - buf = malloc(size, M_TEMP, M_WAITOK | M_ZERO); - IPFW_UH_RLOCK(chain); - /* check again how much space we need */ - want = chain->static_len + ipfw_dyn_len(); - if (size >= want) - len = ipfw_getrules(chain, buf, size); - IPFW_UH_RUNLOCK(chain); - if (size >= want) - error = sooptcopyout(sopt, buf, len); - free(buf, M_TEMP); - if (size >= want) - break; - } - break; - - case IP_FW_FLUSH: - /* locking is done within del_entry() */ - error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */ - break; - - case IP_FW_ADD: - rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, rule, RULE_MAXSIZE, - sizeof(struct ip_fw7) ); - - memset(&ci, 0, sizeof(struct rule_check_info)); - - /* - * If the size of commands equals RULESIZE7 then we assume - * a FreeBSD7.2 binary is talking to us (set is7=1). - * is7 is persistent so the next 'ipfw list' command - * will use this format. - * NOTE: If wrong version is guessed (this can happen if - * the first ipfw command is 'ipfw [pipe] list') - * the ipfw binary may crash or loop infinitly... - */ - size = sopt->sopt_valsize; - if (size == RULESIZE7(rule)) { - is7 = 1; - error = convert_rule_to_8(rule); - if (error) { - free(rule, M_TEMP); - return error; - } - size = RULESIZE(rule); - } else - is7 = 0; - if (error == 0) - error = check_ipfw_rule0(rule, size, &ci); - if (error == 0) { - /* locking is done within add_rule() */ - struct ip_fw *krule; - krule = ipfw_alloc_rule(chain, RULEKSIZE0(rule)); - ci.urule = (caddr_t)rule; - ci.krule = krule; - import_rule0(&ci); - error = commit_rules(chain, &ci, 1); - if (error != 0) - free_rule(ci.krule); - else if (sopt->sopt_dir == SOPT_GET) { - if (is7) { - error = convert_rule_to_7(rule); - size = RULESIZE7(rule); - if (error) { - free(rule, M_TEMP); - return error; - } - } - error = sooptcopyout(sopt, rule, size); - } - } - free(rule, M_TEMP); - break; - - case IP_FW_DEL: - /* - * IP_FW_DEL is used for deleting single rules or sets, - * and (ab)used to atomically manipulate sets. Argument size - * is used to distinguish between the two: - * sizeof(u_int32_t) - * delete single rule or set of rules, - * or reassign rules (or sets) to a different set. - * 2*sizeof(u_int32_t) - * atomic disable/enable sets. - * first u_int32_t contains sets to be disabled, - * second u_int32_t contains sets to be enabled. - */ - error = sooptcopyin(sopt, rulenum, - 2*sizeof(u_int32_t), sizeof(u_int32_t)); - if (error) - break; - size = sopt->sopt_valsize; - if (size == sizeof(u_int32_t) && rulenum[0] != 0) { - /* delete or reassign, locking done in del_entry() */ - error = del_entry(chain, rulenum[0]); - } else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */ - IPFW_UH_WLOCK(chain); - V_set_disable = - (V_set_disable | rulenum[0]) & ~rulenum[1] & - ~(1<<RESVD_SET); /* set RESVD_SET always enabled */ - IPFW_UH_WUNLOCK(chain); - } else - error = EINVAL; - break; - - case IP_FW_ZERO: - case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */ - rulenum[0] = 0; - if (sopt->sopt_val != 0) { - error = sooptcopyin(sopt, rulenum, - sizeof(u_int32_t), sizeof(u_int32_t)); - if (error) - break; - } - error = zero_entry(chain, rulenum[0], - sopt->sopt_name == IP_FW_RESETLOG); - break; - - /*--- TABLE opcodes ---*/ - case IP_FW_TABLE_ADD: - case IP_FW_TABLE_DEL: - { - ipfw_table_entry ent; - struct tentry_info tei; - struct tid_info ti; - struct table_value v; - - error = sooptcopyin(sopt, &ent, - sizeof(ent), sizeof(ent)); - if (error) - break; - - memset(&tei, 0, sizeof(tei)); - tei.paddr = &ent.addr; - tei.subtype = AF_INET; - tei.masklen = ent.masklen; - ipfw_import_table_value_legacy(ent.value, &v); - tei.pvalue = &v; - memset(&ti, 0, sizeof(ti)); - ti.uidx = ent.tbl; - ti.type = IPFW_TABLE_CIDR; - - error = (opt == IP_FW_TABLE_ADD) ? - add_table_entry(chain, &ti, &tei, 0, 1) : - del_table_entry(chain, &ti, &tei, 0, 1); - } - break; - - - case IP_FW_TABLE_FLUSH: - { - u_int16_t tbl; - struct tid_info ti; - - error = sooptcopyin(sopt, &tbl, - sizeof(tbl), sizeof(tbl)); - if (error) - break; - memset(&ti, 0, sizeof(ti)); - ti.uidx = tbl; - error = flush_table(chain, &ti); - } - break; - - case IP_FW_TABLE_GETSIZE: - { - u_int32_t tbl, cnt; - struct tid_info ti; - - if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), - sizeof(tbl)))) - break; - memset(&ti, 0, sizeof(ti)); - ti.uidx = tbl; - IPFW_RLOCK(chain); - error = ipfw_count_table(chain, &ti, &cnt); - IPFW_RUNLOCK(chain); - if (error) - break; - error = sooptcopyout(sopt, &cnt, sizeof(cnt)); - } - break; - - case IP_FW_TABLE_LIST: - { - ipfw_table *tbl; - struct tid_info ti; - - if (sopt->sopt_valsize < sizeof(*tbl)) { - error = EINVAL; - break; - } - size = sopt->sopt_valsize; - tbl = malloc(size, M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); - if (error) { - free(tbl, M_TEMP); - break; - } - tbl->size = (size - sizeof(*tbl)) / - sizeof(ipfw_table_entry); - memset(&ti, 0, sizeof(ti)); - ti.uidx = tbl->tbl; - IPFW_RLOCK(chain); - error = ipfw_dump_table_legacy(chain, &ti, tbl); - IPFW_RUNLOCK(chain); - if (error) { - free(tbl, M_TEMP); - break; - } - error = sooptcopyout(sopt, tbl, size); - free(tbl, M_TEMP); - } - break; - - /*--- NAT operations are protected by the IPFW_LOCK ---*/ - case IP_FW_NAT_CFG: - if (IPFW_NAT_LOADED) - error = ipfw_nat_cfg_ptr(sopt); - else { - printf("IP_FW_NAT_CFG: %s\n", - "ipfw_nat not present, please load it"); - error = EINVAL; - } - break; - - case IP_FW_NAT_DEL: - if (IPFW_NAT_LOADED) - error = ipfw_nat_del_ptr(sopt); - else { - printf("IP_FW_NAT_DEL: %s\n", - "ipfw_nat not present, please load it"); - error = EINVAL; - } - break; - - case IP_FW_NAT_GET_CONFIG: - if (IPFW_NAT_LOADED) - error = ipfw_nat_get_cfg_ptr(sopt); - else { - printf("IP_FW_NAT_GET_CFG: %s\n", - "ipfw_nat not present, please load it"); - error = EINVAL; - } - break; - - case IP_FW_NAT_GET_LOG: - if (IPFW_NAT_LOADED) - error = ipfw_nat_get_log_ptr(sopt); - else { - printf("IP_FW_NAT_GET_LOG: %s\n", - "ipfw_nat not present, please load it"); - error = EINVAL; - } - break; - - default: - printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); - error = EINVAL; - } - - return (error); -#undef RULE_MAXSIZE -} -#define RULE_MAXSIZE (256*sizeof(u_int32_t)) - -/* Functions to convert rules 7.2 <==> 8.0 */ -static int -convert_rule_to_7(struct ip_fw_rule0 *rule) -{ - /* Used to modify original rule */ - struct ip_fw7 *rule7 = (struct ip_fw7 *)rule; - /* copy of original rule, version 8 */ - struct ip_fw_rule0 *tmp; - - /* Used to copy commands */ - ipfw_insn *ccmd, *dst; - int ll = 0, ccmdlen = 0; - - tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO); - if (tmp == NULL) { - return 1; //XXX error - } - bcopy(rule, tmp, RULE_MAXSIZE); - - /* Copy fields */ - //rule7->_pad = tmp->_pad; - rule7->set = tmp->set; - rule7->rulenum = tmp->rulenum; - rule7->cmd_len = tmp->cmd_len; - rule7->act_ofs = tmp->act_ofs; - rule7->next_rule = (struct ip_fw7 *)tmp->next_rule; - rule7->cmd_len = tmp->cmd_len; - rule7->pcnt = tmp->pcnt; - rule7->bcnt = tmp->bcnt; - rule7->timestamp = tmp->timestamp; - - /* Copy commands */ - for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ; - ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) { - ccmdlen = F_LEN(ccmd); - - bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t)); - - if (dst->opcode > O_NAT) - /* O_REASS doesn't exists in 7.2 version, so - * decrement opcode if it is after O_REASS - */ - dst->opcode--; - - if (ccmdlen > ll) { - printf("ipfw: opcode %d size truncated\n", - ccmd->opcode); - return EINVAL; - } - } - free(tmp, M_TEMP); - - return 0; -} - -static int -convert_rule_to_8(struct ip_fw_rule0 *rule) -{ - /* Used to modify original rule */ - struct ip_fw7 *rule7 = (struct ip_fw7 *) rule; - - /* Used to copy commands */ - ipfw_insn *ccmd, *dst; - int ll = 0, ccmdlen = 0; - - /* Copy of original rule */ - struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO); - if (tmp == NULL) { - return 1; //XXX error - } - - bcopy(rule7, tmp, RULE_MAXSIZE); - - for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ; - ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) { - ccmdlen = F_LEN(ccmd); - - bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t)); - - if (dst->opcode > O_NAT) - /* O_REASS doesn't exists in 7.2 version, so - * increment opcode if it is after O_REASS - */ - dst->opcode++; - - if (ccmdlen > ll) { - printf("ipfw: opcode %d size truncated\n", - ccmd->opcode); - return EINVAL; - } - } - - rule->_pad = tmp->_pad; - rule->set = tmp->set; - rule->rulenum = tmp->rulenum; - rule->cmd_len = tmp->cmd_len; - rule->act_ofs = tmp->act_ofs; - rule->next_rule = (struct ip_fw *)tmp->next_rule; - rule->cmd_len = tmp->cmd_len; - rule->id = 0; /* XXX see if is ok = 0 */ - rule->pcnt = tmp->pcnt; - rule->bcnt = tmp->bcnt; - rule->timestamp = tmp->timestamp; - - free (tmp, M_TEMP); - return 0; -} - -/* - * Named object api - * - */ - -void -ipfw_init_srv(struct ip_fw_chain *ch) -{ - - ch->srvmap = ipfw_objhash_create(IPFW_OBJECTS_DEFAULT); - ch->srvstate = malloc(sizeof(void *) * IPFW_OBJECTS_DEFAULT, - M_IPFW, M_WAITOK | M_ZERO); -} - -void -ipfw_destroy_srv(struct ip_fw_chain *ch) -{ - - free(ch->srvstate, M_IPFW); - ipfw_objhash_destroy(ch->srvmap); -} - -/* - * Allocate new bitmask which can be used to enlarge/shrink - * named instance index. - */ -void -ipfw_objhash_bitmap_alloc(uint32_t items, void **idx, int *pblocks) -{ - size_t size; - int max_blocks; - u_long *idx_mask; - - KASSERT((items % BLOCK_ITEMS) == 0, - ("bitmask size needs to power of 2 and greater or equal to %zu", - BLOCK_ITEMS)); - - max_blocks = items / BLOCK_ITEMS; - size = items / 8; - idx_mask = malloc(size * IPFW_MAX_SETS, M_IPFW, M_WAITOK); - /* Mark all as free */ - memset(idx_mask, 0xFF, size * IPFW_MAX_SETS); - *idx_mask &= ~(u_long)1; /* Skip index 0 */ - - *idx = idx_mask; - *pblocks = max_blocks; -} - -/* - * Copy current bitmask index to new one. - */ -void -ipfw_objhash_bitmap_merge(struct namedobj_instance *ni, void **idx, int *blocks) -{ - int old_blocks, new_blocks; - u_long *old_idx, *new_idx; - int i; - - old_idx = ni->idx_mask; - old_blocks = ni->max_blocks; - new_idx = *idx; - new_blocks = *blocks; - - for (i = 0; i < IPFW_MAX_SETS; i++) { - memcpy(&new_idx[new_blocks * i], &old_idx[old_blocks * i], - old_blocks * sizeof(u_long)); - } -} - -/* - * Swaps current @ni index with new one. - */ -void -ipfw_objhash_bitmap_swap(struct namedobj_instance *ni, void **idx, int *blocks) -{ - int old_blocks; - u_long *old_idx; - - old_idx = ni->idx_mask; - old_blocks = ni->max_blocks; - - ni->idx_mask = *idx; - ni->max_blocks = *blocks; - - /* Save old values */ - *idx = old_idx; - *blocks = old_blocks; -} - -void -ipfw_objhash_bitmap_free(void *idx, int blocks) -{ - - free(idx, M_IPFW); -} - -/* - * Creates named hash instance. - * Must be called without holding any locks. - * Return pointer to new instance. - */ -struct namedobj_instance * -ipfw_objhash_create(uint32_t items) -{ - struct namedobj_instance *ni; - int i; - size_t size; - - size = sizeof(struct namedobj_instance) + - sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE + - sizeof(struct namedobjects_head) * NAMEDOBJ_HASH_SIZE; - - ni = malloc(size, M_IPFW, M_WAITOK | M_ZERO); - ni->nn_size = NAMEDOBJ_HASH_SIZE; - ni->nv_size = NAMEDOBJ_HASH_SIZE; - - ni->names = (struct namedobjects_head *)(ni +1); - ni->values = &ni->names[ni->nn_size]; - - for (i = 0; i < ni->nn_size; i++) - TAILQ_INIT(&ni->names[i]); - - for (i = 0; i < ni->nv_size; i++) - TAILQ_INIT(&ni->values[i]); - - /* Set default hashing/comparison functions */ - ni->hash_f = objhash_hash_name; - ni->cmp_f = objhash_cmp_name; - - /* Allocate bitmask separately due to possible resize */ - ipfw_objhash_bitmap_alloc(items, (void*)&ni->idx_mask, &ni->max_blocks); - - return (ni); -} - -void -ipfw_objhash_destroy(struct namedobj_instance *ni) -{ - - free(ni->idx_mask, M_IPFW); - free(ni, M_IPFW); -} - -void -ipfw_objhash_set_funcs(struct namedobj_instance *ni, objhash_hash_f *hash_f, - objhash_cmp_f *cmp_f) -{ - - ni->hash_f = hash_f; - ni->cmp_f = cmp_f; -} - -static uint32_t -objhash_hash_name(struct namedobj_instance *ni, const void *name, uint32_t set) -{ - - return (fnv_32_str((const char *)name, FNV1_32_INIT)); -} - -static int -objhash_cmp_name(struct named_object *no, const void *name, uint32_t set) -{ - - if ((strcmp(no->name, (const char *)name) == 0) && (no->set == set)) - return (0); - - return (1); -} - -static uint32_t -objhash_hash_idx(struct namedobj_instance *ni, uint32_t val) -{ - uint32_t v; - - v = val % (ni->nv_size - 1); - - return (v); -} - -struct named_object * -ipfw_objhash_lookup_name(struct namedobj_instance *ni, uint32_t set, char *name) -{ - struct named_object *no; - uint32_t hash; - - hash = ni->hash_f(ni, name, set) % ni->nn_size; - - TAILQ_FOREACH(no, &ni->names[hash], nn_next) { - if (ni->cmp_f(no, name, set) == 0) - return (no); - } - - return (NULL); -} - -/* - * Find named object by @uid. - * Check @tlvs for valid data inside. - * - * Returns pointer to found TLV or NULL. - */ -ipfw_obj_ntlv * -ipfw_find_name_tlv_type(void *tlvs, int len, uint16_t uidx, uint32_t etlv) -{ - ipfw_obj_ntlv *ntlv; - uintptr_t pa, pe; - int l; - - pa = (uintptr_t)tlvs; - pe = pa + len; - l = 0; - for (; pa < pe; pa += l) { - ntlv = (ipfw_obj_ntlv *)pa; - l = ntlv->head.length; - - if (l != sizeof(*ntlv)) - return (NULL); - - if (ntlv->idx != uidx) - continue; - /* - * When userland has specified zero TLV type, do - * not compare it with eltv. In some cases userland - * doesn't know what type should it have. Use only - * uidx and name for search named_object. - */ - if (ntlv->head.type != 0 && - ntlv->head.type != (uint16_t)etlv) - continue; - - if (ipfw_check_object_name_generic(ntlv->name) != 0) - return (NULL); - - return (ntlv); - } - - return (NULL); -} - -/* - * Finds object config based on either legacy index - * or name in ntlv. - * Note @ti structure contains unchecked data from userland. - * - * Returns 0 in success and fills in @pno with found config - */ -int -ipfw_objhash_find_type(struct namedobj_instance *ni, struct tid_info *ti, - uint32_t etlv, struct named_object **pno) -{ - char *name; - ipfw_obj_ntlv *ntlv; - uint32_t set; - - if (ti->tlvs == NULL) - return (EINVAL); - - ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, etlv); - if (ntlv == NULL) - return (EINVAL); - name = ntlv->name; - - /* - * Use set provided by @ti instead of @ntlv one. - * This is needed due to different sets behavior - * controlled by V_fw_tables_sets. - */ - set = ti->set; - *pno = ipfw_objhash_lookup_name(ni, set, name); - if (*pno == NULL) - return (ESRCH); - return (0); -} - -/* - * Find named object by name, considering also its TLV type. - */ -struct named_object * -ipfw_objhash_lookup_name_type(struct namedobj_instance *ni, uint32_t set, - uint32_t type, const char *name) -{ - struct named_object *no; - uint32_t hash; - - hash = ni->hash_f(ni, name, set) % ni->nn_size; - - TAILQ_FOREACH(no, &ni->names[hash], nn_next) { - if (ni->cmp_f(no, name, set) == 0 && - no->etlv == (uint16_t)type) - return (no); - } - - return (NULL); -} - -struct named_object * -ipfw_objhash_lookup_kidx(struct namedobj_instance *ni, uint16_t kidx) -{ - struct named_object *no; - uint32_t hash; - - hash = objhash_hash_idx(ni, kidx); - - TAILQ_FOREACH(no, &ni->values[hash], nv_next) { - if (no->kidx == kidx) - return (no); - } - - return (NULL); -} - -int -ipfw_objhash_same_name(struct namedobj_instance *ni, struct named_object *a, - struct named_object *b) -{ - - if ((strcmp(a->name, b->name) == 0) && a->set == b->set) - return (1); - - return (0); -} - -void -ipfw_objhash_add(struct namedobj_instance *ni, struct named_object *no) -{ - uint32_t hash; - - hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size; - TAILQ_INSERT_HEAD(&ni->names[hash], no, nn_next); - - hash = objhash_hash_idx(ni, no->kidx); - TAILQ_INSERT_HEAD(&ni->values[hash], no, nv_next); - - ni->count++; -} - -void -ipfw_objhash_del(struct namedobj_instance *ni, struct named_object *no) -{ - uint32_t hash; - - hash = ni->hash_f(ni, no->name, no->set) % ni->nn_size; - TAILQ_REMOVE(&ni->names[hash], no, nn_next); - - hash = objhash_hash_idx(ni, no->kidx); - TAILQ_REMOVE(&ni->values[hash], no, nv_next); - - ni->count--; -} - -uint32_t -ipfw_objhash_count(struct namedobj_instance *ni) -{ - - return (ni->count); -} - -uint32_t -ipfw_objhash_count_type(struct namedobj_instance *ni, uint16_t type) -{ - struct named_object *no; - uint32_t count; - int i; - - count = 0; - for (i = 0; i < ni->nn_size; i++) { - TAILQ_FOREACH(no, &ni->names[i], nn_next) { - if (no->etlv == type) - count++; - } - } - return (count); -} - -/* - * Runs @func for each found named object. - * It is safe to delete objects from callback - */ -int -ipfw_objhash_foreach(struct namedobj_instance *ni, objhash_cb_t *f, void *arg) -{ - struct named_object *no, *no_tmp; - int i, ret; - - for (i = 0; i < ni->nn_size; i++) { - TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) { - ret = f(ni, no, arg); - if (ret != 0) - return (ret); - } - } - return (0); -} - -/* - * Runs @f for each found named object with type @type. - * It is safe to delete objects from callback - */ -int -ipfw_objhash_foreach_type(struct namedobj_instance *ni, objhash_cb_t *f, - void *arg, uint16_t type) -{ - struct named_object *no, *no_tmp; - int i, ret; - - for (i = 0; i < ni->nn_size; i++) { - TAILQ_FOREACH_SAFE(no, &ni->names[i], nn_next, no_tmp) { - if (no->etlv != type) - continue; - ret = f(ni, no, arg); - if (ret != 0) - return (ret); - } - } - return (0); -} - -/* - * Removes index from given set. - * Returns 0 on success. - */ -int -ipfw_objhash_free_idx(struct namedobj_instance *ni, uint16_t idx) -{ - u_long *mask; - int i, v; - - i = idx / BLOCK_ITEMS; - v = idx % BLOCK_ITEMS; - - if (i >= ni->max_blocks) - return (1); - - mask = &ni->idx_mask[i]; - - if ((*mask & ((u_long)1 << v)) != 0) - return (1); - - /* Mark as free */ - *mask |= (u_long)1 << v; - - /* Update free offset */ - if (ni->free_off[0] > i) - ni->free_off[0] = i; - - return (0); -} - -/* - * Allocate new index in given instance and stores in in @pidx. - * Returns 0 on success. - */ -int -ipfw_objhash_alloc_idx(void *n, uint16_t *pidx) -{ - struct namedobj_instance *ni; - u_long *mask; - int i, off, v; - - ni = (struct namedobj_instance *)n; - - off = ni->free_off[0]; - mask = &ni->idx_mask[off]; - - for (i = off; i < ni->max_blocks; i++, mask++) { - if ((v = ffsl(*mask)) == 0) - continue; - - /* Mark as busy */ - *mask &= ~ ((u_long)1 << (v - 1)); - - ni->free_off[0] = i; - - v = BLOCK_ITEMS * i + v - 1; - - *pidx = v; - return (0); - } - - return (1); -} - -/* end of file */ diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table.c b/freebsd/sys/netpfil/ipfw/ip_fw_table.c deleted file mode 100644 index 828d437a..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_table.c +++ /dev/null @@ -1,3365 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko. - * Copyright (c) 2014 Yandex LLC - * Copyright (c) 2014 Alexander V. Chernikov - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * Lookup table support for ipfw. - * - * This file contains handlers for all generic tables' operations: - * add/del/flush entries, list/dump tables etc.. - * - * Table data modification is protected by both UH and runtime lock - * while reading configuration/data is protected by UH lock. - * - * Lookup algorithms for all table types are located in ip_fw_table_algo.c - */ - -#include <rtems/bsd/local/opt_ipfw.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/rwlock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/queue.h> -#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ -#include <net/pfil.h> - -#include <netinet/in.h> -#include <netinet/ip_var.h> /* struct ipfw_rule_ref */ -#include <netinet/ip_fw.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/ip_fw_table.h> - - /* - * Table has the following `type` concepts: - * - * `no.type` represents lookup key type (addr, ifp, uid, etc..) - * vmask represents bitmask of table values which are present at the moment. - * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old - * single-value-for-all approach. - */ -struct table_config { - struct named_object no; - uint8_t tflags; /* type flags */ - uint8_t locked; /* 1 if locked from changes */ - uint8_t linked; /* 1 if already linked */ - uint8_t ochanged; /* used by set swapping */ - uint8_t vshared; /* 1 if using shared value array */ - uint8_t spare[3]; - uint32_t count; /* Number of records */ - uint32_t limit; /* Max number of records */ - uint32_t vmask; /* bitmask with supported values */ - uint32_t ocount; /* used by set swapping */ - uint64_t gencnt; /* generation count */ - char tablename[64]; /* table name */ - struct table_algo *ta; /* Callbacks for given algo */ - void *astate; /* algorithm state */ - struct table_info ti_copy; /* data to put to table_info */ - struct namedobj_instance *vi; -}; - -static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti, - struct table_config **tc); -static struct table_config *find_table(struct namedobj_instance *ni, - struct tid_info *ti); -static struct table_config *alloc_table_config(struct ip_fw_chain *ch, - struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags); -static void free_table_config(struct namedobj_instance *ni, - struct table_config *tc); -static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, - char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref); -static void link_table(struct ip_fw_chain *ch, struct table_config *tc); -static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc); -static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc); -#define OP_ADD 1 -#define OP_DEL 0 -static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, - struct sockopt_data *sd); -static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc, - ipfw_xtable_info *i); -static int dump_table_tentry(void *e, void *arg); -static int dump_table_xentry(void *e, void *arg); - -static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a, - struct tid_info *b); - -static int check_table_name(const char *name); -static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, - struct table_config *tc, struct table_info *ti, uint32_t count); -static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti); - -static struct table_algo *find_table_algo(struct tables_config *tableconf, - struct tid_info *ti, char *name); - -static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti); -static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti); - -#define CHAIN_TO_NI(chain) (CHAIN_TO_TCFG(chain)->namehash) -#define KIDX_TO_TI(ch, k) (&(((struct table_info *)(ch)->tablestate)[k])) - -#define TA_BUF_SZ 128 /* On-stack buffer for add/delete state */ - -void -rollback_toperation_state(struct ip_fw_chain *ch, void *object) -{ - struct tables_config *tcfg; - struct op_state *os; - - tcfg = CHAIN_TO_TCFG(ch); - TAILQ_FOREACH(os, &tcfg->state_list, next) - os->func(object, os); -} - -void -add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) -{ - struct tables_config *tcfg; - - tcfg = CHAIN_TO_TCFG(ch); - TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next); -} - -void -del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts) -{ - struct tables_config *tcfg; - - tcfg = CHAIN_TO_TCFG(ch); - TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next); -} - -void -tc_ref(struct table_config *tc) -{ - - tc->no.refcnt++; -} - -void -tc_unref(struct table_config *tc) -{ - - tc->no.refcnt--; -} - -static struct table_value * -get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx) -{ - struct table_value *pval; - - pval = (struct table_value *)ch->valuestate; - - return (&pval[kidx]); -} - - -/* - * Checks if we're able to insert/update entry @tei into table - * w.r.t @tc limits. - * May alter @tei to indicate insertion error / insert - * options. - * - * Returns 0 if operation can be performed/ - */ -static int -check_table_limit(struct table_config *tc, struct tentry_info *tei) -{ - - if (tc->limit == 0 || tc->count < tc->limit) - return (0); - - if ((tei->flags & TEI_FLAGS_UPDATE) == 0) { - /* Notify userland on error cause */ - tei->flags |= TEI_FLAGS_LIMIT; - return (EFBIG); - } - - /* - * We have UPDATE flag set. - * Permit updating record (if found), - * but restrict adding new one since we've - * already hit the limit. - */ - tei->flags |= TEI_FLAGS_DONTADD; - - return (0); -} - -/* - * Convert algorithm callback return code into - * one of pre-defined states known by userland. - */ -static void -store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num) -{ - int flag; - - flag = 0; - - switch (error) { - case 0: - if (op == OP_ADD && num != 0) - flag = TEI_FLAGS_ADDED; - if (op == OP_DEL) - flag = TEI_FLAGS_DELETED; - break; - case ENOENT: - flag = TEI_FLAGS_NOTFOUND; - break; - case EEXIST: - flag = TEI_FLAGS_EXISTS; - break; - default: - flag = TEI_FLAGS_ERROR; - } - - tei->flags |= flag; -} - -/* - * Creates and references table with default parameters. - * Saves table config, algo and allocated kidx info @ptc, @pta and - * @pkidx if non-zero. - * Used for table auto-creation to support old binaries. - * - * Returns 0 on success. - */ -static int -create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti, - uint16_t *pkidx) -{ - ipfw_xtable_info xi; - int error; - - memset(&xi, 0, sizeof(xi)); - /* Set default value mask for legacy clients */ - xi.vmask = IPFW_VTYPE_LEGACY; - - error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1); - if (error != 0) - return (error); - - return (0); -} - -/* - * Find and reference existing table optionally - * creating new one. - * - * Saves found table config into @ptc. - * Note function may drop/acquire UH_WLOCK. - * Returns 0 if table was found/created and referenced - * or non-zero return code. - */ -static int -find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei, uint32_t count, int op, - struct table_config **ptc) -{ - struct namedobj_instance *ni; - struct table_config *tc; - uint16_t kidx; - int error; - - IPFW_UH_WLOCK_ASSERT(ch); - - ni = CHAIN_TO_NI(ch); - tc = NULL; - if ((tc = find_table(ni, ti)) != NULL) { - /* check table type */ - if (tc->no.subtype != ti->type) - return (EINVAL); - - if (tc->locked != 0) - return (EACCES); - - /* Try to exit early on limit hit */ - if (op == OP_ADD && count == 1 && - check_table_limit(tc, tei) != 0) - return (EFBIG); - - /* Reference and return */ - tc->no.refcnt++; - *ptc = tc; - return (0); - } - - if (op == OP_DEL) - return (ESRCH); - - /* Compatibility mode: create new table for old clients */ - if ((tei->flags & TEI_FLAGS_COMPAT) == 0) - return (ESRCH); - - IPFW_UH_WUNLOCK(ch); - error = create_table_compat(ch, ti, &kidx); - IPFW_UH_WLOCK(ch); - - if (error != 0) - return (error); - - tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); - KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx)); - - /* OK, now we've got referenced table. */ - *ptc = tc; - return (0); -} - -/* - * Rolls back already @added to @tc entries using state array @ta_buf_m. - * Assume the following layout: - * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases - * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1]) - * for storing deleted state - */ -static void -rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc, - struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m, - uint32_t count, uint32_t added) -{ - struct table_algo *ta; - struct tentry_info *ptei; - caddr_t v, vv; - size_t ta_buf_sz; - int error, i; - uint32_t num; - - IPFW_UH_WLOCK_ASSERT(ch); - - ta = tc->ta; - ta_buf_sz = ta->ta_buf_size; - v = ta_buf_m; - vv = v + count * ta_buf_sz; - for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) { - ptei = &tei[i]; - if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) { - - /* - * We have old value stored by previous - * call in @ptei->value. Do add once again - * to restore it. - */ - error = ta->add(tc->astate, tinfo, ptei, v, &num); - KASSERT(error == 0, ("rollback UPDATE fail")); - KASSERT(num == 0, ("rollback UPDATE fail2")); - continue; - } - - error = ta->prepare_del(ch, ptei, vv); - KASSERT(error == 0, ("pre-rollback INSERT failed")); - error = ta->del(tc->astate, tinfo, ptei, vv, &num); - KASSERT(error == 0, ("rollback INSERT failed")); - tc->count -= num; - } -} - -/* - * Prepares add/del state for all @count entries in @tei. - * Uses either stack buffer (@ta_buf) or allocates a new one. - * Stores pointer to allocated buffer back to @ta_buf. - * - * Returns 0 on success. - */ -static int -prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, - struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf) -{ - caddr_t ta_buf_m, v; - size_t ta_buf_sz, sz; - struct tentry_info *ptei; - int error, i; - - error = 0; - ta_buf_sz = ta->ta_buf_size; - if (count == 1) { - /* Single add/delete, use on-stack buffer */ - memset(*ta_buf, 0, TA_BUF_SZ); - ta_buf_m = *ta_buf; - } else { - - /* - * Multiple adds/deletes, allocate larger buffer - * - * Note we need 2xcount buffer for add case: - * we have hold both ADD state - * and DELETE state (this may be needed - * if we need to rollback all changes) - */ - sz = count * ta_buf_sz; - ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP, - M_WAITOK | M_ZERO); - } - - v = ta_buf_m; - for (i = 0; i < count; i++, v += ta_buf_sz) { - ptei = &tei[i]; - error = (op == OP_ADD) ? - ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v); - - /* - * Some syntax error (incorrect mask, or address, or - * anything). Return error regardless of atomicity - * settings. - */ - if (error != 0) - break; - } - - *ta_buf = ta_buf_m; - return (error); -} - -/* - * Flushes allocated state for each @count entries in @tei. - * Frees @ta_buf_m if differs from stack buffer @ta_buf. - */ -static void -flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta, - struct tentry_info *tei, uint32_t count, int rollback, - caddr_t ta_buf_m, caddr_t ta_buf) -{ - caddr_t v; - struct tentry_info *ptei; - size_t ta_buf_sz; - int i; - - ta_buf_sz = ta->ta_buf_size; - - /* Run cleaning callback anyway */ - v = ta_buf_m; - for (i = 0; i < count; i++, v += ta_buf_sz) { - ptei = &tei[i]; - ta->flush_entry(ch, ptei, v); - if (ptei->ptv != NULL) { - free(ptei->ptv, M_IPFW); - ptei->ptv = NULL; - } - } - - /* Clean up "deleted" state in case of rollback */ - if (rollback != 0) { - v = ta_buf_m + count * ta_buf_sz; - for (i = 0; i < count; i++, v += ta_buf_sz) - ta->flush_entry(ch, &tei[i], v); - } - - if (ta_buf_m != ta_buf) - free(ta_buf_m, M_TEMP); -} - - -static void -rollback_add_entry(void *object, struct op_state *_state) -{ - struct ip_fw_chain *ch; - struct tableop_state *ts; - - ts = (struct tableop_state *)_state; - - if (ts->tc != object && ts->ch != object) - return; - - ch = ts->ch; - - IPFW_UH_WLOCK_ASSERT(ch); - - /* Call specifid unlockers */ - rollback_table_values(ts); - - /* Indicate we've called */ - ts->modified = 1; -} - -/* - * Adds/updates one or more entries in table @ti. - * - * Function may drop/reacquire UH wlock multiple times due to - * items alloc, algorithm callbacks (check_space), value linkage - * (new values, value storage realloc), etc.. - * Other processes like other adds (which may involve storage resize), - * table swaps (which changes table data and may change algo type), - * table modify (which may change value mask) may be executed - * simultaneously so we need to deal with it. - * - * The following approach was implemented: - * we have per-chain linked list, protected with UH lock. - * add_table_entry prepares special on-stack structure wthich is passed - * to its descendants. Users add this structure to this list before unlock. - * After performing needed operations and acquiring UH lock back, each user - * checks if structure has changed. If true, it rolls local state back and - * returns without error to the caller. - * add_table_entry() on its own checks if structure has changed and restarts - * its operation from the beginning (goto restart). - * - * Functions which are modifying fields of interest (currently - * resize_shared_value_storage() and swap_tables() ) - * traverses given list while holding UH lock immediately before - * performing their operations calling function provided be list entry - * ( currently rollback_add_entry ) which performs rollback for all necessary - * state and sets appropriate values in structure indicating rollback - * has happened. - * - * Algo interaction: - * Function references @ti first to ensure table won't - * disappear or change its type. - * After that, prepare_add callback is called for each @tei entry. - * Next, we try to add each entry under UH+WHLOCK - * using add() callback. - * Finally, we free all state by calling flush_entry callback - * for each @tei. - * - * Returns 0 on success. - */ -int -add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei, uint8_t flags, uint32_t count) -{ - struct table_config *tc; - struct table_algo *ta; - uint16_t kidx; - int error, first_error, i, rollback; - uint32_t num, numadd; - struct tentry_info *ptei; - struct tableop_state ts; - char ta_buf[TA_BUF_SZ]; - caddr_t ta_buf_m, v; - - memset(&ts, 0, sizeof(ts)); - ta = NULL; - IPFW_UH_WLOCK(ch); - - /* - * Find and reference existing table. - */ -restart: - if (ts.modified != 0) { - IPFW_UH_WUNLOCK(ch); - flush_batch_buffer(ch, ta, tei, count, rollback, - ta_buf_m, ta_buf); - memset(&ts, 0, sizeof(ts)); - ta = NULL; - IPFW_UH_WLOCK(ch); - } - - error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc); - if (error != 0) { - IPFW_UH_WUNLOCK(ch); - return (error); - } - ta = tc->ta; - - /* Fill in tablestate */ - ts.ch = ch; - ts.opstate.func = rollback_add_entry; - ts.tc = tc; - ts.vshared = tc->vshared; - ts.vmask = tc->vmask; - ts.ta = ta; - ts.tei = tei; - ts.count = count; - rollback = 0; - add_toperation_state(ch, &ts); - IPFW_UH_WUNLOCK(ch); - - /* Allocate memory and prepare record(s) */ - /* Pass stack buffer by default */ - ta_buf_m = ta_buf; - error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m); - - IPFW_UH_WLOCK(ch); - del_toperation_state(ch, &ts); - /* Drop reference we've used in first search */ - tc->no.refcnt--; - - /* Check prepare_batch_buffer() error */ - if (error != 0) - goto cleanup; - - /* - * Check if table swap has happened. - * (so table algo might be changed). - * Restart operation to achieve consistent behavior. - */ - if (ts.modified != 0) - goto restart; - - /* - * Link all values values to shared/per-table value array. - * - * May release/reacquire UH_WLOCK. - */ - error = ipfw_link_table_values(ch, &ts); - if (error != 0) - goto cleanup; - if (ts.modified != 0) - goto restart; - - /* - * Ensure we are able to add all entries without additional - * memory allocations. May release/reacquire UH_WLOCK. - */ - kidx = tc->no.kidx; - error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count); - if (error != 0) - goto cleanup; - if (ts.modified != 0) - goto restart; - - /* We've got valid table in @tc. Let's try to add data */ - kidx = tc->no.kidx; - ta = tc->ta; - numadd = 0; - first_error = 0; - - IPFW_WLOCK(ch); - - v = ta_buf_m; - for (i = 0; i < count; i++, v += ta->ta_buf_size) { - ptei = &tei[i]; - num = 0; - /* check limit before adding */ - if ((error = check_table_limit(tc, ptei)) == 0) { - error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx), - ptei, v, &num); - /* Set status flag to inform userland */ - store_tei_result(ptei, OP_ADD, error, num); - } - if (error == 0) { - /* Update number of records to ease limit checking */ - tc->count += num; - numadd += num; - continue; - } - - if (first_error == 0) - first_error = error; - - /* - * Some error have happened. Check our atomicity - * settings: continue if atomicity is not required, - * rollback changes otherwise. - */ - if ((flags & IPFW_CTF_ATOMIC) == 0) - continue; - - rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx), - tei, ta_buf_m, count, i); - - rollback = 1; - break; - } - - IPFW_WUNLOCK(ch); - - ipfw_garbage_table_values(ch, tc, tei, count, rollback); - - /* Permit post-add algorithm grow/rehash. */ - if (numadd != 0) - check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); - - /* Return first error to user, if any */ - error = first_error; - -cleanup: - IPFW_UH_WUNLOCK(ch); - - flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf); - - return (error); -} - -/* - * Deletes one or more entries in table @ti. - * - * Returns 0 on success. - */ -int -del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei, uint8_t flags, uint32_t count) -{ - struct table_config *tc; - struct table_algo *ta; - struct tentry_info *ptei; - uint16_t kidx; - int error, first_error, i; - uint32_t num, numdel; - char ta_buf[TA_BUF_SZ]; - caddr_t ta_buf_m, v; - - /* - * Find and reference existing table. - */ - IPFW_UH_WLOCK(ch); - error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc); - if (error != 0) { - IPFW_UH_WUNLOCK(ch); - return (error); - } - ta = tc->ta; - IPFW_UH_WUNLOCK(ch); - - /* Allocate memory and prepare record(s) */ - /* Pass stack buffer by default */ - ta_buf_m = ta_buf; - error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m); - if (error != 0) - goto cleanup; - - IPFW_UH_WLOCK(ch); - - /* Drop reference we've used in first search */ - tc->no.refcnt--; - - /* - * Check if table algo is still the same. - * (changed ta may be the result of table swap). - */ - if (ta != tc->ta) { - IPFW_UH_WUNLOCK(ch); - error = EINVAL; - goto cleanup; - } - - kidx = tc->no.kidx; - numdel = 0; - first_error = 0; - - IPFW_WLOCK(ch); - v = ta_buf_m; - for (i = 0; i < count; i++, v += ta->ta_buf_size) { - ptei = &tei[i]; - num = 0; - error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v, - &num); - /* Save state for userland */ - store_tei_result(ptei, OP_DEL, error, num); - if (error != 0 && first_error == 0) - first_error = error; - tc->count -= num; - numdel += num; - } - IPFW_WUNLOCK(ch); - - /* Unlink non-used values */ - ipfw_garbage_table_values(ch, tc, tei, count, 0); - - if (numdel != 0) { - /* Run post-del hook to permit shrinking */ - check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0); - } - - IPFW_UH_WUNLOCK(ch); - - /* Return first error to user, if any */ - error = first_error; - -cleanup: - flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf); - - return (error); -} - -/* - * Ensure that table @tc has enough space to add @count entries without - * need for reallocation. - * - * Callbacks order: - * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize. - * - * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags. - * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage - * 3) modify (UH_WLOCK + WLOCK) - switch pointers - * 4) flush_modify (UH_WLOCK) - free state, if needed - * - * Returns 0 on success. - */ -static int -check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts, - struct table_config *tc, struct table_info *ti, uint32_t count) -{ - struct table_algo *ta; - uint64_t pflags; - char ta_buf[TA_BUF_SZ]; - int error; - - IPFW_UH_WLOCK_ASSERT(ch); - - error = 0; - ta = tc->ta; - if (ta->need_modify == NULL) - return (0); - - /* Acquire reference not to loose @tc between locks/unlocks */ - tc->no.refcnt++; - - /* - * TODO: think about avoiding race between large add/large delete - * operation on algorithm which implements shrinking along with - * growing. - */ - while (true) { - pflags = 0; - if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { - error = 0; - break; - } - - /* We have to shrink/grow table */ - if (ts != NULL) - add_toperation_state(ch, ts); - IPFW_UH_WUNLOCK(ch); - - memset(&ta_buf, 0, sizeof(ta_buf)); - error = ta->prepare_mod(ta_buf, &pflags); - - IPFW_UH_WLOCK(ch); - if (ts != NULL) - del_toperation_state(ch, ts); - - if (error != 0) - break; - - if (ts != NULL && ts->modified != 0) { - - /* - * Swap operation has happened - * so we're currently operating on other - * table data. Stop doing this. - */ - ta->flush_mod(ta_buf); - break; - } - - /* Check if we still need to alter table */ - ti = KIDX_TO_TI(ch, tc->no.kidx); - if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) { - IPFW_UH_WUNLOCK(ch); - - /* - * Other thread has already performed resize. - * Flush our state and return. - */ - ta->flush_mod(ta_buf); - break; - } - - error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags); - if (error == 0) { - /* Do actual modification */ - IPFW_WLOCK(ch); - ta->modify(tc->astate, ti, ta_buf, pflags); - IPFW_WUNLOCK(ch); - } - - /* Anyway, flush data and retry */ - ta->flush_mod(ta_buf); - } - - tc->no.refcnt--; - return (error); -} - -/* - * Adds or deletes record in table. - * Data layout (v0): - * Request: [ ip_fw3_opheader ipfw_table_xentry ] - * - * Returns 0 on success - */ -static int -manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_table_xentry *xent; - struct tentry_info tei; - struct tid_info ti; - struct table_value v; - int error, hdrlen, read; - - hdrlen = offsetof(ipfw_table_xentry, k); - - /* Check minimum header size */ - if (sd->valsize < (sizeof(*op3) + hdrlen)) - return (EINVAL); - - read = sizeof(ip_fw3_opheader); - - /* Check if xentry len field is valid */ - xent = (ipfw_table_xentry *)(op3 + 1); - if (xent->len < hdrlen || xent->len + read > sd->valsize) - return (EINVAL); - - memset(&tei, 0, sizeof(tei)); - tei.paddr = &xent->k; - tei.masklen = xent->masklen; - ipfw_import_table_value_legacy(xent->value, &v); - tei.pvalue = &v; - /* Old requests compatibility */ - tei.flags = TEI_FLAGS_COMPAT; - if (xent->type == IPFW_TABLE_ADDR) { - if (xent->len - hdrlen == sizeof(in_addr_t)) - tei.subtype = AF_INET; - else - tei.subtype = AF_INET6; - } - - memset(&ti, 0, sizeof(ti)); - ti.uidx = xent->tbl; - ti.type = xent->type; - - error = (op3->opcode == IP_FW_TABLE_XADD) ? - add_table_entry(ch, &ti, &tei, 0, 1) : - del_table_entry(ch, &ti, &tei, 0, 1); - - return (error); -} - -/* - * Adds or deletes record in table. - * Data layout (v1)(current): - * Request: [ ipfw_obj_header - * ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ] - * ] - * - * Returns 0 on success - */ -static int -manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_tentry *tent, *ptent; - ipfw_obj_ctlv *ctlv; - ipfw_obj_header *oh; - struct tentry_info *ptei, tei, *tei_buf; - struct tid_info ti; - int error, i, kidx, read; - - /* Check minimum header size */ - if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv))) - return (EINVAL); - - /* Check if passed data is too long */ - if (sd->valsize != sd->kavail) - return (EINVAL); - - oh = (ipfw_obj_header *)sd->kbuf; - - /* Basic length checks for TLVs */ - if (oh->ntlv.head.length != sizeof(oh->ntlv)) - return (EINVAL); - - read = sizeof(*oh); - - ctlv = (ipfw_obj_ctlv *)(oh + 1); - if (ctlv->head.length + read != sd->valsize) - return (EINVAL); - - read += sizeof(*ctlv); - tent = (ipfw_obj_tentry *)(ctlv + 1); - if (ctlv->count * sizeof(*tent) + read != sd->valsize) - return (EINVAL); - - if (ctlv->count == 0) - return (0); - - /* - * Mark entire buffer as "read". - * This instructs sopt api write it back - * after function return. - */ - ipfw_get_sopt_header(sd, sd->valsize); - - /* Perform basic checks for each entry */ - ptent = tent; - kidx = tent->idx; - for (i = 0; i < ctlv->count; i++, ptent++) { - if (ptent->head.length != sizeof(*ptent)) - return (EINVAL); - if (ptent->idx != kidx) - return (ENOTSUP); - } - - /* Convert data into kernel request objects */ - objheader_to_ti(oh, &ti); - ti.type = oh->ntlv.type; - ti.uidx = kidx; - - /* Use on-stack buffer for single add/del */ - if (ctlv->count == 1) { - memset(&tei, 0, sizeof(tei)); - tei_buf = &tei; - } else - tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP, - M_WAITOK | M_ZERO); - - ptei = tei_buf; - ptent = tent; - for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { - ptei->paddr = &ptent->k; - ptei->subtype = ptent->subtype; - ptei->masklen = ptent->masklen; - if (ptent->head.flags & IPFW_TF_UPDATE) - ptei->flags |= TEI_FLAGS_UPDATE; - - ipfw_import_table_value_v1(&ptent->v.value); - ptei->pvalue = (struct table_value *)&ptent->v.value; - } - - error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ? - add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) : - del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count); - - /* Translate result back to userland */ - ptei = tei_buf; - ptent = tent; - for (i = 0; i < ctlv->count; i++, ptent++, ptei++) { - if (ptei->flags & TEI_FLAGS_ADDED) - ptent->result = IPFW_TR_ADDED; - else if (ptei->flags & TEI_FLAGS_DELETED) - ptent->result = IPFW_TR_DELETED; - else if (ptei->flags & TEI_FLAGS_UPDATED) - ptent->result = IPFW_TR_UPDATED; - else if (ptei->flags & TEI_FLAGS_LIMIT) - ptent->result = IPFW_TR_LIMIT; - else if (ptei->flags & TEI_FLAGS_ERROR) - ptent->result = IPFW_TR_ERROR; - else if (ptei->flags & TEI_FLAGS_NOTFOUND) - ptent->result = IPFW_TR_NOTFOUND; - else if (ptei->flags & TEI_FLAGS_EXISTS) - ptent->result = IPFW_TR_EXISTS; - ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value); - } - - if (tei_buf != &tei) - free(tei_buf, M_TEMP); - - return (error); -} - -/* - * Looks up an entry in given table. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_obj_tentry ] - * Reply: [ ipfw_obj_header ipfw_obj_tentry ] - * - * Returns 0 on success - */ -static int -find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_tentry *tent; - ipfw_obj_header *oh; - struct tid_info ti; - struct table_config *tc; - struct table_algo *ta; - struct table_info *kti; - struct table_value *pval; - struct namedobj_instance *ni; - int error; - size_t sz; - - /* Check minimum header size */ - sz = sizeof(*oh) + sizeof(*tent); - if (sd->valsize != sz) - return (EINVAL); - - oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); - tent = (ipfw_obj_tentry *)(oh + 1); - - /* Basic length checks for TLVs */ - if (oh->ntlv.head.length != sizeof(oh->ntlv)) - return (EINVAL); - - objheader_to_ti(oh, &ti); - ti.type = oh->ntlv.type; - ti.uidx = tent->idx; - - IPFW_UH_RLOCK(ch); - ni = CHAIN_TO_NI(ch); - - /* - * Find existing table and check its type . - */ - ta = NULL; - if ((tc = find_table(ni, &ti)) == NULL) { - IPFW_UH_RUNLOCK(ch); - return (ESRCH); - } - - /* check table type */ - if (tc->no.subtype != ti.type) { - IPFW_UH_RUNLOCK(ch); - return (EINVAL); - } - - kti = KIDX_TO_TI(ch, tc->no.kidx); - ta = tc->ta; - - if (ta->find_tentry == NULL) - return (ENOTSUP); - - error = ta->find_tentry(tc->astate, kti, tent); - if (error == 0) { - pval = get_table_value(ch, tc, tent->v.kidx); - ipfw_export_table_value_v1(pval, &tent->v.value); - } - IPFW_UH_RUNLOCK(ch); - - return (error); -} - -/* - * Flushes all entries or destroys given table. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * - * Returns 0 on success - */ -static int -flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - int error; - struct _ipfw_obj_header *oh; - struct tid_info ti; - - if (sd->valsize != sizeof(*oh)) - return (EINVAL); - - oh = (struct _ipfw_obj_header *)op3; - objheader_to_ti(oh, &ti); - - if (op3->opcode == IP_FW_TABLE_XDESTROY) - error = destroy_table(ch, &ti); - else if (op3->opcode == IP_FW_TABLE_XFLUSH) - error = flush_table(ch, &ti); - else - return (ENOTSUP); - - return (error); -} - -static void -restart_flush(void *object, struct op_state *_state) -{ - struct tableop_state *ts; - - ts = (struct tableop_state *)_state; - - if (ts->tc != object) - return; - - /* Indicate we've called */ - ts->modified = 1; -} - -/* - * Flushes given table. - * - * Function create new table instance with the same - * parameters, swaps it with old one and - * flushes state without holding runtime WLOCK. - * - * Returns 0 on success. - */ -int -flush_table(struct ip_fw_chain *ch, struct tid_info *ti) -{ - struct namedobj_instance *ni; - struct table_config *tc; - struct table_algo *ta; - struct table_info ti_old, ti_new, *tablestate; - void *astate_old, *astate_new; - char algostate[64], *pstate; - struct tableop_state ts; - int error, need_gc; - uint16_t kidx; - uint8_t tflags; - - /* - * Stage 1: save table algorithm. - * Reference found table to ensure it won't disappear. - */ - IPFW_UH_WLOCK(ch); - ni = CHAIN_TO_NI(ch); - if ((tc = find_table(ni, ti)) == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - need_gc = 0; - astate_new = NULL; - memset(&ti_new, 0, sizeof(ti_new)); -restart: - /* Set up swap handler */ - memset(&ts, 0, sizeof(ts)); - ts.opstate.func = restart_flush; - ts.tc = tc; - - ta = tc->ta; - /* Do not flush readonly tables */ - if ((ta->flags & TA_FLAG_READONLY) != 0) { - IPFW_UH_WUNLOCK(ch); - return (EACCES); - } - /* Save startup algo parameters */ - if (ta->print_config != NULL) { - ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx), - algostate, sizeof(algostate)); - pstate = algostate; - } else - pstate = NULL; - tflags = tc->tflags; - tc->no.refcnt++; - add_toperation_state(ch, &ts); - IPFW_UH_WUNLOCK(ch); - - /* - * Stage 1.5: if this is not the first attempt, destroy previous state - */ - if (need_gc != 0) { - ta->destroy(astate_new, &ti_new); - need_gc = 0; - } - - /* - * Stage 2: allocate new table instance using same algo. - */ - memset(&ti_new, 0, sizeof(struct table_info)); - error = ta->init(ch, &astate_new, &ti_new, pstate, tflags); - - /* - * Stage 3: swap old state pointers with newly-allocated ones. - * Decrease refcount. - */ - IPFW_UH_WLOCK(ch); - tc->no.refcnt--; - del_toperation_state(ch, &ts); - - if (error != 0) { - IPFW_UH_WUNLOCK(ch); - return (error); - } - - /* - * Restart operation if table swap has happened: - * even if algo may be the same, algo init parameters - * may change. Restart operation instead of doing - * complex checks. - */ - if (ts.modified != 0) { - /* Delay destroying data since we're holding UH lock */ - need_gc = 1; - goto restart; - } - - ni = CHAIN_TO_NI(ch); - kidx = tc->no.kidx; - tablestate = (struct table_info *)ch->tablestate; - - IPFW_WLOCK(ch); - ti_old = tablestate[kidx]; - tablestate[kidx] = ti_new; - IPFW_WUNLOCK(ch); - - astate_old = tc->astate; - tc->astate = astate_new; - tc->ti_copy = ti_new; - tc->count = 0; - - /* Notify algo on real @ti address */ - if (ta->change_ti != NULL) - ta->change_ti(tc->astate, &tablestate[kidx]); - - /* - * Stage 4: unref values. - */ - ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old); - IPFW_UH_WUNLOCK(ch); - - /* - * Stage 5: perform real flush/destroy. - */ - ta->destroy(astate_old, &ti_old); - - return (0); -} - -/* - * Swaps two tables. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_obj_ntlv ] - * - * Returns 0 on success - */ -static int -swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - int error; - struct _ipfw_obj_header *oh; - struct tid_info ti_a, ti_b; - - if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv)) - return (EINVAL); - - oh = (struct _ipfw_obj_header *)op3; - ntlv_to_ti(&oh->ntlv, &ti_a); - ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b); - - error = swap_tables(ch, &ti_a, &ti_b); - - return (error); -} - -/* - * Swaps two tables of the same type/valtype. - * - * Checks if tables are compatible and limits - * permits swap, than actually perform swap. - * - * Each table consists of 2 different parts: - * config: - * @tc (with name, set, kidx) and rule bindings, which is "stable". - * number of items - * table algo - * runtime: - * runtime data @ti (ch->tablestate) - * runtime cache in @tc - * algo-specific data (@tc->astate) - * - * So we switch: - * all runtime data - * number of items - * table algo - * - * After that we call @ti change handler for each table. - * - * Note that referencing @tc won't protect tc->ta from change. - * XXX: Do we need to restrict swap between locked tables? - * XXX: Do we need to exchange ftype? - * - * Returns 0 on success. - */ -static int -swap_tables(struct ip_fw_chain *ch, struct tid_info *a, - struct tid_info *b) -{ - struct namedobj_instance *ni; - struct table_config *tc_a, *tc_b; - struct table_algo *ta; - struct table_info ti, *tablestate; - void *astate; - uint32_t count; - - /* - * Stage 1: find both tables and ensure they are of - * the same type. - */ - IPFW_UH_WLOCK(ch); - ni = CHAIN_TO_NI(ch); - if ((tc_a = find_table(ni, a)) == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - if ((tc_b = find_table(ni, b)) == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - - /* It is very easy to swap between the same table */ - if (tc_a == tc_b) { - IPFW_UH_WUNLOCK(ch); - return (0); - } - - /* Check type and value are the same */ - if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) { - IPFW_UH_WUNLOCK(ch); - return (EINVAL); - } - - /* Check limits before swap */ - if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) || - (tc_b->limit != 0 && tc_a->count > tc_b->limit)) { - IPFW_UH_WUNLOCK(ch); - return (EFBIG); - } - - /* Check if one of the tables is readonly */ - if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) { - IPFW_UH_WUNLOCK(ch); - return (EACCES); - } - - /* Notify we're going to swap */ - rollback_toperation_state(ch, tc_a); - rollback_toperation_state(ch, tc_b); - - /* Everything is fine, prepare to swap */ - tablestate = (struct table_info *)ch->tablestate; - ti = tablestate[tc_a->no.kidx]; - ta = tc_a->ta; - astate = tc_a->astate; - count = tc_a->count; - - IPFW_WLOCK(ch); - /* a <- b */ - tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx]; - tc_a->ta = tc_b->ta; - tc_a->astate = tc_b->astate; - tc_a->count = tc_b->count; - /* b <- a */ - tablestate[tc_b->no.kidx] = ti; - tc_b->ta = ta; - tc_b->astate = astate; - tc_b->count = count; - IPFW_WUNLOCK(ch); - - /* Ensure tc.ti copies are in sync */ - tc_a->ti_copy = tablestate[tc_a->no.kidx]; - tc_b->ti_copy = tablestate[tc_b->no.kidx]; - - /* Notify both tables on @ti change */ - if (tc_a->ta->change_ti != NULL) - tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]); - if (tc_b->ta->change_ti != NULL) - tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]); - - IPFW_UH_WUNLOCK(ch); - - return (0); -} - -/* - * Destroys table specified by @ti. - * Data layout (v0)(current): - * Request: [ ip_fw3_opheader ] - * - * Returns 0 on success - */ -static int -destroy_table(struct ip_fw_chain *ch, struct tid_info *ti) -{ - struct namedobj_instance *ni; - struct table_config *tc; - - IPFW_UH_WLOCK(ch); - - ni = CHAIN_TO_NI(ch); - if ((tc = find_table(ni, ti)) == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - - /* Do not permit destroying referenced tables */ - if (tc->no.refcnt > 0) { - IPFW_UH_WUNLOCK(ch); - return (EBUSY); - } - - IPFW_WLOCK(ch); - unlink_table(ch, tc); - IPFW_WUNLOCK(ch); - - /* Free obj index */ - if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0) - printf("Error unlinking kidx %d from table %s\n", - tc->no.kidx, tc->tablename); - - /* Unref values used in tables while holding UH lock */ - ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy); - IPFW_UH_WUNLOCK(ch); - - free_table_config(ni, tc); - - return (0); -} - -static uint32_t -roundup2p(uint32_t v) -{ - - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - - return (v); -} - -/* - * Grow tables index. - * - * Returns 0 on success. - */ -int -ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables) -{ - unsigned int ntables_old, tbl; - struct namedobj_instance *ni; - void *new_idx, *old_tablestate, *tablestate; - struct table_info *ti; - struct table_config *tc; - int i, new_blocks; - - /* Check new value for validity */ - if (ntables == 0) - return (EINVAL); - if (ntables > IPFW_TABLES_MAX) - ntables = IPFW_TABLES_MAX; - /* Alight to nearest power of 2 */ - ntables = (unsigned int)roundup2p(ntables); - - /* Allocate new pointers */ - tablestate = malloc(ntables * sizeof(struct table_info), - M_IPFW, M_WAITOK | M_ZERO); - - ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks); - - IPFW_UH_WLOCK(ch); - - tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables; - ni = CHAIN_TO_NI(ch); - - /* Temporary restrict decreasing max_tables */ - if (ntables < V_fw_tables_max) { - - /* - * FIXME: Check if we really can shrink - */ - IPFW_UH_WUNLOCK(ch); - return (EINVAL); - } - - /* Copy table info/indices */ - memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl); - ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks); - - IPFW_WLOCK(ch); - - /* Change pointers */ - old_tablestate = ch->tablestate; - ch->tablestate = tablestate; - ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks); - - ntables_old = V_fw_tables_max; - V_fw_tables_max = ntables; - - IPFW_WUNLOCK(ch); - - /* Notify all consumers that their @ti pointer has changed */ - ti = (struct table_info *)ch->tablestate; - for (i = 0; i < tbl; i++, ti++) { - if (ti->lookup == NULL) - continue; - tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i); - if (tc == NULL || tc->ta->change_ti == NULL) - continue; - - tc->ta->change_ti(tc->astate, ti); - } - - IPFW_UH_WUNLOCK(ch); - - /* Free old pointers */ - free(old_tablestate, M_IPFW); - ipfw_objhash_bitmap_free(new_idx, new_blocks); - - return (0); -} - -/* - * Lookup table's named object by its @kidx. - */ -struct named_object * -ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx) -{ - - return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx)); -} - -/* - * Take reference to table specified in @ntlv. - * On success return its @kidx. - */ -int -ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx) -{ - struct tid_info ti; - struct table_config *tc; - int error; - - IPFW_UH_WLOCK_ASSERT(ch); - - ntlv_to_ti(ntlv, &ti); - error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc); - if (error != 0) - return (error); - - if (tc == NULL) - return (ESRCH); - - tc_ref(tc); - *kidx = tc->no.kidx; - - return (0); -} - -void -ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx) -{ - - struct namedobj_instance *ni; - struct named_object *no; - - IPFW_UH_WLOCK_ASSERT(ch); - ni = CHAIN_TO_NI(ch); - no = ipfw_objhash_lookup_kidx(ni, kidx); - KASSERT(no != NULL, ("Table with index %d not found", kidx)); - no->refcnt--; -} - -/* - * Lookup an arbtrary key @paddr of legth @plen in table @tbl. - * Stores found value in @val. - * - * Returns 1 if key was found. - */ -int -ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen, - void *paddr, uint32_t *val) -{ - struct table_info *ti; - - ti = KIDX_TO_TI(ch, tbl); - - return (ti->lookup(ti, paddr, plen, val)); -} - -/* - * Info/List/dump support for tables. - * - */ - -/* - * High-level 'get' cmds sysctl handlers - */ - -/* - * Lists all tables currently available in kernel. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size - * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ] - * - * Returns 0 on success - */ -static int -list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct _ipfw_obj_lheader *olh; - int error; - - olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); - if (olh == NULL) - return (EINVAL); - if (sd->valsize < olh->size) - return (EINVAL); - - IPFW_UH_RLOCK(ch); - error = export_tables(ch, olh, sd); - IPFW_UH_RUNLOCK(ch); - - return (error); -} - -/* - * Store table info to buffer provided by @sd. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_xtable_info(empty)] - * Reply: [ ipfw_obj_header ipfw_xtable_info ] - * - * Returns 0 on success. - */ -static int -describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct _ipfw_obj_header *oh; - struct table_config *tc; - struct tid_info ti; - size_t sz; - - sz = sizeof(*oh) + sizeof(ipfw_xtable_info); - oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); - if (oh == NULL) - return (EINVAL); - - objheader_to_ti(oh, &ti); - - IPFW_UH_RLOCK(ch); - if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { - IPFW_UH_RUNLOCK(ch); - return (ESRCH); - } - - export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1)); - IPFW_UH_RUNLOCK(ch); - - return (0); -} - -/* - * Modifies existing table. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_xtable_info ] - * - * Returns 0 on success - */ -static int -modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct _ipfw_obj_header *oh; - ipfw_xtable_info *i; - char *tname; - struct tid_info ti; - struct namedobj_instance *ni; - struct table_config *tc; - - if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) - return (EINVAL); - - oh = (struct _ipfw_obj_header *)sd->kbuf; - i = (ipfw_xtable_info *)(oh + 1); - - /* - * Verify user-supplied strings. - * Check for null-terminated/zero-length strings/ - */ - tname = oh->ntlv.name; - if (check_table_name(tname) != 0) - return (EINVAL); - - objheader_to_ti(oh, &ti); - ti.type = i->type; - - IPFW_UH_WLOCK(ch); - ni = CHAIN_TO_NI(ch); - if ((tc = find_table(ni, &ti)) == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - - /* Do not support any modifications for readonly tables */ - if ((tc->ta->flags & TA_FLAG_READONLY) != 0) { - IPFW_UH_WUNLOCK(ch); - return (EACCES); - } - - if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0) - tc->limit = i->limit; - if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0) - tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0); - IPFW_UH_WUNLOCK(ch); - - return (0); -} - -/* - * Creates new table. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_xtable_info ] - * - * Returns 0 on success - */ -static int -create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct _ipfw_obj_header *oh; - ipfw_xtable_info *i; - char *tname, *aname; - struct tid_info ti; - struct namedobj_instance *ni; - - if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info)) - return (EINVAL); - - oh = (struct _ipfw_obj_header *)sd->kbuf; - i = (ipfw_xtable_info *)(oh + 1); - - /* - * Verify user-supplied strings. - * Check for null-terminated/zero-length strings/ - */ - tname = oh->ntlv.name; - aname = i->algoname; - if (check_table_name(tname) != 0 || - strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname)) - return (EINVAL); - - if (aname[0] == '\0') { - /* Use default algorithm */ - aname = NULL; - } - - objheader_to_ti(oh, &ti); - ti.type = i->type; - - ni = CHAIN_TO_NI(ch); - - IPFW_UH_RLOCK(ch); - if (find_table(ni, &ti) != NULL) { - IPFW_UH_RUNLOCK(ch); - return (EEXIST); - } - IPFW_UH_RUNLOCK(ch); - - return (create_table_internal(ch, &ti, aname, i, NULL, 0)); -} - -/* - * Creates new table based on @ti and @aname. - * - * Assume @aname to be checked and valid. - * Stores allocated table kidx inside @pkidx (if non-NULL). - * Reference created table if @compat is non-zero. - * - * Returns 0 on success. - */ -static int -create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti, - char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat) -{ - struct namedobj_instance *ni; - struct table_config *tc, *tc_new, *tmp; - struct table_algo *ta; - uint16_t kidx; - - ni = CHAIN_TO_NI(ch); - - ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname); - if (ta == NULL) - return (ENOTSUP); - - tc = alloc_table_config(ch, ti, ta, aname, i->tflags); - if (tc == NULL) - return (ENOMEM); - - tc->vmask = i->vmask; - tc->limit = i->limit; - if (ta->flags & TA_FLAG_READONLY) - tc->locked = 1; - else - tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0; - - IPFW_UH_WLOCK(ch); - - /* Check if table has been already created */ - tc_new = find_table(ni, ti); - if (tc_new != NULL) { - - /* - * Compat: do not fail if we're - * requesting to create existing table - * which has the same type - */ - if (compat == 0 || tc_new->no.subtype != tc->no.subtype) { - IPFW_UH_WUNLOCK(ch); - free_table_config(ni, tc); - return (EEXIST); - } - - /* Exchange tc and tc_new for proper refcounting & freeing */ - tmp = tc; - tc = tc_new; - tc_new = tmp; - } else { - /* New table */ - if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) { - IPFW_UH_WUNLOCK(ch); - printf("Unable to allocate table index." - " Consider increasing net.inet.ip.fw.tables_max"); - free_table_config(ni, tc); - return (EBUSY); - } - tc->no.kidx = kidx; - tc->no.etlv = IPFW_TLV_TBL_NAME; - - IPFW_WLOCK(ch); - link_table(ch, tc); - IPFW_WUNLOCK(ch); - } - - if (compat != 0) - tc->no.refcnt++; - if (pkidx != NULL) - *pkidx = tc->no.kidx; - - IPFW_UH_WUNLOCK(ch); - - if (tc_new != NULL) - free_table_config(ni, tc_new); - - return (0); -} - -static void -ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti) -{ - - memset(ti, 0, sizeof(struct tid_info)); - ti->set = ntlv->set; - ti->uidx = ntlv->idx; - ti->tlvs = ntlv; - ti->tlen = ntlv->head.length; -} - -static void -objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti) -{ - - ntlv_to_ti(&oh->ntlv, ti); -} - -struct namedobj_instance * -ipfw_get_table_objhash(struct ip_fw_chain *ch) -{ - - return (CHAIN_TO_NI(ch)); -} - -/* - * Exports basic table info as name TLV. - * Used inside dump_static_rules() to provide info - * about all tables referenced by current ruleset. - * - * Returns 0 on success. - */ -int -ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, - struct sockopt_data *sd) -{ - struct namedobj_instance *ni; - struct named_object *no; - ipfw_obj_ntlv *ntlv; - - ni = CHAIN_TO_NI(ch); - - no = ipfw_objhash_lookup_kidx(ni, kidx); - KASSERT(no != NULL, ("invalid table kidx passed")); - - ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv)); - if (ntlv == NULL) - return (ENOMEM); - - ntlv->head.type = IPFW_TLV_TBL_NAME; - ntlv->head.length = sizeof(*ntlv); - ntlv->idx = no->kidx; - strlcpy(ntlv->name, no->name, sizeof(ntlv->name)); - - return (0); -} - -struct dump_args { - struct ip_fw_chain *ch; - struct table_info *ti; - struct table_config *tc; - struct sockopt_data *sd; - uint32_t cnt; - uint16_t uidx; - int error; - uint32_t size; - ipfw_table_entry *ent; - ta_foreach_f *f; - void *farg; - ipfw_obj_tentry tent; -}; - -static int -count_ext_entries(void *e, void *arg) -{ - struct dump_args *da; - - da = (struct dump_args *)arg; - da->cnt++; - - return (0); -} - -/* - * Gets number of items from table either using - * internal counter or calling algo callback for - * externally-managed tables. - * - * Returns number of records. - */ -static uint32_t -table_get_count(struct ip_fw_chain *ch, struct table_config *tc) -{ - struct table_info *ti; - struct table_algo *ta; - struct dump_args da; - - ti = KIDX_TO_TI(ch, tc->no.kidx); - ta = tc->ta; - - /* Use internal counter for self-managed tables */ - if ((ta->flags & TA_FLAG_READONLY) == 0) - return (tc->count); - - /* Use callback to quickly get number of items */ - if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0) - return (ta->get_count(tc->astate, ti)); - - /* Count number of iterms ourselves */ - memset(&da, 0, sizeof(da)); - ta->foreach(tc->astate, ti, count_ext_entries, &da); - - return (da.cnt); -} - -/* - * Exports table @tc info into standard ipfw_xtable_info format. - */ -static void -export_table_info(struct ip_fw_chain *ch, struct table_config *tc, - ipfw_xtable_info *i) -{ - struct table_info *ti; - struct table_algo *ta; - - i->type = tc->no.subtype; - i->tflags = tc->tflags; - i->vmask = tc->vmask; - i->set = tc->no.set; - i->kidx = tc->no.kidx; - i->refcnt = tc->no.refcnt; - i->count = table_get_count(ch, tc); - i->limit = tc->limit; - i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0; - i->size = i->count * sizeof(ipfw_obj_tentry); - i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); - strlcpy(i->tablename, tc->tablename, sizeof(i->tablename)); - ti = KIDX_TO_TI(ch, tc->no.kidx); - ta = tc->ta; - if (ta->print_config != NULL) { - /* Use algo function to print table config to string */ - ta->print_config(tc->astate, ti, i->algoname, - sizeof(i->algoname)); - } else - strlcpy(i->algoname, ta->name, sizeof(i->algoname)); - /* Dump algo-specific data, if possible */ - if (ta->dump_tinfo != NULL) { - ta->dump_tinfo(tc->astate, ti, &i->ta_info); - i->ta_info.flags |= IPFW_TATFLAGS_DATA; - } -} - -struct dump_table_args { - struct ip_fw_chain *ch; - struct sockopt_data *sd; -}; - -static int -export_table_internal(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - ipfw_xtable_info *i; - struct dump_table_args *dta; - - dta = (struct dump_table_args *)arg; - - i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i)); - KASSERT(i != NULL, ("previously checked buffer is not enough")); - - export_table_info(dta->ch, (struct table_config *)no, i); - return (0); -} - -/* - * Export all tables as ipfw_xtable_info structures to - * storage provided by @sd. - * - * If supplied buffer is too small, fills in required size - * and returns ENOMEM. - * Returns 0 on success. - */ -static int -export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh, - struct sockopt_data *sd) -{ - uint32_t size; - uint32_t count; - struct dump_table_args dta; - - count = ipfw_objhash_count(CHAIN_TO_NI(ch)); - size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader); - - /* Fill in header regadless of buffer size */ - olh->count = count; - olh->objsize = sizeof(ipfw_xtable_info); - - if (size > olh->size) { - olh->size = size; - return (ENOMEM); - } - - olh->size = size; - - dta.ch = ch; - dta.sd = sd; - - ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta); - - return (0); -} - -/* - * Dumps all table data - * Data layout (v1)(current): - * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size - * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ] - * - * Returns 0 on success - */ -static int -dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct _ipfw_obj_header *oh; - ipfw_xtable_info *i; - struct tid_info ti; - struct table_config *tc; - struct table_algo *ta; - struct dump_args da; - uint32_t sz; - - sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info); - oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); - if (oh == NULL) - return (EINVAL); - - i = (ipfw_xtable_info *)(oh + 1); - objheader_to_ti(oh, &ti); - - IPFW_UH_RLOCK(ch); - if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { - IPFW_UH_RUNLOCK(ch); - return (ESRCH); - } - export_table_info(ch, tc, i); - - if (sd->valsize < i->size) { - - /* - * Submitted buffer size is not enough. - * WE've already filled in @i structure with - * relevant table info including size, so we - * can return. Buffer will be flushed automatically. - */ - IPFW_UH_RUNLOCK(ch); - return (ENOMEM); - } - - /* - * Do the actual dump in eXtended format - */ - memset(&da, 0, sizeof(da)); - da.ch = ch; - da.ti = KIDX_TO_TI(ch, tc->no.kidx); - da.tc = tc; - da.sd = sd; - - ta = tc->ta; - - ta->foreach(tc->astate, da.ti, dump_table_tentry, &da); - IPFW_UH_RUNLOCK(ch); - - return (da.error); -} - -/* - * Dumps all table data - * Data layout (version 0)(legacy): - * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE() - * Reply: [ ipfw_xtable ipfw_table_xentry x N ] - * - * Returns 0 on success - */ -static int -dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_xtable *xtbl; - struct tid_info ti; - struct table_config *tc; - struct table_algo *ta; - struct dump_args da; - size_t sz, count; - - xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable)); - if (xtbl == NULL) - return (EINVAL); - - memset(&ti, 0, sizeof(ti)); - ti.uidx = xtbl->tbl; - - IPFW_UH_RLOCK(ch); - if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) { - IPFW_UH_RUNLOCK(ch); - return (0); - } - count = table_get_count(ch, tc); - sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable); - - xtbl->cnt = count; - xtbl->size = sz; - xtbl->type = tc->no.subtype; - xtbl->tbl = ti.uidx; - - if (sd->valsize < sz) { - - /* - * Submitted buffer size is not enough. - * WE've already filled in @i structure with - * relevant table info including size, so we - * can return. Buffer will be flushed automatically. - */ - IPFW_UH_RUNLOCK(ch); - return (ENOMEM); - } - - /* Do the actual dump in eXtended format */ - memset(&da, 0, sizeof(da)); - da.ch = ch; - da.ti = KIDX_TO_TI(ch, tc->no.kidx); - da.tc = tc; - da.sd = sd; - - ta = tc->ta; - - ta->foreach(tc->astate, da.ti, dump_table_xentry, &da); - IPFW_UH_RUNLOCK(ch); - - return (0); -} - -/* - * Legacy function to retrieve number of items in table. - */ -static int -get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - uint32_t *tbl; - struct tid_info ti; - size_t sz; - int error; - - sz = sizeof(*op3) + sizeof(uint32_t); - op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz); - if (op3 == NULL) - return (EINVAL); - - tbl = (uint32_t *)(op3 + 1); - memset(&ti, 0, sizeof(ti)); - ti.uidx = *tbl; - IPFW_UH_RLOCK(ch); - error = ipfw_count_xtable(ch, &ti, tbl); - IPFW_UH_RUNLOCK(ch); - return (error); -} - -/* - * Legacy IP_FW_TABLE_GETSIZE handler - */ -int -ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) -{ - struct table_config *tc; - - if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) - return (ESRCH); - *cnt = table_get_count(ch, tc); - return (0); -} - -/* - * Legacy IP_FW_TABLE_XGETSIZE handler - */ -int -ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt) -{ - struct table_config *tc; - uint32_t count; - - if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) { - *cnt = 0; - return (0); /* 'table all list' requires success */ - } - - count = table_get_count(ch, tc); - *cnt = count * sizeof(ipfw_table_xentry); - if (count > 0) - *cnt += sizeof(ipfw_xtable); - return (0); -} - -static int -dump_table_entry(void *e, void *arg) -{ - struct dump_args *da; - struct table_config *tc; - struct table_algo *ta; - ipfw_table_entry *ent; - struct table_value *pval; - int error; - - da = (struct dump_args *)arg; - - tc = da->tc; - ta = tc->ta; - - /* Out of memory, returning */ - if (da->cnt == da->size) - return (1); - ent = da->ent++; - ent->tbl = da->uidx; - da->cnt++; - - error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); - if (error != 0) - return (error); - - ent->addr = da->tent.k.addr.s_addr; - ent->masklen = da->tent.masklen; - pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); - ent->value = ipfw_export_table_value_legacy(pval); - - return (0); -} - -/* - * Dumps table in pre-8.1 legacy format. - */ -int -ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, - ipfw_table *tbl) -{ - struct table_config *tc; - struct table_algo *ta; - struct dump_args da; - - tbl->cnt = 0; - - if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) - return (0); /* XXX: We should return ESRCH */ - - ta = tc->ta; - - /* This dump format supports IPv4 only */ - if (tc->no.subtype != IPFW_TABLE_ADDR) - return (0); - - memset(&da, 0, sizeof(da)); - da.ch = ch; - da.ti = KIDX_TO_TI(ch, tc->no.kidx); - da.tc = tc; - da.ent = &tbl->ent[0]; - da.size = tbl->size; - - tbl->cnt = 0; - ta->foreach(tc->astate, da.ti, dump_table_entry, &da); - tbl->cnt = da.cnt; - - return (0); -} - -/* - * Dumps table entry in eXtended format (v1)(current). - */ -static int -dump_table_tentry(void *e, void *arg) -{ - struct dump_args *da; - struct table_config *tc; - struct table_algo *ta; - struct table_value *pval; - ipfw_obj_tentry *tent; - int error; - - da = (struct dump_args *)arg; - - tc = da->tc; - ta = tc->ta; - - tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent)); - /* Out of memory, returning */ - if (tent == NULL) { - da->error = ENOMEM; - return (1); - } - tent->head.length = sizeof(ipfw_obj_tentry); - tent->idx = da->uidx; - - error = ta->dump_tentry(tc->astate, da->ti, e, tent); - if (error != 0) - return (error); - - pval = get_table_value(da->ch, da->tc, tent->v.kidx); - ipfw_export_table_value_v1(pval, &tent->v.value); - - return (0); -} - -/* - * Dumps table entry in eXtended format (v0). - */ -static int -dump_table_xentry(void *e, void *arg) -{ - struct dump_args *da; - struct table_config *tc; - struct table_algo *ta; - ipfw_table_xentry *xent; - ipfw_obj_tentry *tent; - struct table_value *pval; - int error; - - da = (struct dump_args *)arg; - - tc = da->tc; - ta = tc->ta; - - xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent)); - /* Out of memory, returning */ - if (xent == NULL) - return (1); - xent->len = sizeof(ipfw_table_xentry); - xent->tbl = da->uidx; - - memset(&da->tent, 0, sizeof(da->tent)); - tent = &da->tent; - error = ta->dump_tentry(tc->astate, da->ti, e, tent); - if (error != 0) - return (error); - - /* Convert current format to previous one */ - xent->masklen = tent->masklen; - pval = get_table_value(da->ch, da->tc, da->tent.v.kidx); - xent->value = ipfw_export_table_value_legacy(pval); - /* Apply some hacks */ - if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) { - xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr; - xent->flags = IPFW_TCF_INET; - } else - memcpy(&xent->k, &tent->k, sizeof(xent->k)); - - return (0); -} - -/* - * Helper function to export table algo data - * to tentry format before calling user function. - * - * Returns 0 on success. - */ -static int -prepare_table_tentry(void *e, void *arg) -{ - struct dump_args *da; - struct table_config *tc; - struct table_algo *ta; - int error; - - da = (struct dump_args *)arg; - - tc = da->tc; - ta = tc->ta; - - error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent); - if (error != 0) - return (error); - - da->f(&da->tent, da->farg); - - return (0); -} - -/* - * Allow external consumers to read table entries in standard format. - */ -int -ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx, - ta_foreach_f *f, void *arg) -{ - struct namedobj_instance *ni; - struct table_config *tc; - struct table_algo *ta; - struct dump_args da; - - ni = CHAIN_TO_NI(ch); - - tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx); - if (tc == NULL) - return (ESRCH); - - ta = tc->ta; - - memset(&da, 0, sizeof(da)); - da.ch = ch; - da.ti = KIDX_TO_TI(ch, tc->no.kidx); - da.tc = tc; - da.f = f; - da.farg = arg; - - ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da); - - return (0); -} - -/* - * Table algorithms - */ - -/* - * Finds algorithm by index, table type or supplied name. - * - * Returns pointer to algo or NULL. - */ -static struct table_algo * -find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name) -{ - int i, l; - struct table_algo *ta; - - if (ti->type > IPFW_TABLE_MAXTYPE) - return (NULL); - - /* Search by index */ - if (ti->atype != 0) { - if (ti->atype > tcfg->algo_count) - return (NULL); - return (tcfg->algo[ti->atype]); - } - - if (name == NULL) { - /* Return default algorithm for given type if set */ - return (tcfg->def_algo[ti->type]); - } - - /* Search by name */ - /* TODO: better search */ - for (i = 1; i <= tcfg->algo_count; i++) { - ta = tcfg->algo[i]; - - /* - * One can supply additional algorithm - * parameters so we compare only the first word - * of supplied name: - * 'addr:chash hsize=32' - * '^^^^^^^^^' - * - */ - l = strlen(ta->name); - if (strncmp(name, ta->name, l) != 0) - continue; - if (name[l] != '\0' && name[l] != ' ') - continue; - /* Check if we're requesting proper table type */ - if (ti->type != 0 && ti->type != ta->type) - return (NULL); - return (ta); - } - - return (NULL); -} - -/* - * Register new table algo @ta. - * Stores algo id inside @idx. - * - * Returns 0 on success. - */ -int -ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size, - int *idx) -{ - struct tables_config *tcfg; - struct table_algo *ta_new; - size_t sz; - - if (size > sizeof(struct table_algo)) - return (EINVAL); - - /* Check for the required on-stack size for add/del */ - sz = roundup2(ta->ta_buf_size, sizeof(void *)); - if (sz > TA_BUF_SZ) - return (EINVAL); - - KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE")); - - /* Copy algorithm data to stable storage. */ - ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO); - memcpy(ta_new, ta, size); - - tcfg = CHAIN_TO_TCFG(ch); - - KASSERT(tcfg->algo_count < 255, ("Increase algo array size")); - - tcfg->algo[++tcfg->algo_count] = ta_new; - ta_new->idx = tcfg->algo_count; - - /* Set algorithm as default one for given type */ - if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 && - tcfg->def_algo[ta_new->type] == NULL) - tcfg->def_algo[ta_new->type] = ta_new; - - *idx = ta_new->idx; - - return (0); -} - -/* - * Unregisters table algo using @idx as id. - * XXX: It is NOT safe to call this function in any place - * other than ipfw instance destroy handler. - */ -void -ipfw_del_table_algo(struct ip_fw_chain *ch, int idx) -{ - struct tables_config *tcfg; - struct table_algo *ta; - - tcfg = CHAIN_TO_TCFG(ch); - - KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d", - idx, tcfg->algo_count)); - - ta = tcfg->algo[idx]; - KASSERT(ta != NULL, ("algo idx %d is NULL", idx)); - - if (tcfg->def_algo[ta->type] == ta) - tcfg->def_algo[ta->type] = NULL; - - free(ta, M_IPFW); -} - -/* - * Lists all table algorithms currently available. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size - * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ] - * - * Returns 0 on success - */ -static int -list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct _ipfw_obj_lheader *olh; - struct tables_config *tcfg; - ipfw_ta_info *i; - struct table_algo *ta; - uint32_t count, n, size; - - olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); - if (olh == NULL) - return (EINVAL); - if (sd->valsize < olh->size) - return (EINVAL); - - IPFW_UH_RLOCK(ch); - tcfg = CHAIN_TO_TCFG(ch); - count = tcfg->algo_count; - size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader); - - /* Fill in header regadless of buffer size */ - olh->count = count; - olh->objsize = sizeof(ipfw_ta_info); - - if (size > olh->size) { - olh->size = size; - IPFW_UH_RUNLOCK(ch); - return (ENOMEM); - } - olh->size = size; - - for (n = 1; n <= count; n++) { - i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i)); - KASSERT(i != NULL, ("previously checked buffer is not enough")); - ta = tcfg->algo[n]; - strlcpy(i->algoname, ta->name, sizeof(i->algoname)); - i->type = ta->type; - i->refcnt = ta->refcnt; - } - - IPFW_UH_RUNLOCK(ch); - - return (0); -} - -static int -classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - /* Basic IPv4/IPv6 or u32 lookups */ - *puidx = cmd->arg1; - /* Assume ADDR by default */ - *ptype = IPFW_TABLE_ADDR; - int v; - - if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) { - /* - * generic lookup. The key must be - * in 32bit big-endian format. - */ - v = ((ipfw_insn_u32 *)cmd)->d[1]; - switch (v) { - case 0: - case 1: - /* IPv4 src/dst */ - break; - case 2: - case 3: - /* src/dst port */ - *ptype = IPFW_TABLE_NUMBER; - break; - case 4: - /* uid/gid */ - *ptype = IPFW_TABLE_NUMBER; - break; - case 5: - /* jid */ - *ptype = IPFW_TABLE_NUMBER; - break; - case 6: - /* dscp */ - *ptype = IPFW_TABLE_NUMBER; - break; - } - } - - return (0); -} - -static int -classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - ipfw_insn_if *cmdif; - - /* Interface table, possibly */ - cmdif = (ipfw_insn_if *)cmd; - if (cmdif->name[0] != '\1') - return (1); - - *ptype = IPFW_TABLE_INTERFACE; - *puidx = cmdif->p.kidx; - - return (0); -} - -static int -classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - - *puidx = cmd->arg1; - *ptype = IPFW_TABLE_FLOW; - - return (0); -} - -static void -update_arg1(ipfw_insn *cmd, uint16_t idx) -{ - - cmd->arg1 = idx; -} - -static void -update_via(ipfw_insn *cmd, uint16_t idx) -{ - ipfw_insn_if *cmdif; - - cmdif = (ipfw_insn_if *)cmd; - cmdif->p.kidx = idx; -} - -static int -table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, - struct named_object **pno) -{ - struct table_config *tc; - int error; - - IPFW_UH_WLOCK_ASSERT(ch); - - error = find_table_err(CHAIN_TO_NI(ch), ti, &tc); - if (error != 0) - return (error); - - *pno = &tc->no; - return (0); -} - -/* XXX: sets-sets! */ -static struct named_object * -table_findbykidx(struct ip_fw_chain *ch, uint16_t idx) -{ - struct namedobj_instance *ni; - struct table_config *tc; - - IPFW_UH_WLOCK_ASSERT(ch); - ni = CHAIN_TO_NI(ch); - tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx); - KASSERT(tc != NULL, ("Table with index %d not found", idx)); - - return (&tc->no); -} - -static int -table_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, - enum ipfw_sets_cmd cmd) -{ - - switch (cmd) { - case SWAP_ALL: - case TEST_ALL: - case MOVE_ALL: - /* - * Always return success, the real action and decision - * should make table_manage_sets_all(). - */ - return (0); - case TEST_ONE: - case MOVE_ONE: - /* - * NOTE: we need to use ipfw_objhash_del/ipfw_objhash_add - * if set number will be used in hash function. Currently - * we can just use generic handler that replaces set value. - */ - if (V_fw_tables_sets == 0) - return (0); - break; - case COUNT_ONE: - /* - * Return EOPNOTSUPP for COUNT_ONE when per-set sysctl is - * disabled. This allow skip table's opcodes from additional - * checks when specific rules moved to another set. - */ - if (V_fw_tables_sets == 0) - return (EOPNOTSUPP); - } - /* Use generic sets handler when per-set sysctl is enabled. */ - return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME, - set, new_set, cmd)); -} - -/* - * We register several opcode rewriters for lookup tables. - * All tables opcodes have the same ETLV type, but different subtype. - * To avoid invoking sets handler several times for XXX_ALL commands, - * we use separate manage_sets handler. O_RECV has the lowest value, - * so it should be called first. - */ -static int -table_manage_sets_all(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, - enum ipfw_sets_cmd cmd) -{ - - switch (cmd) { - case SWAP_ALL: - case TEST_ALL: - /* - * Return success for TEST_ALL, since nothing prevents - * move rules from one set to another. All tables are - * accessible from all sets when per-set tables sysctl - * is disabled. - */ - case MOVE_ALL: - if (V_fw_tables_sets == 0) - return (0); - break; - default: - return (table_manage_sets(ch, set, new_set, cmd)); - } - /* Use generic sets handler when per-set sysctl is enabled. */ - return (ipfw_obj_manage_sets(CHAIN_TO_NI(ch), IPFW_TLV_TBL_NAME, - set, new_set, cmd)); -} - -static struct opcode_obj_rewrite opcodes[] = { - { - .opcode = O_IP_SRC_LOOKUP, - .etlv = IPFW_TLV_TBL_NAME, - .classifier = classify_srcdst, - .update = update_arg1, - .find_byname = table_findbyname, - .find_bykidx = table_findbykidx, - .create_object = create_table_compat, - .manage_sets = table_manage_sets, - }, - { - .opcode = O_IP_DST_LOOKUP, - .etlv = IPFW_TLV_TBL_NAME, - .classifier = classify_srcdst, - .update = update_arg1, - .find_byname = table_findbyname, - .find_bykidx = table_findbykidx, - .create_object = create_table_compat, - .manage_sets = table_manage_sets, - }, - { - .opcode = O_IP_FLOW_LOOKUP, - .etlv = IPFW_TLV_TBL_NAME, - .classifier = classify_flow, - .update = update_arg1, - .find_byname = table_findbyname, - .find_bykidx = table_findbykidx, - .create_object = create_table_compat, - .manage_sets = table_manage_sets, - }, - { - .opcode = O_XMIT, - .etlv = IPFW_TLV_TBL_NAME, - .classifier = classify_via, - .update = update_via, - .find_byname = table_findbyname, - .find_bykidx = table_findbykidx, - .create_object = create_table_compat, - .manage_sets = table_manage_sets, - }, - { - .opcode = O_RECV, - .etlv = IPFW_TLV_TBL_NAME, - .classifier = classify_via, - .update = update_via, - .find_byname = table_findbyname, - .find_bykidx = table_findbykidx, - .create_object = create_table_compat, - .manage_sets = table_manage_sets_all, - }, - { - .opcode = O_VIA, - .etlv = IPFW_TLV_TBL_NAME, - .classifier = classify_via, - .update = update_via, - .find_byname = table_findbyname, - .find_bykidx = table_findbykidx, - .create_object = create_table_compat, - .manage_sets = table_manage_sets, - }, -}; - -static int -test_sets_cb(struct namedobj_instance *ni __unused, struct named_object *no, - void *arg __unused) -{ - - /* Check that there aren't any tables in not default set */ - if (no->set != 0) - return (EBUSY); - return (0); -} - -/* - * Switch between "set 0" and "rule's set" table binding, - * Check all ruleset bindings and permits changing - * IFF each binding has both rule AND table in default set (set 0). - * - * Returns 0 on success. - */ -int -ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets) -{ - struct opcode_obj_rewrite *rw; - struct namedobj_instance *ni; - struct named_object *no; - struct ip_fw *rule; - ipfw_insn *cmd; - int cmdlen, i, l; - uint16_t kidx; - uint8_t subtype; - - IPFW_UH_WLOCK(ch); - - if (V_fw_tables_sets == sets) { - IPFW_UH_WUNLOCK(ch); - return (0); - } - ni = CHAIN_TO_NI(ch); - if (sets == 0) { - /* - * Prevent disabling sets support if we have some tables - * in not default sets. - */ - if (ipfw_objhash_foreach_type(ni, test_sets_cb, - NULL, IPFW_TLV_TBL_NAME) != 0) { - IPFW_UH_WUNLOCK(ch); - return (EBUSY); - } - } - /* - * Scan all rules and examine tables opcodes. - */ - for (i = 0; i < ch->n_rules; i++) { - rule = ch->map[i]; - - l = rule->cmd_len; - cmd = rule->cmd; - cmdlen = 0; - for ( ; l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - /* Check only tables opcodes */ - for (kidx = 0, rw = opcodes; - rw < opcodes + nitems(opcodes); rw++) { - if (rw->opcode != cmd->opcode) - continue; - if (rw->classifier(cmd, &kidx, &subtype) == 0) - break; - } - if (kidx == 0) - continue; - no = ipfw_objhash_lookup_kidx(ni, kidx); - /* Check if both table object and rule has the set 0 */ - if (no->set != 0 || rule->set != 0) { - IPFW_UH_WUNLOCK(ch); - return (EBUSY); - } - - } - } - V_fw_tables_sets = sets; - IPFW_UH_WUNLOCK(ch); - return (0); -} - -/* - * Checks table name for validity. - * Enforce basic length checks, the rest - * should be done in userland. - * - * Returns 0 if name is considered valid. - */ -static int -check_table_name(const char *name) -{ - - /* - * TODO: do some more complicated checks - */ - return (ipfw_check_object_name_generic(name)); -} - -/* - * Finds table config based on either legacy index - * or name in ntlv. - * Note @ti structure contains unchecked data from userland. - * - * Returns 0 in success and fills in @tc with found config - */ -static int -find_table_err(struct namedobj_instance *ni, struct tid_info *ti, - struct table_config **tc) -{ - char *name, bname[16]; - struct named_object *no; - ipfw_obj_ntlv *ntlv; - uint32_t set; - - if (ti->tlvs != NULL) { - ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, - IPFW_TLV_TBL_NAME); - if (ntlv == NULL) - return (EINVAL); - name = ntlv->name; - - /* - * Use set provided by @ti instead of @ntlv one. - * This is needed due to different sets behavior - * controlled by V_fw_tables_sets. - */ - set = (V_fw_tables_sets != 0) ? ti->set : 0; - } else { - snprintf(bname, sizeof(bname), "%d", ti->uidx); - name = bname; - set = 0; - } - - no = ipfw_objhash_lookup_name(ni, set, name); - *tc = (struct table_config *)no; - - return (0); -} - -/* - * Finds table config based on either legacy index - * or name in ntlv. - * Note @ti structure contains unchecked data from userland. - * - * Returns pointer to table_config or NULL. - */ -static struct table_config * -find_table(struct namedobj_instance *ni, struct tid_info *ti) -{ - struct table_config *tc; - - if (find_table_err(ni, ti, &tc) != 0) - return (NULL); - - return (tc); -} - -/* - * Allocate new table config structure using - * specified @algo and @aname. - * - * Returns pointer to config or NULL. - */ -static struct table_config * -alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti, - struct table_algo *ta, char *aname, uint8_t tflags) -{ - char *name, bname[16]; - struct table_config *tc; - int error; - ipfw_obj_ntlv *ntlv; - uint32_t set; - - if (ti->tlvs != NULL) { - ntlv = ipfw_find_name_tlv_type(ti->tlvs, ti->tlen, ti->uidx, - IPFW_TLV_TBL_NAME); - if (ntlv == NULL) - return (NULL); - name = ntlv->name; - set = ntlv->set; - } else { - /* Compat part: convert number to string representation */ - snprintf(bname, sizeof(bname), "%d", ti->uidx); - name = bname; - set = 0; - } - - tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO); - tc->no.name = tc->tablename; - tc->no.subtype = ta->type; - tc->no.set = set; - tc->tflags = tflags; - tc->ta = ta; - strlcpy(tc->tablename, name, sizeof(tc->tablename)); - /* Set "shared" value type by default */ - tc->vshared = 1; - - /* Preallocate data structures for new tables */ - error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags); - if (error != 0) { - free(tc, M_IPFW); - return (NULL); - } - - return (tc); -} - -/* - * Destroys table state and config. - */ -static void -free_table_config(struct namedobj_instance *ni, struct table_config *tc) -{ - - KASSERT(tc->linked == 0, ("free() on linked config")); - /* UH lock MUST NOT be held */ - - /* - * We're using ta without any locking/referencing. - * TODO: fix this if we're going to use unloadable algos. - */ - tc->ta->destroy(tc->astate, &tc->ti_copy); - free(tc, M_IPFW); -} - -/* - * Links @tc to @chain table named instance. - * Sets appropriate type/states in @chain table info. - */ -static void -link_table(struct ip_fw_chain *ch, struct table_config *tc) -{ - struct namedobj_instance *ni; - struct table_info *ti; - uint16_t kidx; - - IPFW_UH_WLOCK_ASSERT(ch); - IPFW_WLOCK_ASSERT(ch); - - ni = CHAIN_TO_NI(ch); - kidx = tc->no.kidx; - - ipfw_objhash_add(ni, &tc->no); - - ti = KIDX_TO_TI(ch, kidx); - *ti = tc->ti_copy; - - /* Notify algo on real @ti address */ - if (tc->ta->change_ti != NULL) - tc->ta->change_ti(tc->astate, ti); - - tc->linked = 1; - tc->ta->refcnt++; -} - -/* - * Unlinks @tc from @chain table named instance. - * Zeroes states in @chain and stores them in @tc. - */ -static void -unlink_table(struct ip_fw_chain *ch, struct table_config *tc) -{ - struct namedobj_instance *ni; - struct table_info *ti; - uint16_t kidx; - - IPFW_UH_WLOCK_ASSERT(ch); - IPFW_WLOCK_ASSERT(ch); - - ni = CHAIN_TO_NI(ch); - kidx = tc->no.kidx; - - /* Clear state. @ti copy is already saved inside @tc */ - ipfw_objhash_del(ni, &tc->no); - ti = KIDX_TO_TI(ch, kidx); - memset(ti, 0, sizeof(struct table_info)); - tc->linked = 0; - tc->ta->refcnt--; - - /* Notify algo on real @ti address */ - if (tc->ta->change_ti != NULL) - tc->ta->change_ti(tc->astate, NULL); -} - -static struct ipfw_sopt_handler scodes[] = { - { IP_FW_TABLE_XCREATE, 0, HDIR_SET, create_table }, - { IP_FW_TABLE_XDESTROY, 0, HDIR_SET, flush_table_v0 }, - { IP_FW_TABLE_XFLUSH, 0, HDIR_SET, flush_table_v0 }, - { IP_FW_TABLE_XMODIFY, 0, HDIR_BOTH, modify_table }, - { IP_FW_TABLE_XINFO, 0, HDIR_GET, describe_table }, - { IP_FW_TABLES_XLIST, 0, HDIR_GET, list_tables }, - { IP_FW_TABLE_XLIST, 0, HDIR_GET, dump_table_v0 }, - { IP_FW_TABLE_XLIST, 1, HDIR_GET, dump_table_v1 }, - { IP_FW_TABLE_XADD, 0, HDIR_BOTH, manage_table_ent_v0 }, - { IP_FW_TABLE_XADD, 1, HDIR_BOTH, manage_table_ent_v1 }, - { IP_FW_TABLE_XDEL, 0, HDIR_BOTH, manage_table_ent_v0 }, - { IP_FW_TABLE_XDEL, 1, HDIR_BOTH, manage_table_ent_v1 }, - { IP_FW_TABLE_XFIND, 0, HDIR_GET, find_table_entry }, - { IP_FW_TABLE_XSWAP, 0, HDIR_SET, swap_table }, - { IP_FW_TABLES_ALIST, 0, HDIR_GET, list_table_algo }, - { IP_FW_TABLE_XGETSIZE, 0, HDIR_GET, get_table_size }, -}; - -static int -destroy_table_locked(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - - unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no); - if (ipfw_objhash_free_idx(ni, no->kidx) != 0) - printf("Error unlinking kidx %d from table %s\n", - no->kidx, no->name); - free_table_config(ni, (struct table_config *)no); - return (0); -} - -/* - * Shuts tables module down. - */ -void -ipfw_destroy_tables(struct ip_fw_chain *ch, int last) -{ - - IPFW_DEL_SOPT_HANDLER(last, scodes); - IPFW_DEL_OBJ_REWRITER(last, opcodes); - - /* Remove all tables from working set */ - IPFW_UH_WLOCK(ch); - IPFW_WLOCK(ch); - ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch); - IPFW_WUNLOCK(ch); - IPFW_UH_WUNLOCK(ch); - - /* Free pointers itself */ - free(ch->tablestate, M_IPFW); - - ipfw_table_value_destroy(ch, last); - ipfw_table_algo_destroy(ch); - - ipfw_objhash_destroy(CHAIN_TO_NI(ch)); - free(CHAIN_TO_TCFG(ch), M_IPFW); -} - -/* - * Starts tables module. - */ -int -ipfw_init_tables(struct ip_fw_chain *ch, int first) -{ - struct tables_config *tcfg; - - /* Allocate pointers */ - ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info), - M_IPFW, M_WAITOK | M_ZERO); - - tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO); - tcfg->namehash = ipfw_objhash_create(V_fw_tables_max); - ch->tblcfg = tcfg; - - ipfw_table_value_init(ch, first); - ipfw_table_algo_init(ch); - - IPFW_ADD_OBJ_REWRITER(first, opcodes); - IPFW_ADD_SOPT_HANDLER(first, scodes); - return (0); -} - - - diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table.h b/freebsd/sys/netpfil/ipfw/ip_fw_table.h deleted file mode 100644 index d6578482..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_table.h +++ /dev/null @@ -1,234 +0,0 @@ -/*- - * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _IPFW2_TABLE_H -#define _IPFW2_TABLE_H - -/* - * Internal constants and data structures used by ipfw tables - * not meant to be exported outside the kernel. - */ -#ifdef _KERNEL - -struct table_algo; -struct tables_config { - struct namedobj_instance *namehash; - struct namedobj_instance *valhash; - uint32_t val_size; - uint32_t algo_count; - struct table_algo *algo[256]; - struct table_algo *def_algo[IPFW_TABLE_MAXTYPE + 1]; - TAILQ_HEAD(op_state_l,op_state) state_list; -}; -#define CHAIN_TO_TCFG(chain) ((struct tables_config *)(chain)->tblcfg) - -struct table_info { - table_lookup_t *lookup; /* Lookup function */ - void *state; /* Lookup radix/other structure */ - void *xstate; /* eXtended state */ - u_long data; /* Hints for given func */ -}; - -struct table_value; -struct tentry_info { - void *paddr; - struct table_value *pvalue; - void *ptv; /* Temporary field to hold obj */ - uint8_t masklen; /* mask length */ - uint8_t subtype; - uint16_t flags; /* record flags */ - uint32_t value; /* value index */ -}; -#define TEI_FLAGS_UPDATE 0x0001 /* Add or update rec if exists */ -#define TEI_FLAGS_UPDATED 0x0002 /* Entry has been updated */ -#define TEI_FLAGS_COMPAT 0x0004 /* Called from old ABI */ -#define TEI_FLAGS_DONTADD 0x0008 /* Do not create new rec */ -#define TEI_FLAGS_ADDED 0x0010 /* Entry was added */ -#define TEI_FLAGS_DELETED 0x0020 /* Entry was deleted */ -#define TEI_FLAGS_LIMIT 0x0040 /* Limit was hit */ -#define TEI_FLAGS_ERROR 0x0080 /* Unknown request error */ -#define TEI_FLAGS_NOTFOUND 0x0100 /* Entry was not found */ -#define TEI_FLAGS_EXISTS 0x0200 /* Entry already exists */ - -typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state, - struct table_info *ti, char *data, uint8_t tflags); -typedef void (ta_destroy)(void *ta_state, struct table_info *ti); -typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -typedef int (ta_add)(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -typedef int (ta_del)(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -typedef void (ta_flush_entry)(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); - -typedef int (ta_need_modify)(void *ta_state, struct table_info *ti, - uint32_t count, uint64_t *pflags); -typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags); -typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti, - void *ta_buf, uint64_t *pflags); -typedef void (ta_modify)(void *ta_state, struct table_info *ti, - void *ta_buf, uint64_t pflags); -typedef void (ta_flush_mod)(void *ta_buf); - -typedef void (ta_change_ti)(void *ta_state, struct table_info *ti); -typedef void (ta_print_config)(void *ta_state, struct table_info *ti, char *buf, - size_t bufsize); - -typedef int ta_foreach_f(void *node, void *arg); -typedef void ta_foreach(void *ta_state, struct table_info *ti, ta_foreach_f *f, - void *arg); -typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent); -typedef int ta_find_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent); -typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti, - ipfw_ta_tinfo *tinfo); -typedef uint32_t ta_get_count(void *ta_state, struct table_info *ti); - -struct table_algo { - char name[16]; - uint32_t idx; - uint32_t type; - uint32_t refcnt; - uint32_t flags; - uint32_t vlimit; - size_t ta_buf_size; - ta_init *init; - ta_destroy *destroy; - ta_prepare_add *prepare_add; - ta_prepare_del *prepare_del; - ta_add *add; - ta_del *del; - ta_flush_entry *flush_entry; - ta_find_tentry *find_tentry; - ta_need_modify *need_modify; - ta_prepare_mod *prepare_mod; - ta_fill_mod *fill_mod; - ta_modify *modify; - ta_flush_mod *flush_mod; - ta_change_ti *change_ti; - ta_foreach *foreach; - ta_dump_tentry *dump_tentry; - ta_print_config *print_config; - ta_dump_tinfo *dump_tinfo; - ta_get_count *get_count; -}; -#define TA_FLAG_DEFAULT 0x01 /* Algo is default for given type */ -#define TA_FLAG_READONLY 0x02 /* Algo does not support modifications*/ -#define TA_FLAG_EXTCOUNTER 0x04 /* Algo has external counter available*/ - -int ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, - size_t size, int *idx); -void ipfw_del_table_algo(struct ip_fw_chain *ch, int idx); - -void ipfw_table_algo_init(struct ip_fw_chain *chain); -void ipfw_table_algo_destroy(struct ip_fw_chain *chain); - -MALLOC_DECLARE(M_IPFW_TBL); -/* Exported to support legacy opcodes */ -int add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei, uint8_t flags, uint32_t count); -int del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti, - struct tentry_info *tei, uint8_t flags, uint32_t count); -int flush_table(struct ip_fw_chain *ch, struct tid_info *ti); -void ipfw_import_table_value_legacy(uint32_t value, struct table_value *v); -uint32_t ipfw_export_table_value_legacy(struct table_value *v); -int ipfw_get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd); - -/* ipfw_table_value.c functions */ -struct table_config; -struct tableop_state; -void ipfw_table_value_init(struct ip_fw_chain *ch, int first); -void ipfw_table_value_destroy(struct ip_fw_chain *ch, int last); -int ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts); -void ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc, - struct tentry_info *tei, uint32_t count, int rollback); -void ipfw_import_table_value_v1(ipfw_table_value *iv); -void ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *iv); -void ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc, - struct table_algo *ta, void *astate, struct table_info *ti); -void rollback_table_values(struct tableop_state *ts); - -int ipfw_rewrite_table_uidx(struct ip_fw_chain *chain, - struct rule_check_info *ci); -int ipfw_mark_table_kidx(struct ip_fw_chain *chain, struct ip_fw *rule, - uint32_t *bmask); -int ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx, - struct sockopt_data *sd); -void ipfw_unref_rule_tables(struct ip_fw_chain *chain, struct ip_fw *rule); -struct namedobj_instance *ipfw_get_table_objhash(struct ip_fw_chain *ch); - -/* utility functions */ -int ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt, - uint32_t new_set); -void ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t old_set, - uint32_t new_set, int mv); -int ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx, - ta_foreach_f f, void *arg); - -/* internal functions */ -void tc_ref(struct table_config *tc); -void tc_unref(struct table_config *tc); - -struct op_state; -typedef void (op_rollback_f)(void *object, struct op_state *state); -struct op_state { - TAILQ_ENTRY(op_state) next; /* chain link */ - op_rollback_f *func; -}; - -struct tableop_state { - struct op_state opstate; - struct ip_fw_chain *ch; - struct table_config *tc; - struct table_algo *ta; - struct tentry_info *tei; - uint32_t count; - uint32_t vmask; - int vshared; - int modified; -}; - -void add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts); -void del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts); -void rollback_toperation_state(struct ip_fw_chain *ch, void *object); - -/* Legacy interfaces */ -int ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, - uint32_t *cnt); -int ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, - uint32_t *cnt); -int ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti, - ipfw_table *tbl); - - -#endif /* _KERNEL */ -#endif /* _IPFW2_TABLE_H */ diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c b/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c deleted file mode 100644 index 2cf0bdcb..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_table_algo.c +++ /dev/null @@ -1,4112 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2014 Yandex LLC - * Copyright (c) 2014 Alexander V. Chernikov - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * Lookup table algorithms. - * - */ - -#include <rtems/bsd/local/opt_ipfw.h> -#include <rtems/bsd/local/opt_inet.h> -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ -#include <rtems/bsd/local/opt_inet6.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/rwlock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <sys/queue.h> -#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ -#include <net/radix.h> -#include <net/route.h> -#include <net/route_var.h> - -#include <netinet/in.h> -#include <netinet/in_fib.h> -#include <netinet/ip_var.h> /* struct ipfw_rule_ref */ -#include <netinet/ip_fw.h> -#include <netinet6/in6_fib.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/ip_fw_table.h> - - -/* - * IPFW table lookup algorithms. - * - * What is needed to add another table algo? - * - * Algo init: - * * struct table_algo has to be filled with: - * name: "type:algoname" format, e.g. "addr:radix". Currently - * there are the following types: "addr", "iface", "number" and "flow". - * type: one of IPFW_TABLE_* types - * flags: one or more TA_FLAGS_* - * ta_buf_size: size of structure used to store add/del item state. - * Needs to be less than TA_BUF_SZ. - * callbacks: see below for description. - * * ipfw_add_table_algo / ipfw_del_table_algo has to be called - * - * Callbacks description: - * - * -init: request to initialize new table instance. - * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state, - * struct table_info *ti, char *data, uint8_t tflags); - * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. - * - * Allocate all structures needed for normal operations. - * * Caller may want to parse @data for some algo-specific - * options provided by userland. - * * Caller may want to save configuration state pointer to @ta_state - * * Caller needs to save desired runtime structure pointer(s) - * inside @ti fields. Note that it is not correct to save - * @ti pointer at this moment. Use -change_ti hook for that. - * * Caller has to fill in ti->lookup to appropriate function - * pointer. - * - * - * - * -destroy: request to destroy table instance. - * typedef void (ta_destroy)(void *ta_state, struct table_info *ti); - * MANDATORY, unlocked. (M_WAITOK). - * - * Frees all table entries and all tables structures allocated by -init. - * - * - * - * -prepare_add: request to allocate state for adding new entry. - * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei, - * void *ta_buf); - * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success. - * - * Allocates state and fills it in with all necessary data (EXCEPT value) - * from @tei to minimize operations needed to be done under WLOCK. - * "value" field has to be copied to new entry in @add callback. - * Buffer ta_buf of size ta->ta_buf_sz may be used to store - * allocated state. - * - * - * - * -prepare_del: request to set state for deleting existing entry. - * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei, - * void *ta_buf); - * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success. - * - * Buffer ta_buf of size ta->ta_buf_sz may be used to store - * allocated state. Caller should use on-stack ta_buf allocation - * instead of doing malloc(). - * - * - * - * -add: request to insert new entry into runtime/config structures. - * typedef int (ta_add)(void *ta_state, struct table_info *ti, - * struct tentry_info *tei, void *ta_buf, uint32_t *pnum); - * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success. - * - * Insert new entry using previously-allocated state in @ta_buf. - * * @tei may have the following flags: - * TEI_FLAGS_UPDATE: request to add or update entry. - * TEI_FLAGS_DONTADD: request to update (but not add) entry. - * * Caller is required to do the following: - * copy real entry value from @tei - * entry added: return 0, set 1 to @pnum - * entry updated: return 0, store 0 to @pnum, store old value in @tei, - * add TEI_FLAGS_UPDATED flag to @tei. - * entry exists: return EEXIST - * entry not found: return ENOENT - * other error: return non-zero error code. - * - * - * - * -del: request to delete existing entry from runtime/config structures. - * typedef int (ta_del)(void *ta_state, struct table_info *ti, - * struct tentry_info *tei, void *ta_buf, uint32_t *pnum); - * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success. - * - * Delete entry using previously set up in @ta_buf. - * * Caller is required to do the following: - * entry deleted: return 0, set 1 to @pnum, store old value in @tei. - * entry not found: return ENOENT - * other error: return non-zero error code. - * - * - * - * -flush_entry: flush entry state created by -prepare_add / -del / others - * typedef void (ta_flush_entry)(struct ip_fw_chain *ch, - * struct tentry_info *tei, void *ta_buf); - * MANDATORY, may be locked. (M_NOWAIT). - * - * Delete state allocated by: - * -prepare_add (-add returned EEXIST|UPDATED) - * -prepare_del (if any) - * -del - * * Caller is required to handle empty @ta_buf correctly. - * - * - * -find_tentry: finds entry specified by key @tei - * typedef int ta_find_tentry(void *ta_state, struct table_info *ti, - * ipfw_obj_tentry *tent); - * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success. - * - * Finds entry specified by given key. - * * Caller is required to do the following: - * entry found: returns 0, export entry to @tent - * entry not found: returns ENOENT - * - * - * -need_modify: checks if @ti has enough space to hold another @count items. - * typedef int (ta_need_modify)(void *ta_state, struct table_info *ti, - * uint32_t count, uint64_t *pflags); - * OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 if has. - * - * Checks if given table has enough space to add @count items without - * resize. Caller may use @pflags to store desired modification data. - * - * - * - * -prepare_mod: allocate structures for table modification. - * typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags); - * OPTIONAL(need_modify), unlocked. (M_WAITOK). Returns 0 on success. - * - * Allocate all needed state for table modification. Caller - * should use `struct mod_item` to store new state in @ta_buf. - * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf. - * - * - * - * -fill_mod: copy some data to new state/ - * typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti, - * void *ta_buf, uint64_t *pflags); - * OPTIONAL(need_modify), locked (UH). (M_NOWAIT). Returns 0 on success. - * - * Copy as much data as we can to minimize changes under WLOCK. - * For example, array can be merged inside this callback. - * - * - * - * -modify: perform final modification. - * typedef void (ta_modify)(void *ta_state, struct table_info *ti, - * void *ta_buf, uint64_t pflags); - * OPTIONAL(need_modify), locked (UH+WLOCK). (M_NOWAIT). - * - * Performs all changes necessary to switch to new structures. - * * Caller should save old pointers to @ta_buf storage. - * - * - * - * -flush_mod: flush table modification state. - * typedef void (ta_flush_mod)(void *ta_buf); - * OPTIONAL(need_modify), unlocked. (M_WAITOK). - * - * Performs flush for the following: - * - prepare_mod (modification was not necessary) - * - modify (for the old state) - * - * - * - * -change_gi: monitor table info pointer changes - * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti); - * OPTIONAL, locked (UH). (M_NOWAIT). - * - * Called on @ti pointer changed. Called immediately after -init - * to set initial state. - * - * - * - * -foreach: calls @f for each table entry - * typedef void ta_foreach(void *ta_state, struct table_info *ti, - * ta_foreach_f *f, void *arg); - * MANDATORY, locked(UH). (M_NOWAIT). - * - * Runs callback with specified argument for each table entry, - * Typically used for dumping table entries. - * - * - * - * -dump_tentry: dump table entry in current @tentry format. - * typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e, - * ipfw_obj_tentry *tent); - * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success. - * - * Dumps entry @e to @tent. - * - * - * -print_config: prints custom algorithm options into buffer. - * typedef void (ta_print_config)(void *ta_state, struct table_info *ti, - * char *buf, size_t bufsize); - * OPTIONAL. locked(UH). (M_NOWAIT). - * - * Prints custom algorithm options in the format suitable to pass - * back to -init callback. - * - * - * - * -dump_tinfo: dumps algo-specific info. - * typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti, - * ipfw_ta_tinfo *tinfo); - * OPTIONAL. locked(UH). (M_NOWAIT). - * - * Dumps options like items size/hash size, etc. - */ - -MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); - -/* - * Utility structures/functions common to more than one algo - */ - -struct mod_item { - void *main_ptr; - size_t size; - void *main_ptr6; - size_t size6; -}; - -static int badd(const void *key, void *item, void *base, size_t nmemb, - size_t size, int (*compar) (const void *, const void *)); -static int bdel(const void *key, void *base, size_t nmemb, size_t size, - int (*compar) (const void *, const void *)); - - -/* - * ADDR implementation using radix - * - */ - -/* - * The radix code expects addr and mask to be array of bytes, - * with the first byte being the length of the array. rn_inithead - * is called with the offset in bits of the lookup key within the - * array. If we use a sockaddr_in as the underlying type, - * sin_len is conveniently located at offset 0, sin_addr is at - * offset 4 and normally aligned. - * But for portability, let's avoid assumption and make the code explicit - */ -#define KEY_LEN(v) *((uint8_t *)&(v)) -/* - * Do not require radix to compare more than actual IPv4/IPv6 address - */ -#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) -#define KEY_LEN_INET6 (offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr)) - -#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) -#define OFF_LEN_INET6 (8 * offsetof(struct sa_in6, sin6_addr)) - -struct radix_addr_entry { - struct radix_node rn[2]; - struct sockaddr_in addr; - uint32_t value; - uint8_t masklen; -}; - -struct sa_in6 { - uint8_t sin6_len; - uint8_t sin6_family; - uint8_t pad[2]; - struct in6_addr sin6_addr; -}; - -struct radix_addr_xentry { - struct radix_node rn[2]; - struct sa_in6 addr6; - uint32_t value; - uint8_t masklen; -}; - -struct radix_cfg { - struct radix_node_head *head4; - struct radix_node_head *head6; - size_t count4; - size_t count6; -}; - -struct ta_buf_radix -{ - void *ent_ptr; - struct sockaddr *addr_ptr; - struct sockaddr *mask_ptr; - union { - struct { - struct sockaddr_in sa; - struct sockaddr_in ma; - } a4; - struct { - struct sa_in6 sa; - struct sa_in6 ma; - } a6; - } addr; -}; - -static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); -static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state, - struct table_info *ti, char *data, uint8_t tflags); -static int flush_radix_entry(struct radix_node *rn, void *arg); -static void ta_destroy_radix(void *ta_state, struct table_info *ti); -static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, - ipfw_ta_tinfo *tinfo); -static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti, - void *e, ipfw_obj_tentry *tent); -static int ta_find_radix_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent); -static void ta_foreach_radix(void *ta_state, struct table_info *ti, - ta_foreach_f *f, void *arg); -static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, - struct sockaddr *ma, int *set_mask); -static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_add_radix(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_del_radix(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_need_modify_radix(void *ta_state, struct table_info *ti, - uint32_t count, uint64_t *pflags); - -static int -ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) -{ - struct radix_node_head *rnh; - - if (keylen == sizeof(in_addr_t)) { - struct radix_addr_entry *ent; - struct sockaddr_in sa; - KEY_LEN(sa) = KEY_LEN_INET; - sa.sin_addr.s_addr = *((in_addr_t *)key); - rnh = (struct radix_node_head *)ti->state; - ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, &rnh->rh)); - if (ent != NULL) { - *val = ent->value; - return (1); - } - } else { - struct radix_addr_xentry *xent; - struct sa_in6 sa6; - KEY_LEN(sa6) = KEY_LEN_INET6; - memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr)); - rnh = (struct radix_node_head *)ti->xstate; - xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, &rnh->rh)); - if (xent != NULL) { - *val = xent->value; - return (1); - } - } - - return (0); -} - -/* - * New table - */ -static int -ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data, uint8_t tflags) -{ - struct radix_cfg *cfg; - - if (!rn_inithead(&ti->state, OFF_LEN_INET)) - return (ENOMEM); - if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) { - rn_detachhead(&ti->state); - return (ENOMEM); - } - - cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO); - - *ta_state = cfg; - ti->lookup = ta_lookup_radix; - - return (0); -} - -static int -flush_radix_entry(struct radix_node *rn, void *arg) -{ - struct radix_node_head * const rnh = arg; - struct radix_addr_entry *ent; - - ent = (struct radix_addr_entry *) - rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, &rnh->rh); - if (ent != NULL) - free(ent, M_IPFW_TBL); - return (0); -} - -static void -ta_destroy_radix(void *ta_state, struct table_info *ti) -{ - struct radix_cfg *cfg; - struct radix_node_head *rnh; - - cfg = (struct radix_cfg *)ta_state; - - rnh = (struct radix_node_head *)(ti->state); - rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh); - rn_detachhead(&ti->state); - - rnh = (struct radix_node_head *)(ti->xstate); - rnh->rnh_walktree(&rnh->rh, flush_radix_entry, rnh); - rn_detachhead(&ti->xstate); - - free(cfg, M_IPFW); -} - -/* - * Provide algo-specific table info - */ -static void -ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) -{ - struct radix_cfg *cfg; - - cfg = (struct radix_cfg *)ta_state; - - tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; - tinfo->taclass4 = IPFW_TACLASS_RADIX; - tinfo->count4 = cfg->count4; - tinfo->itemsize4 = sizeof(struct radix_addr_entry); - tinfo->taclass6 = IPFW_TACLASS_RADIX; - tinfo->count6 = cfg->count6; - tinfo->itemsize6 = sizeof(struct radix_addr_xentry); -} - -static int -ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent) -{ - struct radix_addr_entry *n; -#ifdef INET6 - struct radix_addr_xentry *xn; -#endif - - n = (struct radix_addr_entry *)e; - - /* Guess IPv4/IPv6 radix by sockaddr family */ - if (n->addr.sin_family == AF_INET) { - tent->k.addr.s_addr = n->addr.sin_addr.s_addr; - tent->masklen = n->masklen; - tent->subtype = AF_INET; - tent->v.kidx = n->value; -#ifdef INET6 - } else { - xn = (struct radix_addr_xentry *)e; - memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr)); - tent->masklen = xn->masklen; - tent->subtype = AF_INET6; - tent->v.kidx = xn->value; -#endif - } - - return (0); -} - -static int -ta_find_radix_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent) -{ - struct radix_node_head *rnh; - void *e; - - e = NULL; - if (tent->subtype == AF_INET) { - struct sockaddr_in sa; - KEY_LEN(sa) = KEY_LEN_INET; - sa.sin_addr.s_addr = tent->k.addr.s_addr; - rnh = (struct radix_node_head *)ti->state; - e = rnh->rnh_matchaddr(&sa, &rnh->rh); - } else { - struct sa_in6 sa6; - KEY_LEN(sa6) = KEY_LEN_INET6; - memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr)); - rnh = (struct radix_node_head *)ti->xstate; - e = rnh->rnh_matchaddr(&sa6, &rnh->rh); - } - - if (e != NULL) { - ta_dump_radix_tentry(ta_state, ti, e, tent); - return (0); - } - - return (ENOENT); -} - -static void -ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f, - void *arg) -{ - struct radix_node_head *rnh; - - rnh = (struct radix_node_head *)(ti->state); - rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg); - - rnh = (struct radix_node_head *)(ti->xstate); - rnh->rnh_walktree(&rnh->rh, (walktree_f_t *)f, arg); -} - - -#ifdef INET6 -static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask); - -static inline void -ipv6_writemask(struct in6_addr *addr6, uint8_t mask) -{ - uint32_t *cp; - - for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) - *cp++ = 0xFFFFFFFF; - if (mask > 0) - *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); -} -#endif - -static void -tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa, - struct sockaddr *ma, int *set_mask) -{ - int mlen; -#ifdef INET - struct sockaddr_in *addr, *mask; -#endif -#ifdef INET6 - struct sa_in6 *addr6, *mask6; -#endif - in_addr_t a4; - - mlen = tei->masklen; - - if (tei->subtype == AF_INET) { -#ifdef INET - addr = (struct sockaddr_in *)sa; - mask = (struct sockaddr_in *)ma; - /* Set 'total' structure length */ - KEY_LEN(*addr) = KEY_LEN_INET; - KEY_LEN(*mask) = KEY_LEN_INET; - addr->sin_family = AF_INET; - mask->sin_addr.s_addr = - htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - a4 = *((in_addr_t *)tei->paddr); - addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr; - if (mlen != 32) - *set_mask = 1; - else - *set_mask = 0; -#endif -#ifdef INET6 - } else if (tei->subtype == AF_INET6) { - /* IPv6 case */ - addr6 = (struct sa_in6 *)sa; - mask6 = (struct sa_in6 *)ma; - /* Set 'total' structure length */ - KEY_LEN(*addr6) = KEY_LEN_INET6; - KEY_LEN(*mask6) = KEY_LEN_INET6; - addr6->sin6_family = AF_INET6; - ipv6_writemask(&mask6->sin6_addr, mlen); - memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr)); - APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr); - if (mlen != 128) - *set_mask = 1; - else - *set_mask = 0; -#endif - } -} - -static int -ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_radix *tb; - struct radix_addr_entry *ent; -#ifdef INET6 - struct radix_addr_xentry *xent; -#endif - struct sockaddr *addr, *mask; - int mlen, set_mask; - - tb = (struct ta_buf_radix *)ta_buf; - - mlen = tei->masklen; - set_mask = 0; - - if (tei->subtype == AF_INET) { -#ifdef INET - if (mlen > 32) - return (EINVAL); - ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); - ent->masklen = mlen; - - addr = (struct sockaddr *)&ent->addr; - mask = (struct sockaddr *)&tb->addr.a4.ma; - tb->ent_ptr = ent; -#endif -#ifdef INET6 - } else if (tei->subtype == AF_INET6) { - /* IPv6 case */ - if (mlen > 128) - return (EINVAL); - xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO); - xent->masklen = mlen; - - addr = (struct sockaddr *)&xent->addr6; - mask = (struct sockaddr *)&tb->addr.a6.ma; - tb->ent_ptr = xent; -#endif - } else { - /* Unknown CIDR type */ - return (EINVAL); - } - - tei_to_sockaddr_ent(tei, addr, mask, &set_mask); - /* Set pointers */ - tb->addr_ptr = addr; - if (set_mask != 0) - tb->mask_ptr = mask; - - return (0); -} - -static int -ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct radix_cfg *cfg; - struct radix_node_head *rnh; - struct radix_node *rn; - struct ta_buf_radix *tb; - uint32_t *old_value, value; - - cfg = (struct radix_cfg *)ta_state; - tb = (struct ta_buf_radix *)ta_buf; - - /* Save current entry value from @tei */ - if (tei->subtype == AF_INET) { - rnh = ti->state; - ((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value; - } else { - rnh = ti->xstate; - ((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value; - } - - /* Search for an entry first */ - rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, &rnh->rh); - if (rn != NULL) { - if ((tei->flags & TEI_FLAGS_UPDATE) == 0) - return (EEXIST); - /* Record already exists. Update value if we're asked to */ - if (tei->subtype == AF_INET) - old_value = &((struct radix_addr_entry *)rn)->value; - else - old_value = &((struct radix_addr_xentry *)rn)->value; - - value = *old_value; - *old_value = tei->value; - tei->value = value; - - /* Indicate that update has happened instead of addition */ - tei->flags |= TEI_FLAGS_UPDATED; - *pnum = 0; - - return (0); - } - - if ((tei->flags & TEI_FLAGS_DONTADD) != 0) - return (EFBIG); - - rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, &rnh->rh,tb->ent_ptr); - if (rn == NULL) { - /* Unknown error */ - return (EINVAL); - } - - if (tei->subtype == AF_INET) - cfg->count4++; - else - cfg->count6++; - tb->ent_ptr = NULL; - *pnum = 1; - - return (0); -} - -static int -ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_radix *tb; - struct sockaddr *addr, *mask; - int mlen, set_mask; - - tb = (struct ta_buf_radix *)ta_buf; - - mlen = tei->masklen; - set_mask = 0; - - if (tei->subtype == AF_INET) { - if (mlen > 32) - return (EINVAL); - - addr = (struct sockaddr *)&tb->addr.a4.sa; - mask = (struct sockaddr *)&tb->addr.a4.ma; -#ifdef INET6 - } else if (tei->subtype == AF_INET6) { - if (mlen > 128) - return (EINVAL); - - addr = (struct sockaddr *)&tb->addr.a6.sa; - mask = (struct sockaddr *)&tb->addr.a6.ma; -#endif - } else - return (EINVAL); - - tei_to_sockaddr_ent(tei, addr, mask, &set_mask); - tb->addr_ptr = addr; - if (set_mask != 0) - tb->mask_ptr = mask; - - return (0); -} - -static int -ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct radix_cfg *cfg; - struct radix_node_head *rnh; - struct radix_node *rn; - struct ta_buf_radix *tb; - - cfg = (struct radix_cfg *)ta_state; - tb = (struct ta_buf_radix *)ta_buf; - - if (tei->subtype == AF_INET) - rnh = ti->state; - else - rnh = ti->xstate; - - rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, &rnh->rh); - - if (rn == NULL) - return (ENOENT); - - /* Save entry value to @tei */ - if (tei->subtype == AF_INET) - tei->value = ((struct radix_addr_entry *)rn)->value; - else - tei->value = ((struct radix_addr_xentry *)rn)->value; - - tb->ent_ptr = rn; - - if (tei->subtype == AF_INET) - cfg->count4--; - else - cfg->count6--; - *pnum = 1; - - return (0); -} - -static void -ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_radix *tb; - - tb = (struct ta_buf_radix *)ta_buf; - - if (tb->ent_ptr != NULL) - free(tb->ent_ptr, M_IPFW_TBL); -} - -static int -ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count, - uint64_t *pflags) -{ - - /* - * radix does not require additional memory allocations - * other than nodes itself. Adding new masks to the tree do - * but we don't have any API to call (and we don't known which - * sizes do we need). - */ - return (0); -} - -struct table_algo addr_radix = { - .name = "addr:radix", - .type = IPFW_TABLE_ADDR, - .flags = TA_FLAG_DEFAULT, - .ta_buf_size = sizeof(struct ta_buf_radix), - .init = ta_init_radix, - .destroy = ta_destroy_radix, - .prepare_add = ta_prepare_add_radix, - .prepare_del = ta_prepare_del_radix, - .add = ta_add_radix, - .del = ta_del_radix, - .flush_entry = ta_flush_radix_entry, - .foreach = ta_foreach_radix, - .dump_tentry = ta_dump_radix_tentry, - .find_tentry = ta_find_radix_tentry, - .dump_tinfo = ta_dump_radix_tinfo, - .need_modify = ta_need_modify_radix, -}; - - -/* - * addr:hash cmds - * - * - * ti->data: - * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] - * [ 8][ 8[ 8][ 8] - * - * inv.mask4: 32 - mask - * inv.mask6: - * 1) _slow lookup: mask - * 2) _aligned: (128 - mask) / 8 - * 3) _64: 8 - * - * - * pflags: - * [v4=1/v6=0][hsize] - * [ 32][ 32] - */ - -struct chashentry; - -SLIST_HEAD(chashbhead, chashentry); - -struct chash_cfg { - struct chashbhead *head4; - struct chashbhead *head6; - size_t size4; - size_t size6; - size_t items4; - size_t items6; - uint8_t mask4; - uint8_t mask6; -}; - -struct chashentry { - SLIST_ENTRY(chashentry) next; - uint32_t value; - uint32_t type; - union { - uint32_t a4; /* Host format */ - struct in6_addr a6; /* Network format */ - } a; -}; - -struct ta_buf_chash -{ - void *ent_ptr; - struct chashentry ent; -}; - -#ifdef INET -static __inline uint32_t hash_ip(uint32_t addr, int hsize); -#endif -#ifdef INET6 -static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize); -static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize); -static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key, - int mask, int hsize); -static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask, - int hsize); -#endif -static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); -static int ta_lookup_chash_aligned(struct table_info *ti, void *key, - uint32_t keylen, uint32_t *val); -static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); -static int chash_parse_opts(struct chash_cfg *cfg, char *data); -static void ta_print_chash_config(void *ta_state, struct table_info *ti, - char *buf, size_t bufsize); -static int ta_log2(uint32_t v); -static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state, - struct table_info *ti, char *data, uint8_t tflags); -static void ta_destroy_chash(void *ta_state, struct table_info *ti); -static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, - ipfw_ta_tinfo *tinfo); -static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti, - void *e, ipfw_obj_tentry *tent); -static uint32_t hash_ent(struct chashentry *ent, int af, int mlen, - uint32_t size); -static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent); -static int ta_find_chash_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent); -static void ta_foreach_chash(void *ta_state, struct table_info *ti, - ta_foreach_f *f, void *arg); -static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_add_chash(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_del_chash(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_need_modify_chash(void *ta_state, struct table_info *ti, - uint32_t count, uint64_t *pflags); -static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags); -static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t *pflags); -static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t pflags); -static void ta_flush_mod_chash(void *ta_buf); - - -#ifdef INET -static __inline uint32_t -hash_ip(uint32_t addr, int hsize) -{ - - return (addr % (hsize - 1)); -} -#endif - -#ifdef INET6 -static __inline uint32_t -hash_ip6(struct in6_addr *addr6, int hsize) -{ - uint32_t i; - - i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^ - addr6->s6_addr32[2] ^ addr6->s6_addr32[3]; - - return (i % (hsize - 1)); -} - - -static __inline uint16_t -hash_ip64(struct in6_addr *addr6, int hsize) -{ - uint32_t i; - - i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1]; - - return (i % (hsize - 1)); -} - - -static __inline uint32_t -hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize) -{ - struct in6_addr mask6; - - ipv6_writemask(&mask6, mask); - memcpy(addr6, key, sizeof(struct in6_addr)); - APPLY_MASK(addr6, &mask6); - return (hash_ip6(addr6, hsize)); -} - -static __inline uint32_t -hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize) -{ - uint64_t *paddr; - - paddr = (uint64_t *)addr6; - *paddr = 0; - *(paddr + 1) = 0; - memcpy(addr6, key, mask); - return (hash_ip6(addr6, hsize)); -} -#endif - -static int -ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) -{ - struct chashbhead *head; - struct chashentry *ent; - uint16_t hash, hsize; - uint8_t imask; - - if (keylen == sizeof(in_addr_t)) { -#ifdef INET - head = (struct chashbhead *)ti->state; - imask = ti->data >> 24; - hsize = 1 << ((ti->data & 0xFFFF) >> 8); - uint32_t a; - a = ntohl(*((in_addr_t *)key)); - a = a >> imask; - hash = hash_ip(a, hsize); - SLIST_FOREACH(ent, &head[hash], next) { - if (ent->a.a4 == a) { - *val = ent->value; - return (1); - } - } -#endif - } else { -#ifdef INET6 - /* IPv6: worst scenario: non-round mask */ - struct in6_addr addr6; - head = (struct chashbhead *)ti->xstate; - imask = (ti->data & 0xFF0000) >> 16; - hsize = 1 << (ti->data & 0xFF); - hash = hash_ip6_slow(&addr6, key, imask, hsize); - SLIST_FOREACH(ent, &head[hash], next) { - if (memcmp(&ent->a.a6, &addr6, 16) == 0) { - *val = ent->value; - return (1); - } - } -#endif - } - - return (0); -} - -static int -ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) -{ - struct chashbhead *head; - struct chashentry *ent; - uint16_t hash, hsize; - uint8_t imask; - - if (keylen == sizeof(in_addr_t)) { -#ifdef INET - head = (struct chashbhead *)ti->state; - imask = ti->data >> 24; - hsize = 1 << ((ti->data & 0xFFFF) >> 8); - uint32_t a; - a = ntohl(*((in_addr_t *)key)); - a = a >> imask; - hash = hash_ip(a, hsize); - SLIST_FOREACH(ent, &head[hash], next) { - if (ent->a.a4 == a) { - *val = ent->value; - return (1); - } - } -#endif - } else { -#ifdef INET6 - /* IPv6: aligned to 8bit mask */ - struct in6_addr addr6; - uint64_t *paddr, *ptmp; - head = (struct chashbhead *)ti->xstate; - imask = (ti->data & 0xFF0000) >> 16; - hsize = 1 << (ti->data & 0xFF); - - hash = hash_ip6_al(&addr6, key, imask, hsize); - paddr = (uint64_t *)&addr6; - SLIST_FOREACH(ent, &head[hash], next) { - ptmp = (uint64_t *)&ent->a.a6; - if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) { - *val = ent->value; - return (1); - } - } -#endif - } - - return (0); -} - -static int -ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) -{ - struct chashbhead *head; - struct chashentry *ent; - uint16_t hash, hsize; - uint8_t imask; - - if (keylen == sizeof(in_addr_t)) { -#ifdef INET - head = (struct chashbhead *)ti->state; - imask = ti->data >> 24; - hsize = 1 << ((ti->data & 0xFFFF) >> 8); - uint32_t a; - a = ntohl(*((in_addr_t *)key)); - a = a >> imask; - hash = hash_ip(a, hsize); - SLIST_FOREACH(ent, &head[hash], next) { - if (ent->a.a4 == a) { - *val = ent->value; - return (1); - } - } -#endif - } else { -#ifdef INET6 - /* IPv6: /64 */ - uint64_t a6, *paddr; - head = (struct chashbhead *)ti->xstate; - paddr = (uint64_t *)key; - hsize = 1 << (ti->data & 0xFF); - a6 = *paddr; - hash = hash_ip64((struct in6_addr *)key, hsize); - SLIST_FOREACH(ent, &head[hash], next) { - paddr = (uint64_t *)&ent->a.a6; - if (a6 == *paddr) { - *val = ent->value; - return (1); - } - } -#endif - } - - return (0); -} - -static int -chash_parse_opts(struct chash_cfg *cfg, char *data) -{ - char *pdel, *pend, *s; - int mask4, mask6; - - mask4 = cfg->mask4; - mask6 = cfg->mask6; - - if (data == NULL) - return (0); - if ((pdel = strchr(data, ' ')) == NULL) - return (0); - while (*pdel == ' ') - pdel++; - if (strncmp(pdel, "masks=", 6) != 0) - return (EINVAL); - if ((s = strchr(pdel, ' ')) != NULL) - *s++ = '\0'; - - pdel += 6; - /* Need /XX[,/YY] */ - if (*pdel++ != '/') - return (EINVAL); - mask4 = strtol(pdel, &pend, 10); - if (*pend == ',') { - /* ,/YY */ - pdel = pend + 1; - if (*pdel++ != '/') - return (EINVAL); - mask6 = strtol(pdel, &pend, 10); - if (*pend != '\0') - return (EINVAL); - } else if (*pend != '\0') - return (EINVAL); - - if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128) - return (EINVAL); - - cfg->mask4 = mask4; - cfg->mask6 = mask6; - - return (0); -} - -static void -ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf, - size_t bufsize) -{ - struct chash_cfg *cfg; - - cfg = (struct chash_cfg *)ta_state; - - if (cfg->mask4 != 32 || cfg->mask6 != 128) - snprintf(buf, bufsize, "%s masks=/%d,/%d", "addr:hash", - cfg->mask4, cfg->mask6); - else - snprintf(buf, bufsize, "%s", "addr:hash"); -} - -static int -ta_log2(uint32_t v) -{ - uint32_t r; - - r = 0; - while (v >>= 1) - r++; - - return (r); -} - -/* - * New table. - * We assume 'data' to be either NULL or the following format: - * 'addr:hash [masks=/32[,/128]]' - */ -static int -ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data, uint8_t tflags) -{ - int error, i; - uint32_t hsize; - struct chash_cfg *cfg; - - cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO); - - cfg->mask4 = 32; - cfg->mask6 = 128; - - if ((error = chash_parse_opts(cfg, data)) != 0) { - free(cfg, M_IPFW); - return (error); - } - - cfg->size4 = 128; - cfg->size6 = 128; - - cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW, - M_WAITOK | M_ZERO); - cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW, - M_WAITOK | M_ZERO); - for (i = 0; i < cfg->size4; i++) - SLIST_INIT(&cfg->head4[i]); - for (i = 0; i < cfg->size6; i++) - SLIST_INIT(&cfg->head6[i]); - - - *ta_state = cfg; - ti->state = cfg->head4; - ti->xstate = cfg->head6; - - /* Store data depending on v6 mask length */ - hsize = ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6); - if (cfg->mask6 == 64) { - ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16| - hsize; - ti->lookup = ta_lookup_chash_64; - } else if ((cfg->mask6 % 8) == 0) { - ti->data = (32 - cfg->mask4) << 24 | - cfg->mask6 << 13 | hsize; - ti->lookup = ta_lookup_chash_aligned; - } else { - /* don't do that! */ - ti->data = (32 - cfg->mask4) << 24 | - cfg->mask6 << 16 | hsize; - ti->lookup = ta_lookup_chash_slow; - } - - return (0); -} - -static void -ta_destroy_chash(void *ta_state, struct table_info *ti) -{ - struct chash_cfg *cfg; - struct chashentry *ent, *ent_next; - int i; - - cfg = (struct chash_cfg *)ta_state; - - for (i = 0; i < cfg->size4; i++) - SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next) - free(ent, M_IPFW_TBL); - - for (i = 0; i < cfg->size6; i++) - SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next) - free(ent, M_IPFW_TBL); - - free(cfg->head4, M_IPFW); - free(cfg->head6, M_IPFW); - - free(cfg, M_IPFW); -} - -static void -ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) -{ - struct chash_cfg *cfg; - - cfg = (struct chash_cfg *)ta_state; - - tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM; - tinfo->taclass4 = IPFW_TACLASS_HASH; - tinfo->size4 = cfg->size4; - tinfo->count4 = cfg->items4; - tinfo->itemsize4 = sizeof(struct chashentry); - tinfo->taclass6 = IPFW_TACLASS_HASH; - tinfo->size6 = cfg->size6; - tinfo->count6 = cfg->items6; - tinfo->itemsize6 = sizeof(struct chashentry); -} - -static int -ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent) -{ - struct chash_cfg *cfg; - struct chashentry *ent; - - cfg = (struct chash_cfg *)ta_state; - ent = (struct chashentry *)e; - - if (ent->type == AF_INET) { - tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4)); - tent->masklen = cfg->mask4; - tent->subtype = AF_INET; - tent->v.kidx = ent->value; -#ifdef INET6 - } else { - memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr)); - tent->masklen = cfg->mask6; - tent->subtype = AF_INET6; - tent->v.kidx = ent->value; -#endif - } - - return (0); -} - -static uint32_t -hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size) -{ - uint32_t hash; - - hash = 0; - - if (af == AF_INET) { -#ifdef INET - hash = hash_ip(ent->a.a4, size); -#endif - } else { -#ifdef INET6 - if (mlen == 64) - hash = hash_ip64(&ent->a.a6, size); - else - hash = hash_ip6(&ent->a.a6, size); -#endif - } - - return (hash); -} - -static int -tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent) -{ - int mlen; -#ifdef INET6 - struct in6_addr mask6; -#endif - - - mlen = tei->masklen; - - if (tei->subtype == AF_INET) { -#ifdef INET - if (mlen > 32) - return (EINVAL); - ent->type = AF_INET; - - /* Calculate masked address */ - ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen); -#endif -#ifdef INET6 - } else if (tei->subtype == AF_INET6) { - /* IPv6 case */ - if (mlen > 128) - return (EINVAL); - ent->type = AF_INET6; - - ipv6_writemask(&mask6, mlen); - memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr)); - APPLY_MASK(&ent->a.a6, &mask6); -#endif - } else { - /* Unknown CIDR type */ - return (EINVAL); - } - - return (0); -} - -static int -ta_find_chash_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent) -{ - struct chash_cfg *cfg; - struct chashbhead *head; - struct chashentry ent, *tmp; - struct tentry_info tei; - int error; - uint32_t hash; - - cfg = (struct chash_cfg *)ta_state; - - memset(&ent, 0, sizeof(ent)); - memset(&tei, 0, sizeof(tei)); - - if (tent->subtype == AF_INET) { - tei.paddr = &tent->k.addr; - tei.masklen = cfg->mask4; - tei.subtype = AF_INET; - - if ((error = tei_to_chash_ent(&tei, &ent)) != 0) - return (error); - - head = cfg->head4; - hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4); - /* Check for existence */ - SLIST_FOREACH(tmp, &head[hash], next) { - if (tmp->a.a4 != ent.a.a4) - continue; - - ta_dump_chash_tentry(ta_state, ti, tmp, tent); - return (0); - } - } else { - tei.paddr = &tent->k.addr6; - tei.masklen = cfg->mask6; - tei.subtype = AF_INET6; - - if ((error = tei_to_chash_ent(&tei, &ent)) != 0) - return (error); - - head = cfg->head6; - hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6); - /* Check for existence */ - SLIST_FOREACH(tmp, &head[hash], next) { - if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0) - continue; - ta_dump_chash_tentry(ta_state, ti, tmp, tent); - return (0); - } - } - - return (ENOENT); -} - -static void -ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f, - void *arg) -{ - struct chash_cfg *cfg; - struct chashentry *ent, *ent_next; - int i; - - cfg = (struct chash_cfg *)ta_state; - - for (i = 0; i < cfg->size4; i++) - SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next) - f(ent, arg); - - for (i = 0; i < cfg->size6; i++) - SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next) - f(ent, arg); -} - -static int -ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_chash *tb; - struct chashentry *ent; - int error; - - tb = (struct ta_buf_chash *)ta_buf; - - ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO); - - error = tei_to_chash_ent(tei, ent); - if (error != 0) { - free(ent, M_IPFW_TBL); - return (error); - } - tb->ent_ptr = ent; - - return (0); -} - -static int -ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct chash_cfg *cfg; - struct chashbhead *head; - struct chashentry *ent, *tmp; - struct ta_buf_chash *tb; - int exists; - uint32_t hash, value; - - cfg = (struct chash_cfg *)ta_state; - tb = (struct ta_buf_chash *)ta_buf; - ent = (struct chashentry *)tb->ent_ptr; - hash = 0; - exists = 0; - - /* Read current value from @tei */ - ent->value = tei->value; - - /* Read cuurrent value */ - if (tei->subtype == AF_INET) { - if (tei->masklen != cfg->mask4) - return (EINVAL); - head = cfg->head4; - hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4); - - /* Check for existence */ - SLIST_FOREACH(tmp, &head[hash], next) { - if (tmp->a.a4 == ent->a.a4) { - exists = 1; - break; - } - } - } else { - if (tei->masklen != cfg->mask6) - return (EINVAL); - head = cfg->head6; - hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6); - /* Check for existence */ - SLIST_FOREACH(tmp, &head[hash], next) { - if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) { - exists = 1; - break; - } - } - } - - if (exists == 1) { - if ((tei->flags & TEI_FLAGS_UPDATE) == 0) - return (EEXIST); - /* Record already exists. Update value if we're asked to */ - value = tmp->value; - tmp->value = tei->value; - tei->value = value; - /* Indicate that update has happened instead of addition */ - tei->flags |= TEI_FLAGS_UPDATED; - *pnum = 0; - } else { - if ((tei->flags & TEI_FLAGS_DONTADD) != 0) - return (EFBIG); - SLIST_INSERT_HEAD(&head[hash], ent, next); - tb->ent_ptr = NULL; - *pnum = 1; - - /* Update counters */ - if (tei->subtype == AF_INET) - cfg->items4++; - else - cfg->items6++; - } - - return (0); -} - -static int -ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_chash *tb; - - tb = (struct ta_buf_chash *)ta_buf; - - return (tei_to_chash_ent(tei, &tb->ent)); -} - -static int -ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct chash_cfg *cfg; - struct chashbhead *head; - struct chashentry *tmp, *tmp_next, *ent; - struct ta_buf_chash *tb; - uint32_t hash; - - cfg = (struct chash_cfg *)ta_state; - tb = (struct ta_buf_chash *)ta_buf; - ent = &tb->ent; - - if (tei->subtype == AF_INET) { - if (tei->masklen != cfg->mask4) - return (EINVAL); - head = cfg->head4; - hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4); - - SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) { - if (tmp->a.a4 != ent->a.a4) - continue; - - SLIST_REMOVE(&head[hash], tmp, chashentry, next); - cfg->items4--; - tb->ent_ptr = tmp; - tei->value = tmp->value; - *pnum = 1; - return (0); - } - } else { - if (tei->masklen != cfg->mask6) - return (EINVAL); - head = cfg->head6; - hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6); - SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) { - if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0) - continue; - - SLIST_REMOVE(&head[hash], tmp, chashentry, next); - cfg->items6--; - tb->ent_ptr = tmp; - tei->value = tmp->value; - *pnum = 1; - return (0); - } - } - - return (ENOENT); -} - -static void -ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_chash *tb; - - tb = (struct ta_buf_chash *)ta_buf; - - if (tb->ent_ptr != NULL) - free(tb->ent_ptr, M_IPFW_TBL); -} - -/* - * Hash growing callbacks. - */ - -static int -ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count, - uint64_t *pflags) -{ - struct chash_cfg *cfg; - uint64_t data; - - /* - * Since we don't know exact number of IPv4/IPv6 records in @count, - * ignore non-zero @count value at all. Check current hash sizes - * and return appropriate data. - */ - - cfg = (struct chash_cfg *)ta_state; - - data = 0; - if (cfg->items4 > cfg->size4 && cfg->size4 < 65536) - data |= (cfg->size4 * 2) << 16; - if (cfg->items6 > cfg->size6 && cfg->size6 < 65536) - data |= cfg->size6 * 2; - - if (data != 0) { - *pflags = data; - return (1); - } - - return (0); -} - -/* - * Allocate new, larger chash. - */ -static int -ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags) -{ - struct mod_item *mi; - struct chashbhead *head; - int i; - - mi = (struct mod_item *)ta_buf; - - memset(mi, 0, sizeof(struct mod_item)); - mi->size = (*pflags >> 16) & 0xFFFF; - mi->size6 = *pflags & 0xFFFF; - if (mi->size > 0) { - head = malloc(sizeof(struct chashbhead) * mi->size, - M_IPFW, M_WAITOK | M_ZERO); - for (i = 0; i < mi->size; i++) - SLIST_INIT(&head[i]); - mi->main_ptr = head; - } - - if (mi->size6 > 0) { - head = malloc(sizeof(struct chashbhead) * mi->size6, - M_IPFW, M_WAITOK | M_ZERO); - for (i = 0; i < mi->size6; i++) - SLIST_INIT(&head[i]); - mi->main_ptr6 = head; - } - - return (0); -} - -/* - * Copy data from old runtime array to new one. - */ -static int -ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t *pflags) -{ - - /* In is not possible to do rehash if we're not holidng WLOCK. */ - return (0); -} - -/* - * Switch old & new arrays. - */ -static void -ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t pflags) -{ - struct mod_item *mi; - struct chash_cfg *cfg; - struct chashbhead *old_head, *new_head; - struct chashentry *ent, *ent_next; - int af, i, mlen; - uint32_t nhash; - size_t old_size, new_size; - - mi = (struct mod_item *)ta_buf; - cfg = (struct chash_cfg *)ta_state; - - /* Check which hash we need to grow and do we still need that */ - if (mi->size > 0 && cfg->size4 < mi->size) { - new_head = (struct chashbhead *)mi->main_ptr; - new_size = mi->size; - old_size = cfg->size4; - old_head = ti->state; - mlen = cfg->mask4; - af = AF_INET; - - for (i = 0; i < old_size; i++) { - SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { - nhash = hash_ent(ent, af, mlen, new_size); - SLIST_INSERT_HEAD(&new_head[nhash], ent, next); - } - } - - ti->state = new_head; - cfg->head4 = new_head; - cfg->size4 = mi->size; - mi->main_ptr = old_head; - } - - if (mi->size6 > 0 && cfg->size6 < mi->size6) { - new_head = (struct chashbhead *)mi->main_ptr6; - new_size = mi->size6; - old_size = cfg->size6; - old_head = ti->xstate; - mlen = cfg->mask6; - af = AF_INET6; - - for (i = 0; i < old_size; i++) { - SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { - nhash = hash_ent(ent, af, mlen, new_size); - SLIST_INSERT_HEAD(&new_head[nhash], ent, next); - } - } - - ti->xstate = new_head; - cfg->head6 = new_head; - cfg->size6 = mi->size6; - mi->main_ptr6 = old_head; - } - - /* Update lower 32 bits with new values */ - ti->data &= 0xFFFFFFFF00000000; - ti->data |= ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6); -} - -/* - * Free unneded array. - */ -static void -ta_flush_mod_chash(void *ta_buf) -{ - struct mod_item *mi; - - mi = (struct mod_item *)ta_buf; - if (mi->main_ptr != NULL) - free(mi->main_ptr, M_IPFW); - if (mi->main_ptr6 != NULL) - free(mi->main_ptr6, M_IPFW); -} - -struct table_algo addr_hash = { - .name = "addr:hash", - .type = IPFW_TABLE_ADDR, - .ta_buf_size = sizeof(struct ta_buf_chash), - .init = ta_init_chash, - .destroy = ta_destroy_chash, - .prepare_add = ta_prepare_add_chash, - .prepare_del = ta_prepare_del_chash, - .add = ta_add_chash, - .del = ta_del_chash, - .flush_entry = ta_flush_chash_entry, - .foreach = ta_foreach_chash, - .dump_tentry = ta_dump_chash_tentry, - .find_tentry = ta_find_chash_tentry, - .print_config = ta_print_chash_config, - .dump_tinfo = ta_dump_chash_tinfo, - .need_modify = ta_need_modify_chash, - .prepare_mod = ta_prepare_mod_chash, - .fill_mod = ta_fill_mod_chash, - .modify = ta_modify_chash, - .flush_mod = ta_flush_mod_chash, -}; - - -/* - * Iface table cmds. - * - * Implementation: - * - * Runtime part: - * - sorted array of "struct ifidx" pointed by ti->state. - * Array is allocated with rounding up to IFIDX_CHUNK. Only existing - * interfaces are stored in array, however its allocated size is - * sufficient to hold all table records if needed. - * - current array size is stored in ti->data - * - * Table data: - * - "struct iftable_cfg" is allocated to store table state (ta_state). - * - All table records are stored inside namedobj instance. - * - */ - -struct ifidx { - uint16_t kidx; - uint16_t spare; - uint32_t value; -}; -#define DEFAULT_IFIDX_SIZE 64 - -struct iftable_cfg; - -struct ifentry { - struct named_object no; - struct ipfw_ifc ic; - struct iftable_cfg *icfg; - uint32_t value; - int linked; -}; - -struct iftable_cfg { - struct namedobj_instance *ii; - struct ip_fw_chain *ch; - struct table_info *ti; - void *main_ptr; - size_t size; /* Number of items allocated in array */ - size_t count; /* Number of all items */ - size_t used; /* Number of items _active_ now */ -}; - -struct ta_buf_ifidx -{ - struct ifentry *ife; - uint32_t value; -}; - -int compare_ifidx(const void *k, const void *v); -static struct ifidx * ifidx_find(struct table_info *ti, void *key); -static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); -static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, - struct table_info *ti, char *data, uint8_t tflags); -static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti); -static int destroy_ifidx_locked(struct namedobj_instance *ii, - struct named_object *no, void *arg); -static void ta_destroy_ifidx(void *ta_state, struct table_info *ti); -static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, - ipfw_ta_tinfo *tinfo); -static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_add_ifidx(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_del_ifidx(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static void ta_flush_ifidx_entry(struct ip_fw_chain *ch, - struct tentry_info *tei, void *ta_buf); -static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex); -static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti, - uint32_t count, uint64_t *pflags); -static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags); -static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, - void *ta_buf, uint64_t *pflags); -static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t pflags); -static void ta_flush_mod_ifidx(void *ta_buf); -static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent); -static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent); -static int foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, - void *arg); -static void ta_foreach_ifidx(void *ta_state, struct table_info *ti, - ta_foreach_f *f, void *arg); - -int -compare_ifidx(const void *k, const void *v) -{ - const struct ifidx *ifidx; - uint16_t key; - - key = *((const uint16_t *)k); - ifidx = (const struct ifidx *)v; - - if (key < ifidx->kidx) - return (-1); - else if (key > ifidx->kidx) - return (1); - - return (0); -} - -/* - * Adds item @item with key @key into ascending-sorted array @base. - * Assumes @base has enough additional storage. - * - * Returns 1 on success, 0 on duplicate key. - */ -static int -badd(const void *key, void *item, void *base, size_t nmemb, - size_t size, int (*compar) (const void *, const void *)) -{ - int min, max, mid, shift, res; - caddr_t paddr; - - if (nmemb == 0) { - memcpy(base, item, size); - return (1); - } - - /* Binary search */ - min = 0; - max = nmemb - 1; - mid = 0; - while (min <= max) { - mid = (min + max) / 2; - res = compar(key, (const void *)((caddr_t)base + mid * size)); - if (res == 0) - return (0); - - if (res > 0) - min = mid + 1; - else - max = mid - 1; - } - - /* Item not found. */ - res = compar(key, (const void *)((caddr_t)base + mid * size)); - if (res > 0) - shift = mid + 1; - else - shift = mid; - - paddr = (caddr_t)base + shift * size; - if (nmemb > shift) - memmove(paddr + size, paddr, (nmemb - shift) * size); - - memcpy(paddr, item, size); - - return (1); -} - -/* - * Deletes item with key @key from ascending-sorted array @base. - * - * Returns 1 on success, 0 for non-existent key. - */ -static int -bdel(const void *key, void *base, size_t nmemb, size_t size, - int (*compar) (const void *, const void *)) -{ - caddr_t item; - size_t sz; - - item = (caddr_t)bsearch(key, base, nmemb, size, compar); - - if (item == NULL) - return (0); - - sz = (caddr_t)base + nmemb * size - item; - - if (sz > 0) - memmove(item, item + size, sz); - - return (1); -} - -static struct ifidx * -ifidx_find(struct table_info *ti, void *key) -{ - struct ifidx *ifi; - - ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx), - compare_ifidx); - - return (ifi); -} - -static int -ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) -{ - struct ifidx *ifi; - - ifi = ifidx_find(ti, key); - - if (ifi != NULL) { - *val = ifi->value; - return (1); - } - - return (0); -} - -static int -ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data, uint8_t tflags) -{ - struct iftable_cfg *icfg; - - icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO); - - icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE); - icfg->size = DEFAULT_IFIDX_SIZE; - icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW, - M_WAITOK | M_ZERO); - icfg->ch = ch; - - *ta_state = icfg; - ti->state = icfg->main_ptr; - ti->lookup = ta_lookup_ifidx; - - return (0); -} - -/* - * Handle tableinfo @ti pointer change (on table array resize). - */ -static void -ta_change_ti_ifidx(void *ta_state, struct table_info *ti) -{ - struct iftable_cfg *icfg; - - icfg = (struct iftable_cfg *)ta_state; - icfg->ti = ti; -} - -static int -destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no, - void *arg) -{ - struct ifentry *ife; - struct ip_fw_chain *ch; - - ch = (struct ip_fw_chain *)arg; - ife = (struct ifentry *)no; - - ipfw_iface_del_notify(ch, &ife->ic); - ipfw_iface_unref(ch, &ife->ic); - free(ife, M_IPFW_TBL); - return (0); -} - - -/* - * Destroys table @ti - */ -static void -ta_destroy_ifidx(void *ta_state, struct table_info *ti) -{ - struct iftable_cfg *icfg; - struct ip_fw_chain *ch; - - icfg = (struct iftable_cfg *)ta_state; - ch = icfg->ch; - - if (icfg->main_ptr != NULL) - free(icfg->main_ptr, M_IPFW); - - IPFW_UH_WLOCK(ch); - ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch); - IPFW_UH_WUNLOCK(ch); - - ipfw_objhash_destroy(icfg->ii); - - free(icfg, M_IPFW); -} - -/* - * Provide algo-specific table info - */ -static void -ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) -{ - struct iftable_cfg *cfg; - - cfg = (struct iftable_cfg *)ta_state; - - tinfo->taclass4 = IPFW_TACLASS_ARRAY; - tinfo->size4 = cfg->size; - tinfo->count4 = cfg->used; - tinfo->itemsize4 = sizeof(struct ifidx); -} - -/* - * Prepare state to add to the table: - * allocate ifentry and reference needed interface. - */ -static int -ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_ifidx *tb; - char *ifname; - struct ifentry *ife; - - tb = (struct ta_buf_ifidx *)ta_buf; - - /* Check if string is terminated */ - ifname = (char *)tei->paddr; - if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) - return (EINVAL); - - ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO); - ife->ic.cb = if_notifier; - ife->ic.cbdata = ife; - - if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) { - free(ife, M_IPFW_TBL); - return (EINVAL); - } - - /* Use ipfw_iface 'ifname' field as stable storage */ - ife->no.name = ife->ic.iface->ifname; - - tb->ife = ife; - - return (0); -} - -static int -ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct iftable_cfg *icfg; - struct ifentry *ife, *tmp; - struct ta_buf_ifidx *tb; - struct ipfw_iface *iif; - struct ifidx *ifi; - char *ifname; - uint32_t value; - - tb = (struct ta_buf_ifidx *)ta_buf; - ifname = (char *)tei->paddr; - icfg = (struct iftable_cfg *)ta_state; - ife = tb->ife; - - ife->icfg = icfg; - ife->value = tei->value; - - tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); - - if (tmp != NULL) { - if ((tei->flags & TEI_FLAGS_UPDATE) == 0) - return (EEXIST); - - /* Exchange values in @tmp and @tei */ - value = tmp->value; - tmp->value = tei->value; - tei->value = value; - - iif = tmp->ic.iface; - if (iif->resolved != 0) { - /* We have to update runtime value, too */ - ifi = ifidx_find(ti, &iif->ifindex); - ifi->value = ife->value; - } - - /* Indicate that update has happened instead of addition */ - tei->flags |= TEI_FLAGS_UPDATED; - *pnum = 0; - return (0); - } - - if ((tei->flags & TEI_FLAGS_DONTADD) != 0) - return (EFBIG); - - /* Link to internal list */ - ipfw_objhash_add(icfg->ii, &ife->no); - - /* Link notifier (possible running its callback) */ - ipfw_iface_add_notify(icfg->ch, &ife->ic); - icfg->count++; - - tb->ife = NULL; - *pnum = 1; - - return (0); -} - -/* - * Prepare to delete key from table. - * Do basic interface name checks. - */ -static int -ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_ifidx *tb; - char *ifname; - - tb = (struct ta_buf_ifidx *)ta_buf; - - /* Check if string is terminated */ - ifname = (char *)tei->paddr; - if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) - return (EINVAL); - - return (0); -} - -/* - * Remove key from both configuration list and - * runtime array. Removed interface notification. - */ -static int -ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct iftable_cfg *icfg; - struct ifentry *ife; - struct ta_buf_ifidx *tb; - char *ifname; - uint16_t ifindex; - int res; - - tb = (struct ta_buf_ifidx *)ta_buf; - ifname = (char *)tei->paddr; - icfg = (struct iftable_cfg *)ta_state; - ife = tb->ife; - - ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); - - if (ife == NULL) - return (ENOENT); - - if (ife->linked != 0) { - /* We have to remove item from runtime */ - ifindex = ife->ic.iface->ifindex; - - res = bdel(&ifindex, icfg->main_ptr, icfg->used, - sizeof(struct ifidx), compare_ifidx); - - KASSERT(res == 1, ("index %d does not exist", ifindex)); - icfg->used--; - ti->data = icfg->used; - ife->linked = 0; - } - - /* Unlink from local list */ - ipfw_objhash_del(icfg->ii, &ife->no); - /* Unlink notifier and deref */ - ipfw_iface_del_notify(icfg->ch, &ife->ic); - ipfw_iface_unref(icfg->ch, &ife->ic); - - icfg->count--; - tei->value = ife->value; - - tb->ife = ife; - *pnum = 1; - - return (0); -} - -/* - * Flush deleted entry. - * Drops interface reference and frees entry. - */ -static void -ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_ifidx *tb; - - tb = (struct ta_buf_ifidx *)ta_buf; - - if (tb->ife != NULL) - free(tb->ife, M_IPFW_TBL); -} - - -/* - * Handle interface announce/withdrawal for particular table. - * Every real runtime array modification happens here. - */ -static void -if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex) -{ - struct ifentry *ife; - struct ifidx ifi; - struct iftable_cfg *icfg; - struct table_info *ti; - int res; - - ife = (struct ifentry *)cbdata; - icfg = ife->icfg; - ti = icfg->ti; - - KASSERT(ti != NULL, ("ti=NULL, check change_ti handler")); - - if (ife->linked == 0 && ifindex != 0) { - /* Interface announce */ - ifi.kidx = ifindex; - ifi.spare = 0; - ifi.value = ife->value; - res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used, - sizeof(struct ifidx), compare_ifidx); - KASSERT(res == 1, ("index %d already exists", ifindex)); - icfg->used++; - ti->data = icfg->used; - ife->linked = 1; - } else if (ife->linked != 0 && ifindex == 0) { - /* Interface withdrawal */ - ifindex = ife->ic.iface->ifindex; - - res = bdel(&ifindex, icfg->main_ptr, icfg->used, - sizeof(struct ifidx), compare_ifidx); - - KASSERT(res == 1, ("index %d does not exist", ifindex)); - icfg->used--; - ti->data = icfg->used; - ife->linked = 0; - } -} - - -/* - * Table growing callbacks. - */ - -static int -ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count, - uint64_t *pflags) -{ - struct iftable_cfg *cfg; - uint32_t size; - - cfg = (struct iftable_cfg *)ta_state; - - size = cfg->size; - while (size < cfg->count + count) - size *= 2; - - if (size != cfg->size) { - *pflags = size; - return (1); - } - - return (0); -} - -/* - * Allocate ned, larger runtime ifidx array. - */ -static int -ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags) -{ - struct mod_item *mi; - - mi = (struct mod_item *)ta_buf; - - memset(mi, 0, sizeof(struct mod_item)); - mi->size = *pflags; - mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW, - M_WAITOK | M_ZERO); - - return (0); -} - -/* - * Copy data from old runtime array to new one. - */ -static int -ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t *pflags) -{ - struct mod_item *mi; - struct iftable_cfg *icfg; - - mi = (struct mod_item *)ta_buf; - icfg = (struct iftable_cfg *)ta_state; - - /* Check if we still need to grow array */ - if (icfg->size >= mi->size) { - *pflags = 0; - return (0); - } - - memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx)); - - return (0); -} - -/* - * Switch old & new arrays. - */ -static void -ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t pflags) -{ - struct mod_item *mi; - struct iftable_cfg *icfg; - void *old_ptr; - - mi = (struct mod_item *)ta_buf; - icfg = (struct iftable_cfg *)ta_state; - - old_ptr = icfg->main_ptr; - icfg->main_ptr = mi->main_ptr; - icfg->size = mi->size; - ti->state = icfg->main_ptr; - - mi->main_ptr = old_ptr; -} - -/* - * Free unneded array. - */ -static void -ta_flush_mod_ifidx(void *ta_buf) -{ - struct mod_item *mi; - - mi = (struct mod_item *)ta_buf; - if (mi->main_ptr != NULL) - free(mi->main_ptr, M_IPFW); -} - -static int -ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent) -{ - struct ifentry *ife; - - ife = (struct ifentry *)e; - - tent->masklen = 8 * IF_NAMESIZE; - memcpy(&tent->k, ife->no.name, IF_NAMESIZE); - tent->v.kidx = ife->value; - - return (0); -} - -static int -ta_find_ifidx_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent) -{ - struct iftable_cfg *icfg; - struct ifentry *ife; - char *ifname; - - icfg = (struct iftable_cfg *)ta_state; - ifname = tent->k.iface; - - if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE) - return (EINVAL); - - ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname); - - if (ife != NULL) { - ta_dump_ifidx_tentry(ta_state, ti, ife, tent); - return (0); - } - - return (ENOENT); -} - -struct wa_ifidx { - ta_foreach_f *f; - void *arg; -}; - -static int -foreach_ifidx(struct namedobj_instance *ii, struct named_object *no, - void *arg) -{ - struct ifentry *ife; - struct wa_ifidx *wa; - - ife = (struct ifentry *)no; - wa = (struct wa_ifidx *)arg; - - wa->f(ife, wa->arg); - return (0); -} - -static void -ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f, - void *arg) -{ - struct iftable_cfg *icfg; - struct wa_ifidx wa; - - icfg = (struct iftable_cfg *)ta_state; - - wa.f = f; - wa.arg = arg; - - ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa); -} - -struct table_algo iface_idx = { - .name = "iface:array", - .type = IPFW_TABLE_INTERFACE, - .flags = TA_FLAG_DEFAULT, - .ta_buf_size = sizeof(struct ta_buf_ifidx), - .init = ta_init_ifidx, - .destroy = ta_destroy_ifidx, - .prepare_add = ta_prepare_add_ifidx, - .prepare_del = ta_prepare_del_ifidx, - .add = ta_add_ifidx, - .del = ta_del_ifidx, - .flush_entry = ta_flush_ifidx_entry, - .foreach = ta_foreach_ifidx, - .dump_tentry = ta_dump_ifidx_tentry, - .find_tentry = ta_find_ifidx_tentry, - .dump_tinfo = ta_dump_ifidx_tinfo, - .need_modify = ta_need_modify_ifidx, - .prepare_mod = ta_prepare_mod_ifidx, - .fill_mod = ta_fill_mod_ifidx, - .modify = ta_modify_ifidx, - .flush_mod = ta_flush_mod_ifidx, - .change_ti = ta_change_ti_ifidx, -}; - -/* - * Number array cmds. - * - * Implementation: - * - * Runtime part: - * - sorted array of "struct numarray" pointed by ti->state. - * Array is allocated with rounding up to NUMARRAY_CHUNK. - * - current array size is stored in ti->data - * - */ - -struct numarray { - uint32_t number; - uint32_t value; -}; - -struct numarray_cfg { - void *main_ptr; - size_t size; /* Number of items allocated in array */ - size_t used; /* Number of items _active_ now */ -}; - -struct ta_buf_numarray -{ - struct numarray na; -}; - -int compare_numarray(const void *k, const void *v); -static struct numarray *numarray_find(struct table_info *ti, void *key); -static int ta_lookup_numarray(struct table_info *ti, void *key, - uint32_t keylen, uint32_t *val); -static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, - struct table_info *ti, char *data, uint8_t tflags); -static void ta_destroy_numarray(void *ta_state, struct table_info *ti); -static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, - ipfw_ta_tinfo *tinfo); -static int ta_prepare_add_numarray(struct ip_fw_chain *ch, - struct tentry_info *tei, void *ta_buf); -static int ta_add_numarray(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static int ta_del_numarray(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static void ta_flush_numarray_entry(struct ip_fw_chain *ch, - struct tentry_info *tei, void *ta_buf); -static int ta_need_modify_numarray(void *ta_state, struct table_info *ti, - uint32_t count, uint64_t *pflags); -static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags); -static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti, - void *ta_buf, uint64_t *pflags); -static void ta_modify_numarray(void *ta_state, struct table_info *ti, - void *ta_buf, uint64_t pflags); -static void ta_flush_mod_numarray(void *ta_buf); -static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, - void *e, ipfw_obj_tentry *tent); -static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent); -static void ta_foreach_numarray(void *ta_state, struct table_info *ti, - ta_foreach_f *f, void *arg); - -int -compare_numarray(const void *k, const void *v) -{ - const struct numarray *na; - uint32_t key; - - key = *((const uint32_t *)k); - na = (const struct numarray *)v; - - if (key < na->number) - return (-1); - else if (key > na->number) - return (1); - - return (0); -} - -static struct numarray * -numarray_find(struct table_info *ti, void *key) -{ - struct numarray *ri; - - ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray), - compare_ifidx); - - return (ri); -} - -static int -ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) -{ - struct numarray *ri; - - ri = numarray_find(ti, key); - - if (ri != NULL) { - *val = ri->value; - return (1); - } - - return (0); -} - -static int -ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data, uint8_t tflags) -{ - struct numarray_cfg *cfg; - - cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO); - - cfg->size = 16; - cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW, - M_WAITOK | M_ZERO); - - *ta_state = cfg; - ti->state = cfg->main_ptr; - ti->lookup = ta_lookup_numarray; - - return (0); -} - -/* - * Destroys table @ti - */ -static void -ta_destroy_numarray(void *ta_state, struct table_info *ti) -{ - struct numarray_cfg *cfg; - - cfg = (struct numarray_cfg *)ta_state; - - if (cfg->main_ptr != NULL) - free(cfg->main_ptr, M_IPFW); - - free(cfg, M_IPFW); -} - -/* - * Provide algo-specific table info - */ -static void -ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) -{ - struct numarray_cfg *cfg; - - cfg = (struct numarray_cfg *)ta_state; - - tinfo->taclass4 = IPFW_TACLASS_ARRAY; - tinfo->size4 = cfg->size; - tinfo->count4 = cfg->used; - tinfo->itemsize4 = sizeof(struct numarray); -} - -/* - * Prepare for addition/deletion to an array. - */ -static int -ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_numarray *tb; - - tb = (struct ta_buf_numarray *)ta_buf; - - tb->na.number = *((uint32_t *)tei->paddr); - - return (0); -} - -static int -ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct numarray_cfg *cfg; - struct ta_buf_numarray *tb; - struct numarray *ri; - int res; - uint32_t value; - - tb = (struct ta_buf_numarray *)ta_buf; - cfg = (struct numarray_cfg *)ta_state; - - /* Read current value from @tei */ - tb->na.value = tei->value; - - ri = numarray_find(ti, &tb->na.number); - - if (ri != NULL) { - if ((tei->flags & TEI_FLAGS_UPDATE) == 0) - return (EEXIST); - - /* Exchange values between ri and @tei */ - value = ri->value; - ri->value = tei->value; - tei->value = value; - /* Indicate that update has happened instead of addition */ - tei->flags |= TEI_FLAGS_UPDATED; - *pnum = 0; - return (0); - } - - if ((tei->flags & TEI_FLAGS_DONTADD) != 0) - return (EFBIG); - - res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used, - sizeof(struct numarray), compare_numarray); - - KASSERT(res == 1, ("number %d already exists", tb->na.number)); - cfg->used++; - ti->data = cfg->used; - *pnum = 1; - - return (0); -} - -/* - * Remove key from both configuration list and - * runtime array. Removed interface notification. - */ -static int -ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct numarray_cfg *cfg; - struct ta_buf_numarray *tb; - struct numarray *ri; - int res; - - tb = (struct ta_buf_numarray *)ta_buf; - cfg = (struct numarray_cfg *)ta_state; - - ri = numarray_find(ti, &tb->na.number); - if (ri == NULL) - return (ENOENT); - - tei->value = ri->value; - - res = bdel(&tb->na.number, cfg->main_ptr, cfg->used, - sizeof(struct numarray), compare_numarray); - - KASSERT(res == 1, ("number %u does not exist", tb->na.number)); - cfg->used--; - ti->data = cfg->used; - *pnum = 1; - - return (0); -} - -static void -ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - - /* We don't have any state, do nothing */ -} - - -/* - * Table growing callbacks. - */ - -static int -ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count, - uint64_t *pflags) -{ - struct numarray_cfg *cfg; - size_t size; - - cfg = (struct numarray_cfg *)ta_state; - - size = cfg->size; - while (size < cfg->used + count) - size *= 2; - - if (size != cfg->size) { - *pflags = size; - return (1); - } - - return (0); -} - -/* - * Allocate new, larger runtime array. - */ -static int -ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags) -{ - struct mod_item *mi; - - mi = (struct mod_item *)ta_buf; - - memset(mi, 0, sizeof(struct mod_item)); - mi->size = *pflags; - mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW, - M_WAITOK | M_ZERO); - - return (0); -} - -/* - * Copy data from old runtime array to new one. - */ -static int -ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t *pflags) -{ - struct mod_item *mi; - struct numarray_cfg *cfg; - - mi = (struct mod_item *)ta_buf; - cfg = (struct numarray_cfg *)ta_state; - - /* Check if we still need to grow array */ - if (cfg->size >= mi->size) { - *pflags = 0; - return (0); - } - - memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray)); - - return (0); -} - -/* - * Switch old & new arrays. - */ -static void -ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t pflags) -{ - struct mod_item *mi; - struct numarray_cfg *cfg; - void *old_ptr; - - mi = (struct mod_item *)ta_buf; - cfg = (struct numarray_cfg *)ta_state; - - old_ptr = cfg->main_ptr; - cfg->main_ptr = mi->main_ptr; - cfg->size = mi->size; - ti->state = cfg->main_ptr; - - mi->main_ptr = old_ptr; -} - -/* - * Free unneded array. - */ -static void -ta_flush_mod_numarray(void *ta_buf) -{ - struct mod_item *mi; - - mi = (struct mod_item *)ta_buf; - if (mi->main_ptr != NULL) - free(mi->main_ptr, M_IPFW); -} - -static int -ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent) -{ - struct numarray *na; - - na = (struct numarray *)e; - - tent->k.key = na->number; - tent->v.kidx = na->value; - - return (0); -} - -static int -ta_find_numarray_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent) -{ - struct numarray_cfg *cfg; - struct numarray *ri; - - cfg = (struct numarray_cfg *)ta_state; - - ri = numarray_find(ti, &tent->k.key); - - if (ri != NULL) { - ta_dump_numarray_tentry(ta_state, ti, ri, tent); - return (0); - } - - return (ENOENT); -} - -static void -ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f, - void *arg) -{ - struct numarray_cfg *cfg; - struct numarray *array; - int i; - - cfg = (struct numarray_cfg *)ta_state; - array = cfg->main_ptr; - - for (i = 0; i < cfg->used; i++) - f(&array[i], arg); -} - -struct table_algo number_array = { - .name = "number:array", - .type = IPFW_TABLE_NUMBER, - .ta_buf_size = sizeof(struct ta_buf_numarray), - .init = ta_init_numarray, - .destroy = ta_destroy_numarray, - .prepare_add = ta_prepare_add_numarray, - .prepare_del = ta_prepare_add_numarray, - .add = ta_add_numarray, - .del = ta_del_numarray, - .flush_entry = ta_flush_numarray_entry, - .foreach = ta_foreach_numarray, - .dump_tentry = ta_dump_numarray_tentry, - .find_tentry = ta_find_numarray_tentry, - .dump_tinfo = ta_dump_numarray_tinfo, - .need_modify = ta_need_modify_numarray, - .prepare_mod = ta_prepare_mod_numarray, - .fill_mod = ta_fill_mod_numarray, - .modify = ta_modify_numarray, - .flush_mod = ta_flush_mod_numarray, -}; - -/* - * flow:hash cmds - * - * - * ti->data: - * [inv.mask4][inv.mask6][log2hsize4][log2hsize6] - * [ 8][ 8[ 8][ 8] - * - * inv.mask4: 32 - mask - * inv.mask6: - * 1) _slow lookup: mask - * 2) _aligned: (128 - mask) / 8 - * 3) _64: 8 - * - * - * pflags: - * [hsize4][hsize6] - * [ 16][ 16] - */ - -struct fhashentry; - -SLIST_HEAD(fhashbhead, fhashentry); - -struct fhashentry { - SLIST_ENTRY(fhashentry) next; - uint8_t af; - uint8_t proto; - uint16_t spare0; - uint16_t dport; - uint16_t sport; - uint32_t value; - uint32_t spare1; -}; - -struct fhashentry4 { - struct fhashentry e; - struct in_addr dip; - struct in_addr sip; -}; - -struct fhashentry6 { - struct fhashentry e; - struct in6_addr dip6; - struct in6_addr sip6; -}; - -struct fhash_cfg { - struct fhashbhead *head; - size_t size; - size_t items; - struct fhashentry4 fe4; - struct fhashentry6 fe6; -}; - -struct ta_buf_fhash { - void *ent_ptr; - struct fhashentry6 fe6; -}; - -static __inline int cmp_flow_ent(struct fhashentry *a, - struct fhashentry *b, size_t sz); -static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize); -static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize); -static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size); -static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); -static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, -struct table_info *ti, char *data, uint8_t tflags); -static void ta_destroy_fhash(void *ta_state, struct table_info *ti); -static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, - ipfw_ta_tinfo *tinfo); -static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, - void *e, ipfw_obj_tentry *tent); -static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent); -static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent); -static void ta_foreach_fhash(void *ta_state, struct table_info *ti, - ta_foreach_f *f, void *arg); -static int ta_prepare_add_fhash(struct ip_fw_chain *ch, - struct tentry_info *tei, void *ta_buf); -static int ta_add_fhash(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_del_fhash(void *ta_state, struct table_info *ti, - struct tentry_info *tei, void *ta_buf, uint32_t *pnum); -static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf); -static int ta_need_modify_fhash(void *ta_state, struct table_info *ti, - uint32_t count, uint64_t *pflags); -static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags); -static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti, - void *ta_buf, uint64_t *pflags); -static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t pflags); -static void ta_flush_mod_fhash(void *ta_buf); - -static __inline int -cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz) -{ - uint64_t *ka, *kb; - - ka = (uint64_t *)(&a->next + 1); - kb = (uint64_t *)(&b->next + 1); - - if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0)) - return (1); - - return (0); -} - -static __inline uint32_t -hash_flow4(struct fhashentry4 *f, int hsize) -{ - uint32_t i; - - i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport); - - return (i % (hsize - 1)); -} - -static __inline uint32_t -hash_flow6(struct fhashentry6 *f, int hsize) -{ - uint32_t i; - - i = (f->dip6.__u6_addr.__u6_addr32[2]) ^ - (f->dip6.__u6_addr.__u6_addr32[3]) ^ - (f->sip6.__u6_addr.__u6_addr32[2]) ^ - (f->sip6.__u6_addr.__u6_addr32[3]) ^ - (f->e.dport) ^ (f->e.sport); - - return (i % (hsize - 1)); -} - -static uint32_t -hash_flow_ent(struct fhashentry *ent, uint32_t size) -{ - uint32_t hash; - - if (ent->af == AF_INET) { - hash = hash_flow4((struct fhashentry4 *)ent, size); - } else { - hash = hash_flow6((struct fhashentry6 *)ent, size); - } - - return (hash); -} - -static int -ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) -{ - struct fhashbhead *head; - struct fhashentry *ent; - struct fhashentry4 *m4; - struct ipfw_flow_id *id; - uint16_t hash, hsize; - - id = (struct ipfw_flow_id *)key; - head = (struct fhashbhead *)ti->state; - hsize = ti->data; - m4 = (struct fhashentry4 *)ti->xstate; - - if (id->addr_type == 4) { - struct fhashentry4 f; - - /* Copy hash mask */ - f = *m4; - - f.dip.s_addr &= id->dst_ip; - f.sip.s_addr &= id->src_ip; - f.e.dport &= id->dst_port; - f.e.sport &= id->src_port; - f.e.proto &= id->proto; - hash = hash_flow4(&f, hsize); - SLIST_FOREACH(ent, &head[hash], next) { - if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) { - *val = ent->value; - return (1); - } - } - } else if (id->addr_type == 6) { - struct fhashentry6 f; - uint64_t *fp, *idp; - - /* Copy hash mask */ - f = *((struct fhashentry6 *)(m4 + 1)); - - /* Handle lack of __u6_addr.__u6_addr64 */ - fp = (uint64_t *)&f.dip6; - idp = (uint64_t *)&id->dst_ip6; - /* src IPv6 is stored after dst IPv6 */ - *fp++ &= *idp++; - *fp++ &= *idp++; - *fp++ &= *idp++; - *fp &= *idp; - f.e.dport &= id->dst_port; - f.e.sport &= id->src_port; - f.e.proto &= id->proto; - hash = hash_flow6(&f, hsize); - SLIST_FOREACH(ent, &head[hash], next) { - if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) { - *val = ent->value; - return (1); - } - } - } - - return (0); -} - -/* - * New table. - */ -static int -ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data, uint8_t tflags) -{ - int i; - struct fhash_cfg *cfg; - struct fhashentry4 *fe4; - struct fhashentry6 *fe6; - - cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO); - - cfg->size = 512; - - cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW, - M_WAITOK | M_ZERO); - for (i = 0; i < cfg->size; i++) - SLIST_INIT(&cfg->head[i]); - - /* Fill in fe masks based on @tflags */ - fe4 = &cfg->fe4; - fe6 = &cfg->fe6; - if (tflags & IPFW_TFFLAG_SRCIP) { - memset(&fe4->sip, 0xFF, sizeof(fe4->sip)); - memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6)); - } - if (tflags & IPFW_TFFLAG_DSTIP) { - memset(&fe4->dip, 0xFF, sizeof(fe4->dip)); - memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6)); - } - if (tflags & IPFW_TFFLAG_SRCPORT) { - memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport)); - memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport)); - } - if (tflags & IPFW_TFFLAG_DSTPORT) { - memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport)); - memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport)); - } - if (tflags & IPFW_TFFLAG_PROTO) { - memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto)); - memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto)); - } - - fe4->e.af = AF_INET; - fe6->e.af = AF_INET6; - - *ta_state = cfg; - ti->state = cfg->head; - ti->xstate = &cfg->fe4; - ti->data = cfg->size; - ti->lookup = ta_lookup_fhash; - - return (0); -} - -static void -ta_destroy_fhash(void *ta_state, struct table_info *ti) -{ - struct fhash_cfg *cfg; - struct fhashentry *ent, *ent_next; - int i; - - cfg = (struct fhash_cfg *)ta_state; - - for (i = 0; i < cfg->size; i++) - SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) - free(ent, M_IPFW_TBL); - - free(cfg->head, M_IPFW); - free(cfg, M_IPFW); -} - -/* - * Provide algo-specific table info - */ -static void -ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) -{ - struct fhash_cfg *cfg; - - cfg = (struct fhash_cfg *)ta_state; - - tinfo->flags = IPFW_TATFLAGS_AFITEM; - tinfo->taclass4 = IPFW_TACLASS_HASH; - tinfo->size4 = cfg->size; - tinfo->count4 = cfg->items; - tinfo->itemsize4 = sizeof(struct fhashentry4); - tinfo->itemsize6 = sizeof(struct fhashentry6); -} - -static int -ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent) -{ - struct fhash_cfg *cfg; - struct fhashentry *ent; - struct fhashentry4 *fe4; -#ifdef INET6 - struct fhashentry6 *fe6; -#endif - struct tflow_entry *tfe; - - cfg = (struct fhash_cfg *)ta_state; - ent = (struct fhashentry *)e; - tfe = &tent->k.flow; - - tfe->af = ent->af; - tfe->proto = ent->proto; - tfe->dport = htons(ent->dport); - tfe->sport = htons(ent->sport); - tent->v.kidx = ent->value; - tent->subtype = ent->af; - - if (ent->af == AF_INET) { - fe4 = (struct fhashentry4 *)ent; - tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr); - tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr); - tent->masklen = 32; -#ifdef INET6 - } else { - fe6 = (struct fhashentry6 *)ent; - tfe->a.a6.sip6 = fe6->sip6; - tfe->a.a6.dip6 = fe6->dip6; - tent->masklen = 128; -#endif - } - - return (0); -} - -static int -tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent) -{ -#ifdef INET - struct fhashentry4 *fe4; -#endif -#ifdef INET6 - struct fhashentry6 *fe6; -#endif - struct tflow_entry *tfe; - - tfe = (struct tflow_entry *)tei->paddr; - - ent->af = tei->subtype; - ent->proto = tfe->proto; - ent->dport = ntohs(tfe->dport); - ent->sport = ntohs(tfe->sport); - - if (tei->subtype == AF_INET) { -#ifdef INET - fe4 = (struct fhashentry4 *)ent; - fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr); - fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr); -#endif -#ifdef INET6 - } else if (tei->subtype == AF_INET6) { - fe6 = (struct fhashentry6 *)ent; - fe6->sip6 = tfe->a.a6.sip6; - fe6->dip6 = tfe->a.a6.dip6; -#endif - } else { - /* Unknown CIDR type */ - return (EINVAL); - } - - return (0); -} - - -static int -ta_find_fhash_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent) -{ - struct fhash_cfg *cfg; - struct fhashbhead *head; - struct fhashentry *ent, *tmp; - struct fhashentry6 fe6; - struct tentry_info tei; - int error; - uint32_t hash; - size_t sz; - - cfg = (struct fhash_cfg *)ta_state; - - ent = &fe6.e; - - memset(&fe6, 0, sizeof(fe6)); - memset(&tei, 0, sizeof(tei)); - - tei.paddr = &tent->k.flow; - tei.subtype = tent->subtype; - - if ((error = tei_to_fhash_ent(&tei, ent)) != 0) - return (error); - - head = cfg->head; - hash = hash_flow_ent(ent, cfg->size); - - if (tei.subtype == AF_INET) - sz = 2 * sizeof(struct in_addr); - else - sz = 2 * sizeof(struct in6_addr); - - /* Check for existence */ - SLIST_FOREACH(tmp, &head[hash], next) { - if (cmp_flow_ent(tmp, ent, sz) != 0) { - ta_dump_fhash_tentry(ta_state, ti, tmp, tent); - return (0); - } - } - - return (ENOENT); -} - -static void -ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f, - void *arg) -{ - struct fhash_cfg *cfg; - struct fhashentry *ent, *ent_next; - int i; - - cfg = (struct fhash_cfg *)ta_state; - - for (i = 0; i < cfg->size; i++) - SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next) - f(ent, arg); -} - -static int -ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_fhash *tb; - struct fhashentry *ent; - size_t sz; - int error; - - tb = (struct ta_buf_fhash *)ta_buf; - - if (tei->subtype == AF_INET) - sz = sizeof(struct fhashentry4); - else if (tei->subtype == AF_INET6) - sz = sizeof(struct fhashentry6); - else - return (EINVAL); - - ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO); - - error = tei_to_fhash_ent(tei, ent); - if (error != 0) { - free(ent, M_IPFW_TBL); - return (error); - } - tb->ent_ptr = ent; - - return (0); -} - -static int -ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct fhash_cfg *cfg; - struct fhashbhead *head; - struct fhashentry *ent, *tmp; - struct ta_buf_fhash *tb; - int exists; - uint32_t hash, value; - size_t sz; - - cfg = (struct fhash_cfg *)ta_state; - tb = (struct ta_buf_fhash *)ta_buf; - ent = (struct fhashentry *)tb->ent_ptr; - exists = 0; - - /* Read current value from @tei */ - ent->value = tei->value; - - head = cfg->head; - hash = hash_flow_ent(ent, cfg->size); - - if (tei->subtype == AF_INET) - sz = 2 * sizeof(struct in_addr); - else - sz = 2 * sizeof(struct in6_addr); - - /* Check for existence */ - SLIST_FOREACH(tmp, &head[hash], next) { - if (cmp_flow_ent(tmp, ent, sz) != 0) { - exists = 1; - break; - } - } - - if (exists == 1) { - if ((tei->flags & TEI_FLAGS_UPDATE) == 0) - return (EEXIST); - /* Record already exists. Update value if we're asked to */ - /* Exchange values between tmp and @tei */ - value = tmp->value; - tmp->value = tei->value; - tei->value = value; - /* Indicate that update has happened instead of addition */ - tei->flags |= TEI_FLAGS_UPDATED; - *pnum = 0; - } else { - if ((tei->flags & TEI_FLAGS_DONTADD) != 0) - return (EFBIG); - - SLIST_INSERT_HEAD(&head[hash], ent, next); - tb->ent_ptr = NULL; - *pnum = 1; - - /* Update counters and check if we need to grow hash */ - cfg->items++; - } - - return (0); -} - -static int -ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_fhash *tb; - - tb = (struct ta_buf_fhash *)ta_buf; - - return (tei_to_fhash_ent(tei, &tb->fe6.e)); -} - -static int -ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei, - void *ta_buf, uint32_t *pnum) -{ - struct fhash_cfg *cfg; - struct fhashbhead *head; - struct fhashentry *ent, *tmp; - struct ta_buf_fhash *tb; - uint32_t hash; - size_t sz; - - cfg = (struct fhash_cfg *)ta_state; - tb = (struct ta_buf_fhash *)ta_buf; - ent = &tb->fe6.e; - - head = cfg->head; - hash = hash_flow_ent(ent, cfg->size); - - if (tei->subtype == AF_INET) - sz = 2 * sizeof(struct in_addr); - else - sz = 2 * sizeof(struct in6_addr); - - /* Check for existence */ - SLIST_FOREACH(tmp, &head[hash], next) { - if (cmp_flow_ent(tmp, ent, sz) == 0) - continue; - - SLIST_REMOVE(&head[hash], tmp, fhashentry, next); - tei->value = tmp->value; - *pnum = 1; - cfg->items--; - tb->ent_ptr = tmp; - return (0); - } - - return (ENOENT); -} - -static void -ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei, - void *ta_buf) -{ - struct ta_buf_fhash *tb; - - tb = (struct ta_buf_fhash *)ta_buf; - - if (tb->ent_ptr != NULL) - free(tb->ent_ptr, M_IPFW_TBL); -} - -/* - * Hash growing callbacks. - */ - -static int -ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count, - uint64_t *pflags) -{ - struct fhash_cfg *cfg; - - cfg = (struct fhash_cfg *)ta_state; - - if (cfg->items > cfg->size && cfg->size < 65536) { - *pflags = cfg->size * 2; - return (1); - } - - return (0); -} - -/* - * Allocate new, larger fhash. - */ -static int -ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags) -{ - struct mod_item *mi; - struct fhashbhead *head; - int i; - - mi = (struct mod_item *)ta_buf; - - memset(mi, 0, sizeof(struct mod_item)); - mi->size = *pflags; - head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW, - M_WAITOK | M_ZERO); - for (i = 0; i < mi->size; i++) - SLIST_INIT(&head[i]); - - mi->main_ptr = head; - - return (0); -} - -/* - * Copy data from old runtime array to new one. - */ -static int -ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t *pflags) -{ - - /* In is not possible to do rehash if we're not holidng WLOCK. */ - return (0); -} - -/* - * Switch old & new arrays. - */ -static void -ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf, - uint64_t pflags) -{ - struct mod_item *mi; - struct fhash_cfg *cfg; - struct fhashbhead *old_head, *new_head; - struct fhashentry *ent, *ent_next; - int i; - uint32_t nhash; - size_t old_size; - - mi = (struct mod_item *)ta_buf; - cfg = (struct fhash_cfg *)ta_state; - - old_size = cfg->size; - old_head = ti->state; - - new_head = (struct fhashbhead *)mi->main_ptr; - for (i = 0; i < old_size; i++) { - SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) { - nhash = hash_flow_ent(ent, mi->size); - SLIST_INSERT_HEAD(&new_head[nhash], ent, next); - } - } - - ti->state = new_head; - ti->data = mi->size; - cfg->head = new_head; - cfg->size = mi->size; - - mi->main_ptr = old_head; -} - -/* - * Free unneded array. - */ -static void -ta_flush_mod_fhash(void *ta_buf) -{ - struct mod_item *mi; - - mi = (struct mod_item *)ta_buf; - if (mi->main_ptr != NULL) - free(mi->main_ptr, M_IPFW); -} - -struct table_algo flow_hash = { - .name = "flow:hash", - .type = IPFW_TABLE_FLOW, - .flags = TA_FLAG_DEFAULT, - .ta_buf_size = sizeof(struct ta_buf_fhash), - .init = ta_init_fhash, - .destroy = ta_destroy_fhash, - .prepare_add = ta_prepare_add_fhash, - .prepare_del = ta_prepare_del_fhash, - .add = ta_add_fhash, - .del = ta_del_fhash, - .flush_entry = ta_flush_fhash_entry, - .foreach = ta_foreach_fhash, - .dump_tentry = ta_dump_fhash_tentry, - .find_tentry = ta_find_fhash_tentry, - .dump_tinfo = ta_dump_fhash_tinfo, - .need_modify = ta_need_modify_fhash, - .prepare_mod = ta_prepare_mod_fhash, - .fill_mod = ta_fill_mod_fhash, - .modify = ta_modify_fhash, - .flush_mod = ta_flush_mod_fhash, -}; - -/* - * Kernel fibs bindings. - * - * Implementation: - * - * Runtime part: - * - fully relies on route API - * - fib number is stored in ti->data - * - */ - -static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val); -static int kfib_parse_opts(int *pfib, char *data); -static void ta_print_kfib_config(void *ta_state, struct table_info *ti, - char *buf, size_t bufsize); -static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, - struct table_info *ti, char *data, uint8_t tflags); -static void ta_destroy_kfib(void *ta_state, struct table_info *ti); -static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, - ipfw_ta_tinfo *tinfo); -static int contigmask(uint8_t *p, int len); -static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent); -static int ta_dump_kfib_tentry_int(struct sockaddr *paddr, - struct sockaddr *pmask, ipfw_obj_tentry *tent); -static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent); -static void ta_foreach_kfib(void *ta_state, struct table_info *ti, - ta_foreach_f *f, void *arg); - - -static int -ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen, - uint32_t *val) -{ -#ifdef INET - struct nhop4_basic nh4; - struct in_addr in; -#endif -#ifdef INET6 - struct nhop6_basic nh6; -#endif - int error; - - error = ENOENT; -#ifdef INET - if (keylen == 4) { - in.s_addr = *(in_addr_t *)key; - error = fib4_lookup_nh_basic(ti->data, - in, 0, 0, &nh4); - } -#endif -#ifdef INET6 - if (keylen == 6) - error = fib6_lookup_nh_basic(ti->data, - (struct in6_addr *)key, 0, 0, 0, &nh6); -#endif - - if (error != 0) - return (0); - - *val = 0; - - return (1); -} - -/* Parse 'fib=%d' */ -static int -kfib_parse_opts(int *pfib, char *data) -{ - char *pdel, *pend, *s; - int fibnum; - - if (data == NULL) - return (0); - if ((pdel = strchr(data, ' ')) == NULL) - return (0); - while (*pdel == ' ') - pdel++; - if (strncmp(pdel, "fib=", 4) != 0) - return (EINVAL); - if ((s = strchr(pdel, ' ')) != NULL) - *s++ = '\0'; - - pdel += 4; - /* Need \d+ */ - fibnum = strtol(pdel, &pend, 10); - if (*pend != '\0') - return (EINVAL); - - *pfib = fibnum; - - return (0); -} - -static void -ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf, - size_t bufsize) -{ - - if (ti->data != 0) - snprintf(buf, bufsize, "%s fib=%lu", "addr:kfib", ti->data); - else - snprintf(buf, bufsize, "%s", "addr:kfib"); -} - -static int -ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti, - char *data, uint8_t tflags) -{ - int error, fibnum; - - fibnum = 0; - if ((error = kfib_parse_opts(&fibnum, data)) != 0) - return (error); - - if (fibnum >= rt_numfibs) - return (E2BIG); - - ti->data = fibnum; - ti->lookup = ta_lookup_kfib; - - return (0); -} - -/* - * Destroys table @ti - */ -static void -ta_destroy_kfib(void *ta_state, struct table_info *ti) -{ - -} - -/* - * Provide algo-specific table info - */ -static void -ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo) -{ - - tinfo->flags = IPFW_TATFLAGS_AFDATA; - tinfo->taclass4 = IPFW_TACLASS_RADIX; - tinfo->count4 = 0; - tinfo->itemsize4 = sizeof(struct rtentry); - tinfo->taclass6 = IPFW_TACLASS_RADIX; - tinfo->count6 = 0; - tinfo->itemsize6 = sizeof(struct rtentry); -} - -static int -contigmask(uint8_t *p, int len) -{ - int i, n; - - for (i = 0; i < len ; i++) - if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */ - break; - for (n= i + 1; n < len; n++) - if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0) - return (-1); /* mask not contiguous */ - return (i); -} - - -static int -ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e, - ipfw_obj_tentry *tent) -{ - struct rtentry *rte; - - rte = (struct rtentry *)e; - - return ta_dump_kfib_tentry_int(rt_key(rte), rt_mask(rte), tent); -} - -static int -ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask, - ipfw_obj_tentry *tent) -{ -#ifdef INET - struct sockaddr_in *addr, *mask; -#endif -#ifdef INET6 - struct sockaddr_in6 *addr6, *mask6; -#endif - int len; - - len = 0; - - /* Guess IPv4/IPv6 radix by sockaddr family */ -#ifdef INET - if (paddr->sa_family == AF_INET) { - addr = (struct sockaddr_in *)paddr; - mask = (struct sockaddr_in *)pmask; - tent->k.addr.s_addr = addr->sin_addr.s_addr; - len = 32; - if (mask != NULL) - len = contigmask((uint8_t *)&mask->sin_addr, 32); - if (len == -1) - len = 0; - tent->masklen = len; - tent->subtype = AF_INET; - tent->v.kidx = 0; /* Do we need to put GW here? */ - } -#endif -#ifdef INET6 - if (paddr->sa_family == AF_INET6) { - addr6 = (struct sockaddr_in6 *)paddr; - mask6 = (struct sockaddr_in6 *)pmask; - memcpy(&tent->k, &addr6->sin6_addr, sizeof(struct in6_addr)); - len = 128; - if (mask6 != NULL) - len = contigmask((uint8_t *)&mask6->sin6_addr, 128); - if (len == -1) - len = 0; - tent->masklen = len; - tent->subtype = AF_INET6; - tent->v.kidx = 0; - } -#endif - - return (0); -} - -static int -ta_find_kfib_tentry(void *ta_state, struct table_info *ti, - ipfw_obj_tentry *tent) -{ - struct rt_addrinfo info; - struct sockaddr_in6 key6, dst6, mask6; - struct sockaddr *dst, *key, *mask; - - /* Prepare sockaddr for prefix/mask and info */ - bzero(&dst6, sizeof(dst6)); - dst6.sin6_len = sizeof(dst6); - dst = (struct sockaddr *)&dst6; - bzero(&mask6, sizeof(mask6)); - mask6.sin6_len = sizeof(mask6); - mask = (struct sockaddr *)&mask6; - - bzero(&info, sizeof(info)); - info.rti_info[RTAX_DST] = dst; - info.rti_info[RTAX_NETMASK] = mask; - - /* Prepare the lookup key */ - bzero(&key6, sizeof(key6)); - key6.sin6_family = tent->subtype; - key = (struct sockaddr *)&key6; - - if (tent->subtype == AF_INET) { - ((struct sockaddr_in *)&key6)->sin_addr = tent->k.addr; - key6.sin6_len = sizeof(struct sockaddr_in); - } else { - key6.sin6_addr = tent->k.addr6; - key6.sin6_len = sizeof(struct sockaddr_in6); - } - - if (rib_lookup_info(ti->data, key, 0, 0, &info) != 0) - return (ENOENT); - if ((info.rti_addrs & RTA_NETMASK) == 0) - mask = NULL; - - ta_dump_kfib_tentry_int(dst, mask, tent); - - return (0); -} - -static void -ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f, - void *arg) -{ - struct rib_head *rh; - int error; - - rh = rt_tables_get_rnh(ti->data, AF_INET); - if (rh != NULL) { - RIB_RLOCK(rh); - error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg); - RIB_RUNLOCK(rh); - } - - rh = rt_tables_get_rnh(ti->data, AF_INET6); - if (rh != NULL) { - RIB_RLOCK(rh); - error = rh->rnh_walktree(&rh->head, (walktree_f_t *)f, arg); - RIB_RUNLOCK(rh); - } -} - -struct table_algo addr_kfib = { - .name = "addr:kfib", - .type = IPFW_TABLE_ADDR, - .flags = TA_FLAG_READONLY, - .ta_buf_size = 0, - .init = ta_init_kfib, - .destroy = ta_destroy_kfib, - .foreach = ta_foreach_kfib, - .dump_tentry = ta_dump_kfib_tentry, - .find_tentry = ta_find_kfib_tentry, - .dump_tinfo = ta_dump_kfib_tinfo, - .print_config = ta_print_kfib_config, -}; - -void -ipfw_table_algo_init(struct ip_fw_chain *ch) -{ - size_t sz; - - /* - * Register all algorithms presented here. - */ - sz = sizeof(struct table_algo); - ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx); - ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx); - ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx); - ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx); - ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx); - ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx); -} - -void -ipfw_table_algo_destroy(struct ip_fw_chain *ch) -{ - - ipfw_del_table_algo(ch, addr_radix.idx); - ipfw_del_table_algo(ch, addr_hash.idx); - ipfw_del_table_algo(ch, iface_idx.idx); - ipfw_del_table_algo(ch, number_array.idx); - ipfw_del_table_algo(ch, flow_hash.idx); - ipfw_del_table_algo(ch, addr_kfib.idx); -} - - diff --git a/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c b/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c deleted file mode 100644 index d9228f6b..00000000 --- a/freebsd/sys/netpfil/ipfw/ip_fw_table_value.c +++ /dev/null @@ -1,811 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2014 Yandex LLC - * Copyright (c) 2014 Alexander V. Chernikov - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * Multi-field value support for ipfw tables. - * - * This file contains necessary functions to convert - * large multi-field values into u32 indices suitable to be fed - * to various table algorithms. Other machinery like proper refcounting, - * internal structures resizing are also kept here. - */ - -#include <rtems/bsd/local/opt_ipfw.h> - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/hash.h> -#include <sys/lock.h> -#include <sys/rwlock.h> -#include <sys/rmlock.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/queue.h> -#include <net/if.h> /* ip_fw.h requires IFNAMSIZ */ -#include <net/pfil.h> - -#include <netinet/in.h> -#include <netinet/ip_var.h> /* struct ipfw_rule_ref */ -#include <netinet/ip_fw.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/ip_fw_table.h> - -static uint32_t hash_table_value(struct namedobj_instance *ni, const void *key, - uint32_t kopt); -static int cmp_table_value(struct named_object *no, const void *key, - uint32_t kopt); - -static int list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd); - -static struct ipfw_sopt_handler scodes[] = { - { IP_FW_TABLE_VLIST, 0, HDIR_GET, list_table_values }, -}; - -#define CHAIN_TO_VI(chain) (CHAIN_TO_TCFG(chain)->valhash) - -struct table_val_link -{ - struct named_object no; - struct table_value *pval; /* Pointer to real table value */ -}; -#define VALDATA_START_SIZE 64 /* Allocate 64-items array by default */ - -struct vdump_args { - struct ip_fw_chain *ch; - struct sockopt_data *sd; - struct table_value *pval; - int error; -}; - - -static uint32_t -hash_table_value(struct namedobj_instance *ni, const void *key, uint32_t kopt) -{ - - return (hash32_buf(key, 56, 0)); -} - -static int -cmp_table_value(struct named_object *no, const void *key, uint32_t kopt) -{ - - return (memcmp(((struct table_val_link *)no)->pval, key, 56)); -} - -static void -mask_table_value(struct table_value *src, struct table_value *dst, - uint32_t mask) -{ -#define _MCPY(f, b) if ((mask & (b)) != 0) { dst->f = src->f; } - - memset(dst, 0, sizeof(*dst)); - _MCPY(tag, IPFW_VTYPE_TAG); - _MCPY(pipe, IPFW_VTYPE_PIPE); - _MCPY(divert, IPFW_VTYPE_DIVERT); - _MCPY(skipto, IPFW_VTYPE_SKIPTO); - _MCPY(netgraph, IPFW_VTYPE_NETGRAPH); - _MCPY(fib, IPFW_VTYPE_FIB); - _MCPY(nat, IPFW_VTYPE_NAT); - _MCPY(dscp, IPFW_VTYPE_DSCP); - _MCPY(nh4, IPFW_VTYPE_NH4); - _MCPY(nh6, IPFW_VTYPE_NH6); - _MCPY(zoneid, IPFW_VTYPE_NH6); -#undef _MCPY -} - -static void -get_value_ptrs(struct ip_fw_chain *ch, struct table_config *tc, int vshared, - struct table_value **ptv, struct namedobj_instance **pvi) -{ - struct table_value *pval; - struct namedobj_instance *vi; - - if (vshared != 0) { - pval = (struct table_value *)ch->valuestate; - vi = CHAIN_TO_VI(ch); - } else { - pval = NULL; - vi = NULL; - //pval = (struct table_value *)&tc->ti.data; - } - - if (ptv != NULL) - *ptv = pval; - if (pvi != NULL) - *pvi = vi; -} - -/* - * Update pointers to real vaues after @pval change. - */ -static int -update_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg) -{ - struct vdump_args *da; - struct table_val_link *ptv; - struct table_value *pval; - - da = (struct vdump_args *)arg; - ptv = (struct table_val_link *)no; - - pval = da->pval; - ptv->pval = &pval[ptv->no.kidx]; - ptv->no.name = (char *)&pval[ptv->no.kidx]; - return (0); -} - -/* - * Grows value storage shared among all tables. - * Drops/reacquires UH locks. - * Notifies other running adds on @ch shared storage resize. - * Note function does not guarantee that free space - * will be available after invocation, so one caller needs - * to roll cycle himself. - * - * Returns 0 if case of no errors. - */ -static int -resize_shared_value_storage(struct ip_fw_chain *ch) -{ - struct tables_config *tcfg; - struct namedobj_instance *vi; - struct table_value *pval, *valuestate, *old_valuestate; - void *new_idx; - struct vdump_args da; - int new_blocks; - int val_size, val_size_old; - - IPFW_UH_WLOCK_ASSERT(ch); - - valuestate = NULL; - new_idx = NULL; - - pval = (struct table_value *)ch->valuestate; - vi = CHAIN_TO_VI(ch); - tcfg = CHAIN_TO_TCFG(ch); - - val_size = tcfg->val_size * 2; - - if (val_size == (1 << 30)) - return (ENOSPC); - - IPFW_UH_WUNLOCK(ch); - - valuestate = malloc(sizeof(struct table_value) * val_size, M_IPFW, - M_WAITOK | M_ZERO); - ipfw_objhash_bitmap_alloc(val_size, (void *)&new_idx, - &new_blocks); - - IPFW_UH_WLOCK(ch); - - /* - * Check if we still need to resize - */ - if (tcfg->val_size >= val_size) - goto done; - - /* Update pointers and notify everyone we're changing @ch */ - pval = (struct table_value *)ch->valuestate; - rollback_toperation_state(ch, ch); - - /* Good. Let's merge */ - memcpy(valuestate, pval, sizeof(struct table_value) * tcfg->val_size); - ipfw_objhash_bitmap_merge(CHAIN_TO_VI(ch), &new_idx, &new_blocks); - - IPFW_WLOCK(ch); - /* Change pointers */ - old_valuestate = ch->valuestate; - ch->valuestate = valuestate; - valuestate = old_valuestate; - ipfw_objhash_bitmap_swap(CHAIN_TO_VI(ch), &new_idx, &new_blocks); - - val_size_old = tcfg->val_size; - tcfg->val_size = val_size; - val_size = val_size_old; - IPFW_WUNLOCK(ch); - /* Update pointers to reflect resize */ - memset(&da, 0, sizeof(da)); - da.pval = (struct table_value *)ch->valuestate; - ipfw_objhash_foreach(vi, update_tvalue, &da); - -done: - free(valuestate, M_IPFW); - ipfw_objhash_bitmap_free(new_idx, new_blocks); - - return (0); -} - -/* - * Drops reference for table value with index @kidx, stored in @pval and - * @vi. Frees value if it has no references. - */ -static void -unref_table_value(struct namedobj_instance *vi, struct table_value *pval, - uint32_t kidx) -{ - struct table_val_link *ptvl; - - KASSERT(pval[kidx].refcnt > 0, ("Refcount is 0 on kidx %d", kidx)); - if (--pval[kidx].refcnt > 0) - return; - - /* Last reference, delete item */ - ptvl = (struct table_val_link *)ipfw_objhash_lookup_kidx(vi, kidx); - KASSERT(ptvl != NULL, ("lookup on value kidx %d failed", kidx)); - ipfw_objhash_del(vi, &ptvl->no); - ipfw_objhash_free_idx(vi, kidx); - free(ptvl, M_IPFW); -} - -struct flush_args { - struct ip_fw_chain *ch; - struct table_algo *ta; - struct table_info *ti; - void *astate; - ipfw_obj_tentry tent; -}; - -static int -unref_table_value_cb(void *e, void *arg) -{ - struct flush_args *fa; - struct ip_fw_chain *ch; - struct table_algo *ta; - ipfw_obj_tentry *tent; - int error; - - fa = (struct flush_args *)arg; - - ta = fa->ta; - memset(&fa->tent, 0, sizeof(fa->tent)); - tent = &fa->tent; - error = ta->dump_tentry(fa->astate, fa->ti, e, tent); - if (error != 0) - return (error); - - ch = fa->ch; - - unref_table_value(CHAIN_TO_VI(ch), - (struct table_value *)ch->valuestate, tent->v.kidx); - - return (0); -} - -/* - * Drop references for each value used in @tc. - */ -void -ipfw_unref_table_values(struct ip_fw_chain *ch, struct table_config *tc, - struct table_algo *ta, void *astate, struct table_info *ti) -{ - struct flush_args fa; - - IPFW_UH_WLOCK_ASSERT(ch); - - memset(&fa, 0, sizeof(fa)); - fa.ch = ch; - fa.ta = ta; - fa.astate = astate; - fa.ti = ti; - - ta->foreach(astate, ti, unref_table_value_cb, &fa); -} - -/* - * Table operation state handler. - * Called when we are going to change something in @tc which - * may lead to inconsistencies in on-going table data addition. - * - * Here we rollback all already committed state (table values, currently) - * and set "modified" field to non-zero value to indicate - * that we need to restart original operation. - */ -void -rollback_table_values(struct tableop_state *ts) -{ - struct ip_fw_chain *ch; - struct table_value *pval; - struct tentry_info *ptei; - struct namedobj_instance *vi; - int i; - - ch = ts->ch; - - IPFW_UH_WLOCK_ASSERT(ch); - - /* Get current table value pointer */ - get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi); - - for (i = 0; i < ts->count; i++) { - ptei = &ts->tei[i]; - - if (ptei->value == 0) - continue; - - unref_table_value(vi, pval, ptei->value); - } -} - -/* - * Allocate new value index in either shared or per-table array. - * Function may drop/reacquire UH lock. - * - * Returns 0 on success. - */ -static int -alloc_table_vidx(struct ip_fw_chain *ch, struct tableop_state *ts, - struct namedobj_instance *vi, uint16_t *pvidx) -{ - int error, vlimit; - uint16_t vidx; - - IPFW_UH_WLOCK_ASSERT(ch); - - error = ipfw_objhash_alloc_idx(vi, &vidx); - if (error != 0) { - - /* - * We need to resize array. This involves - * lock/unlock, so we need to check "modified" - * state. - */ - ts->opstate.func(ts->tc, &ts->opstate); - error = resize_shared_value_storage(ch); - return (error); /* ts->modified should be set, we will restart */ - } - - vlimit = ts->ta->vlimit; - if (vlimit != 0 && vidx >= vlimit) { - - /* - * Algorithm is not able to store given index. - * We have to rollback state, start using - * per-table value array or return error - * if we're already using it. - * - * TODO: do not rollback state if - * atomicity is not required. - */ - if (ts->vshared != 0) { - /* shared -> per-table */ - return (ENOSPC); /* TODO: proper error */ - } - - /* per-table. Fail for now. */ - return (ENOSPC); /* TODO: proper error */ - } - - *pvidx = vidx; - return (0); -} - -/* - * Drops value reference for unused values (updates, deletes, partially - * successful adds or rollbacks). - */ -void -ipfw_garbage_table_values(struct ip_fw_chain *ch, struct table_config *tc, - struct tentry_info *tei, uint32_t count, int rollback) -{ - int i; - struct tentry_info *ptei; - struct table_value *pval; - struct namedobj_instance *vi; - - /* - * We have two slightly different ADD cases here: - * either (1) we are successful / partially successful, - * in that case we need - * * to ignore ADDED entries values - * * rollback every other values (either UPDATED since - * old value has been stored there, or some failure like - * EXISTS or LIMIT or simply "ignored" case. - * - * (2): atomic rollback of partially successful operation - * in that case we simply need to unref all entries. - * - * DELETE case is simpler: no atomic support there, so - * we simply unref all non-zero values. - */ - - /* - * Get current table value pointers. - * XXX: Properly read vshared - */ - get_value_ptrs(ch, tc, 1, &pval, &vi); - - for (i = 0; i < count; i++) { - ptei = &tei[i]; - - if (ptei->value == 0) { - - /* - * We may be deleting non-existing record. - * Skip. - */ - continue; - } - - if ((ptei->flags & TEI_FLAGS_ADDED) != 0 && rollback == 0) { - ptei->value = 0; - continue; - } - - unref_table_value(vi, pval, ptei->value); - ptei->value = 0; - } -} - -/* - * Main function used to link values of entries going to be added, - * to the index. Since we may perform many UH locks drops/acquires, - * handle changes by checking tablestate "modified" field. - * - * Success: return 0. - */ -int -ipfw_link_table_values(struct ip_fw_chain *ch, struct tableop_state *ts) -{ - int error, i, found; - struct namedobj_instance *vi; - struct table_config *tc; - struct tentry_info *tei, *ptei; - uint32_t count, vlimit; - uint16_t vidx; - struct table_val_link *ptv; - struct table_value tval, *pval; - - /* - * Stage 1: reference all existing values and - * save their indices. - */ - IPFW_UH_WLOCK_ASSERT(ch); - get_value_ptrs(ch, ts->tc, ts->vshared, &pval, &vi); - - error = 0; - found = 0; - vlimit = ts->ta->vlimit; - vidx = 0; - tc = ts->tc; - tei = ts->tei; - count = ts->count; - for (i = 0; i < count; i++) { - ptei = &tei[i]; - ptei->value = 0; /* Ensure value is always 0 in the beginning */ - mask_table_value(ptei->pvalue, &tval, ts->vmask); - ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0, - (char *)&tval); - if (ptv == NULL) - continue; - /* Deal with vlimit later */ - if (vlimit > 0 && vlimit <= ptv->no.kidx) - continue; - - /* Value found. Bump refcount */ - ptv->pval->refcnt++; - ptei->value = ptv->no.kidx; - found++; - } - - if (ts->count == found) { - /* We've found all values , no need ts create new ones */ - return (0); - } - - /* - * we have added some state here, let's attach operation - * state ts the list ts be able ts rollback if necessary. - */ - add_toperation_state(ch, ts); - /* Ensure table won't disappear */ - tc_ref(tc); - IPFW_UH_WUNLOCK(ch); - - /* - * Stage 2: allocate objects for non-existing values. - */ - for (i = 0; i < count; i++) { - ptei = &tei[i]; - if (ptei->value != 0) - continue; - if (ptei->ptv != NULL) - continue; - ptei->ptv = malloc(sizeof(struct table_val_link), M_IPFW, - M_WAITOK | M_ZERO); - } - - /* - * Stage 3: allocate index numbers for new values - * and link them to index. - */ - IPFW_UH_WLOCK(ch); - tc_unref(tc); - del_toperation_state(ch, ts); - if (ts->modified != 0) { - - /* - * In general, we should free all state/indexes here - * and return. However, we keep allocated state instead - * to ensure we achieve some progress on each restart. - */ - return (0); - } - - KASSERT(pval == ch->valuestate, ("resize_storage() notify failure")); - - /* Let's try to link values */ - for (i = 0; i < count; i++) { - ptei = &tei[i]; - - /* Check if record has appeared */ - mask_table_value(ptei->pvalue, &tval, ts->vmask); - ptv = (struct table_val_link *)ipfw_objhash_lookup_name(vi, 0, - (char *)&tval); - if (ptv != NULL) { - ptv->pval->refcnt++; - ptei->value = ptv->no.kidx; - continue; - } - - /* May perform UH unlock/lock */ - error = alloc_table_vidx(ch, ts, vi, &vidx); - if (error != 0) { - ts->opstate.func(ts->tc, &ts->opstate); - return (error); - } - /* value storage resize has happened, return */ - if (ts->modified != 0) - return (0); - - /* Finally, we have allocated valid index, let's add entry */ - ptei->value = vidx; - ptv = (struct table_val_link *)ptei->ptv; - ptei->ptv = NULL; - - ptv->no.kidx = vidx; - ptv->no.name = (char *)&pval[vidx]; - ptv->pval = &pval[vidx]; - memcpy(ptv->pval, &tval, sizeof(struct table_value)); - pval[vidx].refcnt = 1; - ipfw_objhash_add(vi, &ptv->no); - } - - return (0); -} - -/* - * Compatibility function used to import data from old - * IP_FW_TABLE_ADD / IP_FW_TABLE_XADD opcodes. - */ -void -ipfw_import_table_value_legacy(uint32_t value, struct table_value *v) -{ - - memset(v, 0, sizeof(*v)); - v->tag = value; - v->pipe = value; - v->divert = value; - v->skipto = value; - v->netgraph = value; - v->fib = value; - v->nat = value; - v->nh4 = value; /* host format */ - v->dscp = value; - v->limit = value; -} - -/* - * Export data to legacy table dumps opcodes. - */ -uint32_t -ipfw_export_table_value_legacy(struct table_value *v) -{ - - /* - * TODO: provide more compatibility depending on - * vmask value. - */ - return (v->tag); -} - -/* - * Imports table value from current userland format. - * Saves value in kernel format to the same place. - */ -void -ipfw_import_table_value_v1(ipfw_table_value *iv) -{ - struct table_value v; - - memset(&v, 0, sizeof(v)); - v.tag = iv->tag; - v.pipe = iv->pipe; - v.divert = iv->divert; - v.skipto = iv->skipto; - v.netgraph = iv->netgraph; - v.fib = iv->fib; - v.nat = iv->nat; - v.dscp = iv->dscp; - v.nh4 = iv->nh4; - v.nh6 = iv->nh6; - v.limit = iv->limit; - v.zoneid = iv->zoneid; - - memcpy(iv, &v, sizeof(ipfw_table_value)); -} - -/* - * Export real table value @v to current userland format. - * Note that @v and @piv may point to the same memory. - */ -void -ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv) -{ - ipfw_table_value iv; - - memset(&iv, 0, sizeof(iv)); - iv.tag = v->tag; - iv.pipe = v->pipe; - iv.divert = v->divert; - iv.skipto = v->skipto; - iv.netgraph = v->netgraph; - iv.fib = v->fib; - iv.nat = v->nat; - iv.dscp = v->dscp; - iv.limit = v->limit; - iv.nh4 = v->nh4; - iv.nh6 = v->nh6; - iv.zoneid = v->zoneid; - - memcpy(piv, &iv, sizeof(iv)); -} - -/* - * Exports real value data into ipfw_table_value structure. - * Utilizes "spare1" field to store kernel index. - */ -static int -dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg) -{ - struct vdump_args *da; - struct table_val_link *ptv; - struct table_value *v; - - da = (struct vdump_args *)arg; - ptv = (struct table_val_link *)no; - - v = (struct table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v)); - /* Out of memory, returning */ - if (v == NULL) { - da->error = ENOMEM; - return (ENOMEM); - } - - memcpy(v, ptv->pval, sizeof(*v)); - v->spare1 = ptv->no.kidx; - return (0); -} - -/* - * Dumps all shared/table value data - * Data layout (v1)(current): - * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size - * Reply: [ ipfw_obj_lheader ipfw_table_value x N ] - * - * Returns 0 on success - */ -static int -list_table_values(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct _ipfw_obj_lheader *olh; - struct namedobj_instance *vi; - struct vdump_args da; - uint32_t count, size; - - olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh)); - if (olh == NULL) - return (EINVAL); - if (sd->valsize < olh->size) - return (EINVAL); - - IPFW_UH_RLOCK(ch); - vi = CHAIN_TO_VI(ch); - - count = ipfw_objhash_count(vi); - size = count * sizeof(ipfw_table_value) + sizeof(ipfw_obj_lheader); - - /* Fill in header regadless of buffer size */ - olh->count = count; - olh->objsize = sizeof(ipfw_table_value); - - if (size > olh->size) { - olh->size = size; - IPFW_UH_RUNLOCK(ch); - return (ENOMEM); - } - olh->size = size; - - /* - * Do the actual value dump - */ - memset(&da, 0, sizeof(da)); - da.ch = ch; - da.sd = sd; - ipfw_objhash_foreach(vi, dump_tvalue, &da); - - IPFW_UH_RUNLOCK(ch); - - return (0); -} - -void -ipfw_table_value_init(struct ip_fw_chain *ch, int first) -{ - struct tables_config *tcfg; - - ch->valuestate = malloc(VALDATA_START_SIZE * sizeof(struct table_value), - M_IPFW, M_WAITOK | M_ZERO); - - tcfg = ch->tblcfg; - - tcfg->val_size = VALDATA_START_SIZE; - tcfg->valhash = ipfw_objhash_create(tcfg->val_size); - ipfw_objhash_set_funcs(tcfg->valhash, hash_table_value, - cmp_table_value); - - IPFW_ADD_SOPT_HANDLER(first, scodes); -} - -static int -destroy_value(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - - free(no, M_IPFW); - return (0); -} - -void -ipfw_table_value_destroy(struct ip_fw_chain *ch, int last) -{ - - IPFW_DEL_SOPT_HANDLER(last, scodes); - - free(ch->valuestate, M_IPFW); - ipfw_objhash_foreach(CHAIN_TO_VI(ch), destroy_value, ch); - ipfw_objhash_destroy(CHAIN_TO_VI(ch)); -} - diff --git a/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c deleted file mode 100644 index 0af8d7c4..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.c +++ /dev/null @@ -1,131 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2015-2016 Yandex LLC - * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/module.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/sysctl.h> - -#include <net/if.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/nat64/ip_fw_nat64.h> -#include <netpfil/ipfw/nat64/nat64_translate.h> - - -int nat64_debug = 0; -SYSCTL_DECL(_net_inet_ip_fw); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_debug, CTLFLAG_RW, - &nat64_debug, 0, "Debug level for NAT64 module"); - -int nat64_allow_private = 0; -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_allow_private, CTLFLAG_RW, - &nat64_allow_private, 0, - "Allow use of non-global IPv4 addresses with NAT64"); - -static int -vnet_ipfw_nat64_init(const void *arg __unused) -{ - struct ip_fw_chain *ch; - int first, error; - - ch = &V_layer3_chain; - first = IS_DEFAULT_VNET(curvnet) ? 1: 0; - error = nat64stl_init(ch, first); - if (error != 0) - return (error); - error = nat64lsn_init(ch, first); - if (error != 0) { - nat64stl_uninit(ch, first); - return (error); - } - return (0); -} - -static int -vnet_ipfw_nat64_uninit(const void *arg __unused) -{ - struct ip_fw_chain *ch; - int last; - - ch = &V_layer3_chain; - last = IS_DEFAULT_VNET(curvnet) ? 1: 0; - nat64stl_uninit(ch, last); - nat64lsn_uninit(ch, last); - return (0); -} - -static int -ipfw_nat64_modevent(module_t mod, int type, void *unused) -{ - - switch (type) { - case MOD_LOAD: - case MOD_UNLOAD: - break; - default: - return (EOPNOTSUPP); - } - return (0); -} - -static moduledata_t ipfw_nat64_mod = { - "ipfw_nat64", - ipfw_nat64_modevent, - 0 -}; - -/* Define startup order. */ -#define IPFW_NAT64_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN -#define IPFW_NAT64_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */ -#define IPFW_NAT64_MODULE_ORDER (IPFW_NAT64_MODEVENT_ORDER + 1) -#define IPFW_NAT64_VNET_ORDER (IPFW_NAT64_MODEVENT_ORDER + 2) - -DECLARE_MODULE(ipfw_nat64, ipfw_nat64_mod, IPFW_NAT64_SI_SUB_FIREWALL, - SI_ORDER_ANY); -MODULE_DEPEND(ipfw_nat64, ipfw, 3, 3, 3); -MODULE_VERSION(ipfw_nat64, 1); - -VNET_SYSINIT(vnet_ipfw_nat64_init, IPFW_NAT64_SI_SUB_FIREWALL, - IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_init, NULL); -VNET_SYSUNINIT(vnet_ipfw_nat64_uninit, IPFW_NAT64_SI_SUB_FIREWALL, - IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_uninit, NULL); diff --git a/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h b/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h deleted file mode 100644 index 1d2bb774..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/ip_fw_nat64.h +++ /dev/null @@ -1,117 +0,0 @@ -/*- - * Copyright (c) 2015-2016 Yandex LLC - * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _IP_FW_NAT64_H_ -#define _IP_FW_NAT64_H_ - -#define DPRINTF(mask, fmt, ...) \ - if (nat64_debug & (mask)) \ - printf("NAT64: %s: " fmt "\n", __func__, ## __VA_ARGS__) -#define DP_GENERIC 0x0001 -#define DP_OBJ 0x0002 -#define DP_JQUEUE 0x0004 -#define DP_STATE 0x0008 -#define DP_DROPS 0x0010 -#define DP_ALL 0xFFFF -extern int nat64_debug; - -#if 0 -#define NAT64NOINLINE __noinline -#else -#define NAT64NOINLINE -#endif - -int nat64stl_init(struct ip_fw_chain *ch, int first); -void nat64stl_uninit(struct ip_fw_chain *ch, int last); -int nat64lsn_init(struct ip_fw_chain *ch, int first); -void nat64lsn_uninit(struct ip_fw_chain *ch, int last); - -struct ip_fw_nat64_stats { - counter_u64_t opcnt64; /* 6to4 of packets translated */ - counter_u64_t opcnt46; /* 4to6 of packets translated */ - counter_u64_t ofrags; /* number of fragments generated */ - counter_u64_t ifrags; /* number of fragments received */ - counter_u64_t oerrors; /* number of output errors */ - counter_u64_t noroute4; - counter_u64_t noroute6; - counter_u64_t nomatch4; /* No addr/port match */ - counter_u64_t noproto; /* Protocol not supported */ - counter_u64_t nomem; /* mbufs allocation failed */ - counter_u64_t dropped; /* number of packets silently - * dropped due to some errors/ - * unsupported/etc. - */ - - counter_u64_t jrequests; /* number of jobs requests queued */ - counter_u64_t jcalls; /* number of jobs handler calls */ - counter_u64_t jhostsreq; /* number of hosts requests */ - counter_u64_t jportreq; - counter_u64_t jhostfails; - counter_u64_t jportfails; - counter_u64_t jmaxlen; - counter_u64_t jnomem; - counter_u64_t jreinjected; - - counter_u64_t screated; - counter_u64_t sdeleted; - counter_u64_t spgcreated; - counter_u64_t spgdeleted; -}; - -#define IPFW_NAT64_VERSION 1 -#define NAT64STATS (sizeof(struct ip_fw_nat64_stats) / sizeof(uint64_t)) -typedef struct _nat64_stats_block { - counter_u64_t stats[NAT64STATS]; -} nat64_stats_block; -#define NAT64STAT_ADD(s, f, v) \ - counter_u64_add((s)->stats[ \ - offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)], (v)) -#define NAT64STAT_INC(s, f) NAT64STAT_ADD(s, f, 1) -#define NAT64STAT_FETCH(s, f) \ - counter_u64_fetch((s)->stats[ \ - offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)]) - -#define L3HDR(_ip, _t) ((_t)((u_int32_t *)(_ip) + (_ip)->ip_hl)) -#define TCP(p) ((struct tcphdr *)(p)) -#define UDP(p) ((struct udphdr *)(p)) -#define ICMP(p) ((struct icmphdr *)(p)) -#define ICMP6(p) ((struct icmp6_hdr *)(p)) - -#define NAT64SKIP 0 -#define NAT64RETURN 1 -#define NAT64MFREE -1 - -/* Well-known prefix 64:ff9b::/96 */ -#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b) -#define IN6_IS_ADDR_WKPFX(a) \ - ((a)->s6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \ - (a)->s6_addr32[1] == 0 && (a)->s6_addr32[2] == 0) - -#endif - diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c deleted file mode 100644 index 142d19ab..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.c +++ /dev/null @@ -1,1574 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2015-2016 Yandex LLC - * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <rtems/bsd/local/opt_ipfw.h> - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/counter.h> -#include <sys/errno.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/mbuf.h> -#include <sys/module.h> -#include <sys/rmlock.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/queue.h> - -#include <net/if.h> -#include <net/if_var.h> -#include <net/if_pflog.h> -#include <net/pfil.h> -#include <net/netisr.h> -#include <net/route.h> - -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#include <netinet/ip_icmp.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> -#include <netinet6/in6_var.h> -#include <netinet6/ip6_var.h> - -#include <netpfil/pf/pf.h> -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/nat64/ip_fw_nat64.h> -#include <netpfil/ipfw/nat64/nat64_translate.h> -#include <machine/in_cksum.h> - -static void -nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) -{ - - logdata->dir = PF_OUT; - logdata->af = family; - ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); -} -#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT -static NAT64NOINLINE struct sockaddr* nat64_find_route4(struct route *ro, - in_addr_t dest, struct mbuf *m); -static NAT64NOINLINE struct sockaddr* nat64_find_route6(struct route_in6 *ro, - struct in6_addr *dest, struct mbuf *m); - -static NAT64NOINLINE int -nat64_output(struct ifnet *ifp, struct mbuf *m, - struct sockaddr *dst, struct route *ro, nat64_stats_block *stats, - void *logdata) -{ - int error; - - if (logdata != NULL) - nat64_log(logdata, m, dst->sa_family); - error = (*ifp->if_output)(ifp, m, dst, ro); - if (error != 0) - NAT64STAT_INC(stats, oerrors); - return (error); -} - -static NAT64NOINLINE int -nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata) -{ - struct route_in6 ro6; - struct route ro4, *ro; - struct sockaddr *dst; - struct ifnet *ifp; - struct ip6_hdr *ip6; - struct ip *ip4; - int error; - - ip4 = mtod(m, struct ip *); - switch (ip4->ip_v) { - case IPVERSION: - ro = &ro4; - dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m); - if (dst == NULL) - NAT64STAT_INC(stats, noroute4); - break; - case (IPV6_VERSION >> 4): - ip6 = (struct ip6_hdr *)ip4; - ro = (struct route *)&ro6; - dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m); - if (dst == NULL) - NAT64STAT_INC(stats, noroute6); - break; - default: - m_freem(m); - NAT64STAT_INC(stats, dropped); - DPRINTF(DP_DROPS, "dropped due to unknown IP version"); - return (EAFNOSUPPORT); - } - if (dst == NULL) { - FREE_ROUTE(ro); - m_freem(m); - return (EHOSTUNREACH); - } - if (logdata != NULL) - nat64_log(logdata, m, dst->sa_family); - ifp = ro->ro_rt->rt_ifp; - error = (*ifp->if_output)(ifp, m, dst, ro); - if (error != 0) - NAT64STAT_INC(stats, oerrors); - FREE_ROUTE(ro); - return (error); -} -#else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ -static NAT64NOINLINE int -nat64_output(struct ifnet *ifp, struct mbuf *m, - struct sockaddr *dst, struct route *ro, nat64_stats_block *stats, - void *logdata) -{ - struct ip *ip4; - int ret, af; - - ip4 = mtod(m, struct ip *); - switch (ip4->ip_v) { - case IPVERSION: - af = AF_INET; - ret = NETISR_IP; - break; - case (IPV6_VERSION >> 4): - af = AF_INET6; - ret = NETISR_IPV6; - break; - default: - m_freem(m); - NAT64STAT_INC(stats, dropped); - DPRINTF(DP_DROPS, "unknown IP version"); - return (EAFNOSUPPORT); - } - if (logdata != NULL) - nat64_log(logdata, m, af); - ret = netisr_queue(ret, m); - if (ret != 0) - NAT64STAT_INC(stats, oerrors); - return (ret); -} - -static NAT64NOINLINE int -nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata) -{ - - return (nat64_output(NULL, m, NULL, NULL, stats, logdata)); -} -#endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ - - -#if 0 -void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize); - -void -print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize) -{ - char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; - - inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf)); - inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf)); - snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt); -} - - -static NAT64NOINLINE int -nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6) -{ - - /* assume the prefix is properly filled with zeros */ - bcopy(&cfg->prefix, ip6, sizeof(*ip6)); - switch (cfg->plen) { - case 32: - case 96: - ip6->s6_addr32[cfg->plen / 32] = ia; - break; - case 40: - case 48: - case 56: -#if BYTE_ORDER == BIG_ENDIAN - ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] | - (ia >> (cfg->plen % 32)); - ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32); -#elif BYTE_ORDER == LITTLE_ENDIAN - ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] | - (ia << (cfg->plen % 32)); - ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32); -#endif - break; - case 64: -#if BYTE_ORDER == BIG_ENDIAN - ip6->s6_addr32[2] = ia >> 8; - ip6->s6_addr32[3] = ia << 24; -#elif BYTE_ORDER == LITTLE_ENDIAN - ip6->s6_addr32[2] = ia << 8; - ip6->s6_addr32[3] = ia >> 24; -#endif - break; - default: - return (0); - }; - ip6->s6_addr8[8] = 0; - return (1); -} - -static NAT64NOINLINE in_addr_t -nat64_extract_ip4(struct in6_addr *ip6, int plen) -{ - in_addr_t ia; - - /* - * According to RFC 6052 p2.2: - * IPv4-embedded IPv6 addresses are composed of a variable-length - * prefix, the embedded IPv4 address, and a variable length suffix. - * The suffix bits are reserved for future extensions and SHOULD - * be set to zero. - */ - switch (plen) { - case 32: - if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) - goto badip6; - break; - case 40: - if (ip6->s6_addr32[3] != 0 || - (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) - goto badip6; - break; - case 48: - if (ip6->s6_addr32[3] != 0 || - (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) - goto badip6; - break; - case 56: - if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) - goto badip6; - break; - case 64: - if (ip6->s6_addr8[8] != 0 || - (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) - goto badip6; - }; - switch (plen) { - case 32: - case 96: - ia = ip6->s6_addr32[plen / 32]; - break; - case 40: - case 48: - case 56: -#if BYTE_ORDER == BIG_ENDIAN - ia = (ip6->s6_addr32[1] << (plen % 32)) | - (ip6->s6_addr32[2] >> (24 - plen % 32)); -#elif BYTE_ORDER == LITTLE_ENDIAN - ia = (ip6->s6_addr32[1] >> (plen % 32)) | - (ip6->s6_addr32[2] << (24 - plen % 32)); -#endif - break; - case 64: -#if BYTE_ORDER == BIG_ENDIAN - ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); -#elif BYTE_ORDER == LITTLE_ENDIAN - ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); -#endif - break; - default: - return (0); - }; - if (nat64_check_ip4(ia) != 0 || - nat64_check_private_ip4(ia) != 0) - goto badip4; - - return (ia); -badip4: - DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia); - return (0); -badip6: - DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address"); - return (0); -} -#endif - -/* - * According to RFC 1624 the equation for incremental checksum update is: - * HC' = ~(~HC + ~m + m') -- [Eqn. 3] - * HC' = HC - ~m - m' -- [Eqn. 4] - * So, when we are replacing IPv4 addresses to IPv6, we - * can assume, that new bytes previously were zeros, and vise versa - - * when we replacing IPv6 addresses to IPv4, now unused bytes become - * zeros. The payload length in pseudo header has bigger size, but one - * half of it should be zero. Using the equation 4 we get: - * HC' = HC - (~m0 + m0') -- m0 is first changed word - * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word - * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = - * = HC - sum(~m[i] + m'[i]) - * - * The function result should be used as follows: - * IPv6 to IPv4: HC' = cksum_add(HC, result) - * IPv4 to IPv6: HC' = cksum_add(HC, ~result) - */ -static NAT64NOINLINE uint16_t -nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) -{ - uint32_t sum; - uint16_t *p; - - sum = ~ip->ip_src.s_addr >> 16; - sum += ~ip->ip_src.s_addr & 0xffff; - sum += ~ip->ip_dst.s_addr >> 16; - sum += ~ip->ip_dst.s_addr & 0xffff; - - for (p = (uint16_t *)&ip6->ip6_src; - p < (uint16_t *)(&ip6->ip6_src + 2); p++) - sum += *p; - - while (sum >> 16) - sum = (sum & 0xffff) + (sum >> 16); - return (sum); -} - -#if __FreeBSD_version < 1100000 -#define ip_fillid(ip) (ip)->ip_id = ip_newid() -#endif -static NAT64NOINLINE void -nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, - uint16_t plen, uint8_t proto, struct ip *ip) -{ - - /* assume addresses are already initialized */ - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(*ip) >> 2; - ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; - ip->ip_len = htons(sizeof(*ip) + plen); -#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT - ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC; -#else - /* Forwarding code will decrement TTL. */ - ip->ip_ttl = ip6->ip6_hlim; -#endif - ip->ip_sum = 0; - ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; - ip_fillid(ip); - if (frag != NULL) { - ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); - if (frag->ip6f_offlg & IP6F_MORE_FRAG) - ip->ip_off |= htons(IP_MF); - } else { - ip->ip_off = htons(IP_DF); - } - ip->ip_sum = in_cksum_hdr(ip); -} - -#define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) -static NAT64NOINLINE int -nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq, - struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off) -{ - struct ip6_frag ip6f; - struct mbuf *n; - uint16_t hlen, len, offset; - int plen; - - plen = ntohs(ip6->ip6_plen); - hlen = sizeof(struct ip6_hdr); - - /* Fragmentation isn't needed */ - if (ip_off == 0 && plen <= mtu - hlen) { - M_PREPEND(m, hlen, M_NOWAIT); - if (m == NULL) { - NAT64STAT_INC(stats, nomem); - return (ENOMEM); - } - bcopy(ip6, mtod(m, void *), hlen); - if (mbufq_enqueue(mq, m) != 0) { - m_freem(m); - NAT64STAT_INC(stats, dropped); - DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); - return (ENOBUFS); - } - return (0); - } - - hlen += sizeof(struct ip6_frag); - ip6f.ip6f_reserved = 0; - ip6f.ip6f_nxt = ip6->ip6_nxt; - ip6->ip6_nxt = IPPROTO_FRAGMENT; - if (ip_off != 0) { - /* - * We have got an IPv4 fragment. - * Use offset value and ip_id from original fragment. - */ - ip6f.ip6f_ident = htonl(ntohs(ip_id)); - offset = (ntohs(ip_off) & IP_OFFMASK) << 3; - NAT64STAT_INC(stats, ifrags); - } else { - /* The packet size exceeds interface MTU */ - ip6f.ip6f_ident = htonl(ip6_randomid()); - offset = 0; /* First fragment*/ - } - while (plen > 0 && m != NULL) { - n = NULL; - len = FRAGSZ(mtu) & ~7; - if (len > plen) - len = plen; - ip6->ip6_plen = htons(len + sizeof(ip6f)); - ip6f.ip6f_offlg = ntohs(offset); - if (len < plen || (ip_off & htons(IP_MF)) != 0) - ip6f.ip6f_offlg |= IP6F_MORE_FRAG; - offset += len; - plen -= len; - if (plen > 0) { - n = m_split(m, len, M_NOWAIT); - if (n == NULL) - goto fail; - } - M_PREPEND(m, hlen, M_NOWAIT); - if (m == NULL) - goto fail; - bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); - bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), - sizeof(struct ip6_frag)); - if (mbufq_enqueue(mq, m) != 0) - goto fail; - m = n; - } - NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); - return (0); -fail: - if (m != NULL) - m_freem(m); - if (n != NULL) - m_freem(n); - mbufq_drain(mq); - NAT64STAT_INC(stats, nomem); - return (ENOMEM); -} - -#if __FreeBSD_version < 1100000 -#define rt_expire rt_rmx.rmx_expire -#define rt_mtu rt_rmx.rmx_mtu -#endif -static NAT64NOINLINE struct sockaddr* -nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m) -{ - struct sockaddr_in6 *dst; - struct rtentry *rt; - - bzero(ro, sizeof(*ro)); - dst = (struct sockaddr_in6 *)&ro->ro_dst; - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(*dst); - dst->sin6_addr = *dest; - IN6_LOOKUP_ROUTE(ro, M_GETFIB(m)); - rt = ro->ro_rt; - if (rt && (rt->rt_flags & RTF_UP) && - (rt->rt_ifp->if_flags & IFF_UP) && - (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if (rt->rt_flags & RTF_GATEWAY) - dst = (struct sockaddr_in6 *)rt->rt_gateway; - } else - return (NULL); - if (((rt->rt_flags & RTF_REJECT) && - (rt->rt_expire == 0 || - time_uptime < rt->rt_expire)) || - rt->rt_ifp->if_link_state == LINK_STATE_DOWN) - return (NULL); - return ((struct sockaddr *)dst); -} - -#define NAT64_ICMP6_PLEN 64 -static NAT64NOINLINE void -nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, - nat64_stats_block *stats, void *logdata) -{ - struct icmp6_hdr *icmp6; - struct ip6_hdr *ip6, *oip6; - struct mbuf *n; - int len, plen; - - len = 0; - plen = nat64_getlasthdr(m, &len); - if (plen < 0) { - DPRINTF(DP_DROPS, "mbuf isn't contigious"); - goto freeit; - } - /* - * Do not send ICMPv6 in reply to ICMPv6 errors. - */ - if (plen == IPPROTO_ICMPV6) { - if (m->m_len < len + sizeof(*icmp6)) { - DPRINTF(DP_DROPS, "mbuf isn't contigious"); - goto freeit; - } - icmp6 = mtodo(m, len); - if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || - icmp6->icmp6_type == ND_REDIRECT) { - DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " - "ICMPv6 errors"); - goto freeit; - } - } - /* - if (icmp6_ratelimit(&ip6->ip6_src, type, code)) - goto freeit; - */ - ip6 = mtod(m, struct ip6_hdr *); - switch (type) { - case ICMP6_DST_UNREACH: - case ICMP6_PACKET_TOO_BIG: - case ICMP6_TIME_EXCEEDED: - case ICMP6_PARAM_PROB: - break; - default: - goto freeit; - } - /* Calculate length of ICMPv6 payload */ - len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: - m->m_pkthdr.len; - - /* Create new ICMPv6 datagram */ - plen = len + sizeof(struct icmp6_hdr); - n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, - MT_HEADER, M_PKTHDR); - if (n == NULL) { - NAT64STAT_INC(stats, nomem); - m_freem(m); - return; - } - /* - * Move pkthdr from original mbuf. We should have initialized some - * fields, because we can reinject this mbuf to netisr and it will - * go trough input path (it requires at least rcvif should be set). - * Also do M_ALIGN() to reduce chances of need to allocate new mbuf - * in the chain, when we will do M_PREPEND() or make some type of - * tunneling. - */ - m_move_pkthdr(n, m); - M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); - - n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; - oip6 = mtod(n, struct ip6_hdr *); - oip6->ip6_src = ip6->ip6_dst; - oip6->ip6_dst = ip6->ip6_src; - oip6->ip6_nxt = IPPROTO_ICMPV6; - oip6->ip6_flow = 0; - oip6->ip6_vfc |= IPV6_VERSION; - oip6->ip6_hlim = V_ip6_defhlim; - oip6->ip6_plen = htons(plen); - - icmp6 = mtodo(n, sizeof(struct ip6_hdr)); - icmp6->icmp6_cksum = 0; - icmp6->icmp6_type = type; - icmp6->icmp6_code = code; - icmp6->icmp6_mtu = htonl(mtu); - - m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + - sizeof(struct icmp6_hdr))); - icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, - sizeof(struct ip6_hdr), plen); - m_freem(m); - nat64_output_one(n, stats, logdata); - return; -freeit: - NAT64STAT_INC(stats, dropped); - m_freem(m); -} - -static NAT64NOINLINE struct sockaddr* -nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m) -{ - struct sockaddr_in *dst; - struct rtentry *rt; - - bzero(ro, sizeof(*ro)); - dst = (struct sockaddr_in *)&ro->ro_dst; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr.s_addr = dest; - IN_LOOKUP_ROUTE(ro, M_GETFIB(m)); - rt = ro->ro_rt; - if (rt && (rt->rt_flags & RTF_UP) && - (rt->rt_ifp->if_flags & IFF_UP) && - (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { - if (rt->rt_flags & RTF_GATEWAY) - dst = (struct sockaddr_in *)rt->rt_gateway; - } else - return (NULL); - if (((rt->rt_flags & RTF_REJECT) && - (rt->rt_expire == 0 || - time_uptime < rt->rt_expire)) || - rt->rt_ifp->if_link_state == LINK_STATE_DOWN) - return (NULL); - return ((struct sockaddr *)dst); -} - -#define NAT64_ICMP_PLEN 64 -static NAT64NOINLINE void -nat64_icmp_reflect(struct mbuf *m, uint8_t type, - uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata) -{ - struct icmp *icmp; - struct ip *ip, *oip; - struct mbuf *n; - int len, plen; - - ip = mtod(m, struct ip *); - /* Do not send ICMP error if packet is not the first fragment */ - if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { - DPRINTF(DP_DROPS, "not first fragment"); - goto freeit; - } - /* Do not send ICMP in reply to ICMP errors */ - if (ip->ip_p == IPPROTO_ICMP) { - if (m->m_len < (ip->ip_hl << 2)) { - DPRINTF(DP_DROPS, "mbuf isn't contigious"); - goto freeit; - } - icmp = mtodo(m, ip->ip_hl << 2); - if (!ICMP_INFOTYPE(icmp->icmp_type)) { - DPRINTF(DP_DROPS, "do not send ICMP in reply to " - "ICMP errors"); - goto freeit; - } - } - switch (type) { - case ICMP_UNREACH: - case ICMP_TIMXCEED: - case ICMP_PARAMPROB: - break; - default: - goto freeit; - } - /* Calculate length of ICMP payload */ - len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: - m->m_pkthdr.len; - - /* Create new ICMPv4 datagram */ - plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); - n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, - MT_HEADER, M_PKTHDR); - if (n == NULL) { - NAT64STAT_INC(stats, nomem); - m_freem(m); - return; - } - m_move_pkthdr(n, m); - M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); - - n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; - oip = mtod(n, struct ip *); - oip->ip_v = IPVERSION; - oip->ip_hl = sizeof(struct ip) >> 2; - oip->ip_tos = 0; - oip->ip_len = htons(n->m_pkthdr.len); - oip->ip_ttl = V_ip_defttl; - oip->ip_p = IPPROTO_ICMP; - ip_fillid(oip); - oip->ip_off = htons(IP_DF); - oip->ip_src = ip->ip_dst; - oip->ip_dst = ip->ip_src; - oip->ip_sum = 0; - oip->ip_sum = in_cksum_hdr(oip); - - icmp = mtodo(n, sizeof(struct ip)); - icmp->icmp_type = type; - icmp->icmp_code = code; - icmp->icmp_cksum = 0; - icmp->icmp_pmvoid = 0; - icmp->icmp_nextmtu = htons(mtu); - m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + - sizeof(struct icmphdr) + sizeof(uint32_t))); - icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, - sizeof(struct ip)); - m_freem(m); - nat64_output_one(n, stats, logdata); - return; -freeit: - NAT64STAT_INC(stats, dropped); - m_freem(m); -} - -/* Translate ICMP echo request/reply into ICMPv6 */ -static void -nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, - uint16_t id, uint8_t type) -{ - uint16_t old; - - old = *(uint16_t *)icmp6; /* save type+code in one word */ - icmp6->icmp6_type = type; - /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ - icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, - old, *(uint16_t *)icmp6); - if (id != 0) { - old = icmp6->icmp6_id; - icmp6->icmp6_id = id; - /* Reflect ICMP id translation in the cksum */ - icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, - old, id); - } - /* Reflect IPv6 pseudo header in the cksum */ - icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), - IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); -} - -static NAT64NOINLINE struct mbuf * -nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, - int offset, nat64_stats_block *stats) -{ - struct ip ip; - struct icmp *icmp; - struct tcphdr *tcp; - struct udphdr *udp; - struct ip6_hdr *eip6; - struct mbuf *n; - uint32_t mtu; - int len, hlen, plen; - uint8_t type, code; - - if (m->m_len < offset + ICMP_MINLEN) - m = m_pullup(m, offset + ICMP_MINLEN); - if (m == NULL) { - NAT64STAT_INC(stats, nomem); - return (m); - } - mtu = 0; - icmp = mtodo(m, offset); - /* RFC 7915 p4.2 */ - switch (icmp->icmp_type) { - case ICMP_ECHOREPLY: - type = ICMP6_ECHO_REPLY; - code = 0; - break; - case ICMP_UNREACH: - type = ICMP6_DST_UNREACH; - switch (icmp->icmp_code) { - case ICMP_UNREACH_NET: - case ICMP_UNREACH_HOST: - case ICMP_UNREACH_SRCFAIL: - case ICMP_UNREACH_NET_UNKNOWN: - case ICMP_UNREACH_HOST_UNKNOWN: - case ICMP_UNREACH_TOSNET: - case ICMP_UNREACH_TOSHOST: - code = ICMP6_DST_UNREACH_NOROUTE; - break; - case ICMP_UNREACH_PROTOCOL: - type = ICMP6_PARAM_PROB; - code = ICMP6_PARAMPROB_NEXTHEADER; - break; - case ICMP_UNREACH_PORT: - code = ICMP6_DST_UNREACH_NOPORT; - break; - case ICMP_UNREACH_NEEDFRAG: - type = ICMP6_PACKET_TOO_BIG; - code = 0; - /* XXX: needs an additional look */ - mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); - break; - case ICMP_UNREACH_NET_PROHIB: - case ICMP_UNREACH_HOST_PROHIB: - case ICMP_UNREACH_FILTER_PROHIB: - case ICMP_UNREACH_PRECEDENCE_CUTOFF: - code = ICMP6_DST_UNREACH_ADMIN; - break; - default: - DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", - icmp->icmp_type, icmp->icmp_code); - goto freeit; - } - break; - case ICMP_TIMXCEED: - type = ICMP6_TIME_EXCEEDED; - code = icmp->icmp_code; - break; - case ICMP_ECHO: - type = ICMP6_ECHO_REQUEST; - code = 0; - break; - case ICMP_PARAMPROB: - type = ICMP6_PARAM_PROB; - switch (icmp->icmp_code) { - case ICMP_PARAMPROB_ERRATPTR: - case ICMP_PARAMPROB_LENGTH: - code = ICMP6_PARAMPROB_HEADER; - switch (icmp->icmp_pptr) { - case 0: /* Version/IHL */ - case 1: /* Type Of Service */ - mtu = icmp->icmp_pptr; - break; - case 2: /* Total Length */ - case 3: mtu = 4; /* Payload Length */ - break; - case 8: /* Time to Live */ - mtu = 7; /* Hop Limit */ - break; - case 9: /* Protocol */ - mtu = 6; /* Next Header */ - break; - case 12: /* Source address */ - case 13: - case 14: - case 15: - mtu = 8; - break; - case 16: /* Destination address */ - case 17: - case 18: - case 19: - mtu = 24; - break; - default: /* Silently drop */ - DPRINTF(DP_DROPS, "Unsupported ICMP type %d," - " code %d, pptr %d", icmp->icmp_type, - icmp->icmp_code, icmp->icmp_pptr); - goto freeit; - } - break; - default: - DPRINTF(DP_DROPS, "Unsupported ICMP type %d," - " code %d, pptr %d", icmp->icmp_type, - icmp->icmp_code, icmp->icmp_pptr); - goto freeit; - } - break; - default: - DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", - icmp->icmp_type, icmp->icmp_code); - goto freeit; - } - /* - * For echo request/reply we can use original payload, - * but we need adjust icmp_cksum, because ICMPv6 cksum covers - * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. - */ - if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { - nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); - return (m); - } - /* - * For other types of ICMP messages we need to translate inner - * IPv4 header to IPv6 header. - * Assume ICMP src is the same as payload dst - * E.g. we have ( GWsrc1 , NATIP1 ) in outer header - * and ( NATIP1, Hostdst1 ) in ICMP copy header. - * In that case, we already have map for NATIP1 and GWsrc1. - * The only thing we need is to copy IPv6 map prefix to - * Hostdst1. - */ - hlen = offset + ICMP_MINLEN; - if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { - DPRINTF(DP_DROPS, "Message is too short %d", - m->m_pkthdr.len); - goto freeit; - } - m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); - if (ip.ip_v != IPVERSION) { - DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); - goto freeit; - } - hlen += ip.ip_hl << 2; /* Skip inner IP header */ - if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || - nat64_check_ip4(ip.ip_dst.s_addr) != 0 || - nat64_check_private_ip4(ip.ip_src.s_addr) != 0 || - nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) { - DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", - ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); - goto freeit; - } - if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { - DPRINTF(DP_DROPS, "Message is too short %d", - m->m_pkthdr.len); - goto freeit; - } -#if 0 - /* - * Check that inner source matches the outer destination. - * XXX: We need some method to convert IPv4 into IPv6 address here, - * and compare IPv6 addresses. - */ - if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { - DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", - "%04x vs %04x", ip.ip_src.s_addr, - nat64_get_ip4(&ip6->ip6_dst)); - goto freeit; - } -#endif - /* - * Create new mbuf for ICMPv6 datagram. - * NOTE: len is data length just after inner IP header. - */ - len = m->m_pkthdr.len - hlen; - if (sizeof(struct ip6_hdr) + - sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) - len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - - sizeof(struct ip6_hdr); - plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; - n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); - if (n == NULL) { - NAT64STAT_INC(stats, nomem); - m_freem(m); - return (NULL); - } - m_move_pkthdr(n, m); - M_ALIGN(n, offset + plen + max_hdr); - n->m_len = n->m_pkthdr.len = offset + plen; - /* Adjust ip6_plen in outer header */ - ip6->ip6_plen = htons(plen); - /* Construct new inner IPv6 header */ - eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); - eip6->ip6_src = ip6->ip6_dst; - /* Use the fact that we have single /96 prefix for IPv4 map */ - eip6->ip6_dst = ip6->ip6_src; - nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr); - - eip6->ip6_flow = htonl(ip.ip_tos << 20); - eip6->ip6_vfc |= IPV6_VERSION; - eip6->ip6_hlim = ip.ip_ttl; - eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); - eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; - m_copydata(m, hlen, len, (char *)(eip6 + 1)); - /* - * We need to translate source port in the inner ULP header, - * and adjust ULP checksum. - */ - switch (ip.ip_p) { - case IPPROTO_TCP: - if (len < offsetof(struct tcphdr, th_sum)) - break; - tcp = TCP(eip6 + 1); - if (icmpid != 0) { - tcp->th_sum = cksum_adjust(tcp->th_sum, - tcp->th_sport, icmpid); - tcp->th_sport = icmpid; - } - tcp->th_sum = cksum_add(tcp->th_sum, - ~nat64_cksum_convert(eip6, &ip)); - break; - case IPPROTO_UDP: - if (len < offsetof(struct udphdr, uh_sum)) - break; - udp = UDP(eip6 + 1); - if (icmpid != 0) { - udp->uh_sum = cksum_adjust(udp->uh_sum, - udp->uh_sport, icmpid); - udp->uh_sport = icmpid; - } - udp->uh_sum = cksum_add(udp->uh_sum, - ~nat64_cksum_convert(eip6, &ip)); - break; - case IPPROTO_ICMP: - /* - * Check if this is an ICMP error message for echo request - * that we sent. I.e. ULP in the data containing invoking - * packet is IPPROTO_ICMP and its type is ICMP_ECHO. - */ - icmp = (struct icmp *)(eip6 + 1); - if (icmp->icmp_type != ICMP_ECHO) { - m_freem(n); - goto freeit; - } - /* - * For our client this original datagram should looks - * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. - * Thus we need adjust icmp_cksum and convert type from - * ICMP_ECHO to ICMP6_ECHO_REQUEST. - */ - nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, - ICMP6_ECHO_REQUEST); - } - m_freem(m); - /* Convert ICMPv4 into ICMPv6 header */ - icmp = mtodo(n, offset); - ICMP6(icmp)->icmp6_type = type; - ICMP6(icmp)->icmp6_code = code; - ICMP6(icmp)->icmp6_mtu = htonl(mtu); - ICMP6(icmp)->icmp6_cksum = 0; - ICMP6(icmp)->icmp6_cksum = cksum_add( - ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), - in_cksum_skip(n, n->m_pkthdr.len, offset)); - return (n); -freeit: - m_freem(m); - NAT64STAT_INC(stats, dropped); - return (NULL); -} - -int -nat64_getlasthdr(struct mbuf *m, int *offset) -{ - struct ip6_hdr *ip6; - struct ip6_hbh *hbh; - int proto, hlen; - - if (offset != NULL) - hlen = *offset; - else - hlen = 0; - - if (m->m_len < hlen + sizeof(*ip6)) - return (-1); - - ip6 = mtodo(m, hlen); - hlen += sizeof(*ip6); - proto = ip6->ip6_nxt; - /* Skip extension headers */ - while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || - proto == IPPROTO_DSTOPTS) { - hbh = mtodo(m, hlen); - /* - * We expect mbuf has contigious data up to - * upper level header. - */ - if (m->m_len < hlen) - return (-1); - /* - * We doesn't support Jumbo payload option, - * so return error. - */ - if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) - return (-1); - proto = hbh->ip6h_nxt; - hlen += hbh->ip6h_len << 3; - } - if (offset != NULL) - *offset = hlen; - return (proto); -} - -int -nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, - struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats, - void *logdata) -{ - struct route_in6 ro; - struct ip6_hdr ip6; - struct ifnet *ifp; - struct ip *ip; - struct mbufq mq; - struct sockaddr *dst; - uint32_t mtu; - uint16_t ip_id, ip_off; - uint16_t *csum; - int plen, hlen; - uint8_t proto; - - ip = mtod(m, struct ip*); - - if (ip->ip_ttl <= IPTTLDEC) { - nat64_icmp_reflect(m, ICMP_TIMXCEED, - ICMP_TIMXCEED_INTRANS, 0, stats, logdata); - return (NAT64RETURN); - } - - ip6.ip6_dst = *daddr; - ip6.ip6_src = *saddr; - - hlen = ip->ip_hl << 2; - plen = ntohs(ip->ip_len) - hlen; - proto = ip->ip_p; - - /* Save ip_id and ip_off, both are in network byte order */ - ip_id = ip->ip_id; - ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); - - /* Fragment length must be multiple of 8 octets */ - if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { - nat64_icmp_reflect(m, ICMP_PARAMPROB, - ICMP_PARAMPROB_LENGTH, 0, stats, logdata); - return (NAT64RETURN); - } - /* Fragmented ICMP is unsupported */ - if (proto == IPPROTO_ICMP && ip_off != 0) { - DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - - dst = nat64_find_route6(&ro, &ip6.ip6_dst, m); - if (dst == NULL) { - FREE_ROUTE(&ro); - NAT64STAT_INC(stats, noroute6); - nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, - stats, logdata); - return (NAT64RETURN); - } - ifp = ro.ro_rt->rt_ifp; - if (ro.ro_rt->rt_mtu != 0) - mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); - else - mtu = ifp->if_mtu; - if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) { - FREE_ROUTE(&ro); - nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, - FRAGSZ(mtu) + sizeof(struct ip), stats, logdata); - return (NAT64RETURN); - } - - ip6.ip6_flow = htonl(ip->ip_tos << 20); - ip6.ip6_vfc |= IPV6_VERSION; -#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT - ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC; -#else - /* Forwarding code will decrement HLIM. */ - ip6.ip6_hlim = ip->ip_ttl; -#endif - ip6.ip6_plen = htons(plen); - ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; - /* Convert checksums. */ - switch (proto) { - case IPPROTO_TCP: - csum = &TCP(mtodo(m, hlen))->th_sum; - if (lport != 0) { - struct tcphdr *tcp = TCP(mtodo(m, hlen)); - *csum = cksum_adjust(*csum, tcp->th_dport, lport); - tcp->th_dport = lport; - } - *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); - break; - case IPPROTO_UDP: - csum = &UDP(mtodo(m, hlen))->uh_sum; - if (lport != 0) { - struct udphdr *udp = UDP(mtodo(m, hlen)); - *csum = cksum_adjust(*csum, udp->uh_dport, lport); - udp->uh_dport = lport; - } - *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); - break; - case IPPROTO_ICMP: - m = nat64_icmp_translate(m, &ip6, lport, hlen, stats); - if (m == NULL) { - FREE_ROUTE(&ro); - /* stats already accounted */ - return (NAT64RETURN); - } - } - - m_adj(m, hlen); - mbufq_init(&mq, 255); - nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off); - while ((m = mbufq_dequeue(&mq)) != NULL) { - if (nat64_output(ifp, m, dst, (struct route *)&ro, stats, - logdata) != 0) - break; - NAT64STAT_INC(stats, opcnt46); - } - mbufq_drain(&mq); - FREE_ROUTE(&ro); - return (NAT64RETURN); -} - -int -nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, - nat64_stats_block *stats, void *logdata) -{ - struct ip ip; - struct icmp6_hdr *icmp6; - struct ip6_frag *ip6f; - struct ip6_hdr *ip6, *ip6i; - uint32_t mtu; - int plen, proto; - uint8_t type, code; - - if (hlen == 0) { - ip6 = mtod(m, struct ip6_hdr *); - if (nat64_check_ip6(&ip6->ip6_src) != 0 || - nat64_check_ip6(&ip6->ip6_dst) != 0) - return (NAT64SKIP); - - proto = nat64_getlasthdr(m, &hlen); - if (proto != IPPROTO_ICMPV6) { - DPRINTF(DP_DROPS, - "dropped due to mbuf isn't contigious"); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - } - - /* - * Translate ICMPv6 type and code to ICMPv4 (RFC7915). - * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). - */ - icmp6 = mtodo(m, hlen); - mtu = 0; - switch (icmp6->icmp6_type) { - case ICMP6_DST_UNREACH: - type = ICMP_UNREACH; - switch (icmp6->icmp6_code) { - case ICMP6_DST_UNREACH_NOROUTE: - case ICMP6_DST_UNREACH_BEYONDSCOPE: - case ICMP6_DST_UNREACH_ADDR: - code = ICMP_UNREACH_HOST; - break; - case ICMP6_DST_UNREACH_ADMIN: - code = ICMP_UNREACH_HOST_PROHIB; - break; - case ICMP6_DST_UNREACH_NOPORT: - code = ICMP_UNREACH_PORT; - break; - default: - DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," - " code %d", icmp6->icmp6_type, - icmp6->icmp6_code); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - break; - case ICMP6_PACKET_TOO_BIG: - type = ICMP_UNREACH; - code = ICMP_UNREACH_NEEDFRAG; - mtu = ntohl(icmp6->icmp6_mtu); - if (mtu < IPV6_MMTU) { - DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," - " code %d", mtu, icmp6->icmp6_type, - icmp6->icmp6_code); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - /* - * Adjust MTU to reflect difference between - * IPv6 an IPv4 headers. - */ - mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); - break; - case ICMP6_TIME_EXCEEDED: - type = ICMP_TIMXCEED; - code = icmp6->icmp6_code; - break; - case ICMP6_PARAM_PROB: - switch (icmp6->icmp6_code) { - case ICMP6_PARAMPROB_HEADER: - type = ICMP_PARAMPROB; - code = ICMP_PARAMPROB_ERRATPTR; - mtu = ntohl(icmp6->icmp6_pptr); - switch (mtu) { - case 0: /* Version/Traffic Class */ - case 1: /* Traffic Class/Flow Label */ - break; - case 4: /* Payload Length */ - case 5: - mtu = 2; - break; - case 6: /* Next Header */ - mtu = 9; - break; - case 7: /* Hop Limit */ - mtu = 8; - break; - default: - if (mtu >= 8 && mtu <= 23) { - mtu = 12; /* Source address */ - break; - } - if (mtu >= 24 && mtu <= 39) { - mtu = 16; /* Destination address */ - break; - } - DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," - " code %d, pptr %d", icmp6->icmp6_type, - icmp6->icmp6_code, mtu); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - case ICMP6_PARAMPROB_NEXTHEADER: - type = ICMP_UNREACH; - code = ICMP_UNREACH_PROTOCOL; - break; - default: - DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," - " code %d, pptr %d", icmp6->icmp6_type, - icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - break; - default: - DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", - icmp6->icmp6_type, icmp6->icmp6_code); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - - hlen += sizeof(struct icmp6_hdr); - if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { - NAT64STAT_INC(stats, dropped); - DPRINTF(DP_DROPS, "Message is too short %d", - m->m_pkthdr.len); - return (NAT64MFREE); - } - /* - * We need at least ICMP_MINLEN bytes of original datagram payload - * to generate ICMP message. It is nice that ICMP_MINLEN is equal - * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment - * header we will not have to do m_pullup() again. - * - * What we have here: - * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) - * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] - * We need to translate it to: - * - * Outer header: (alias_host, v4exthost) - * Inner header: (v4exthost, alias_host) [sport, alias_port] - * - * Assume caller function has checked if v4mapPRefix+v4host - * matches configured prefix. - * The only two things we should be provided with are mapping between - * IPv6iHost <> alias_host and between dport and alias_port. - */ - if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) - m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); - if (m == NULL) { - NAT64STAT_INC(stats, nomem); - return (NAT64RETURN); - } - ip6 = mtod(m, struct ip6_hdr *); - ip6i = mtodo(m, hlen); - ip6f = NULL; - proto = ip6i->ip6_nxt; - plen = ntohs(ip6i->ip6_plen); - hlen += sizeof(struct ip6_hdr); - if (proto == IPPROTO_FRAGMENT) { - if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + - ICMP_MINLEN) - goto fail; - ip6f = mtodo(m, hlen); - proto = ip6f->ip6f_nxt; - plen -= sizeof(struct ip6_frag); - hlen += sizeof(struct ip6_frag); - /* Ajust MTU to reflect frag header size */ - if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) - mtu -= sizeof(struct ip6_frag); - } - if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { - DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", - proto); - goto fail; - } - if (nat64_check_ip6(&ip6i->ip6_src) != 0 || - nat64_check_ip6(&ip6i->ip6_dst) != 0) { - DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); - goto fail; - } - /* Check if outer dst is the same as inner src */ - if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { - DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); - goto fail; - } - - /* Now we need to make a fake IPv4 packet to generate ICMP message */ - ip.ip_dst.s_addr = aaddr; - ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src); - /* XXX: Make fake ulp header */ -#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT - ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */ -#endif - nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); - m_adj(m, hlen - sizeof(struct ip)); - bcopy(&ip, mtod(m, void *), sizeof(ip)); - nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata); - return (NAT64RETURN); -fail: - /* - * We must call m_freem() because mbuf pointer could be - * changed with m_pullup(). - */ - m_freem(m); - NAT64STAT_INC(stats, dropped); - return (NAT64RETURN); -} - -int -nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, - nat64_stats_block *stats, void *logdata) -{ - struct route ro; - struct ip ip; - struct ifnet *ifp; - struct ip6_frag *frag; - struct ip6_hdr *ip6; - struct icmp6_hdr *icmp6; - struct sockaddr *dst; - uint16_t *csum; - uint32_t mtu; - int plen, hlen, proto; - - /* - * XXX: we expect ipfw_chk() did m_pullup() up to upper level - * protocol's headers. Also we skip some checks, that ip6_input(), - * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. - */ - ip6 = mtod(m, struct ip6_hdr *); - if (nat64_check_ip6(&ip6->ip6_src) != 0 || - nat64_check_ip6(&ip6->ip6_dst) != 0) { - return (NAT64SKIP); - } - - /* Starting from this point we must not return zero */ - ip.ip_src.s_addr = aaddr; - if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { - DPRINTF(DP_GENERIC, "invalid source address: %08x", - ip.ip_src.s_addr); - /* XXX: stats? */ - return (NAT64MFREE); - } - - ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst); - if (ip.ip_dst.s_addr == 0) { - /* XXX: stats? */ - return (NAT64MFREE); - } - - if (ip6->ip6_hlim <= IPV6_HLIMDEC) { - nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata); - return (NAT64RETURN); - } - - hlen = 0; - plen = ntohs(ip6->ip6_plen); - proto = nat64_getlasthdr(m, &hlen); - if (proto < 0) { - DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - frag = NULL; - if (proto == IPPROTO_FRAGMENT) { - /* ipfw_chk should m_pullup up to frag header */ - if (m->m_len < hlen + sizeof(*frag)) { - DPRINTF(DP_DROPS, - "dropped due to mbuf isn't contigious"); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - frag = mtodo(m, hlen); - proto = frag->ip6f_nxt; - hlen += sizeof(*frag); - /* Fragmented ICMPv6 is unsupported */ - if (proto == IPPROTO_ICMPV6) { - DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - /* Fragment length must be multiple of 8 octets */ - if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && - ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { - nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - offsetof(struct ip6_hdr, ip6_plen), stats, - logdata); - return (NAT64RETURN); - } - } - plen -= hlen - sizeof(struct ip6_hdr); - if (plen < 0 || m->m_pkthdr.len < plen + hlen) { - DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", - plen, m->m_pkthdr.len, hlen); - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - - icmp6 = NULL; /* Make gcc happy */ - if (proto == IPPROTO_ICMPV6) { - icmp6 = mtodo(m, hlen); - if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && - icmp6->icmp6_type != ICMP6_ECHO_REPLY) - return (nat64_handle_icmp6(m, hlen, aaddr, aport, - stats, logdata)); - } - dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m); - if (dst == NULL) { - FREE_ROUTE(&ro); - NAT64STAT_INC(stats, noroute4); - nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata); - return (NAT64RETURN); - } - - ifp = ro.ro_rt->rt_ifp; - if (ro.ro_rt->rt_mtu != 0) - mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); - else - mtu = ifp->if_mtu; - if (mtu < plen + sizeof(ip)) { - FREE_ROUTE(&ro); - nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats, - logdata); - return (NAT64RETURN); - } - nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); - /* Convert checksums. */ - switch (proto) { - case IPPROTO_TCP: - csum = &TCP(mtodo(m, hlen))->th_sum; - if (aport != 0) { - struct tcphdr *tcp = TCP(mtodo(m, hlen)); - *csum = cksum_adjust(*csum, tcp->th_sport, aport); - tcp->th_sport = aport; - } - *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); - break; - case IPPROTO_UDP: - csum = &UDP(mtodo(m, hlen))->uh_sum; - if (aport != 0) { - struct udphdr *udp = UDP(mtodo(m, hlen)); - *csum = cksum_adjust(*csum, udp->uh_sport, aport); - udp->uh_sport = aport; - } - *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); - break; - case IPPROTO_ICMPV6: - /* Checksum in ICMPv6 covers pseudo header */ - csum = &icmp6->icmp6_cksum; - *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, - IPPROTO_ICMPV6, 0)); - /* Convert ICMPv6 types to ICMP */ - mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ - if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) - icmp6->icmp6_type = ICMP_ECHO; - else /* ICMP6_ECHO_REPLY */ - icmp6->icmp6_type = ICMP_ECHOREPLY; - *csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6); - if (aport != 0) { - uint16_t old_id = icmp6->icmp6_id; - icmp6->icmp6_id = aport; - *csum = cksum_adjust(*csum, old_id, aport); - } - break; - }; - - m_adj(m, hlen - sizeof(ip)); - bcopy(&ip, mtod(m, void *), sizeof(ip)); - if (nat64_output(ifp, m, dst, &ro, stats, logdata) == 0) - NAT64STAT_INC(stats, opcnt64); - FREE_ROUTE(&ro); - return (NAT64RETURN); -} - diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h b/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h deleted file mode 100644 index 9f653954..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64_translate.h +++ /dev/null @@ -1,116 +0,0 @@ -/*- - * Copyright (c) 2015-2016 Yandex LLC - * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _IP_FW_NAT64_TRANSLATE_H_ -#define _IP_FW_NAT64_TRANSLATE_H_ - -#ifdef RTALLOC_NOLOCK -#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_fib_nolock((ro), 0, (fib)) -#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc_nolock((ro), (fib)) -#define FREE_ROUTE(ro) -#else -#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_ign_fib((ro), 0, (fib)) -#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc((ro), (fib)) -#define FREE_ROUTE(ro) RO_RTFREE((ro)) -#endif - -static inline int -nat64_check_ip6(struct in6_addr *addr) -{ - - /* XXX: We should really check /8 */ - if (addr->s6_addr16[0] == 0 || /* 0000::/8 Reserved by IETF */ - IN6_IS_ADDR_MULTICAST(addr) || IN6_IS_ADDR_LINKLOCAL(addr)) - return (1); - return (0); -} - -extern int nat64_allow_private; -static inline int -nat64_check_private_ip4(in_addr_t ia) -{ - - if (nat64_allow_private) - return (0); - /* WKPFX must not be used to represent non-global IPv4 addresses */ -// if (cfg->flags & NAT64_WKPFX) { - /* IN_PRIVATE */ - if ((ia & htonl(0xff000000)) == htonl(0x0a000000) || - (ia & htonl(0xfff00000)) == htonl(0xac100000) || - (ia & htonl(0xffff0000)) == htonl(0xc0a80000)) - return (1); - /* - * RFC 5735: - * 192.0.0.0/24 - reserved for IETF protocol assignments - * 192.88.99.0/24 - for use as 6to4 relay anycast addresses - * 198.18.0.0/15 - for use in benchmark tests - * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use - * in documentation and example code - */ - if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) || - (ia & htonl(0xffffff00)) == htonl(0xc0586300) || - (ia & htonl(0xfffffe00)) == htonl(0xc6120000) || - (ia & htonl(0xffffff00)) == htonl(0xc0000200) || - (ia & htonl(0xfffffe00)) == htonl(0xc6336400) || - (ia & htonl(0xffffff00)) == htonl(0xcb007100)) - return (1); -// } - return (0); -} - -static inline int -nat64_check_ip4(in_addr_t ia) -{ - - /* IN_LOOPBACK */ - if ((ia & htonl(0xff000000)) == htonl(0x7f000000)) - return (1); - /* IN_LINKLOCAL */ - if ((ia & htonl(0xffff0000)) == htonl(0xa9fe0000)) - return (1); - /* IN_MULTICAST & IN_EXPERIMENTAL */ - if ((ia & htonl(0xe0000000)) == htonl(0xe0000000)) - return (1); - return (0); -} - -#define nat64_get_ip4(_ip6) ((_ip6)->s6_addr32[3]) -#define nat64_set_ip4(_ip6, _ip4) (_ip6)->s6_addr32[3] = (_ip4) - -int nat64_getlasthdr(struct mbuf *m, int *offset); -int nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, - struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats, - void *logdata); -int nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, - nat64_stats_block *stats, void *logdata); -int nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, - nat64_stats_block *stats, void *logdata); - -#endif - diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c deleted file mode 100644 index c058fe8d..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.c +++ /dev/null @@ -1,1772 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2015-2016 Yandex LLC - * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/counter.h> -#include <sys/errno.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/module.h> -#include <sys/rmlock.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/queue.h> -#include <sys/syslog.h> -#include <sys/sysctl.h> - -#include <net/if.h> -#include <net/if_var.h> -#include <net/if_pflog.h> -#include <net/pfil.h> - -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#include <netinet/ip_icmp.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> -#include <netinet6/in6_var.h> -#include <netinet6/ip6_var.h> -#include <netinet6/ip_fw_nat64.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/nat64/ip_fw_nat64.h> -#include <netpfil/ipfw/nat64/nat64lsn.h> -#include <netpfil/ipfw/nat64/nat64_translate.h> -#include <netpfil/pf/pf.h> - -MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN"); - -static void nat64lsn_periodic(void *data); -#define PERIODIC_DELAY 4 -static uint8_t nat64lsn_proto_map[256]; -uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO]; - -#define NAT64_FLAG_FIN 0x01 /* FIN was seen */ -#define NAT64_FLAG_SYN 0x02 /* First syn in->out */ -#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */ -#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN) - -#define NAT64_FLAG_RDR 0x80 /* Port redirect */ -#define NAT64_LOOKUP(chain, cmd) \ - (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1) -/* - * Delayed job queue, used to create new hosts - * and new portgroups - */ -enum nat64lsn_jtype { - JTYPE_NEWHOST = 1, - JTYPE_NEWPORTGROUP, - JTYPE_DELPORTGROUP, -}; - -struct nat64lsn_job_item { - TAILQ_ENTRY(nat64lsn_job_item) next; - enum nat64lsn_jtype jtype; - struct nat64lsn_host *nh; - struct nat64lsn_portgroup *pg; - void *spare_idx; - struct in6_addr haddr; - uint8_t nat_proto; - uint8_t done; - int needs_idx; - int delcount; - unsigned int fhash; /* Flow hash */ - uint32_t aaddr; /* Last used address (net) */ - struct mbuf *m; - struct ipfw_flow_id f_id; - uint64_t delmask[NAT64LSN_PGPTRNMASK]; -}; - -static struct mtx jmtx; -#define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF) -#define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx) -#define JQUEUE_LOCK() mtx_lock(&jmtx) -#define JQUEUE_UNLOCK() mtx_unlock(&jmtx) - -static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, - struct nat64lsn_job_item *ji); -static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, - struct nat64lsn_job_head *jhead, int jlen); - -static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, int jtype); -static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, - int needs_idx); -static int nat64lsn_request_host(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, struct mbuf **pm); -static int nat64lsn_translate4(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, struct mbuf **pm); -static int nat64lsn_translate6(struct nat64lsn_cfg *cfg, - struct ipfw_flow_id *f_id, struct mbuf **pm); - -static int alloc_portgroup(struct nat64lsn_job_item *ji); -static void destroy_portgroup(struct nat64lsn_portgroup *pg); -static void destroy_host6(struct nat64lsn_host *nh); -static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); - -static int attach_portgroup(struct nat64lsn_cfg *cfg, - struct nat64lsn_job_item *ji); -static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); - - -/* XXX tmp */ -static uma_zone_t nat64lsn_host_zone; -static uma_zone_t nat64lsn_pg_zone; -static uma_zone_t nat64lsn_pgidx_zone; - -static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, - struct nat64lsn_host *nh); - -#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16)) -#define I6_first(_ph, h) (_ph)[h] -#define I6_next(x) (x)->next -#define I6_val(x) (&(x)->addr) -#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b) -#define I6_lock(a, b) -#define I6_unlock(a, b) - -#define I6HASH_FIND(_cfg, _res, _a) \ - CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a) -#define I6HASH_INSERT(_cfg, _i) \ - CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i) -#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \ - CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a) - -#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \ - CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg) - -#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8) - -static unsigned -djb_hash(const unsigned char *h, const int len) -{ - unsigned int result = 0; - int i; - - for (i = 0; i < len; i++) - result = 33 * result ^ h[i]; - - return (result); -} - -/* -static size_t -bitmask_size(size_t num, int *level) -{ - size_t x; - int c; - - for (c = 0, x = num; num > 1; num /= 64, c++) - ; - - return (x); -} - -static void -bitmask_prepare(uint64_t *pmask, size_t bufsize, int level) -{ - size_t x, z; - - memset(pmask, 0xFF, bufsize); - for (x = 0, z = 1; level > 1; x += z, z *= 64, level--) - ; - pmask[x] ~= 0x01; -} -*/ - -static void -nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, - uint32_t n, uint32_t sn) -{ - - memset(plog, 0, sizeof(*plog)); - plog->length = PFLOG_REAL_HDRLEN; - plog->af = family; - plog->action = PF_NAT; - plog->dir = PF_IN; - plog->rulenr = htonl(n); - plog->subrulenr = htonl(sn); - plog->ruleset[0] = '\0'; - strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname)); - ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); -} -/* - * Inspects icmp packets to see if the message contains different - * packet header so we need to alter @addr and @port. - */ -static int -inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr, - uint16_t *port) -{ - struct ip *ip; - struct tcphdr *tcp; - struct udphdr *udp; - struct icmphdr *icmp; - int off; - uint8_t proto; - - ip = mtod(*m, struct ip *); /* Outer IP header */ - off = (ip->ip_hl << 2) + ICMP_MINLEN; - if ((*m)->m_len < off) - *m = m_pullup(*m, off); - if (*m == NULL) - return (ENOMEM); - - ip = mtod(*m, struct ip *); /* Outer IP header */ - icmp = L3HDR(ip, struct icmphdr *); - switch (icmp->icmp_type) { - case ICMP_ECHO: - case ICMP_ECHOREPLY: - /* Use icmp ID as distinguisher */ - *port = ntohs(*((uint16_t *)(icmp + 1))); - return (0); - case ICMP_UNREACH: - case ICMP_TIMXCEED: - break; - default: - return (EOPNOTSUPP); - } - /* - * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits - * of ULP header. - */ - if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN) - return (EINVAL); - if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN) - *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN); - if (*m == NULL) - return (ENOMEM); - ip = mtodo(*m, off); /* Inner IP header */ - proto = ip->ip_p; - off += ip->ip_hl << 2; /* Skip inner IP header */ - *addr = ntohl(ip->ip_src.s_addr); - if ((*m)->m_len < off + ICMP_MINLEN) - *m = m_pullup(*m, off + ICMP_MINLEN); - if (*m == NULL) - return (ENOMEM); - switch (proto) { - case IPPROTO_TCP: - tcp = mtodo(*m, off); - *nat_proto = NAT_PROTO_TCP; - *port = ntohs(tcp->th_sport); - return (0); - case IPPROTO_UDP: - udp = mtodo(*m, off); - *nat_proto = NAT_PROTO_UDP; - *port = ntohs(udp->uh_sport); - return (0); - case IPPROTO_ICMP: - /* - * We will translate only ICMP errors for our ICMP - * echo requests. - */ - icmp = mtodo(*m, off); - if (icmp->icmp_type != ICMP_ECHO) - return (EOPNOTSUPP); - *port = ntohs(*((uint16_t *)(icmp + 1))); - return (0); - }; - return (EOPNOTSUPP); -} - -static inline uint8_t -convert_tcp_flags(uint8_t flags) -{ - uint8_t result; - - result = flags & (TH_FIN|TH_SYN); - result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */ - result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */ - - return (result); -} - -static NAT64NOINLINE int -nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, - struct mbuf **pm) -{ - struct pfloghdr loghdr, *logdata; - struct in6_addr src6; - struct nat64lsn_portgroup *pg; - struct nat64lsn_host *nh; - struct nat64lsn_state *st; - struct ip *ip; - uint32_t addr; - uint16_t state_flags, state_ts; - uint16_t port, lport; - uint8_t nat_proto; - int ret; - - addr = f_id->dst_ip; - port = f_id->dst_port; - if (addr < cfg->prefix4 || addr > cfg->pmask4) { - NAT64STAT_INC(&cfg->stats, nomatch4); - return (cfg->nomatch_verdict); - } - - /* Check if protocol is supported and get its short id */ - nat_proto = nat64lsn_proto_map[f_id->proto]; - if (nat_proto == 0) { - NAT64STAT_INC(&cfg->stats, noproto); - return (cfg->nomatch_verdict); - } - - /* We might need to handle icmp differently */ - if (nat_proto == NAT_PROTO_ICMP) { - ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port); - if (ret != 0) { - if (ret == ENOMEM) - NAT64STAT_INC(&cfg->stats, nomem); - else - NAT64STAT_INC(&cfg->stats, noproto); - return (cfg->nomatch_verdict); - } - /* XXX: Check addr for validity */ - if (addr < cfg->prefix4 || addr > cfg->pmask4) { - NAT64STAT_INC(&cfg->stats, nomatch4); - return (cfg->nomatch_verdict); - } - } - - /* Calc portgroup offset w.r.t protocol */ - pg = GET_PORTGROUP(cfg, addr, nat_proto, port); - - /* Check if this port is occupied by any portgroup */ - if (pg == NULL) { - NAT64STAT_INC(&cfg->stats, nomatch4); -#if 0 - DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port, - _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port)); -#endif - return (cfg->nomatch_verdict); - } - - /* TODO: Check flags to see if we need to do some static mapping */ - nh = pg->host; - - /* Prepare some fields we might need to update */ - SET_AGE(state_ts); - ip = mtod(*pm, struct ip *); - if (ip->ip_p == IPPROTO_TCP) - state_flags = convert_tcp_flags( - L3HDR(ip, struct tcphdr *)->th_flags); - else - state_flags = 0; - - /* Lock host and get port mapping */ - NAT64_LOCK(nh); - - st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)]; - if (st->timestamp != state_ts) - st->timestamp = state_ts; - if ((st->flags & state_flags) != state_flags) - st->flags |= state_flags; - lport = htons(st->u.s.lport); - - NAT64_UNLOCK(nh); - - if (cfg->flags & NAT64_LOG) { - logdata = &loghdr; - nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off); - } else - logdata = NULL; - - src6.s6_addr32[0] = cfg->prefix6.s6_addr32[0]; - src6.s6_addr32[1] = cfg->prefix6.s6_addr32[1]; - src6.s6_addr32[2] = cfg->prefix6.s6_addr32[2]; - src6.s6_addr32[3] = htonl(f_id->src_ip); - - ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport, - &cfg->stats, logdata); - - if (ret == NAT64SKIP) - return (IP_FW_PASS); - if (ret == NAT64MFREE) - m_freem(*pm); - *pm = NULL; - - return (IP_FW_DENY); -} - -void -nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, - const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st, - const char *px, int off) -{ - char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN]; - - if ((nat64_debug & DP_STATE) == 0) - return; - inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s)); - inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a)); - inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d)); - - DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> " - "%s:%d AGE %d", px, pg->idx, st, off, - s, st->u.s.lport, pg->nat_proto, a, pg->aport + off, - d, st->u.s.fport, GET_AGE(st->timestamp)); -} - -/* - * Check if particular TCP state is stale and should be deleted. - * Return 1 if true, 0 otherwise. - */ -static int -nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg, - const struct nat64lsn_state *st, int age) -{ - int ttl; - - if (st->flags & NAT64_FLAG_FIN) - ttl = cfg->st_close_ttl; - else if (st->flags & NAT64_FLAG_ESTAB) - ttl = cfg->st_estab_ttl; - else if (st->flags & NAT64_FLAG_SYN) - ttl = cfg->st_syn_ttl; - else - ttl = cfg->st_syn_ttl; - - if (age > ttl) - return (1); - return (0); -} - -/* - * Check if nat state @st is stale and should be deleted. - * Return 1 if true, 0 otherwise. - */ -static NAT64NOINLINE int -nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg, - const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st) -{ - int age, delete; - - age = GET_AGE(st->timestamp); - delete = 0; - - /* Skip immutable records */ - if (st->flags & NAT64_FLAG_RDR) - return (0); - - switch (pg->nat_proto) { - case NAT_PROTO_TCP: - delete = nat64lsn_periodic_check_tcp(cfg, st, age); - break; - case NAT_PROTO_UDP: - if (age > cfg->st_udp_ttl) - delete = 1; - break; - case NAT_PROTO_ICMP: - if (age > cfg->st_icmp_ttl) - delete = 1; - break; - } - - return (delete); -} - - -/* - * The following structures and functions - * are used to perform SLIST_FOREACH_SAFE() - * analog for states identified by struct st_ptr. - */ - -struct st_idx { - struct nat64lsn_portgroup *pg; - struct nat64lsn_state *st; - struct st_ptr sidx_next; -}; - -static struct st_idx * -st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, - struct st_ptr *sidx, struct st_idx *si) -{ - struct nat64lsn_portgroup *pg; - struct nat64lsn_state *st; - - if (sidx->idx == 0) { - memset(si, 0, sizeof(*si)); - return (si); - } - - pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx); - st = &pg->states[sidx->off]; - - si->pg = pg; - si->st = st; - si->sidx_next = st->next; - - return (si); -} - -static struct st_idx * -st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, - struct st_idx *si) -{ - struct st_ptr sidx; - struct nat64lsn_portgroup *pg; - struct nat64lsn_state *st; - - sidx = si->sidx_next; - if (sidx.idx == 0) { - memset(si, 0, sizeof(*si)); - si->st = NULL; - si->pg = NULL; - return (si); - } - - pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); - st = &pg->states[sidx.off]; - - si->pg = pg; - si->st = st; - si->sidx_next = st->next; - - return (si); -} - -static struct st_idx * -st_save_cond(struct st_idx *si_dst, struct st_idx *si) -{ - if (si->st != NULL) - *si_dst = *si; - - return (si_dst); -} - -unsigned int -nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh) -{ - struct st_idx si, si_prev; - int i; - unsigned int delcount; - - delcount = 0; - for (i = 0; i < nh->hsize; i++) { - memset(&si_prev, 0, sizeof(si_prev)); - for (st_first(cfg, nh, &nh->phash[i], &si); - si.st != NULL; - st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) { - if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0) - continue; - nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE", - si.st->cur.off); - /* Unlink from hash */ - if (si_prev.st != NULL) - si_prev.st->next = si.st->next; - else - nh->phash[i] = si.st->next; - /* Delete state and free its data */ - PG_MARK_FREE_IDX(si.pg, si.st->cur.off); - memset(si.st, 0, sizeof(struct nat64lsn_state)); - si.st = NULL; - delcount++; - - /* Update portgroup timestamp */ - SET_AGE(si.pg->timestamp); - } - } - NAT64STAT_ADD(&cfg->stats, sdeleted, delcount); - return (delcount); -} - -/* - * Checks if portgroup is not used and can be deleted, - * Returns 1 if stale, 0 otherwise - */ -static int -stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg) -{ - - if (!PG_IS_EMPTY(pg)) - return (0); - if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay) - return (0); - return (1); -} - -/* - * Checks if host record is not used and can be deleted, - * Returns 1 if stale, 0 otherwise - */ -static int -stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh) -{ - - if (nh->pg_used != 0) - return (0); - if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay) - return (0); - return (1); -} - -struct nat64lsn_periodic_data { - struct nat64lsn_cfg *cfg; - struct nat64lsn_job_head jhead; - int jlen; -}; - -static NAT64NOINLINE int -nat64lsn_periodic_chkhost(struct nat64lsn_host *nh, - struct nat64lsn_periodic_data *d) -{ - char a[INET6_ADDRSTRLEN]; - struct nat64lsn_portgroup *pg; - struct nat64lsn_job_item *ji; - uint64_t delmask[NAT64LSN_PGPTRNMASK]; - int delcount, i; - - delcount = 0; - memset(delmask, 0, sizeof(delmask)); - - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d", - stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu); - if (!stale_nh(d->cfg, nh)) { - /* Non-stale host. Inspect internals */ - NAT64_LOCK(nh); - - /* Stage 1: Check&expire states */ - if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0) - SET_AGE(nh->timestamp); - - /* Stage 2: Check if we need to expire */ - for (i = 0; i < nh->pg_used; i++) { - pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1); - if (pg == NULL) - continue; - - /* Check if we can delete portgroup */ - if (stale_pg(d->cfg, pg) == 0) - continue; - - DPRINTF(DP_JQUEUE, "Check PG %d", i); - delmask[i / 64] |= ((uint64_t)1 << (i % 64)); - delcount++; - } - - NAT64_UNLOCK(nh); - if (delcount == 0) - return (0); - } - - DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount); - /* We have something to delete - add it to queue */ - ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP); - if (ji == NULL) - return (0); - - ji->haddr = nh->addr; - ji->delcount = delcount; - memcpy(ji->delmask, delmask, sizeof(ji->delmask)); - - TAILQ_INSERT_TAIL(&d->jhead, ji, next); - d->jlen++; - return (0); -} - -/* - * This procedure is used to perform various maintance - * on dynamic hash list. Currently it is called every second. - */ -static void -nat64lsn_periodic(void *data) -{ - struct ip_fw_chain *ch; - IPFW_RLOCK_TRACKER; - struct nat64lsn_cfg *cfg; - struct nat64lsn_periodic_data d; - struct nat64lsn_host *nh, *tmp; - - cfg = (struct nat64lsn_cfg *) data; - ch = cfg->ch; - CURVNET_SET(cfg->vp); - - memset(&d, 0, sizeof(d)); - d.cfg = cfg; - TAILQ_INIT(&d.jhead); - - IPFW_RLOCK(ch); - - /* Stage 1: foreach host, check all its portgroups */ - I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d); - - /* Enqueue everything we have requested */ - nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen); - - callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY); - - IPFW_RUNLOCK(ch); - - CURVNET_RESTORE(); -} - -static NAT64NOINLINE void -reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) -{ - - if (ji->m == NULL) - return; - - /* Request has failed or packet type is wrong */ - if (ji->f_id.addr_type != 6 || ji->done == 0) { - m_freem(ji->m); - ji->m = NULL; - NAT64STAT_INC(&cfg->stats, dropped); - DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d", - ji->jtype, ji->done); - return; - } - - /* - * XXX: Limit recursion level - */ - - NAT64STAT_INC(&cfg->stats, jreinjected); - DPRINTF(DP_JQUEUE, "Reinject mbuf"); - nat64lsn_translate6(cfg, &ji->f_id, &ji->m); -} - -static void -destroy_portgroup(struct nat64lsn_portgroup *pg) -{ - - DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg); - uma_zfree(nat64lsn_pg_zone, pg); -} - -static NAT64NOINLINE int -alloc_portgroup(struct nat64lsn_job_item *ji) -{ - struct nat64lsn_portgroup *pg; - - pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT); - if (pg == NULL) - return (1); - - if (ji->needs_idx != 0) { - ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); - /* Failed alloc isn't always fatal, so don't check */ - } - memset(&pg->freemask, 0xFF, sizeof(pg->freemask)); - pg->nat_proto = ji->nat_proto; - ji->pg = pg; - return (0); - -} - -static void -destroy_host6(struct nat64lsn_host *nh) -{ - char a[INET6_ADDRSTRLEN]; - int i; - - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh, - nh->pg_used); - NAT64_LOCK_DESTROY(nh); - for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++) - uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i)); - uma_zfree(nat64lsn_host_zone, nh); -} - -static NAT64NOINLINE int -alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) -{ - struct nat64lsn_host *nh; - char a[INET6_ADDRSTRLEN]; - - nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT); - if (nh == NULL) - return (1); - PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); - if (PORTGROUP_CHUNK(nh, 0) == NULL) { - uma_zfree(nat64lsn_host_zone, nh); - return (2); - } - if (alloc_portgroup(ji) != 0) { - NAT64STAT_INC(&cfg->stats, jportfails); - uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0)); - uma_zfree(nat64lsn_host_zone, nh); - return (3); - } - - NAT64_LOCK_INIT(nh); - nh->addr = ji->haddr; - nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */ - nh->pg_allocated = NAT64LSN_PGIDX_CHUNK; - nh->pg_used = 0; - ji->nh = nh; - - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh); - return (0); -} - -/* - * Finds free @pg index inside @nh - */ -static NAT64NOINLINE int -find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx) -{ - int i; - - for (i = 0; i < nh->pg_allocated; i++) { - if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) { - *idx = i; - return (0); - } - } - return (1); -} - -static NAT64NOINLINE int -attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) -{ - char a[INET6_ADDRSTRLEN]; - struct nat64lsn_host *nh; - - I6HASH_FIND(cfg, nh, &ji->haddr); - if (nh == NULL) { - /* Add new host to list */ - nh = ji->nh; - I6HASH_INSERT(cfg, nh); - cfg->ihcount++; - ji->nh = NULL; - - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh); - /* - * Try to add portgroup. - * Note it will automatically set - * 'done' on ji if successful. - */ - if (attach_portgroup(cfg, ji) != 0) { - DPRINTF(DP_DROPS, "%s %p failed to attach PG", - a, nh); - NAT64STAT_INC(&cfg->stats, jportfails); - return (1); - } - return (0); - } - - /* - * nh isn't NULL. This probably means we had several simultaneous - * host requests. The previous one request has already attached - * this host. Requeue attached mbuf and mark job as done, but - * leave nh and pg pointers not changed, so nat64lsn_do_request() - * will release all allocated resources. - */ - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_OBJ, "%s %p is already attached as %p", - a, ji->nh, nh); - ji->done = 1; - return (0); -} - -static NAT64NOINLINE int -find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off, - int nat_proto, uint16_t *aport, int *ppg_idx) -{ - int j, pg_idx; - - pg_idx = addr_off * _ADDR_PG_COUNT + - (nat_proto - 1) * _ADDR_PG_PROTO_COUNT; - - for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) { - if (cfg->pg[pg_idx + j] != NULL) - continue; - - *aport = j * NAT64_CHUNK_SIZE; - *ppg_idx = pg_idx + j; - return (1); - } - - return (0); -} - -/* - * XXX: This function needs to be rewritten to - * use free bitmask for faster pg finding, - * additionally, it should take into consideration - * a) randomization and - * b) previous addresses allocated to given nat instance - * - */ -static NAT64NOINLINE int -find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji, - uint32_t *aaddr, uint16_t *aport, int *ppg_idx) -{ - int i, nat_proto; - - /* - * XXX: Use bitmask index to be able to find/check if IP address - * has some spare pg's - */ - nat_proto = ji->nat_proto; - - /* First, try to use same address */ - if (ji->aaddr != 0) { - i = ntohl(ji->aaddr) - cfg->prefix4; - if (find_pg_place_addr(cfg, i, nat_proto, aport, - ppg_idx) != 0){ - /* Found! */ - *aaddr = htonl(cfg->prefix4 + i); - return (0); - } - } - - /* Next, try to use random address based on flow hash */ - i = ji->fhash % (1 << (32 - cfg->plen4)); - if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) { - /* Found! */ - *aaddr = htonl(cfg->prefix4 + i); - return (0); - } - - - /* Last one: simply find ANY available */ - for (i = 0; i < (1 << (32 - cfg->plen4)); i++) { - if (find_pg_place_addr(cfg, i, nat_proto, aport, - ppg_idx) != 0){ - /* Found! */ - *aaddr = htonl(cfg->prefix4 + i); - return (0); - } - } - - return (1); -} - -static NAT64NOINLINE int -attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) -{ - char a[INET6_ADDRSTRLEN]; - struct nat64lsn_portgroup *pg; - struct nat64lsn_host *nh; - uint32_t aaddr; - uint16_t aport; - int nh_pg_idx, pg_idx; - - pg = ji->pg; - - /* - * Find source host and bind: we can't rely on - * pg->host - */ - I6HASH_FIND(cfg, nh, &ji->haddr); - if (nh == NULL) - return (1); - - /* Find spare port chunk */ - if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) { - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a); - return (2); - } - - /* Expand PG indexes if needed */ - if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) { - PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) = - ji->spare_idx; - nh->pg_allocated += NAT64LSN_PGIDX_CHUNK; - ji->spare_idx = NULL; - } - - /* Find empty index to store PG in the @nh */ - if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) { - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s", - a); - return (3); - } - - cfg->pg[pg_idx] = pg; - cfg->protochunks[pg->nat_proto]++; - NAT64STAT_INC(&cfg->stats, spgcreated); - - pg->aaddr = aaddr; - pg->aport = aport; - pg->host = nh; - pg->idx = pg_idx; - SET_AGE(pg->timestamp); - - PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg; - if (nh->pg_used == nh_pg_idx) - nh->pg_used++; - SET_AGE(nh->timestamp); - - ji->pg = NULL; - ji->done = 1; - - return (0); -} - -static NAT64NOINLINE void -consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) -{ - struct nat64lsn_host *nh, *nh_tmp; - struct nat64lsn_portgroup *pg, *pg_list[256]; - int i, pg_lidx, idx; - - /* Find source host */ - I6HASH_FIND(cfg, nh, &ji->haddr); - if (nh == NULL || nh->pg_used == 0) - return; - - memset(pg_list, 0, sizeof(pg_list)); - pg_lidx = 0; - - NAT64_LOCK(nh); - - for (i = nh->pg_used - 1; i >= 0; i--) { - if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0) - continue; - pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); - - /* Check that PG isn't busy. */ - if (stale_pg(cfg, pg) == 0) - continue; - - /* DO delete */ - pg_list[pg_lidx++] = pg; - PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL; - - idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto, - pg->aport); - KASSERT(cfg->pg[idx] == pg, ("Non matched pg")); - cfg->pg[idx] = NULL; - cfg->protochunks[pg->nat_proto]--; - NAT64STAT_INC(&cfg->stats, spgdeleted); - - /* Decrease pg_used */ - while (nh->pg_used > 0 && - PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL) - nh->pg_used--; - - /* Check if on-stack buffer has ended */ - if (pg_lidx == nitems(pg_list)) - break; - } - - NAT64_UNLOCK(nh); - - if (stale_nh(cfg, nh)) { - I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr); - KASSERT(nh != NULL, ("Unable to find address")); - cfg->ihcount--; - ji->nh = nh; - I6HASH_FIND(cfg, nh, &ji->haddr); - KASSERT(nh == NULL, ("Failed to delete address")); - } - - /* TODO: Delay freeing portgroups */ - while (pg_lidx > 0) { - pg_lidx--; - NAT64STAT_INC(&cfg->stats, spgdeleted); - destroy_portgroup(pg_list[pg_lidx]); - } -} - -/* - * Main request handler. - * Responsible for handling jqueue, e.g. - * creating new hosts, addind/deleting portgroups. - */ -static NAT64NOINLINE void -nat64lsn_do_request(void *data) -{ - IPFW_RLOCK_TRACKER; - struct nat64lsn_job_head jhead; - struct nat64lsn_job_item *ji; - int jcount, nhsize; - struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data; - struct ip_fw_chain *ch; - int delcount; - - CURVNET_SET(cfg->vp); - - TAILQ_INIT(&jhead); - - /* XXX: We're running unlocked here */ - - ch = cfg->ch; - delcount = 0; - IPFW_RLOCK(ch); - - /* Grab queue */ - JQUEUE_LOCK(); - TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next); - jcount = cfg->jlen; - cfg->jlen = 0; - JQUEUE_UNLOCK(); - - /* check if we need to resize hash */ - nhsize = 0; - if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) { - nhsize = cfg->ihsize; - for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2) - ; - } else if (cfg->ihcount < cfg->ihsize * 4) { - nhsize = cfg->ihsize; - for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2) - ; - } - - IPFW_RUNLOCK(ch); - - if (TAILQ_EMPTY(&jhead)) { - CURVNET_RESTORE(); - return; - } - - NAT64STAT_INC(&cfg->stats, jcalls); - DPRINTF(DP_JQUEUE, "count=%d", jcount); - - /* - * TODO: - * What we should do here is to build a hash - * to ensure we don't have lots of duplicate requests. - * Skip this for now. - * - * TODO: Limit per-call number of items - */ - - /* Pre-allocate everything for entire chain */ - TAILQ_FOREACH(ji, &jhead, next) { - switch (ji->jtype) { - case JTYPE_NEWHOST: - if (alloc_host6(cfg, ji) != 0) - NAT64STAT_INC(&cfg->stats, jhostfails); - break; - case JTYPE_NEWPORTGROUP: - if (alloc_portgroup(ji) != 0) - NAT64STAT_INC(&cfg->stats, jportfails); - break; - case JTYPE_DELPORTGROUP: - delcount += ji->delcount; - break; - default: - break; - } - } - - /* - * TODO: Alloc hew hash - */ - nhsize = 0; - if (nhsize > 0) { - /* XXX: */ - } - - /* Apply all changes in batch */ - IPFW_UH_WLOCK(ch); - IPFW_WLOCK(ch); - - TAILQ_FOREACH(ji, &jhead, next) { - switch (ji->jtype) { - case JTYPE_NEWHOST: - if (ji->nh != NULL) - attach_host6(cfg, ji); - break; - case JTYPE_NEWPORTGROUP: - if (ji->pg != NULL && - attach_portgroup(cfg, ji) != 0) - NAT64STAT_INC(&cfg->stats, jportfails); - break; - case JTYPE_DELPORTGROUP: - consider_del_portgroup(cfg, ji); - break; - } - } - - if (nhsize > 0) { - /* XXX: Move everything to new hash */ - } - - IPFW_WUNLOCK(ch); - IPFW_UH_WUNLOCK(ch); - - /* Flush unused entries */ - while (!TAILQ_EMPTY(&jhead)) { - ji = TAILQ_FIRST(&jhead); - TAILQ_REMOVE(&jhead, ji, next); - if (ji->nh != NULL) - destroy_host6(ji->nh); - if (ji->pg != NULL) - destroy_portgroup(ji->pg); - if (ji->m != NULL) - reinject_mbuf(cfg, ji); - if (ji->spare_idx != NULL) - uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx); - free(ji, M_IPFW); - } - CURVNET_RESTORE(); -} - -static NAT64NOINLINE struct nat64lsn_job_item * -nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, - int jtype) -{ - struct nat64lsn_job_item *ji; - struct in6_addr haddr; - uint8_t nat_proto; - - /* - * Do not try to lock possibly contested mutex if we're near the limit. - * Drop packet instead. - */ - if (cfg->jlen >= cfg->jmaxlen) { - NAT64STAT_INC(&cfg->stats, jmaxlen); - return (NULL); - } - - memset(&haddr, 0, sizeof(haddr)); - nat_proto = 0; - if (f_id != NULL) { - haddr = f_id->src_ip6; - nat_proto = nat64lsn_proto_map[f_id->proto]; - - DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d", - nat_proto, f_id->proto); - - if (nat_proto == 0) - return (NULL); - } - - ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW, - M_NOWAIT | M_ZERO); - - if (ji == NULL) { - NAT64STAT_INC(&cfg->stats, jnomem); - return (NULL); - } - - ji->jtype = jtype; - - if (f_id != NULL) { - ji->f_id = *f_id; - ji->haddr = haddr; - ji->nat_proto = nat_proto; - } - - return (ji); -} - -static NAT64NOINLINE void -nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) -{ - - if (ji == NULL) - return; - - JQUEUE_LOCK(); - TAILQ_INSERT_TAIL(&cfg->jhead, ji, next); - cfg->jlen++; - NAT64STAT_INC(&cfg->stats, jrequests); - - if (callout_pending(&cfg->jcallout) == 0) - callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); - JQUEUE_UNLOCK(); -} - -static NAT64NOINLINE void -nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, - struct nat64lsn_job_head *jhead, int jlen) -{ - - if (TAILQ_EMPTY(jhead)) - return; - - /* Attach current queue to execution one */ - JQUEUE_LOCK(); - TAILQ_CONCAT(&cfg->jhead, jhead, next); - cfg->jlen += jlen; - NAT64STAT_ADD(&cfg->stats, jrequests, jlen); - - if (callout_pending(&cfg->jcallout) == 0) - callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); - JQUEUE_UNLOCK(); -} - -static unsigned int -flow6_hash(const struct ipfw_flow_id *f_id) -{ - unsigned char hbuf[36]; - - memcpy(hbuf, &f_id->dst_ip6, 16); - memcpy(&hbuf[16], &f_id->src_ip6, 16); - memcpy(&hbuf[32], &f_id->dst_port, 2); - memcpy(&hbuf[32], &f_id->src_port, 2); - - return (djb_hash(hbuf, sizeof(hbuf))); -} - -static NAT64NOINLINE int -nat64lsn_request_host(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, struct mbuf **pm) -{ - struct nat64lsn_job_item *ji; - struct mbuf *m; - - m = *pm; - *pm = NULL; - - ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST); - if (ji == NULL) { - m_freem(m); - NAT64STAT_INC(&cfg->stats, dropped); - DPRINTF(DP_DROPS, "failed to create job"); - } else { - ji->m = m; - /* Provide pseudo-random value based on flow */ - ji->fhash = flow6_hash(f_id); - nat64lsn_enqueue_job(cfg, ji); - NAT64STAT_INC(&cfg->stats, jhostsreq); - } - - return (IP_FW_PASS); -} - -static NAT64NOINLINE int -nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, - const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, - int needs_idx) -{ - struct nat64lsn_job_item *ji; - struct mbuf *m; - - m = *pm; - *pm = NULL; - - ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP); - if (ji == NULL) { - m_freem(m); - NAT64STAT_INC(&cfg->stats, dropped); - DPRINTF(DP_DROPS, "failed to create job"); - } else { - ji->m = m; - /* Provide pseudo-random value based on flow */ - ji->fhash = flow6_hash(f_id); - ji->aaddr = aaddr; - ji->needs_idx = needs_idx; - nat64lsn_enqueue_job(cfg, ji); - NAT64STAT_INC(&cfg->stats, jportreq); - } - - return (IP_FW_PASS); -} - -static NAT64NOINLINE struct nat64lsn_state * -nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, - int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr) -{ - struct nat64lsn_portgroup *pg; - struct nat64lsn_state *st; - int i, hval, off; - - /* XXX: create additional bitmask for selecting proper portgroup */ - for (i = 0; i < nh->pg_used; i++) { - pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); - if (pg == NULL) - continue; - if (*aaddr == 0) - *aaddr = pg->aaddr; - if (pg->nat_proto != nat_proto) - continue; - - off = PG_GET_FREE_IDX(pg); - if (off != 0) { - /* We have found spare state. Use it */ - off--; - PG_MARK_BUSY_IDX(pg, off); - st = &pg->states[off]; - - /* - * Fill in new info. Assume state was zeroed. - * Timestamp and flags will be filled by caller. - */ - st->u.s = kst->u.s; - st->cur.idx = i + 1; - st->cur.off = off; - - /* Insert into host hash table */ - hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1); - st->next = nh->phash[hval]; - nh->phash[hval] = st->cur; - - nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off); - - NAT64STAT_INC(&cfg->stats, screated); - - return (st); - } - /* Saev last used alias affress */ - *aaddr = pg->aaddr; - } - - return (NULL); -} - -static NAT64NOINLINE int -nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id, - struct mbuf **pm) -{ - struct pfloghdr loghdr, *logdata; - char a[INET6_ADDRSTRLEN]; - struct nat64lsn_host *nh; - struct st_ptr sidx; - struct nat64lsn_state *st, kst; - struct nat64lsn_portgroup *pg; - struct icmp6_hdr *icmp6; - uint32_t aaddr; - int action, hval, nat_proto, proto; - uint16_t aport, state_ts, state_flags; - - /* Check if af/protocol is supported and get it short id */ - nat_proto = nat64lsn_proto_map[f_id->proto]; - if (nat_proto == 0) { - /* - * Since we can be called from jobs handler, we need - * to free mbuf by self, do not leave this task to - * ipfw_check_packet(). - */ - NAT64STAT_INC(&cfg->stats, noproto); - m_freem(*pm); - *pm = NULL; - return (IP_FW_DENY); - } - - /* Try to find host first */ - I6HASH_FIND(cfg, nh, &f_id->src_ip6); - - if (nh == NULL) - return (nat64lsn_request_host(cfg, f_id, pm)); - - /* Fill-in on-stack state structure */ - kst.u.s.faddr = f_id->dst_ip6.s6_addr32[3]; - kst.u.s.fport = f_id->dst_port; - kst.u.s.lport = f_id->src_port; - - /* Prepare some fields we might need to update */ - hval = 0; - proto = nat64_getlasthdr(*pm, &hval); - if (proto < 0) { - NAT64STAT_INC(&cfg->stats, dropped); - DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); - m_freem(*pm); - *pm = NULL; - return (IP_FW_DENY); - } - - SET_AGE(state_ts); - if (proto == IPPROTO_TCP) - state_flags = convert_tcp_flags( - TCP(mtodo(*pm, hval))->th_flags); - else - state_flags = 0; - if (proto == IPPROTO_ICMPV6) { - /* Alter local port data */ - icmp6 = mtodo(*pm, hval); - if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST || - icmp6->icmp6_type == ICMP6_ECHO_REPLY) - kst.u.s.lport = ntohs(icmp6->icmp6_id); - } - - hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1); - pg = NULL; - st = NULL; - - /* OK, let's find state in host hash */ - NAT64_LOCK(nh); - sidx = nh->phash[hval]; - int k = 0; - while (sidx.idx != 0) { - pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); - st = &pg->states[sidx.off]; - //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off, - //st->next.idx, st->next.off); - if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto) - break; - if (k++ > 1000) { - DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n", - sidx.idx, sidx.off, st->next.idx, st->next.off); - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d", - a, nh, curcpu); - k = 0; - } - sidx = st->next; - } - - if (sidx.idx == 0) { - aaddr = 0; - st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr); - if (st == NULL) { - /* No free states. Request more if we can */ - if (nh->pg_used >= cfg->max_chunks) { - /* Limit reached */ - NAT64STAT_INC(&cfg->stats, dropped); - inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); - DPRINTF(DP_DROPS, "PG limit reached " - " for host %s (used %u, allocated %u, " - "limit %u)", a, - nh->pg_used * NAT64_CHUNK_SIZE, - nh->pg_allocated * NAT64_CHUNK_SIZE, - cfg->max_chunks * NAT64_CHUNK_SIZE); - m_freem(*pm); - *pm = NULL; - NAT64_UNLOCK(nh); - return (IP_FW_DENY); - } - if ((nh->pg_allocated <= - nh->pg_used + NAT64LSN_REMAININGPG) && - nh->pg_allocated < cfg->max_chunks) - action = 1; /* Request new indexes */ - else - action = 0; - NAT64_UNLOCK(nh); - //DPRINTF("No state, unlock for %p", nh); - return (nat64lsn_request_portgroup(cfg, f_id, - pm, aaddr, action)); - } - - /* We've got new state. */ - sidx = st->cur; - pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); - } - - /* Okay, state found */ - - /* Update necessary fileds */ - if (st->timestamp != state_ts) - st->timestamp = state_ts; - if ((st->flags & state_flags) != 0) - st->flags |= state_flags; - - /* Copy needed state data */ - aaddr = pg->aaddr; - aport = htons(pg->aport + sidx.off); - - NAT64_UNLOCK(nh); - - if (cfg->flags & NAT64_LOG) { - logdata = &loghdr; - nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off); - } else - logdata = NULL; - - action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->stats, logdata); - if (action == NAT64SKIP) - return (IP_FW_PASS); - if (action == NAT64MFREE) - m_freem(*pm); - *pm = NULL; /* mark mbuf as consumed */ - return (IP_FW_DENY); -} - -/* - * Main dataplane entry point. - */ -int -ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, - ipfw_insn *cmd, int *done) -{ - ipfw_insn *icmd; - struct nat64lsn_cfg *cfg; - int ret; - - IPFW_RLOCK_ASSERT(ch); - - *done = 1; /* terminate the search */ - icmd = cmd + 1; - if (cmd->opcode != O_EXTERNAL_ACTION || - cmd->arg1 != V_nat64lsn_eid || - icmd->opcode != O_EXTERNAL_INSTANCE || - (cfg = NAT64_LOOKUP(ch, icmd)) == NULL) - return (0); - - switch (args->f_id.addr_type) { - case 4: - ret = nat64lsn_translate4(cfg, &args->f_id, &args->m); - break; - case 6: - ret = nat64lsn_translate6(cfg, &args->f_id, &args->m); - break; - default: - return (0); - } - return (ret); -} - -static int -nat64lsn_ctor_host(void *mem, int size, void *arg, int flags) -{ - struct nat64lsn_host *nh; - - nh = (struct nat64lsn_host *)mem; - memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr)); - memset(nh->phash, 0, sizeof(nh->phash)); - return (0); -} - -static int -nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags) -{ - - memset(mem, 0, size); - return (0); -} - -void -nat64lsn_init_internal(void) -{ - - memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map)); - /* Set up supported protocol map */ - nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP; - nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP; - nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP; - nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP; - /* Fill in reverse proto map */ - memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map)); - nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP; - nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP; - nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6; - - JQUEUE_LOCK_INIT(); - nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone", - sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL, - NULL, NULL, UMA_ALIGN_PTR, 0); - nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone", - sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone", - sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK, - nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); -} - -void -nat64lsn_uninit_internal(void) -{ - - JQUEUE_LOCK_DESTROY(); - uma_zdestroy(nat64lsn_host_zone); - uma_zdestroy(nat64lsn_pg_zone); - uma_zdestroy(nat64lsn_pgidx_zone); -} - -void -nat64lsn_start_instance(struct nat64lsn_cfg *cfg) -{ - - callout_reset(&cfg->periodic, hz * PERIODIC_DELAY, - nat64lsn_periodic, cfg); -} - -struct nat64lsn_cfg * -nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr) -{ - struct nat64lsn_cfg *cfg; - - cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO); - TAILQ_INIT(&cfg->jhead); - cfg->vp = curvnet; - cfg->ch = ch; - COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK); - - cfg->ihsize = NAT64LSN_HSIZE; - cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW, - M_WAITOK | M_ZERO); - - cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW, - M_WAITOK | M_ZERO); - - callout_init(&cfg->periodic, CALLOUT_MPSAFE); - callout_init(&cfg->jcallout, CALLOUT_MPSAFE); - - return (cfg); -} - -/* - * Destroy all hosts callback. - * Called on module unload when all activity already finished, so - * can work without any locks. - */ -static NAT64NOINLINE int -nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg) -{ - struct nat64lsn_portgroup *pg; - int i; - - for (i = nh->pg_used; i > 0; i--) { - pg = PORTGROUP_BYSIDX(cfg, nh, i); - if (pg == NULL) - continue; - cfg->pg[pg->idx] = NULL; - destroy_portgroup(pg); - nh->pg_used--; - } - destroy_host6(nh); - cfg->ihcount--; - return (0); -} - -void -nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg) -{ - struct nat64lsn_host *nh, *tmp; - - JQUEUE_LOCK(); - callout_drain(&cfg->jcallout); - JQUEUE_UNLOCK(); - - callout_drain(&cfg->periodic); - I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg); - DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount); - - COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS); - free(cfg->ih, M_IPFW); - free(cfg->pg, M_IPFW); - free(cfg, M_IPFW); -} - diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h deleted file mode 100644 index e6ceb1dd..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn.h +++ /dev/null @@ -1,351 +0,0 @@ -/*- - * Copyright (c) 2015 Yandex LLC - * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _IP_FW_NAT64LSN_H_ -#define _IP_FW_NAT64LSN_H_ - -#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */ -#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS) - -#define NAT64_MIN_PORT 1024 -#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS) - -struct st_ptr { - uint8_t idx; /* index in nh->pg_ptr array. - * NOTE: it starts from 1. - */ - uint8_t off; -}; -#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1) -#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY) -#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \ - NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS) - -struct nat64lsn_portgroup; -/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */ -struct nat64lsn_host { - struct rwlock h_lock; /* Host states lock */ - - struct in6_addr addr; - struct nat64lsn_host *next; - uint16_t timestamp; /* Last altered */ - uint16_t hsize; /* ports hash size */ - uint16_t pg_used; /* Number of portgroups used */ -#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before - * requesting of new chunk of indexes. - */ - uint16_t pg_allocated; /* Number of portgroups indexes - * allocated. - */ -#define NAT64LSN_HSIZE 64 - struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */ - /* - * PG indexes are stored in chunks with 32 elements. - * The maximum count is limited to 255 due to st_ptr->idx is uint8_t. - */ -#define NAT64LSN_PGIDX_CHUNK 32 -#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \ - NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK) - struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */ -}; - -#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED) -#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED) - -#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock) -#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock) -#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock) -#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock) -#define NAT64_LOCK(h) NAT64_WLOCK(h) -#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h) -#define NAT64_LOCK_INIT(h) do { \ - rw_init(&(h)->h_lock, "NAT64 host lock"); \ - } while (0) - -#define NAT64_LOCK_DESTROY(h) do { \ - rw_destroy(&(h)->h_lock); \ - } while (0) - -/* Internal proto index */ -#define NAT_PROTO_TCP 1 -#define NAT_PROTO_UDP 2 -#define NAT_PROTO_ICMP 3 - -#define NAT_MAX_PROTO 4 -extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO]; - -VNET_DECLARE(uint16_t, nat64lsn_eid); -#define V_nat64lsn_eid VNET(nat64lsn_eid) -#define IPFW_TLV_NAT64LSN_NAME IPFW_TLV_EACTION_NAME(V_nat64lsn_eid) - -/* Timestamp macro */ -#define _CT ((int)time_uptime % 65536) -#define SET_AGE(x) (x) = _CT -#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \ - (int)65536 + _CT - (x)) - -#ifdef __LP64__ -/* ffsl() is capable of checking 64-bit ints */ -#define _FFS64 -#endif - -/* 16 bytes */ -struct nat64lsn_state { - union { - struct { - in_addr_t faddr; /* Remote IPv4 address */ - uint16_t fport; /* Remote IPv4 port */ - uint16_t lport; /* Local IPv6 port */ - }s; - uint64_t hkey; - } u; - uint8_t nat_proto; - uint8_t flags; - uint16_t timestamp; - struct st_ptr cur; /* Index of portgroup in nat64lsn_host */ - struct st_ptr next; /* Next entry index */ -}; - -/* - * 1024+32 bytes per 64 states, used to store state - * AND for outside-in state lookup - */ -struct nat64lsn_portgroup { - struct nat64lsn_host *host; /* IPv6 source host info */ - in_addr_t aaddr; /* Alias addr, network format */ - uint16_t aport; /* Base port */ - uint16_t timestamp; - uint8_t nat_proto; - uint8_t spare[3]; - uint32_t idx; -#ifdef _FFS64 - uint64_t freemask; /* Mask of free entries */ -#else - uint32_t freemask[2]; /* Mask of free entries */ -#endif - struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */ -}; -#ifdef _FFS64 -#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx)) -#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx)) -#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx))) -#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0) -#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask)) -#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0) -#else -#define PG_MARK_BUSY_IDX(_pg, _idx) \ - (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32)) -#define PG_MARK_FREE_IDX(_pg, _idx) \ - (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32)) -#define PG_IS_FREE_IDX(_pg, _idx) \ - ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32))) -#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0) -#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg) -#define PG_IS_EMPTY(_pg) \ - ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0)) - -static inline int -_pg_get_free_idx(const struct nat64lsn_portgroup *pg) -{ - int i; - - if ((i = ffsl(pg->freemask[0])) != 0) - return (i); - if ((i = ffsl(pg->freemask[1])) != 0) - return (i + 32); - return (0); -} - -#endif - -TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item); - -#define NAT64LSN_FLAGSMASK (NAT64_LOG) -struct nat64lsn_cfg { - struct named_object no; - //struct nat64_exthost *ex; /* Pointer to external addr array */ - struct nat64lsn_portgroup **pg; /* XXX: array of pointers */ - struct nat64lsn_host **ih; /* Host hash */ - uint32_t prefix4; /* IPv4 prefix */ - uint32_t pmask4; /* IPv4 prefix mask */ - uint32_t ihsize; /* IPv6 host hash size */ - uint8_t plen4; - uint8_t plen6; - uint8_t nomatch_verdict;/* What to return to ipfw on no-match */ - uint8_t nomatch_final; /* Exit outer loop? */ - struct in6_addr prefix6; /* IPv6 prefix to embed IPv4 hosts */ - - uint32_t ihcount; /* Number of items in host hash */ - int max_chunks; /* Max chunks per client */ - int agg_prefix_len; /* Prefix length to count */ - int agg_prefix_max; /* Max hosts per agg prefix */ - uint32_t jmaxlen; /* Max jobqueue length */ - uint32_t flags; - uint16_t min_chunk; /* Min port group # to use */ - uint16_t max_chunk; /* Max port group # to use */ - uint16_t nh_delete_delay; /* Stale host delete delay */ - uint16_t pg_delete_delay; /* Stale portgroup del delay */ - uint16_t st_syn_ttl; /* TCP syn expire */ - uint16_t st_close_ttl; /* TCP fin expire */ - uint16_t st_estab_ttl; /* TCP established expire */ - uint16_t st_udp_ttl; /* UDP expire */ - uint16_t st_icmp_ttl; /* ICMP expire */ - uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */ - - struct callout periodic; - struct callout jcallout; - struct ip_fw_chain *ch; - struct vnet *vp; - struct nat64lsn_job_head jhead; - int jlen; - char name[64]; /* Nat instance name */ - nat64_stats_block stats; -}; - -struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch, - size_t numaddr); -void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg); -void nat64lsn_start_instance(struct nat64lsn_cfg *cfg); -void nat64lsn_init_internal(void); -void nat64lsn_uninit_internal(void); -int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, - ipfw_insn *cmd, int *done); - -void -nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, - const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st, - const char *px, int off); -/* - * Portgroup layout - * addr x nat_proto x port_off - * - */ - -#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS) -#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO) - -#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4)) -#define __GET_PORTGROUP_IDX(_proto, _port) \ - ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \ - ((_port) >> NAT64_CHUNK_SIZE_BITS)) - -#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \ - GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \ - __GET_PORTGROUP_IDX(_proto, _port) -#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \ - ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)]) - -#define PORTGROUP_CHUNK(_nh, _idx) \ - ((_nh)->pg_ptr[(_idx)]) -#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \ - (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \ - [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK]) - - -/* Chained hash table */ -#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \ - unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ - _PX##lock(_ph, _buck); \ - _x = _PX##first(_ph, _buck); \ - for ( ; _x != NULL; _x = _PX##next(_x)) { \ - if (_PX##cmp(_key, _PX##val(_x))) \ - break; \ - } \ - if (_x == NULL) \ - _PX##unlock(_ph, _buck); \ -} while(0) - -#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \ - _PX##unlock(_ph, _buck); - -#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \ - unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ - _PX##unlock(_ph, _buck); \ -} while(0) - -#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \ - unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \ - _PX##lock(_ph, _buck); \ - _PX##next(_i) = _PX##first(_ph, _buck); \ - _PX##first(_ph, _buck) = _i; \ - _PX##unlock(_ph, _buck); \ -} while(0) - -#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \ - unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ - _PX##lock(_ph, _buck); \ - _x = _PX##first(_ph, _buck); \ - _tmp = NULL; \ - for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \ - if (_PX##cmp(_key, _PX##val(_x))) \ - break; \ - } \ - if (_x != NULL) { \ - if (_tmp == NULL) \ - _PX##first(_ph, _buck) = _PX##next(_x); \ - else \ - _PX##next(_tmp) = _PX##next(_x); \ - } \ - _PX##unlock(_ph, _buck); \ -} while(0) - -#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \ - for (unsigned int _i = 0; _i < _hsize; _i++) { \ - _PX##lock(_ph, _i); \ - _x = _PX##first(_ph, _i); \ - _tmp = NULL; \ - for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \ - if (_cb(_x, _arg) == 0) \ - continue; \ - if (_tmp == NULL) \ - _PX##first(_ph, _i) = _PX##next(_x); \ - else \ - _tmp = _PX##next(_x); \ - } \ - _PX##unlock(_ph, _i); \ - } \ -} while(0) - -#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \ - unsigned int _buck; \ - for (unsigned int _i = 0; _i < _hsize; _i++) { \ - _x = _PX##first(_ph, _i); \ - _y = _x; \ - while (_y != NULL) { \ - _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\ - _y = _PX##next(_x); \ - _PX##next(_x) = _PX##first(_nph, _buck); \ - _PX##first(_nph, _buck) = _x; \ - } \ - } \ -} while(0) - -#endif /* _IP_FW_NAT64LSN_H_ */ - diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c b/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c deleted file mode 100644 index f8278c0c..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64lsn_control.c +++ /dev/null @@ -1,919 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2015 Yandex LLC - * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/counter.h> -#include <sys/errno.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/module.h> -#include <sys/rmlock.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/sockopt.h> -#include <sys/queue.h> - -#include <net/if.h> -#include <net/pfil.h> - -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/nat64/ip_fw_nat64.h> -#include <netpfil/ipfw/nat64/nat64lsn.h> -#include <netinet6/ip_fw_nat64.h> - -VNET_DEFINE(uint16_t, nat64lsn_eid) = 0; - -static struct nat64lsn_cfg * -nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set) -{ - struct nat64lsn_cfg *cfg; - - cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set, - IPFW_TLV_NAT64LSN_NAME, name); - - return (cfg); -} - -static void -nat64lsn_default_config(ipfw_nat64lsn_cfg *uc) -{ - - if (uc->max_ports == 0) - uc->max_ports = NAT64LSN_MAX_PORTS; - else - uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE); - if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR) - uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR; - if (uc->jmaxlen == 0) - uc->jmaxlen = NAT64LSN_JMAXLEN; - if (uc->jmaxlen > 65536) - uc->jmaxlen = 65536; - if (uc->nh_delete_delay == 0) - uc->nh_delete_delay = NAT64LSN_HOST_AGE; - if (uc->pg_delete_delay == 0) - uc->pg_delete_delay = NAT64LSN_PG_AGE; - if (uc->st_syn_ttl == 0) - uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE; - if (uc->st_close_ttl == 0) - uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE; - if (uc->st_estab_ttl == 0) - uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE; - if (uc->st_udp_ttl == 0) - uc->st_udp_ttl = NAT64LSN_UDP_AGE; - if (uc->st_icmp_ttl == 0) - uc->st_icmp_ttl = NAT64LSN_ICMP_AGE; -} - -/* - * Creates new nat64lsn instance. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ] - * - * Returns 0 on success - */ -static int -nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_lheader *olh; - ipfw_nat64lsn_cfg *uc; - struct nat64lsn_cfg *cfg; - struct namedobj_instance *ni; - uint32_t addr4, mask4; - - if (sd->valsize != sizeof(*olh) + sizeof(*uc)) - return (EINVAL); - - olh = (ipfw_obj_lheader *)sd->kbuf; - uc = (ipfw_nat64lsn_cfg *)(olh + 1); - - if (ipfw_check_object_name_generic(uc->name) != 0) - return (EINVAL); - - if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS) - return (EINVAL); - - if (uc->plen4 > 32) - return (EINVAL); - if (uc->plen6 > 128 || ((uc->plen6 % 8) != 0)) - return (EINVAL); - - /* XXX: Check prefix4 to be global */ - addr4 = ntohl(uc->prefix4.s_addr); - mask4 = ~((1 << (32 - uc->plen4)) - 1); - if ((addr4 & mask4) != addr4) - return (EINVAL); - - /* XXX: Check prefix6 */ - if (uc->min_port == 0) - uc->min_port = NAT64_MIN_PORT; - if (uc->max_port == 0) - uc->max_port = 65535; - if (uc->min_port > uc->max_port) - return (EINVAL); - uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE); - uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE); - - nat64lsn_default_config(uc); - - ni = CHAIN_TO_SRV(ch); - IPFW_UH_RLOCK(ch); - if (nat64lsn_find(ni, uc->name, uc->set) != NULL) { - IPFW_UH_RUNLOCK(ch); - return (EEXIST); - } - IPFW_UH_RUNLOCK(ch); - - cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4)); - strlcpy(cfg->name, uc->name, sizeof(cfg->name)); - cfg->no.name = cfg->name; - cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME; - cfg->no.set = uc->set; - - cfg->prefix4 = addr4; - cfg->pmask4 = addr4 | ~mask4; - /* XXX: Copy 96 bits */ - cfg->plen6 = 96; - memcpy(&cfg->prefix6, &uc->prefix6, cfg->plen6 / 8); - cfg->plen4 = uc->plen4; - cfg->flags = uc->flags & NAT64LSN_FLAGSMASK; - cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE; - cfg->agg_prefix_len = uc->agg_prefix_len; - cfg->agg_prefix_max = uc->agg_prefix_max; - - cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE; - cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE; - - cfg->jmaxlen = uc->jmaxlen; - cfg->nh_delete_delay = uc->nh_delete_delay; - cfg->pg_delete_delay = uc->pg_delete_delay; - cfg->st_syn_ttl = uc->st_syn_ttl; - cfg->st_close_ttl = uc->st_close_ttl; - cfg->st_estab_ttl = uc->st_estab_ttl; - cfg->st_udp_ttl = uc->st_udp_ttl; - cfg->st_icmp_ttl = uc->st_icmp_ttl; - - cfg->nomatch_verdict = IP_FW_DENY; - cfg->nomatch_final = 1; /* Exit outer loop by default */ - - IPFW_UH_WLOCK(ch); - - if (nat64lsn_find(ni, uc->name, uc->set) != NULL) { - IPFW_UH_WUNLOCK(ch); - nat64lsn_destroy_instance(cfg); - return (EEXIST); - } - - if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) { - IPFW_UH_WUNLOCK(ch); - nat64lsn_destroy_instance(cfg); - return (ENOSPC); - } - ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no); - - /* Okay, let's link data */ - IPFW_WLOCK(ch); - SRV_OBJECT(ch, cfg->no.kidx) = cfg; - IPFW_WUNLOCK(ch); - - nat64lsn_start_instance(cfg); - - IPFW_UH_WUNLOCK(ch); - return (0); -} - -static void -nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg) -{ - - IPFW_UH_WLOCK_ASSERT(ch); - - ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); - ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx); -} - -/* - * Destroys nat64 instance. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * - * Returns 0 on success - */ -static int -nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct nat64lsn_cfg *cfg; - ipfw_obj_header *oh; - - if (sd->valsize != sizeof(*oh)) - return (EINVAL); - - oh = (ipfw_obj_header *)op3; - - IPFW_UH_WLOCK(ch); - cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - - if (cfg->no.refcnt > 0) { - IPFW_UH_WUNLOCK(ch); - return (EBUSY); - } - - IPFW_WLOCK(ch); - SRV_OBJECT(ch, cfg->no.kidx) = NULL; - IPFW_WUNLOCK(ch); - - nat64lsn_detach_config(ch, cfg); - IPFW_UH_WUNLOCK(ch); - - nat64lsn_destroy_instance(cfg); - return (0); -} - -#define __COPY_STAT_FIELD(_cfg, _stats, _field) \ - (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field) -static void -export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, - struct ipfw_nat64lsn_stats *stats) -{ - - __COPY_STAT_FIELD(cfg, stats, opcnt64); - __COPY_STAT_FIELD(cfg, stats, opcnt46); - __COPY_STAT_FIELD(cfg, stats, ofrags); - __COPY_STAT_FIELD(cfg, stats, ifrags); - __COPY_STAT_FIELD(cfg, stats, oerrors); - __COPY_STAT_FIELD(cfg, stats, noroute4); - __COPY_STAT_FIELD(cfg, stats, noroute6); - __COPY_STAT_FIELD(cfg, stats, nomatch4); - __COPY_STAT_FIELD(cfg, stats, noproto); - __COPY_STAT_FIELD(cfg, stats, nomem); - __COPY_STAT_FIELD(cfg, stats, dropped); - - __COPY_STAT_FIELD(cfg, stats, jcalls); - __COPY_STAT_FIELD(cfg, stats, jrequests); - __COPY_STAT_FIELD(cfg, stats, jhostsreq); - __COPY_STAT_FIELD(cfg, stats, jportreq); - __COPY_STAT_FIELD(cfg, stats, jhostfails); - __COPY_STAT_FIELD(cfg, stats, jportfails); - __COPY_STAT_FIELD(cfg, stats, jmaxlen); - __COPY_STAT_FIELD(cfg, stats, jnomem); - __COPY_STAT_FIELD(cfg, stats, jreinjected); - __COPY_STAT_FIELD(cfg, stats, screated); - __COPY_STAT_FIELD(cfg, stats, sdeleted); - __COPY_STAT_FIELD(cfg, stats, spgcreated); - __COPY_STAT_FIELD(cfg, stats, spgdeleted); - - stats->hostcount = cfg->ihcount; - stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP]; - stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP]; - stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP]; -} -#undef __COPY_STAT_FIELD - -static void -nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, - ipfw_nat64lsn_cfg *uc) -{ - - uc->flags = cfg->flags & NAT64LSN_FLAGSMASK; - uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE; - uc->agg_prefix_len = cfg->agg_prefix_len; - uc->agg_prefix_max = cfg->agg_prefix_max; - - uc->jmaxlen = cfg->jmaxlen; - uc->nh_delete_delay = cfg->nh_delete_delay; - uc->pg_delete_delay = cfg->pg_delete_delay; - uc->st_syn_ttl = cfg->st_syn_ttl; - uc->st_close_ttl = cfg->st_close_ttl; - uc->st_estab_ttl = cfg->st_estab_ttl; - uc->st_udp_ttl = cfg->st_udp_ttl; - uc->st_icmp_ttl = cfg->st_icmp_ttl; - uc->prefix4.s_addr = htonl(cfg->prefix4); - uc->prefix6 = cfg->prefix6; - uc->plen4 = cfg->plen4; - uc->plen6 = cfg->plen6; - uc->set = cfg->no.set; - strlcpy(uc->name, cfg->no.name, sizeof(uc->name)); -} - -struct nat64_dump_arg { - struct ip_fw_chain *ch; - struct sockopt_data *sd; -}; - -static int -export_config_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg; - ipfw_nat64lsn_cfg *uc; - - uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd, - sizeof(*uc)); - nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc); - return (0); -} - -/* - * Lists all nat64 lsn instances currently available in kernel. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ] - * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ] - * - * Returns 0 on success - */ -static int -nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_lheader *olh; - struct nat64_dump_arg da; - - /* Check minimum header size */ - if (sd->valsize < sizeof(ipfw_obj_lheader)) - return (EINVAL); - - olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); - - IPFW_UH_RLOCK(ch); - olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), - IPFW_TLV_NAT64LSN_NAME); - olh->objsize = sizeof(ipfw_nat64lsn_cfg); - olh->size = sizeof(*olh) + olh->count * olh->objsize; - - if (sd->valsize < olh->size) { - IPFW_UH_RUNLOCK(ch); - return (ENOMEM); - } - memset(&da, 0, sizeof(da)); - da.ch = ch; - da.sd = sd; - ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da, - IPFW_TLV_NAT64LSN_NAME); - IPFW_UH_RUNLOCK(ch); - - return (0); -} - -/* - * Change existing nat64lsn instance configuration. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ] - * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ] - * - * Returns 0 on success - */ -static int -nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - ipfw_nat64lsn_cfg *uc; - struct nat64lsn_cfg *cfg; - struct namedobj_instance *ni; - - if (sd->valsize != sizeof(*oh) + sizeof(*uc)) - return (EINVAL); - - oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, - sizeof(*oh) + sizeof(*uc)); - uc = (ipfw_nat64lsn_cfg *)(oh + 1); - - if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || - oh->ntlv.set >= IPFW_MAX_SETS) - return (EINVAL); - - ni = CHAIN_TO_SRV(ch); - if (sd->sopt->sopt_dir == SOPT_GET) { - IPFW_UH_RLOCK(ch); - cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_RUNLOCK(ch); - return (EEXIST); - } - nat64lsn_export_config(ch, cfg, uc); - IPFW_UH_RUNLOCK(ch); - return (0); - } - - nat64lsn_default_config(uc); - - IPFW_UH_WLOCK(ch); - cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_WUNLOCK(ch); - return (EEXIST); - } - - /* - * For now allow to change only following values: - * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age, - * tcp_est_age, udp_age, icmp_age, flags, max_ports. - */ - - cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE; - cfg->jmaxlen = uc->jmaxlen; - cfg->nh_delete_delay = uc->nh_delete_delay; - cfg->pg_delete_delay = uc->pg_delete_delay; - cfg->st_syn_ttl = uc->st_syn_ttl; - cfg->st_close_ttl = uc->st_close_ttl; - cfg->st_estab_ttl = uc->st_estab_ttl; - cfg->st_udp_ttl = uc->st_udp_ttl; - cfg->st_icmp_ttl = uc->st_icmp_ttl; - cfg->flags = uc->flags & NAT64LSN_FLAGSMASK; - - IPFW_UH_WUNLOCK(ch); - - return (0); -} - -/* - * Get nat64lsn statistics. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * Reply: [ ipfw_obj_header ipfw_counter_tlv ] - * - * Returns 0 on success - */ -static int -nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - struct ipfw_nat64lsn_stats stats; - struct nat64lsn_cfg *cfg; - ipfw_obj_header *oh; - ipfw_obj_ctlv *ctlv; - size_t sz; - - sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); - if (sd->valsize % sizeof(uint64_t)) - return (EINVAL); - if (sd->valsize < sz) - return (ENOMEM); - oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); - if (oh == NULL) - return (EINVAL); - memset(&stats, 0, sizeof(stats)); - - IPFW_UH_RLOCK(ch); - cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_RUNLOCK(ch); - return (ESRCH); - } - - export_stats(ch, cfg, &stats); - IPFW_UH_RUNLOCK(ch); - - ctlv = (ipfw_obj_ctlv *)(oh + 1); - memset(ctlv, 0, sizeof(*ctlv)); - ctlv->head.type = IPFW_TLV_COUNTERS; - ctlv->head.length = sz - sizeof(ipfw_obj_header); - ctlv->count = sizeof(stats) / sizeof(uint64_t); - ctlv->objsize = sizeof(uint64_t); - ctlv->version = IPFW_NAT64_VERSION; - memcpy(ctlv + 1, &stats, sizeof(stats)); - return (0); -} - -/* - * Reset nat64lsn statistics. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * - * Returns 0 on success - */ -static int -nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - struct nat64lsn_cfg *cfg; - ipfw_obj_header *oh; - - if (sd->valsize != sizeof(*oh)) - return (EINVAL); - oh = (ipfw_obj_header *)sd->kbuf; - if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || - oh->ntlv.set >= IPFW_MAX_SETS) - return (EINVAL); - - IPFW_UH_WLOCK(ch); - cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS); - IPFW_UH_WUNLOCK(ch); - return (0); -} - -/* - * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg - * ipfw_nat64lsn_state x count, ... ] ] - */ -static int -export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg, - ipfw_nat64lsn_stg *stg, struct sockopt_data *sd) -{ - ipfw_nat64lsn_state *ste; - struct nat64lsn_state *st; - int i, count; - - NAT64_LOCK(pg->host); - count = 0; - for (i = 0; i < 64; i++) { - if (PG_IS_BUSY_IDX(pg, i)) - count++; - } - DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count); - - if (count == 0) { - stg->count = 0; - NAT64_UNLOCK(pg->host); - return (0); - } - ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd, - count * sizeof(ipfw_nat64lsn_state)); - if (ste == NULL) { - NAT64_UNLOCK(pg->host); - return (1); - } - - stg->alias4.s_addr = pg->aaddr; - stg->proto = nat64lsn_rproto_map[pg->nat_proto]; - stg->flags = 0; - stg->host6 = pg->host->addr; - stg->count = count; - for (i = 0; i < 64; i++) { - if (PG_IS_FREE_IDX(pg, i)) - continue; - st = &pg->states[i]; - ste->daddr.s_addr = st->u.s.faddr; - ste->dport = st->u.s.fport; - ste->aport = pg->aport + i; - ste->sport = st->u.s.lport; - ste->flags = st->flags; /* XXX filter flags */ - ste->idle = GET_AGE(st->timestamp); - ste++; - } - NAT64_UNLOCK(pg->host); - - return (0); -} - -static int -get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, - uint16_t *port) -{ - - if (*port < 65536 - NAT64_CHUNK_SIZE) { - *port += NAT64_CHUNK_SIZE; - return (0); - } - *port = 0; - - if (*nat_proto < NAT_MAX_PROTO - 1) { - *nat_proto += 1; - return (0); - } - *nat_proto = 1; - - if (*addr < cfg->pmask4) { - *addr += 1; - return (0); - } - - /* End of space. */ - return (1); -} - -#define PACK_IDX(addr, proto, port) \ - ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8) -#define UNPACK_IDX(idx, addr, proto, port) \ - (addr) = (uint32_t)((idx) >> 32); \ - (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \ - (proto) = (uint8_t)(((idx) >> 8) & 0xFF) - -static struct nat64lsn_portgroup * -get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, - uint16_t *port) -{ - struct nat64lsn_portgroup *pg; - uint64_t pre_pack, post_pack; - - pg = NULL; - pre_pack = PACK_IDX(*addr, *nat_proto, *port); - for (;;) { - if (get_next_idx(cfg, addr, nat_proto, port) != 0) { - /* End of states */ - return (pg); - } - - pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port); - if (pg != NULL) - break; - } - - post_pack = PACK_IDX(*addr, *nat_proto, *port); - if (pre_pack == post_pack) - DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d", - *addr, *nat_proto, *port); - return (pg); -} - -static NAT64NOINLINE struct nat64lsn_portgroup * -get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, - uint16_t *port) -{ - struct nat64lsn_portgroup *pg; - - pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port); - if (pg == NULL) - pg = get_next_pg(cfg, addr, nat_proto, port); - - return (pg); -} - -/* - * Lists nat64lsn states. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]] - * Reply: [ ipfw_obj_header ipfw_obj_data [ - * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ] - * - * Returns 0 on success - */ -static int -nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - ipfw_obj_data *od; - ipfw_nat64lsn_stg *stg; - struct nat64lsn_cfg *cfg; - struct nat64lsn_portgroup *pg, *pg_next; - uint64_t next_idx; - size_t sz; - uint32_t addr, states; - uint16_t port; - uint8_t nat_proto; - - sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + - sizeof(uint64_t); - /* Check minimum header size */ - if (sd->valsize < sz) - return (EINVAL); - - oh = (ipfw_obj_header *)sd->kbuf; - od = (ipfw_obj_data *)(oh + 1); - if (od->head.type != IPFW_TLV_OBJDATA || - od->head.length != sz - sizeof(ipfw_obj_header)) - return (EINVAL); - - next_idx = *(uint64_t *)(od + 1); - /* Translate index to the request position to start from */ - UNPACK_IDX(next_idx, addr, nat_proto, port); - if (nat_proto >= NAT_MAX_PROTO) - return (EINVAL); - if (nat_proto == 0 && addr != 0) - return (EINVAL); - - IPFW_UH_RLOCK(ch); - cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_RUNLOCK(ch); - return (ESRCH); - } - /* Fill in starting point */ - if (addr == 0) { - addr = cfg->prefix4; - nat_proto = 1; - port = 0; - } - if (addr < cfg->prefix4 || addr > cfg->pmask4) { - IPFW_UH_RUNLOCK(ch); - DPRINTF(DP_GENERIC | DP_STATE, "XXX: %ju %u %u", - (uintmax_t)next_idx, addr, cfg->pmask4); - return (EINVAL); - } - - sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + - sizeof(ipfw_nat64lsn_stg); - if (sd->valsize < sz) - return (ENOMEM); - oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz); - od = (ipfw_obj_data *)(oh + 1); - od->head.type = IPFW_TLV_OBJDATA; - od->head.length = sz - sizeof(ipfw_obj_header); - stg = (ipfw_nat64lsn_stg *)(od + 1); - - pg = get_first_pg(cfg, &addr, &nat_proto, &port); - if (pg == NULL) { - /* No states */ - stg->next_idx = 0xFF; - stg->count = 0; - IPFW_UH_RUNLOCK(ch); - return (0); - } - states = 0; - pg_next = NULL; - while (pg != NULL) { - pg_next = get_next_pg(cfg, &addr, &nat_proto, &port); - if (pg_next == NULL) - stg->next_idx = 0xFF; - else - stg->next_idx = PACK_IDX(addr, nat_proto, port); - - if (export_pg_states(cfg, pg, stg, sd) != 0) { - IPFW_UH_RUNLOCK(ch); - return (states == 0 ? ENOMEM: 0); - } - states += stg->count; - od->head.length += stg->count * sizeof(ipfw_nat64lsn_state); - sz += stg->count * sizeof(ipfw_nat64lsn_state); - if (pg_next != NULL) { - sz += sizeof(ipfw_nat64lsn_stg); - if (sd->valsize < sz) - break; - stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd, - sizeof(ipfw_nat64lsn_stg)); - } - pg = pg_next; - } - IPFW_UH_RUNLOCK(ch); - return (0); -} - -static struct ipfw_sopt_handler scodes[] = { - { IP_FW_NAT64LSN_CREATE, 0, HDIR_BOTH, nat64lsn_create }, - { IP_FW_NAT64LSN_DESTROY,0, HDIR_SET, nat64lsn_destroy }, - { IP_FW_NAT64LSN_CONFIG, 0, HDIR_BOTH, nat64lsn_config }, - { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list }, - { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats }, - { IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats }, - { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states }, -}; - -static int -nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - ipfw_insn *icmd; - - icmd = cmd - 1; - if (icmd->opcode != O_EXTERNAL_ACTION || - icmd->arg1 != V_nat64lsn_eid) - return (1); - - *puidx = cmd->arg1; - *ptype = 0; - return (0); -} - -static void -nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx) -{ - - cmd->arg1 = idx; -} - -static int -nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, - struct named_object **pno) -{ - int err; - - err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, - IPFW_TLV_NAT64LSN_NAME, pno); - return (err); -} - -static struct named_object * -nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) -{ - struct namedobj_instance *ni; - struct named_object *no; - - IPFW_UH_WLOCK_ASSERT(ch); - ni = CHAIN_TO_SRV(ch); - no = ipfw_objhash_lookup_kidx(ni, idx); - KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx)); - - return (no); -} - -static int -nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, - enum ipfw_sets_cmd cmd) -{ - - return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME, - set, new_set, cmd)); -} - -static struct opcode_obj_rewrite opcodes[] = { - { - .opcode = O_EXTERNAL_INSTANCE, - .etlv = IPFW_TLV_EACTION /* just show it isn't table */, - .classifier = nat64lsn_classify, - .update = nat64lsn_update_arg1, - .find_byname = nat64lsn_findbyname, - .find_bykidx = nat64lsn_findbykidx, - .manage_sets = nat64lsn_manage_sets, - }, -}; - -static int -destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct nat64lsn_cfg *cfg; - struct ip_fw_chain *ch; - - ch = (struct ip_fw_chain *)arg; - cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx); - SRV_OBJECT(ch, no->kidx) = NULL; - nat64lsn_detach_config(ch, cfg); - nat64lsn_destroy_instance(cfg); - return (0); -} - -int -nat64lsn_init(struct ip_fw_chain *ch, int first) -{ - - if (first != 0) - nat64lsn_init_internal(); - V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn"); - if (V_nat64lsn_eid == 0) - return (ENXIO); - IPFW_ADD_SOPT_HANDLER(first, scodes); - IPFW_ADD_OBJ_REWRITER(first, opcodes); - return (0); -} - -void -nat64lsn_uninit(struct ip_fw_chain *ch, int last) -{ - - IPFW_DEL_OBJ_REWRITER(last, opcodes); - IPFW_DEL_SOPT_HANDLER(last, scodes); - ipfw_del_eaction(ch, V_nat64lsn_eid); - /* - * Since we already have deregistered external action, - * our named objects become unaccessible via rules, because - * all rules were truncated by ipfw_del_eaction(). - * So, we can unlink and destroy our named objects without holding - * IPFW_WLOCK(). - */ - IPFW_UH_WLOCK(ch); - ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, - IPFW_TLV_NAT64LSN_NAME); - V_nat64lsn_eid = 0; - IPFW_UH_WUNLOCK(ch); - if (last != 0) - nat64lsn_uninit_internal(); -} - diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c deleted file mode 100644 index 8419b6d2..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.c +++ /dev/null @@ -1,264 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2015-2016 Yandex LLC - * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/counter.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/mbuf.h> -#include <sys/module.h> -#include <sys/rmlock.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/sysctl.h> - -#include <net/if.h> -#include <net/if_var.h> -#include <net/if_pflog.h> -#include <net/pfil.h> - -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip_icmp.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#include <netinet6/ip_fw_nat64.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/nat64/ip_fw_nat64.h> -#include <netpfil/ipfw/nat64/nat64_translate.h> -#include <netpfil/ipfw/nat64/nat64stl.h> -#include <netpfil/pf/pf.h> - -#define NAT64_LOOKUP(chain, cmd) \ - (struct nat64stl_cfg *)SRV_OBJECT((chain), (cmd)->arg1) - -static void -nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, - uint32_t kidx) -{ - static uint32_t pktid = 0; - - memset(plog, 0, sizeof(*plog)); - plog->length = PFLOG_REAL_HDRLEN; - plog->af = family; - plog->action = PF_NAT; - plog->dir = PF_IN; - plog->rulenr = htonl(kidx); - plog->subrulenr = htonl(++pktid); - plog->ruleset[0] = '\0'; - strlcpy(plog->ifname, "NAT64STL", sizeof(plog->ifname)); - ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); -} - -static int -nat64stl_handle_ip4(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg, - struct mbuf *m, uint32_t tablearg) -{ - struct pfloghdr loghdr, *logdata; - struct in6_addr saddr, daddr; - struct ip *ip; - - ip = mtod(m, struct ip*); - if (nat64_check_ip4(ip->ip_src.s_addr) != 0 || - nat64_check_ip4(ip->ip_dst.s_addr) != 0 || - nat64_check_private_ip4(ip->ip_src.s_addr) != 0 || - nat64_check_private_ip4(ip->ip_dst.s_addr) != 0) - return (NAT64SKIP); - - daddr = TARG_VAL(chain, tablearg, nh6); - if (nat64_check_ip6(&daddr) != 0) - return (NAT64MFREE); - saddr = cfg->prefix6; - nat64_set_ip4(&saddr, ip->ip_src.s_addr); - - if (cfg->flags & NAT64_LOG) { - logdata = &loghdr; - nat64stl_log(logdata, m, AF_INET, cfg->no.kidx); - } else - logdata = NULL; - return (nat64_do_handle_ip4(m, &saddr, &daddr, 0, &cfg->stats, - logdata)); -} - -static int -nat64stl_handle_ip6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg, - struct mbuf *m, uint32_t tablearg) -{ - struct pfloghdr loghdr, *logdata; - struct ip6_hdr *ip6; - uint32_t aaddr; - - aaddr = htonl(TARG_VAL(chain, tablearg, nh4)); - - /* - * NOTE: we expect ipfw_chk() did m_pullup() up to upper level - * protocol's headers. Also we skip some checks, that ip6_input(), - * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. - */ - ip6 = mtod(m, struct ip6_hdr *); - /* Check ip6_dst matches configured prefix */ - if (bcmp(&ip6->ip6_dst, &cfg->prefix6, cfg->plen6 / 8) != 0) - return (NAT64SKIP); - - if (cfg->flags & NAT64_LOG) { - logdata = &loghdr; - nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx); - } else - logdata = NULL; - return (nat64_do_handle_ip6(m, aaddr, 0, &cfg->stats, logdata)); -} - -static int -nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg, - struct mbuf *m) -{ - struct pfloghdr loghdr, *logdata; - nat64_stats_block *stats; - struct ip6_hdr *ip6i; - struct icmp6_hdr *icmp6; - uint32_t tablearg; - int hlen, proto; - - hlen = 0; - stats = &cfg->stats; - proto = nat64_getlasthdr(m, &hlen); - if (proto != IPPROTO_ICMPV6) { - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - icmp6 = mtodo(m, hlen); - switch (icmp6->icmp6_type) { - case ICMP6_DST_UNREACH: - case ICMP6_PACKET_TOO_BIG: - case ICMP6_TIME_EXCEED_TRANSIT: - case ICMP6_PARAM_PROB: - break; - default: - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - hlen += sizeof(struct icmp6_hdr); - if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { - NAT64STAT_INC(stats, dropped); - return (NAT64MFREE); - } - if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) - m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); - if (m == NULL) { - NAT64STAT_INC(stats, nomem); - return (NAT64RETURN); - } - /* - * Use destination address from inner IPv6 header to determine - * IPv4 mapped address. - */ - ip6i = mtodo(m, hlen); - if (ipfw_lookup_table(chain, cfg->map64, - sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) { - m_freem(m); - return (NAT64RETURN); - } - if (cfg->flags & NAT64_LOG) { - logdata = &loghdr; - nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx); - } else - logdata = NULL; - return (nat64_handle_icmp6(m, 0, - htonl(TARG_VAL(chain, tablearg, nh4)), 0, stats, logdata)); -} - -int -ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args, - ipfw_insn *cmd, int *done) -{ - ipfw_insn *icmd; - struct nat64stl_cfg *cfg; - in_addr_t dst4; - uint32_t tablearg; - int ret; - - IPFW_RLOCK_ASSERT(chain); - - *done = 0; /* try next rule if not matched */ - icmd = cmd + 1; - if (cmd->opcode != O_EXTERNAL_ACTION || - cmd->arg1 != V_nat64stl_eid || - icmd->opcode != O_EXTERNAL_INSTANCE || - (cfg = NAT64_LOOKUP(chain, icmd)) == NULL) - return (0); - - switch (args->f_id.addr_type) { - case 4: - dst4 = htonl(args->f_id.dst_ip); - ret = ipfw_lookup_table(chain, cfg->map46, sizeof(in_addr_t), - &dst4, &tablearg); - break; - case 6: - ret = ipfw_lookup_table(chain, cfg->map64, - sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg); - break; - default: - return (0); - } - if (ret == 0) { - /* - * In case when packet is ICMPv6 message from an intermediate - * router, the source address of message will not match the - * addresses from our map64 table. - */ - if (args->f_id.proto != IPPROTO_ICMPV6) - return (0); - - ret = nat64stl_handle_icmp6(chain, cfg, args->m); - } else { - if (args->f_id.addr_type == 4) - ret = nat64stl_handle_ip4(chain, cfg, args->m, - tablearg); - else - ret = nat64stl_handle_ip6(chain, cfg, args->m, - tablearg); - } - if (ret == NAT64SKIP) - return (0); - - *done = 1; /* terminate the search */ - if (ret == NAT64MFREE) - m_freem(args->m); - args->m = NULL; - return (IP_FW_DENY); -} - - diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h b/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h deleted file mode 100644 index 42ec20ea..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64stl.h +++ /dev/null @@ -1,58 +0,0 @@ -/*- - * Copyright (c) 2015-2016 Yandex LLC - * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _IP_FW_NAT64STL_H_ -#define _IP_FW_NAT64STL_H_ - -struct nat64stl_cfg { - struct named_object no; - - uint16_t map64; /* table with 6to4 mapping */ - uint16_t map46; /* table with 4to6 mapping */ - - struct in6_addr prefix6;/* IPv6 prefix */ - uint8_t plen6; /* prefix length */ - uint8_t flags; /* flags for internal use */ -#define NAT64STL_KIDX 0x0100 -#define NAT64STL_46T 0x0200 -#define NAT64STL_64T 0x0400 -#define NAT64STL_FLAGSMASK (NAT64_LOG) /* flags to pass to userland */ - char name[64]; - nat64_stats_block stats; -}; - -VNET_DECLARE(uint16_t, nat64stl_eid); -#define V_nat64stl_eid VNET(nat64stl_eid) -#define IPFW_TLV_NAT64STL_NAME IPFW_TLV_EACTION_NAME(V_nat64stl_eid) - -int ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args, - ipfw_insn *cmd, int *done); - -#endif - diff --git a/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c b/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c deleted file mode 100644 index b5e6e032..00000000 --- a/freebsd/sys/netpfil/ipfw/nat64/nat64stl_control.c +++ /dev/null @@ -1,623 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2015-2016 Yandex LLC - * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org> - * Copyright (c) 2015 Alexander V. Chernikov <melifaro@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/counter.h> -#include <sys/errno.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/module.h> -#include <sys/rmlock.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/sockopt.h> -#include <sys/queue.h> -#include <sys/syslog.h> -#include <sys/sysctl.h> - -#include <net/if.h> -#include <net/if_var.h> -#include <net/pfil.h> -#include <net/route.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> -#include <netinet6/in6_var.h> -#include <netinet6/ip6_var.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/nat64/ip_fw_nat64.h> -#include <netpfil/ipfw/nat64/nat64stl.h> -#include <netinet6/ip_fw_nat64.h> - -VNET_DEFINE(uint16_t, nat64stl_eid) = 0; - -static struct nat64stl_cfg *nat64stl_alloc_config(const char *name, uint8_t set); -static void nat64stl_free_config(struct nat64stl_cfg *cfg); -static struct nat64stl_cfg *nat64stl_find(struct namedobj_instance *ni, - const char *name, uint8_t set); - -static struct nat64stl_cfg * -nat64stl_alloc_config(const char *name, uint8_t set) -{ - struct nat64stl_cfg *cfg; - - cfg = malloc(sizeof(struct nat64stl_cfg), M_IPFW, M_WAITOK | M_ZERO); - COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK); - cfg->no.name = cfg->name; - cfg->no.etlv = IPFW_TLV_NAT64STL_NAME; - cfg->no.set = set; - strlcpy(cfg->name, name, sizeof(cfg->name)); - return (cfg); -} - -static void -nat64stl_free_config(struct nat64stl_cfg *cfg) -{ - - COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS); - free(cfg, M_IPFW); -} - -static void -nat64stl_export_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, - ipfw_nat64stl_cfg *uc) -{ - struct named_object *no; - - uc->prefix6 = cfg->prefix6; - uc->plen6 = cfg->plen6; - uc->flags = cfg->flags & NAT64STL_FLAGSMASK; - uc->set = cfg->no.set; - strlcpy(uc->name, cfg->no.name, sizeof(uc->name)); - - no = ipfw_objhash_lookup_table_kidx(ch, cfg->map64); - ipfw_export_obj_ntlv(no, &uc->ntlv6); - no = ipfw_objhash_lookup_table_kidx(ch, cfg->map46); - ipfw_export_obj_ntlv(no, &uc->ntlv4); -} - -struct nat64stl_dump_arg { - struct ip_fw_chain *ch; - struct sockopt_data *sd; -}; - -static int -export_config_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct nat64stl_dump_arg *da = (struct nat64stl_dump_arg *)arg; - ipfw_nat64stl_cfg *uc; - - uc = (ipfw_nat64stl_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc)); - nat64stl_export_config(da->ch, (struct nat64stl_cfg *)no, uc); - return (0); -} - -static struct nat64stl_cfg * -nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set) -{ - struct nat64stl_cfg *cfg; - - cfg = (struct nat64stl_cfg *)ipfw_objhash_lookup_name_type(ni, set, - IPFW_TLV_NAT64STL_NAME, name); - - return (cfg); -} - - -static int -nat64stl_create_internal(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, - ipfw_nat64stl_cfg *i) -{ - - IPFW_UH_WLOCK_ASSERT(ch); - - if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) - return (ENOSPC); - cfg->flags |= NAT64STL_KIDX; - - if (ipfw_ref_table(ch, &i->ntlv4, &cfg->map46) != 0) - return (EINVAL); - cfg->flags |= NAT64STL_46T; - - if (ipfw_ref_table(ch, &i->ntlv6, &cfg->map64) != 0) - return (EINVAL); - cfg->flags |= NAT64STL_64T; - - ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no); - - return (0); -} - -/* - * Creates new nat64 instance. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ] - * - * Returns 0 on success - */ -static int -nat64stl_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_lheader *olh; - ipfw_nat64stl_cfg *uc; - struct namedobj_instance *ni; - struct nat64stl_cfg *cfg; - int error; - - if (sd->valsize != sizeof(*olh) + sizeof(*uc)) - return (EINVAL); - - olh = (ipfw_obj_lheader *)sd->kbuf; - uc = (ipfw_nat64stl_cfg *)(olh + 1); - - if (ipfw_check_object_name_generic(uc->name) != 0) - return (EINVAL); - if (!IN6_IS_ADDR_WKPFX(&uc->prefix6)) - return (EINVAL); - if (uc->plen6 != 96 || uc->set >= IPFW_MAX_SETS) - return (EINVAL); - - /* XXX: check types of tables */ - - ni = CHAIN_TO_SRV(ch); - error = 0; - - IPFW_UH_RLOCK(ch); - if (nat64stl_find(ni, uc->name, uc->set) != NULL) { - IPFW_UH_RUNLOCK(ch); - return (EEXIST); - } - IPFW_UH_RUNLOCK(ch); - - cfg = nat64stl_alloc_config(uc->name, uc->set); - cfg->prefix6 = uc->prefix6; - cfg->plen6 = uc->plen6; - cfg->flags = uc->flags & NAT64STL_FLAGSMASK; - - IPFW_UH_WLOCK(ch); - - if (nat64stl_find(ni, uc->name, uc->set) != NULL) { - IPFW_UH_WUNLOCK(ch); - nat64stl_free_config(cfg); - return (EEXIST); - } - error = nat64stl_create_internal(ch, cfg, uc); - if (error == 0) { - /* Okay, let's link data */ - IPFW_WLOCK(ch); - SRV_OBJECT(ch, cfg->no.kidx) = cfg; - IPFW_WUNLOCK(ch); - - IPFW_UH_WUNLOCK(ch); - return (0); - } - - if (cfg->flags & NAT64STL_KIDX) - ipfw_objhash_free_idx(ni, cfg->no.kidx); - if (cfg->flags & NAT64STL_46T) - ipfw_unref_table(ch, cfg->map46); - if (cfg->flags & NAT64STL_64T) - ipfw_unref_table(ch, cfg->map64); - - IPFW_UH_WUNLOCK(ch); - nat64stl_free_config(cfg); - return (error); -} - -/* - * Change existing nat64stl instance configuration. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ] - * Reply: [ ipfw_obj_header ipfw_nat64stl_cfg ] - * - * Returns 0 on success - */ -static int -nat64stl_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - ipfw_nat64stl_cfg *uc; - struct nat64stl_cfg *cfg; - struct namedobj_instance *ni; - - if (sd->valsize != sizeof(*oh) + sizeof(*uc)) - return (EINVAL); - - oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, - sizeof(*oh) + sizeof(*uc)); - uc = (ipfw_nat64stl_cfg *)(oh + 1); - - if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || - oh->ntlv.set >= IPFW_MAX_SETS) - return (EINVAL); - - ni = CHAIN_TO_SRV(ch); - if (sd->sopt->sopt_dir == SOPT_GET) { - IPFW_UH_RLOCK(ch); - cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_RUNLOCK(ch); - return (EEXIST); - } - nat64stl_export_config(ch, cfg, uc); - IPFW_UH_RUNLOCK(ch); - return (0); - } - - IPFW_UH_WLOCK(ch); - cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_WUNLOCK(ch); - return (EEXIST); - } - - /* - * For now allow to change only following values: - * flags. - */ - - cfg->flags = uc->flags & NAT64STL_FLAGSMASK; - IPFW_UH_WUNLOCK(ch); - return (0); -} - -static void -nat64stl_detach_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg) -{ - - IPFW_UH_WLOCK_ASSERT(ch); - - ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); - ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx); - ipfw_unref_table(ch, cfg->map46); - ipfw_unref_table(ch, cfg->map64); -} - -/* - * Destroys nat64 instance. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * - * Returns 0 on success - */ -static int -nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - struct nat64stl_cfg *cfg; - - if (sd->valsize != sizeof(*oh)) - return (EINVAL); - - oh = (ipfw_obj_header *)sd->kbuf; - if (ipfw_check_object_name_generic(oh->ntlv.name) != 0) - return (EINVAL); - - IPFW_UH_WLOCK(ch); - cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - if (cfg->no.refcnt > 0) { - IPFW_UH_WUNLOCK(ch); - return (EBUSY); - } - - IPFW_WLOCK(ch); - SRV_OBJECT(ch, cfg->no.kidx) = NULL; - IPFW_WUNLOCK(ch); - - nat64stl_detach_config(ch, cfg); - IPFW_UH_WUNLOCK(ch); - - nat64stl_free_config(cfg); - return (0); -} - -/* - * Lists all nat64stl instances currently available in kernel. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ] - * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ] - * - * Returns 0 on success - */ -static int -nat64stl_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_lheader *olh; - struct nat64stl_dump_arg da; - - /* Check minimum header size */ - if (sd->valsize < sizeof(ipfw_obj_lheader)) - return (EINVAL); - - olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); - - IPFW_UH_RLOCK(ch); - olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), - IPFW_TLV_NAT64STL_NAME); - olh->objsize = sizeof(ipfw_nat64stl_cfg); - olh->size = sizeof(*olh) + olh->count * olh->objsize; - - if (sd->valsize < olh->size) { - IPFW_UH_RUNLOCK(ch); - return (ENOMEM); - } - memset(&da, 0, sizeof(da)); - da.ch = ch; - da.sd = sd; - ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, - &da, IPFW_TLV_NAT64STL_NAME); - IPFW_UH_RUNLOCK(ch); - - return (0); -} - -#define __COPY_STAT_FIELD(_cfg, _stats, _field) \ - (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field) -static void -export_stats(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, - struct ipfw_nat64stl_stats *stats) -{ - - __COPY_STAT_FIELD(cfg, stats, opcnt64); - __COPY_STAT_FIELD(cfg, stats, opcnt46); - __COPY_STAT_FIELD(cfg, stats, ofrags); - __COPY_STAT_FIELD(cfg, stats, ifrags); - __COPY_STAT_FIELD(cfg, stats, oerrors); - __COPY_STAT_FIELD(cfg, stats, noroute4); - __COPY_STAT_FIELD(cfg, stats, noroute6); - __COPY_STAT_FIELD(cfg, stats, noproto); - __COPY_STAT_FIELD(cfg, stats, nomem); - __COPY_STAT_FIELD(cfg, stats, dropped); -} - -/* - * Get nat64stl statistics. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]] - * - * Returns 0 on success - */ -static int -nat64stl_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - struct ipfw_nat64stl_stats stats; - struct nat64stl_cfg *cfg; - ipfw_obj_header *oh; - ipfw_obj_ctlv *ctlv; - size_t sz; - - sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); - if (sd->valsize % sizeof(uint64_t)) - return (EINVAL); - if (sd->valsize < sz) - return (ENOMEM); - oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); - if (oh == NULL) - return (EINVAL); - memset(&stats, 0, sizeof(stats)); - - IPFW_UH_RLOCK(ch); - cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_RUNLOCK(ch); - return (ESRCH); - } - export_stats(ch, cfg, &stats); - IPFW_UH_RUNLOCK(ch); - - ctlv = (ipfw_obj_ctlv *)(oh + 1); - memset(ctlv, 0, sizeof(*ctlv)); - ctlv->head.type = IPFW_TLV_COUNTERS; - ctlv->head.length = sz - sizeof(ipfw_obj_header); - ctlv->count = sizeof(stats) / sizeof(uint64_t); - ctlv->objsize = sizeof(uint64_t); - ctlv->version = IPFW_NAT64_VERSION; - memcpy(ctlv + 1, &stats, sizeof(stats)); - return (0); -} - -/* - * Reset nat64stl statistics. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * - * Returns 0 on success - */ -static int -nat64stl_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - struct nat64stl_cfg *cfg; - ipfw_obj_header *oh; - - if (sd->valsize != sizeof(*oh)) - return (EINVAL); - oh = (ipfw_obj_header *)sd->kbuf; - if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || - oh->ntlv.set >= IPFW_MAX_SETS) - return (EINVAL); - - IPFW_UH_WLOCK(ch); - cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS); - IPFW_UH_WUNLOCK(ch); - return (0); -} - -static struct ipfw_sopt_handler scodes[] = { - - { IP_FW_NAT64STL_CREATE, 0, HDIR_SET, nat64stl_create }, - { IP_FW_NAT64STL_DESTROY,0, HDIR_SET, nat64stl_destroy }, - { IP_FW_NAT64STL_CONFIG, 0, HDIR_BOTH, nat64stl_config }, - { IP_FW_NAT64STL_LIST, 0, HDIR_GET, nat64stl_list }, - { IP_FW_NAT64STL_STATS, 0, HDIR_GET, nat64stl_stats }, - { IP_FW_NAT64STL_RESET_STATS,0, HDIR_SET, nat64stl_reset_stats }, -}; - -static int -nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - ipfw_insn *icmd; - - icmd = cmd - 1; - if (icmd->opcode != O_EXTERNAL_ACTION || - icmd->arg1 != V_nat64stl_eid) - return (1); - - *puidx = cmd->arg1; - *ptype = 0; - return (0); -} - -static void -nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx) -{ - - cmd->arg1 = idx; -} - -static int -nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, - struct named_object **pno) -{ - int err; - - err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, - IPFW_TLV_NAT64STL_NAME, pno); - return (err); -} - -static struct named_object * -nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx) -{ - struct namedobj_instance *ni; - struct named_object *no; - - IPFW_UH_WLOCK_ASSERT(ch); - ni = CHAIN_TO_SRV(ch); - no = ipfw_objhash_lookup_kidx(ni, idx); - KASSERT(no != NULL, ("NAT with index %d not found", idx)); - - return (no); -} - -static int -nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, - enum ipfw_sets_cmd cmd) -{ - - return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME, - set, new_set, cmd)); -} - -static struct opcode_obj_rewrite opcodes[] = { - { - .opcode = O_EXTERNAL_INSTANCE, - .etlv = IPFW_TLV_EACTION /* just show it isn't table */, - .classifier = nat64stl_classify, - .update = nat64stl_update_arg1, - .find_byname = nat64stl_findbyname, - .find_bykidx = nat64stl_findbykidx, - .manage_sets = nat64stl_manage_sets, - }, -}; - -static int -destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct nat64stl_cfg *cfg; - struct ip_fw_chain *ch; - - ch = (struct ip_fw_chain *)arg; - cfg = (struct nat64stl_cfg *)SRV_OBJECT(ch, no->kidx); - SRV_OBJECT(ch, no->kidx) = NULL; - nat64stl_detach_config(ch, cfg); - nat64stl_free_config(cfg); - return (0); -} - -int -nat64stl_init(struct ip_fw_chain *ch, int first) -{ - - V_nat64stl_eid = ipfw_add_eaction(ch, ipfw_nat64stl, "nat64stl"); - if (V_nat64stl_eid == 0) - return (ENXIO); - IPFW_ADD_SOPT_HANDLER(first, scodes); - IPFW_ADD_OBJ_REWRITER(first, opcodes); - return (0); -} - -void -nat64stl_uninit(struct ip_fw_chain *ch, int last) -{ - - IPFW_DEL_OBJ_REWRITER(last, opcodes); - IPFW_DEL_SOPT_HANDLER(last, scodes); - ipfw_del_eaction(ch, V_nat64stl_eid); - /* - * Since we already have deregistered external action, - * our named objects become unaccessible via rules, because - * all rules were truncated by ipfw_del_eaction(). - * So, we can unlink and destroy our named objects without holding - * IPFW_WLOCK(). - */ - IPFW_UH_WLOCK(ch); - ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, - IPFW_TLV_NAT64STL_NAME); - V_nat64stl_eid = 0; - IPFW_UH_WUNLOCK(ch); -} - diff --git a/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c b/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c deleted file mode 100644 index 2746bed7..00000000 --- a/freebsd/sys/netpfil/ipfw/nptv6/ip_fw_nptv6.c +++ /dev/null @@ -1,101 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2016 Yandex LLC - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/module.h> -#include <sys/rwlock.h> -#include <sys/socket.h> - -#include <net/if.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/nptv6/nptv6.h> - -static int -vnet_ipfw_nptv6_init(const void *arg __unused) -{ - - return (nptv6_init(&V_layer3_chain, IS_DEFAULT_VNET(curvnet))); -} - -static int -vnet_ipfw_nptv6_uninit(const void *arg __unused) -{ - - nptv6_uninit(&V_layer3_chain, IS_DEFAULT_VNET(curvnet)); - return (0); -} - -static int -ipfw_nptv6_modevent(module_t mod, int type, void *unused) -{ - - switch (type) { - case MOD_LOAD: - case MOD_UNLOAD: - break; - default: - return (EOPNOTSUPP); - } - return (0); -} - -static moduledata_t ipfw_nptv6_mod = { - "ipfw_nptv6", - ipfw_nptv6_modevent, - 0 -}; - -/* Define startup order. */ -#define IPFW_NPTV6_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN -#define IPFW_NPTV6_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */ -#define IPFW_NPTV6_MODULE_ORDER (IPFW_NPTV6_MODEVENT_ORDER + 1) -#define IPFW_NPTV6_VNET_ORDER (IPFW_NPTV6_MODEVENT_ORDER + 2) - -DECLARE_MODULE(ipfw_nptv6, ipfw_nptv6_mod, IPFW_NPTV6_SI_SUB_FIREWALL, - IPFW_NPTV6_MODULE_ORDER); -MODULE_DEPEND(ipfw_nptv6, ipfw, 3, 3, 3); -MODULE_VERSION(ipfw_nptv6, 1); - -VNET_SYSINIT(vnet_ipfw_nptv6_init, IPFW_NPTV6_SI_SUB_FIREWALL, - IPFW_NPTV6_VNET_ORDER, vnet_ipfw_nptv6_init, NULL); -VNET_SYSUNINIT(vnet_ipfw_nptv6_uninit, IPFW_NPTV6_SI_SUB_FIREWALL, - IPFW_NPTV6_VNET_ORDER, vnet_ipfw_nptv6_uninit, NULL); diff --git a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c deleted file mode 100644 index a41e00f3..00000000 --- a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.c +++ /dev/null @@ -1,896 +0,0 @@ -#include <machine/rtems-bsd-kernel-space.h> - -/*- - * Copyright (c) 2016 Yandex LLC - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/counter.h> -#include <sys/errno.h> -#include <sys/kernel.h> -#include <sys/lock.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/module.h> -#include <sys/rmlock.h> -#include <sys/rwlock.h> -#include <sys/socket.h> -#include <sys/queue.h> -#include <sys/syslog.h> -#include <sys/sysctl.h> - -#include <net/if.h> -#include <net/if_var.h> -#include <net/netisr.h> -#include <net/pfil.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/ip_var.h> -#include <netinet/ip_fw.h> -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#include <netinet6/in6_var.h> -#include <netinet6/ip6_var.h> - -#include <netpfil/ipfw/ip_fw_private.h> -#include <netpfil/ipfw/nptv6/nptv6.h> - -static VNET_DEFINE(uint16_t, nptv6_eid) = 0; -#define V_nptv6_eid VNET(nptv6_eid) -#define IPFW_TLV_NPTV6_NAME IPFW_TLV_EACTION_NAME(V_nptv6_eid) - -static struct nptv6_cfg *nptv6_alloc_config(const char *name, uint8_t set); -static void nptv6_free_config(struct nptv6_cfg *cfg); -static struct nptv6_cfg *nptv6_find(struct namedobj_instance *ni, - const char *name, uint8_t set); -static int nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, - int offset); -static int nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, - int offset); - -#define NPTV6_LOOKUP(chain, cmd) \ - (struct nptv6_cfg *)SRV_OBJECT((chain), (cmd)->arg1) - -#ifndef IN6_MASK_ADDR -#define IN6_MASK_ADDR(a, m) do { \ - (a)->s6_addr32[0] &= (m)->s6_addr32[0]; \ - (a)->s6_addr32[1] &= (m)->s6_addr32[1]; \ - (a)->s6_addr32[2] &= (m)->s6_addr32[2]; \ - (a)->s6_addr32[3] &= (m)->s6_addr32[3]; \ -} while (0) -#endif -#ifndef IN6_ARE_MASKED_ADDR_EQUAL -#define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ - (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \ - (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \ - (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \ - (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 ) -#endif - -#if 0 -#define NPTV6_DEBUG(fmt, ...) do { \ - printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \ -} while (0) -#define NPTV6_IPDEBUG(fmt, ...) do { \ - char _s[INET6_ADDRSTRLEN], _d[INET6_ADDRSTRLEN]; \ - printf("%s: " fmt "\n", __func__, ## __VA_ARGS__); \ -} while (0) -#else -#define NPTV6_DEBUG(fmt, ...) -#define NPTV6_IPDEBUG(fmt, ...) -#endif - -static int -nptv6_getlasthdr(struct nptv6_cfg *cfg, struct mbuf *m, int *offset) -{ - struct ip6_hdr *ip6; - struct ip6_hbh *hbh; - int proto, hlen; - - hlen = (offset == NULL) ? 0: *offset; - if (m->m_len < hlen) - return (-1); - ip6 = mtodo(m, hlen); - hlen += sizeof(*ip6); - proto = ip6->ip6_nxt; - while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || - proto == IPPROTO_DSTOPTS) { - hbh = mtodo(m, hlen); - if (m->m_len < hlen) - return (-1); - proto = hbh->ip6h_nxt; - hlen += hbh->ip6h_len << 3; - } - if (offset != NULL) - *offset = hlen; - return (proto); -} - -static int -nptv6_translate_icmpv6(struct nptv6_cfg *cfg, struct mbuf **mp, int offset) -{ - struct icmp6_hdr *icmp6; - struct ip6_hdr *ip6; - struct mbuf *m; - - m = *mp; - if (offset > m->m_len) - return (-1); - icmp6 = mtodo(m, offset); - NPTV6_DEBUG("ICMPv6 type %d", icmp6->icmp6_type); - switch (icmp6->icmp6_type) { - case ICMP6_DST_UNREACH: - case ICMP6_PACKET_TOO_BIG: - case ICMP6_TIME_EXCEEDED: - case ICMP6_PARAM_PROB: - break; - case ICMP6_ECHO_REQUEST: - case ICMP6_ECHO_REPLY: - /* nothing to translate */ - return (0); - default: - /* - * XXX: We can add some checks to not translate NDP and MLD - * messages. Currently user must explicitly allow these message - * types, otherwise packets will be dropped. - */ - return (-1); - } - offset += sizeof(*icmp6); - if (offset + sizeof(*ip6) > m->m_pkthdr.len) - return (-1); - if (offset + sizeof(*ip6) > m->m_len) - *mp = m = m_pullup(m, offset + sizeof(*ip6)); - if (m == NULL) - return (-1); - ip6 = mtodo(m, offset); - NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset, - inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)), - inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)), - ip6->ip6_nxt); - if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src, - &cfg->external, &cfg->mask)) - return (nptv6_rewrite_external(cfg, mp, offset)); - else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst, - &cfg->internal, &cfg->mask)) - return (nptv6_rewrite_internal(cfg, mp, offset)); - /* - * Addresses in the inner IPv6 header doesn't matched to - * our prefixes. - */ - return (-1); -} - -static int -nptv6_search_index(struct nptv6_cfg *cfg, struct in6_addr *a) -{ - int idx; - - if (cfg->flags & NPTV6_48PLEN) - return (3); - - /* Search suitable word index for adjustment */ - for (idx = 4; idx < 8; idx++) - if (a->s6_addr16[idx] != 0xffff) - break; - /* - * RFC 6296 p3.7: If an NPTv6 Translator discovers a datagram with - * an IID of all-zeros while performing address mapping, that - * datagram MUST be dropped, and an ICMPv6 Parameter Problem error - * SHOULD be generated. - */ - if (idx == 8 || - (a->s6_addr32[2] == 0 && a->s6_addr32[3] == 0)) - return (-1); - return (idx); -} - -static void -nptv6_copy_addr(struct in6_addr *src, struct in6_addr *dst, - struct in6_addr *mask) -{ - int i; - - for (i = 0; i < 8 && mask->s6_addr8[i] != 0; i++) { - dst->s6_addr8[i] &= ~mask->s6_addr8[i]; - dst->s6_addr8[i] |= src->s6_addr8[i] & mask->s6_addr8[i]; - } -} - -static int -nptv6_rewrite_internal(struct nptv6_cfg *cfg, struct mbuf **mp, int offset) -{ - struct in6_addr *addr; - struct ip6_hdr *ip6; - int idx, proto; - uint16_t adj; - - ip6 = mtodo(*mp, offset); - NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset, - inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)), - inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)), - ip6->ip6_nxt); - if (offset == 0) - addr = &ip6->ip6_src; - else { - /* - * When we rewriting inner IPv6 header, we need to rewrite - * destination address back to external prefix. The datagram in - * the ICMPv6 payload should looks like it was send from - * external prefix. - */ - addr = &ip6->ip6_dst; - } - idx = nptv6_search_index(cfg, addr); - if (idx < 0) { - /* - * Do not send ICMPv6 error when offset isn't zero. - * This means we are rewriting inner IPv6 header in the - * ICMPv6 error message. - */ - if (offset == 0) { - icmp6_error2(*mp, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif); - *mp = NULL; - } - return (IP_FW_DENY); - } - adj = addr->s6_addr16[idx]; - nptv6_copy_addr(&cfg->external, addr, &cfg->mask); - adj = cksum_add(adj, cfg->adjustment); - if (adj == 0xffff) - adj = 0; - addr->s6_addr16[idx] = adj; - if (offset == 0) { - /* - * We may need to translate addresses in the inner IPv6 - * header for ICMPv6 error messages. - */ - proto = nptv6_getlasthdr(cfg, *mp, &offset); - if (proto < 0 || (proto == IPPROTO_ICMPV6 && - nptv6_translate_icmpv6(cfg, mp, offset) != 0)) - return (IP_FW_DENY); - NPTV6STAT_INC(cfg, in2ex); - } - return (0); -} - -static int -nptv6_rewrite_external(struct nptv6_cfg *cfg, struct mbuf **mp, int offset) -{ - struct in6_addr *addr; - struct ip6_hdr *ip6; - int idx, proto; - uint16_t adj; - - ip6 = mtodo(*mp, offset); - NPTV6_IPDEBUG("offset %d, %s -> %s %d", offset, - inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)), - inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)), - ip6->ip6_nxt); - if (offset == 0) - addr = &ip6->ip6_dst; - else { - /* - * When we rewriting inner IPv6 header, we need to rewrite - * source address back to internal prefix. The datagram in - * the ICMPv6 payload should looks like it was send from - * internal prefix. - */ - addr = &ip6->ip6_src; - } - idx = nptv6_search_index(cfg, addr); - if (idx < 0) { - /* - * Do not send ICMPv6 error when offset isn't zero. - * This means we are rewriting inner IPv6 header in the - * ICMPv6 error message. - */ - if (offset == 0) { - icmp6_error2(*mp, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADDR, 0, (*mp)->m_pkthdr.rcvif); - *mp = NULL; - } - return (IP_FW_DENY); - } - adj = addr->s6_addr16[idx]; - nptv6_copy_addr(&cfg->internal, addr, &cfg->mask); - adj = cksum_add(adj, ~cfg->adjustment); - if (adj == 0xffff) - adj = 0; - addr->s6_addr16[idx] = adj; - if (offset == 0) { - /* - * We may need to translate addresses in the inner IPv6 - * header for ICMPv6 error messages. - */ - proto = nptv6_getlasthdr(cfg, *mp, &offset); - if (proto < 0 || (proto == IPPROTO_ICMPV6 && - nptv6_translate_icmpv6(cfg, mp, offset) != 0)) - return (IP_FW_DENY); - NPTV6STAT_INC(cfg, ex2in); - } - return (0); -} - -/* - * ipfw external action handler. - */ -static int -ipfw_nptv6(struct ip_fw_chain *chain, struct ip_fw_args *args, - ipfw_insn *cmd, int *done) -{ - struct ip6_hdr *ip6; - struct nptv6_cfg *cfg; - ipfw_insn *icmd; - int ret; - - *done = 0; /* try next rule if not matched */ - ret = IP_FW_DENY; - icmd = cmd + 1; - if (cmd->opcode != O_EXTERNAL_ACTION || - cmd->arg1 != V_nptv6_eid || - icmd->opcode != O_EXTERNAL_INSTANCE || - (cfg = NPTV6_LOOKUP(chain, icmd)) == NULL) - return (ret); - /* - * We need act as router, so when forwarding is disabled - - * do nothing. - */ - if (V_ip6_forwarding == 0 || args->f_id.addr_type != 6) - return (ret); - /* - * NOTE: we expect ipfw_chk() did m_pullup() up to upper level - * protocol's headers. Also we skip some checks, that ip6_input(), - * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. - */ - ip6 = mtod(args->m, struct ip6_hdr *); - NPTV6_IPDEBUG("eid %u, oid %u, %s -> %s %d", - cmd->arg1, icmd->arg1, - inet_ntop(AF_INET6, &ip6->ip6_src, _s, sizeof(_s)), - inet_ntop(AF_INET6, &ip6->ip6_dst, _d, sizeof(_d)), - ip6->ip6_nxt); - if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_src, - &cfg->internal, &cfg->mask)) { - /* - * XXX: Do not translate packets when both src and dst - * are from internal prefix. - */ - if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst, - &cfg->internal, &cfg->mask)) - return (ret); - ret = nptv6_rewrite_internal(cfg, &args->m, 0); - } else if (IN6_ARE_MASKED_ADDR_EQUAL(&ip6->ip6_dst, - &cfg->external, &cfg->mask)) - ret = nptv6_rewrite_external(cfg, &args->m, 0); - else - return (ret); - /* - * If address wasn't rewrited - free mbuf and terminate the search. - */ - if (ret != 0) { - if (args->m != NULL) { - m_freem(args->m); - args->m = NULL; /* mark mbuf as consumed */ - } - NPTV6STAT_INC(cfg, dropped); - *done = 1; - } else { - /* Terminate the search if one_pass is set */ - *done = V_fw_one_pass; - /* Update args->f_id when one_pass is off */ - if (*done == 0) { - ip6 = mtod(args->m, struct ip6_hdr *); - args->f_id.src_ip6 = ip6->ip6_src; - args->f_id.dst_ip6 = ip6->ip6_dst; - } - } - return (ret); -} - -static struct nptv6_cfg * -nptv6_alloc_config(const char *name, uint8_t set) -{ - struct nptv6_cfg *cfg; - - cfg = malloc(sizeof(struct nptv6_cfg), M_IPFW, M_WAITOK | M_ZERO); - COUNTER_ARRAY_ALLOC(cfg->stats, NPTV6STATS, M_WAITOK); - cfg->no.name = cfg->name; - cfg->no.etlv = IPFW_TLV_NPTV6_NAME; - cfg->no.set = set; - strlcpy(cfg->name, name, sizeof(cfg->name)); - return (cfg); -} - -static void -nptv6_free_config(struct nptv6_cfg *cfg) -{ - - COUNTER_ARRAY_FREE(cfg->stats, NPTV6STATS); - free(cfg, M_IPFW); -} - -static void -nptv6_export_config(struct ip_fw_chain *ch, struct nptv6_cfg *cfg, - ipfw_nptv6_cfg *uc) -{ - - uc->internal = cfg->internal; - uc->external = cfg->external; - uc->plen = cfg->plen; - uc->flags = cfg->flags & NPTV6_FLAGSMASK; - uc->set = cfg->no.set; - strlcpy(uc->name, cfg->no.name, sizeof(uc->name)); -} - -struct nptv6_dump_arg { - struct ip_fw_chain *ch; - struct sockopt_data *sd; -}; - -static int -export_config_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct nptv6_dump_arg *da = (struct nptv6_dump_arg *)arg; - ipfw_nptv6_cfg *uc; - - uc = (ipfw_nptv6_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc)); - nptv6_export_config(da->ch, (struct nptv6_cfg *)no, uc); - return (0); -} - -static struct nptv6_cfg * -nptv6_find(struct namedobj_instance *ni, const char *name, uint8_t set) -{ - struct nptv6_cfg *cfg; - - cfg = (struct nptv6_cfg *)ipfw_objhash_lookup_name_type(ni, set, - IPFW_TLV_NPTV6_NAME, name); - - return (cfg); -} - -static void -nptv6_calculate_adjustment(struct nptv6_cfg *cfg) -{ - uint16_t i, e; - uint16_t *p; - - /* Calculate checksum of internal prefix */ - for (i = 0, p = (uint16_t *)&cfg->internal; - p < (uint16_t *)(&cfg->internal + 1); p++) - i = cksum_add(i, *p); - - /* Calculate checksum of external prefix */ - for (e = 0, p = (uint16_t *)&cfg->external; - p < (uint16_t *)(&cfg->external + 1); p++) - e = cksum_add(e, *p); - - /* Adjustment value for Int->Ext direction */ - cfg->adjustment = cksum_add(~e, i); -} - -/* - * Creates new NPTv6 instance. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ipfw_nptv6_cfg ] - * - * Returns 0 on success - */ -static int -nptv6_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - struct in6_addr mask; - ipfw_obj_lheader *olh; - ipfw_nptv6_cfg *uc; - struct namedobj_instance *ni; - struct nptv6_cfg *cfg; - - if (sd->valsize != sizeof(*olh) + sizeof(*uc)) - return (EINVAL); - - olh = (ipfw_obj_lheader *)sd->kbuf; - uc = (ipfw_nptv6_cfg *)(olh + 1); - if (ipfw_check_object_name_generic(uc->name) != 0) - return (EINVAL); - if (uc->plen < 8 || uc->plen > 64 || uc->set >= IPFW_MAX_SETS) - return (EINVAL); - if (IN6_IS_ADDR_MULTICAST(&uc->internal) || - IN6_IS_ADDR_MULTICAST(&uc->external) || - IN6_IS_ADDR_UNSPECIFIED(&uc->internal) || - IN6_IS_ADDR_UNSPECIFIED(&uc->external) || - IN6_IS_ADDR_LINKLOCAL(&uc->internal) || - IN6_IS_ADDR_LINKLOCAL(&uc->external)) - return (EINVAL); - in6_prefixlen2mask(&mask, uc->plen); - if (IN6_ARE_MASKED_ADDR_EQUAL(&uc->internal, &uc->external, &mask)) - return (EINVAL); - - ni = CHAIN_TO_SRV(ch); - IPFW_UH_RLOCK(ch); - if (nptv6_find(ni, uc->name, uc->set) != NULL) { - IPFW_UH_RUNLOCK(ch); - return (EEXIST); - } - IPFW_UH_RUNLOCK(ch); - - cfg = nptv6_alloc_config(uc->name, uc->set); - cfg->plen = uc->plen; - if (cfg->plen <= 48) - cfg->flags |= NPTV6_48PLEN; - cfg->internal = uc->internal; - cfg->external = uc->external; - cfg->mask = mask; - IN6_MASK_ADDR(&cfg->internal, &mask); - IN6_MASK_ADDR(&cfg->external, &mask); - nptv6_calculate_adjustment(cfg); - - IPFW_UH_WLOCK(ch); - if (ipfw_objhash_alloc_idx(ni, &cfg->no.kidx) != 0) { - IPFW_UH_WUNLOCK(ch); - nptv6_free_config(cfg); - return (ENOSPC); - } - ipfw_objhash_add(ni, &cfg->no); - IPFW_WLOCK(ch); - SRV_OBJECT(ch, cfg->no.kidx) = cfg; - IPFW_WUNLOCK(ch); - IPFW_UH_WUNLOCK(ch); - return (0); -} - -/* - * Destroys NPTv6 instance. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * - * Returns 0 on success - */ -static int -nptv6_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_header *oh; - struct nptv6_cfg *cfg; - - if (sd->valsize != sizeof(*oh)) - return (EINVAL); - - oh = (ipfw_obj_header *)sd->kbuf; - if (ipfw_check_object_name_generic(oh->ntlv.name) != 0) - return (EINVAL); - - IPFW_UH_WLOCK(ch); - cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - if (cfg->no.refcnt > 0) { - IPFW_UH_WUNLOCK(ch); - return (EBUSY); - } - - IPFW_WLOCK(ch); - SRV_OBJECT(ch, cfg->no.kidx) = NULL; - IPFW_WUNLOCK(ch); - - ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); - ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx); - IPFW_UH_WUNLOCK(ch); - - nptv6_free_config(cfg); - return (0); -} - -/* - * Get or change nptv6 instance config. - * Request: [ ipfw_obj_header [ ipfw_nptv6_cfg ] ] - */ -static int -nptv6_config(struct ip_fw_chain *chain, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - - return (EOPNOTSUPP); -} - -/* - * Lists all NPTv6 instances currently available in kernel. - * Data layout (v0)(current): - * Request: [ ipfw_obj_lheader ] - * Reply: [ ipfw_obj_lheader ipfw_nptv6_cfg x N ] - * - * Returns 0 on success - */ -static int -nptv6_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, - struct sockopt_data *sd) -{ - ipfw_obj_lheader *olh; - struct nptv6_dump_arg da; - - /* Check minimum header size */ - if (sd->valsize < sizeof(ipfw_obj_lheader)) - return (EINVAL); - - olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); - - IPFW_UH_RLOCK(ch); - olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), - IPFW_TLV_NPTV6_NAME); - olh->objsize = sizeof(ipfw_nptv6_cfg); - olh->size = sizeof(*olh) + olh->count * olh->objsize; - - if (sd->valsize < olh->size) { - IPFW_UH_RUNLOCK(ch); - return (ENOMEM); - } - memset(&da, 0, sizeof(da)); - da.ch = ch; - da.sd = sd; - ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, - &da, IPFW_TLV_NPTV6_NAME); - IPFW_UH_RUNLOCK(ch); - - return (0); -} - -#define __COPY_STAT_FIELD(_cfg, _stats, _field) \ - (_stats)->_field = NPTV6STAT_FETCH(_cfg, _field) -static void -export_stats(struct ip_fw_chain *ch, struct nptv6_cfg *cfg, - struct ipfw_nptv6_stats *stats) -{ - - __COPY_STAT_FIELD(cfg, stats, in2ex); - __COPY_STAT_FIELD(cfg, stats, ex2in); - __COPY_STAT_FIELD(cfg, stats, dropped); -} - -/* - * Get NPTv6 statistics. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]] - * - * Returns 0 on success - */ -static int -nptv6_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - struct ipfw_nptv6_stats stats; - struct nptv6_cfg *cfg; - ipfw_obj_header *oh; - ipfw_obj_ctlv *ctlv; - size_t sz; - - sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); - if (sd->valsize % sizeof(uint64_t)) - return (EINVAL); - if (sd->valsize < sz) - return (ENOMEM); - oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); - if (oh == NULL) - return (EINVAL); - if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || - oh->ntlv.set >= IPFW_MAX_SETS) - return (EINVAL); - memset(&stats, 0, sizeof(stats)); - - IPFW_UH_RLOCK(ch); - cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_RUNLOCK(ch); - return (ESRCH); - } - export_stats(ch, cfg, &stats); - IPFW_UH_RUNLOCK(ch); - - ctlv = (ipfw_obj_ctlv *)(oh + 1); - memset(ctlv, 0, sizeof(*ctlv)); - ctlv->head.type = IPFW_TLV_COUNTERS; - ctlv->head.length = sz - sizeof(ipfw_obj_header); - ctlv->count = sizeof(stats) / sizeof(uint64_t); - ctlv->objsize = sizeof(uint64_t); - ctlv->version = 1; - memcpy(ctlv + 1, &stats, sizeof(stats)); - return (0); -} - -/* - * Reset NPTv6 statistics. - * Data layout (v0)(current): - * Request: [ ipfw_obj_header ] - * - * Returns 0 on success - */ -static int -nptv6_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, - struct sockopt_data *sd) -{ - struct nptv6_cfg *cfg; - ipfw_obj_header *oh; - - if (sd->valsize != sizeof(*oh)) - return (EINVAL); - oh = (ipfw_obj_header *)sd->kbuf; - if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || - oh->ntlv.set >= IPFW_MAX_SETS) - return (EINVAL); - - IPFW_UH_WLOCK(ch); - cfg = nptv6_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); - if (cfg == NULL) { - IPFW_UH_WUNLOCK(ch); - return (ESRCH); - } - COUNTER_ARRAY_ZERO(cfg->stats, NPTV6STATS); - IPFW_UH_WUNLOCK(ch); - return (0); -} - -static struct ipfw_sopt_handler scodes[] = { - { IP_FW_NPTV6_CREATE, 0, HDIR_SET, nptv6_create }, - { IP_FW_NPTV6_DESTROY,0, HDIR_SET, nptv6_destroy }, - { IP_FW_NPTV6_CONFIG, 0, HDIR_BOTH, nptv6_config }, - { IP_FW_NPTV6_LIST, 0, HDIR_GET, nptv6_list }, - { IP_FW_NPTV6_STATS, 0, HDIR_GET, nptv6_stats }, - { IP_FW_NPTV6_RESET_STATS,0, HDIR_SET, nptv6_reset_stats }, -}; - -static int -nptv6_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) -{ - ipfw_insn *icmd; - - icmd = cmd - 1; - NPTV6_DEBUG("opcode %d, arg1 %d, opcode0 %d, arg1 %d", - cmd->opcode, cmd->arg1, icmd->opcode, icmd->arg1); - if (icmd->opcode != O_EXTERNAL_ACTION || - icmd->arg1 != V_nptv6_eid) - return (1); - - *puidx = cmd->arg1; - *ptype = 0; - return (0); -} - -static void -nptv6_update_arg1(ipfw_insn *cmd, uint16_t idx) -{ - - cmd->arg1 = idx; - NPTV6_DEBUG("opcode %d, arg1 -> %d", cmd->opcode, cmd->arg1); -} - -static int -nptv6_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, - struct named_object **pno) -{ - int err; - - err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, - IPFW_TLV_NPTV6_NAME, pno); - NPTV6_DEBUG("uidx %u, type %u, err %d", ti->uidx, ti->type, err); - return (err); -} - -static struct named_object * -nptv6_findbykidx(struct ip_fw_chain *ch, uint16_t idx) -{ - struct namedobj_instance *ni; - struct named_object *no; - - IPFW_UH_WLOCK_ASSERT(ch); - ni = CHAIN_TO_SRV(ch); - no = ipfw_objhash_lookup_kidx(ni, idx); - KASSERT(no != NULL, ("NPT with index %d not found", idx)); - - NPTV6_DEBUG("kidx %u -> %s", idx, no->name); - return (no); -} - -static int -nptv6_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, - enum ipfw_sets_cmd cmd) -{ - - return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NPTV6_NAME, - set, new_set, cmd)); -} - -static struct opcode_obj_rewrite opcodes[] = { - { - .opcode = O_EXTERNAL_INSTANCE, - .etlv = IPFW_TLV_EACTION /* just show it isn't table */, - .classifier = nptv6_classify, - .update = nptv6_update_arg1, - .find_byname = nptv6_findbyname, - .find_bykidx = nptv6_findbykidx, - .manage_sets = nptv6_manage_sets, - }, -}; - -static int -destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, - void *arg) -{ - struct nptv6_cfg *cfg; - struct ip_fw_chain *ch; - - ch = (struct ip_fw_chain *)arg; - IPFW_UH_WLOCK_ASSERT(ch); - - cfg = (struct nptv6_cfg *)SRV_OBJECT(ch, no->kidx); - SRV_OBJECT(ch, no->kidx) = NULL; - ipfw_objhash_del(ni, &cfg->no); - ipfw_objhash_free_idx(ni, cfg->no.kidx); - nptv6_free_config(cfg); - return (0); -} - -int -nptv6_init(struct ip_fw_chain *ch, int first) -{ - - V_nptv6_eid = ipfw_add_eaction(ch, ipfw_nptv6, "nptv6"); - if (V_nptv6_eid == 0) - return (ENXIO); - IPFW_ADD_SOPT_HANDLER(first, scodes); - IPFW_ADD_OBJ_REWRITER(first, opcodes); - return (0); -} - -void -nptv6_uninit(struct ip_fw_chain *ch, int last) -{ - - IPFW_DEL_OBJ_REWRITER(last, opcodes); - IPFW_DEL_SOPT_HANDLER(last, scodes); - ipfw_del_eaction(ch, V_nptv6_eid); - /* - * Since we already have deregistered external action, - * our named objects become unaccessible via rules, because - * all rules were truncated by ipfw_del_eaction(). - * So, we can unlink and destroy our named objects without holding - * IPFW_WLOCK(). - */ - IPFW_UH_WLOCK(ch); - ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, - IPFW_TLV_NPTV6_NAME); - V_nptv6_eid = 0; - IPFW_UH_WUNLOCK(ch); -} - diff --git a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h b/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h deleted file mode 100644 index 95b04bfe..00000000 --- a/freebsd/sys/netpfil/ipfw/nptv6/nptv6.h +++ /dev/null @@ -1,65 +0,0 @@ -/*- - * Copyright (c) 2016 Yandex LLC - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _IP_FW_NPTV6_H_ -#define _IP_FW_NPTV6_H_ - -#include <netinet6/ip_fw_nptv6.h> - -#ifdef _KERNEL -#define NPTV6STATS (sizeof(struct ipfw_nptv6_stats) / sizeof(uint64_t)) -#define NPTV6STAT_ADD(c, f, v) \ - counter_u64_add((c)->stats[ \ - offsetof(struct ipfw_nptv6_stats, f) / sizeof(uint64_t)], (v)) -#define NPTV6STAT_INC(c, f) NPTV6STAT_ADD(c, f, 1) -#define NPTV6STAT_FETCH(c, f) \ - counter_u64_fetch((c)->stats[ \ - offsetof(struct ipfw_nptv6_stats, f) / sizeof(uint64_t)]) - -struct nptv6_cfg { - struct named_object no; - - struct in6_addr internal; /* Internal IPv6 prefix */ - struct in6_addr external; /* External IPv6 prefix */ - struct in6_addr mask; /* IPv6 prefix mask */ - uint16_t adjustment; /* Checksum adjustment value */ - uint8_t plen; /* Prefix length */ - uint8_t flags; /* Flags for internal use */ -#define NPTV6_48PLEN 0x0001 - char name[64]; /* Instance name */ - counter_u64_t stats[NPTV6STATS]; /* Statistics counters */ -}; -#define NPTV6_FLAGSMASK 0 - -int nptv6_init(struct ip_fw_chain *ch, int first); -void nptv6_uninit(struct ip_fw_chain *ch, int last); -#endif /* _KERNEL */ - -#endif /* _IP_FW_NPTV6_H_ */ - |