diff options
Diffstat (limited to 'freebsd/sys/netinet/ip_id.c')
-rw-r--r-- | freebsd/sys/netinet/ip_id.c | 260 |
1 files changed, 175 insertions, 85 deletions
diff --git a/freebsd/sys/netinet/ip_id.c b/freebsd/sys/netinet/ip_id.c index a76c7b78..17352cfb 100644 --- a/freebsd/sys/netinet/ip_id.c +++ b/freebsd/sys/netinet/ip_id.c @@ -76,119 +76,149 @@ __FBSDID("$FreeBSD$"); * enabled. */ -#include <sys/types.h> -#include <sys/malloc.h> #include <rtems/bsd/sys/param.h> -#include <sys/time.h> +#include <sys/systm.h> +#include <sys/counter.h> #include <sys/kernel.h> -#include <sys/libkern.h> +#include <sys/malloc.h> #include <rtems/bsd/sys/lock.h> #include <sys/mutex.h> #include <sys/random.h> -#include <sys/systm.h> +#include <sys/smp.h> #include <sys/sysctl.h> +#include <sys/bitstring.h> + +#include <net/vnet.h> + #include <netinet/in.h> +#include <netinet/ip.h> #include <netinet/ip_var.h> -#include <sys/bitstring.h> +/* + * By default we generate IP ID only for non-atomic datagrams, as + * suggested by RFC6864. We use per-CPU counter for that, or if + * user wants to, we can turn on random ID generation. + */ +static VNET_DEFINE(int, ip_rfc6864) = 1; +static VNET_DEFINE(int, ip_do_randomid) = 0; +#define V_ip_rfc6864 VNET(ip_rfc6864) +#define V_ip_do_randomid VNET(ip_do_randomid) + +/* + * Random ID state engine. + */ static MALLOC_DEFINE(M_IPID, "ipid", "randomized ip id state"); +static VNET_DEFINE(uint16_t *, id_array); +static VNET_DEFINE(bitstr_t *, id_bits); +static VNET_DEFINE(int, array_ptr); +static VNET_DEFINE(int, array_size); +static VNET_DEFINE(int, random_id_collisions); +static VNET_DEFINE(int, random_id_total); +static VNET_DEFINE(struct mtx, ip_id_mtx); +#define V_id_array VNET(id_array) +#define V_id_bits VNET(id_bits) +#define V_array_ptr VNET(array_ptr) +#define V_array_size VNET(array_size) +#define V_random_id_collisions VNET(random_id_collisions) +#define V_random_id_total VNET(random_id_total) +#define V_ip_id_mtx VNET(ip_id_mtx) -static u_int16_t *id_array = NULL; -static bitstr_t *id_bits = NULL; -static int array_ptr = 0; -static int array_size = 8192; -static int random_id_collisions = 0; -static int random_id_total = 0; -static struct mtx ip_id_mtx; +/* + * Non-random ID state engine is simply a per-cpu counter. + */ +static VNET_DEFINE(counter_u64_t, ip_id); +#define V_ip_id VNET(ip_id) -static void ip_initid(void); +static int sysctl_ip_randomid(SYSCTL_HANDLER_ARGS); static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS); - -MTX_SYSINIT(ip_id_mtx, &ip_id_mtx, "ip_id_mtx", MTX_DEF); +static void ip_initid(int); +static uint16_t ip_randomid(void); +static void ipid_sysinit(void); +static void ipid_sysuninit(void); SYSCTL_DECL(_net_inet_ip); -SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period, CTLTYPE_INT|CTLFLAG_RW, - &array_size, 0, sysctl_ip_id_change, "IU", "IP ID Array size"); -SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions, CTLFLAG_RD, - &random_id_collisions, 0, "Count of IP ID collisions"); -SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD, - &random_id_total, 0, "Count of IP IDs created"); +SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(ip_do_randomid), 0, sysctl_ip_randomid, "IU", + "Assign random ip_id values"); +SYSCTL_INT(_net_inet_ip, OID_AUTO, rfc6864, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(ip_rfc6864), 0, + "Use constant IP ID for atomic datagrams"); +SYSCTL_PROC(_net_inet_ip, OID_AUTO, random_id_period, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(array_size), 0, sysctl_ip_id_change, "IU", "IP ID Array size"); +SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_collisions, + CTLFLAG_RD | CTLFLAG_VNET, + &VNET_NAME(random_id_collisions), 0, "Count of IP ID collisions"); +SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id_total, CTLFLAG_RD | CTLFLAG_VNET, + &VNET_NAME(random_id_total), 0, "Count of IP IDs created"); + +static int +sysctl_ip_randomid(SYSCTL_HANDLER_ARGS) +{ + int error, new; + + new = V_ip_do_randomid; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error || req->newptr == NULL) + return (error); + if (new != 0 && new != 1) + return (EINVAL); + if (new == V_ip_do_randomid) + return (0); + if (new == 1 && V_ip_do_randomid == 0) + ip_initid(8192); + /* We don't free memory when turning random ID off, due to race. */ + V_ip_do_randomid = new; + return (0); +} static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS) { int error, new; - new = array_size; + new = V_array_size; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { - if (new >= 512 && new <= 32768) { - mtx_lock(&ip_id_mtx); - array_size = new; - ip_initid(); - mtx_unlock(&ip_id_mtx); - } else + if (new >= 512 && new <= 32768) + ip_initid(new); + else error = EINVAL; } return (error); } -/* - * ip_initid() runs with a mutex held and may execute in a network context. - * As a result, it uses M_NOWAIT. Ideally, we would always do this - * allocation from the sysctl contact and have it be an invariant that if - * this random ID allocation mode is selected, the buffers are present. This - * would also avoid potential network context failures of IP ID generation. - */ static void -ip_initid(void) +ip_initid(int new_size) { + uint16_t *new_array; + bitstr_t *new_bits; - mtx_assert(&ip_id_mtx, MA_OWNED); + new_array = malloc(new_size * sizeof(uint16_t), M_IPID, + M_WAITOK | M_ZERO); + new_bits = malloc(bitstr_size(65536), M_IPID, M_WAITOK | M_ZERO); - if (id_array != NULL) { - free(id_array, M_IPID); - free(id_bits, M_IPID); - } - random_id_collisions = 0; - random_id_total = 0; - array_ptr = 0; - id_array = (u_int16_t *) malloc(array_size * sizeof(u_int16_t), - M_IPID, M_NOWAIT | M_ZERO); - id_bits = (bitstr_t *) malloc(bitstr_size(65536), M_IPID, - M_NOWAIT | M_ZERO); - if (id_array == NULL || id_bits == NULL) { - /* Neither or both. */ - if (id_array != NULL) { - free(id_array, M_IPID); - id_array = NULL; - } - if (id_bits != NULL) { - free(id_bits, M_IPID); - id_bits = NULL; - } + mtx_lock(&V_ip_id_mtx); + if (V_id_array != NULL) { + free(V_id_array, M_IPID); + free(V_id_bits, M_IPID); } + V_id_array = new_array; + V_id_bits = new_bits; + V_array_size = new_size; + V_array_ptr = 0; + V_random_id_collisions = 0; + V_random_id_total = 0; + mtx_unlock(&V_ip_id_mtx); } -u_int16_t +static uint16_t ip_randomid(void) { - u_int16_t new_id; - - mtx_lock(&ip_id_mtx); - if (id_array == NULL) - ip_initid(); - - /* - * Fail gracefully; return a fixed id if memory allocation failed; - * ideally we wouldn't do allocation in this context in order to - * avoid the possibility of this failure mode. - */ - if (id_array == NULL) { - mtx_unlock(&ip_id_mtx); - return (1); - } + uint16_t new_id; + mtx_lock(&V_ip_id_mtx); /* * To avoid a conflict with the zeros that the array is initially * filled with, we never hand out an id of zero. @@ -196,16 +226,76 @@ ip_randomid(void) new_id = 0; do { if (new_id != 0) - random_id_collisions++; + V_random_id_collisions++; arc4rand(&new_id, sizeof(new_id), 0); - } while (bit_test(id_bits, new_id) || new_id == 0); - bit_clear(id_bits, id_array[array_ptr]); - bit_set(id_bits, new_id); - id_array[array_ptr] = new_id; - array_ptr++; - if (array_ptr == array_size) - array_ptr = 0; - random_id_total++; - mtx_unlock(&ip_id_mtx); + } while (bit_test(V_id_bits, new_id) || new_id == 0); + bit_clear(V_id_bits, V_id_array[V_array_ptr]); + bit_set(V_id_bits, new_id); + V_id_array[V_array_ptr] = new_id; + V_array_ptr++; + if (V_array_ptr == V_array_size) + V_array_ptr = 0; + V_random_id_total++; + mtx_unlock(&V_ip_id_mtx); return (new_id); } + +void +ip_fillid(struct ip *ip) +{ + + /* + * Per RFC6864 Section 4 + * + * o Atomic datagrams: (DF==1) && (MF==0) && (frag_offset==0) + * o Non-atomic datagrams: (DF==0) || (MF==1) || (frag_offset>0) + */ + if (V_ip_rfc6864 && (ip->ip_off & htons(IP_DF)) == htons(IP_DF)) + ip->ip_id = 0; + else if (V_ip_do_randomid) + ip->ip_id = ip_randomid(); + else { + counter_u64_add(V_ip_id, 1); + /* + * There are two issues about this trick, to be kept in mind. + * 1) We can migrate between counter_u64_add() and next + * line, and grab counter from other CPU, resulting in too + * quick ID reuse. This is tolerable in our particular case, + * since probability of such event is much lower then reuse + * of ID due to legitimate overflow, that at modern Internet + * speeds happens all the time. + * 2) We are relying on the fact that counter(9) is based on + * UMA_ZONE_PCPU uma(9) zone. We also take only last + * sixteen bits of a counter, so we don't care about the + * fact that machines with 32-bit word update their counters + * not atomically. + */ + ip->ip_id = htons((*(uint64_t *)zpcpu_get(V_ip_id)) & 0xffff); + } +} + +static void +ipid_sysinit(void) +{ + int i; + + mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF); + V_ip_id = counter_u64_alloc(M_WAITOK); + + CPU_FOREACH(i) + arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0); +} +VNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL); + +static void +ipid_sysuninit(void) +{ + + if (V_id_array != NULL) { + free(V_id_array, M_IPID); + free(V_id_bits, M_IPID); + } + counter_u64_free(V_ip_id); + mtx_destroy(&V_ip_id_mtx); +} +VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ipid_sysuninit, NULL); |