diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-08-22 14:59:50 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-09-21 10:29:41 +0200 |
commit | 3489e3b6396ee9944a6a2e19e675ca54c36993b4 (patch) | |
tree | cd55cfac1c96ff4b888a9606fd6a0d8eb65bb446 /freebsd/sys/net | |
parent | ck: Define CK_MD_PPC32_LWSYNC if available (diff) | |
download | rtems-libbsd-3489e3b6396ee9944a6a2e19e675ca54c36993b4.tar.bz2 |
Update to FreeBSD head 2018-09-17
Git mirror commit 6c2192b1ef8c50788c751f878552526800b1e319.
Update #3472.
Diffstat (limited to 'freebsd/sys/net')
52 files changed, 1237 insertions, 1272 deletions
diff --git a/freebsd/sys/net/altq/altq.h b/freebsd/sys/net/altq/altq.h index 9cb97bc2..35024461 100644 --- a/freebsd/sys/net/altq/altq.h +++ b/freebsd/sys/net/altq/altq.h @@ -76,8 +76,8 @@ struct altqreq { /* simple token backet meter profile */ struct tb_profile { - u_int rate; /* rate in bit-per-sec */ - u_int depth; /* depth in bytes */ + u_int64_t rate; /* rate in bit-per-sec */ + u_int32_t depth; /* depth in bytes */ }; #ifdef ALTQ3_COMPAT @@ -203,4 +203,29 @@ struct pktcntr { #include <net/altq/altq_var.h> #endif +/* + * Can't put these versions in the scheduler-specific headers and include + * them all here as that will cause build failure due to cross-including + * each other scheduler's private bits into each scheduler's + * implementation. + */ +#define CBQ_STATS_VERSION 0 /* Latest version of class_stats_t */ +#define CODEL_STATS_VERSION 0 /* Latest version of codel_ifstats */ +#define FAIRQ_STATS_VERSION 0 /* Latest version of fairq_classstats */ +#define HFSC_STATS_VERSION 1 /* Latest version of hfsc_classstats */ +#define PRIQ_STATS_VERSION 0 /* Latest version of priq_classstats */ + +/* Return the latest stats version for the given scheduler. */ +static inline int altq_stats_version(int scheduler) +{ + switch (scheduler) { + case ALTQT_CBQ: return (CBQ_STATS_VERSION); + case ALTQT_CODEL: return (CODEL_STATS_VERSION); + case ALTQT_FAIRQ: return (FAIRQ_STATS_VERSION); + case ALTQT_HFSC: return (HFSC_STATS_VERSION); + case ALTQT_PRIQ: return (PRIQ_STATS_VERSION); + default: return (0); + } +} + #endif /* _ALTQ_ALTQ_H_ */ diff --git a/freebsd/sys/net/altq/altq_cbq.c b/freebsd/sys/net/altq/altq_cbq.c index 1631d145..ac108bd1 100644 --- a/freebsd/sys/net/altq/altq_cbq.c +++ b/freebsd/sys/net/altq/altq_cbq.c @@ -454,7 +454,7 @@ cbq_remove_queue(struct pf_altq *a) } int -cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { cbq_state_t *cbqp; struct rm_class *cl; diff --git a/freebsd/sys/net/altq/altq_cbq.h b/freebsd/sys/net/altq/altq_cbq.h index 51e7cf9a..04bcab1a 100644 --- a/freebsd/sys/net/altq/altq_cbq.h +++ b/freebsd/sys/net/altq/altq_cbq.h @@ -99,6 +99,12 @@ typedef struct _cbq_class_stats_ { struct codel_stats codel; } class_stats_t; +/* + * CBQ_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef ALTQ3_COMPAT /* * Define structures associated with IOCTLS for cbq. diff --git a/freebsd/sys/net/altq/altq_codel.c b/freebsd/sys/net/altq/altq_codel.c index 37a44216..4a55cdbe 100644 --- a/freebsd/sys/net/altq/altq_codel.c +++ b/freebsd/sys/net/altq/altq_codel.c @@ -158,7 +158,7 @@ codel_remove_altq(struct pf_altq *a) } int -codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct codel_if *cif; struct codel_ifstats stats; diff --git a/freebsd/sys/net/altq/altq_codel.h b/freebsd/sys/net/altq/altq_codel.h index 8d7178b4..d7341a87 100644 --- a/freebsd/sys/net/altq/altq_codel.h +++ b/freebsd/sys/net/altq/altq_codel.h @@ -57,6 +57,12 @@ struct codel_ifstats { struct pktcntr cl_dropcnt; /* dropped packet counter */ }; +/* + * CBQ_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef _KERNEL #include <net/altq/altq_classq.h> diff --git a/freebsd/sys/net/altq/altq_fairq.c b/freebsd/sys/net/altq/altq_fairq.c index 9979a1fa..a1bc3fdb 100644 --- a/freebsd/sys/net/altq/altq_fairq.c +++ b/freebsd/sys/net/altq/altq_fairq.c @@ -231,7 +231,7 @@ fairq_remove_queue(struct pf_altq *a) } int -fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct fairq_if *pif; struct fairq_class *cl; diff --git a/freebsd/sys/net/altq/altq_fairq.h b/freebsd/sys/net/altq/altq_fairq.h index 1a4b97dd..f1e3217c 100644 --- a/freebsd/sys/net/altq/altq_fairq.h +++ b/freebsd/sys/net/altq/altq_fairq.h @@ -82,6 +82,12 @@ struct fairq_classstats { struct codel_stats codel; }; +/* + * FAIRQ_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef _KERNEL typedef struct fairq_bucket { diff --git a/freebsd/sys/net/altq/altq_hfsc.c b/freebsd/sys/net/altq/altq_hfsc.c index d31d55c3..8d8fdfdc 100644 --- a/freebsd/sys/net/altq/altq_hfsc.c +++ b/freebsd/sys/net/altq/altq_hfsc.c @@ -118,10 +118,10 @@ static struct hfsc_class *actlist_firstfit(struct hfsc_class *, static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t); static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t); -static __inline u_int64_t m2sm(u_int); -static __inline u_int64_t m2ism(u_int); +static __inline u_int64_t m2sm(u_int64_t); +static __inline u_int64_t m2ism(u_int64_t); static __inline u_int64_t d2dx(u_int); -static u_int sm2m(u_int64_t); +static u_int64_t sm2m(u_int64_t); static u_int dx2d(u_int64_t); static void sc2isc(struct service_curve *, struct internal_sc *); @@ -132,7 +132,9 @@ static u_int64_t rtsc_x2y(struct runtime_sc *, u_int64_t); static void rtsc_min(struct runtime_sc *, struct internal_sc *, u_int64_t, u_int64_t); -static void get_class_stats(struct hfsc_classstats *, +static void get_class_stats_v0(struct hfsc_classstats_v0 *, + struct hfsc_class *); +static void get_class_stats_v1(struct hfsc_classstats_v1 *, struct hfsc_class *); static struct hfsc_class *clh_to_clp(struct hfsc_if *, u_int32_t); @@ -160,7 +162,7 @@ altqdev_decl(hfsc); */ #define is_a_parent_class(cl) ((cl)->cl_children != NULL) -#define HT_INFINITY 0xffffffffffffffffLL /* infinite time value */ +#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ #ifdef ALTQ3_COMPAT /* hif_list keeps all hfsc_if's allocated. */ @@ -228,7 +230,7 @@ hfsc_add_queue(struct pf_altq *a) { struct hfsc_if *hif; struct hfsc_class *cl, *parent; - struct hfsc_opts *opts; + struct hfsc_opts_v1 *opts; struct service_curve rtsc, lssc, ulsc; if ((hif = a->altq_disc) == NULL) @@ -282,11 +284,15 @@ hfsc_remove_queue(struct pf_altq *a) } int -hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct hfsc_if *hif; struct hfsc_class *cl; - struct hfsc_classstats stats; + union { + struct hfsc_classstats_v0 v0; + struct hfsc_classstats_v1 v1; + } stats; + size_t stats_size; int error = 0; if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL) @@ -295,14 +301,27 @@ hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) if ((cl = clh_to_clp(hif, a->qid)) == NULL) return (EINVAL); - if (*nbytes < sizeof(stats)) + if (version > HFSC_STATS_VERSION) return (EINVAL); - get_class_stats(&stats, cl); + memset(&stats, 0, sizeof(stats)); + switch (version) { + case 0: + get_class_stats_v0(&stats.v0, cl); + stats_size = sizeof(struct hfsc_classstats_v0); + break; + case 1: + get_class_stats_v1(&stats.v1, cl); + stats_size = sizeof(struct hfsc_classstats_v1); + break; + } + + if (*nbytes < stats_size) + return (EINVAL); - if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) + if ((error = copyout((caddr_t)&stats, ubuf, stats_size)) != 0) return (error); - *nbytes = sizeof(stats); + *nbytes = stats_size; return (0); } @@ -1359,27 +1378,17 @@ actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time) * m: bits/sec * d: msec * internal service curve parameters - * sm: (bytes/tsc_interval) << SM_SHIFT - * ism: (tsc_count/byte) << ISM_SHIFT - * dx: tsc_count - * - * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. - * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU - * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective - * digits in decimal using the following table. + * sm: (bytes/machclk tick) << SM_SHIFT + * ism: (machclk ticks/byte) << ISM_SHIFT + * dx: machclk ticks * - * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps - * ----------+------------------------------------------------------- - * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6 - * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6 - * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6 + * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits. we + * should be able to handle 100K-100Gbps linkspeed with 256 MHz machclk + * frequency and at least 3 effective digits in decimal. * - * nsec/byte 80000 8000 800 80 8 - * ism(500MHz) 40000 4000 400 40 4 - * ism(200MHz) 16000 1600 160 16 1.6 */ #define SM_SHIFT 24 -#define ISM_SHIFT 10 +#define ISM_SHIFT 14 #define SM_MASK ((1LL << SM_SHIFT) - 1) #define ISM_MASK ((1LL << ISM_SHIFT) - 1) @@ -1415,16 +1424,16 @@ seg_y2x(u_int64_t y, u_int64_t ism) } static __inline u_int64_t -m2sm(u_int m) +m2sm(u_int64_t m) { u_int64_t sm; - sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq; + sm = (m << SM_SHIFT) / 8 / machclk_freq; return (sm); } static __inline u_int64_t -m2ism(u_int m) +m2ism(u_int64_t m) { u_int64_t ism; @@ -1444,13 +1453,13 @@ d2dx(u_int d) return (dx); } -static u_int +static u_int64_t sm2m(u_int64_t sm) { u_int64_t m; m = (sm * 8 * machclk_freq) >> SM_SHIFT; - return ((u_int)m); + return (m); } static u_int @@ -1599,7 +1608,89 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x, } static void -get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl) +get_class_stats_v0(struct hfsc_classstats_v0 *sp, struct hfsc_class *cl) +{ + sp->class_id = cl->cl_id; + sp->class_handle = cl->cl_handle; + +#define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX) + + if (cl->cl_rsc != NULL) { + sp->rsc.m1 = SATU32(sm2m(cl->cl_rsc->sm1)); + sp->rsc.d = dx2d(cl->cl_rsc->dx); + sp->rsc.m2 = SATU32(sm2m(cl->cl_rsc->sm2)); + } else { + sp->rsc.m1 = 0; + sp->rsc.d = 0; + sp->rsc.m2 = 0; + } + if (cl->cl_fsc != NULL) { + sp->fsc.m1 = SATU32(sm2m(cl->cl_fsc->sm1)); + sp->fsc.d = dx2d(cl->cl_fsc->dx); + sp->fsc.m2 = SATU32(sm2m(cl->cl_fsc->sm2)); + } else { + sp->fsc.m1 = 0; + sp->fsc.d = 0; + sp->fsc.m2 = 0; + } + if (cl->cl_usc != NULL) { + sp->usc.m1 = SATU32(sm2m(cl->cl_usc->sm1)); + sp->usc.d = dx2d(cl->cl_usc->dx); + sp->usc.m2 = SATU32(sm2m(cl->cl_usc->sm2)); + } else { + sp->usc.m1 = 0; + sp->usc.d = 0; + sp->usc.m2 = 0; + } + +#undef SATU32 + + sp->total = cl->cl_total; + sp->cumul = cl->cl_cumul; + + sp->d = cl->cl_d; + sp->e = cl->cl_e; + sp->vt = cl->cl_vt; + sp->f = cl->cl_f; + + sp->initvt = cl->cl_initvt; + sp->vtperiod = cl->cl_vtperiod; + sp->parentperiod = cl->cl_parentperiod; + sp->nactive = cl->cl_nactive; + sp->vtoff = cl->cl_vtoff; + sp->cvtmax = cl->cl_cvtmax; + sp->myf = cl->cl_myf; + sp->cfmin = cl->cl_cfmin; + sp->cvtmin = cl->cl_cvtmin; + sp->myfadj = cl->cl_myfadj; + sp->vtadj = cl->cl_vtadj; + + sp->cur_time = read_machclk(); + sp->machclk_freq = machclk_freq; + + sp->qlength = qlen(cl->cl_q); + sp->qlimit = qlimit(cl->cl_q); + sp->xmit_cnt = cl->cl_stats.xmit_cnt; + sp->drop_cnt = cl->cl_stats.drop_cnt; + sp->period = cl->cl_stats.period; + + sp->qtype = qtype(cl->cl_q); +#ifdef ALTQ_RED + if (q_is_red(cl->cl_q)) + red_getstats(cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_RIO + if (q_is_rio(cl->cl_q)) + rio_getstats((rio_t *)cl->cl_red, &sp->red[0]); +#endif +#ifdef ALTQ_CODEL + if (q_is_codel(cl->cl_q)) + codel_getstats(cl->cl_codel, &sp->codel); +#endif +} + +static void +get_class_stats_v1(struct hfsc_classstats_v1 *sp, struct hfsc_class *cl) { sp->class_id = cl->cl_id; sp->class_handle = cl->cl_handle; diff --git a/freebsd/sys/net/altq/altq_hfsc.h b/freebsd/sys/net/altq/altq_hfsc.h index 0a9fcf95..67ec0036 100644 --- a/freebsd/sys/net/altq/altq_hfsc.h +++ b/freebsd/sys/net/altq/altq_hfsc.h @@ -43,12 +43,21 @@ extern "C" { #endif -struct service_curve { +struct service_curve_v0 { u_int m1; /* slope of the first segment in bits/sec */ u_int d; /* the x-projection of the first segment in msec */ u_int m2; /* slope of the second segment in bits/sec */ }; +struct service_curve_v1 { + u_int64_t m1; /* slope of the first segment in bits/sec */ + u_int d; /* the x-projection of the first segment in msec */ + u_int64_t m2; /* slope of the second segment in bits/sec */ +}; + +/* Latest version of struct service_curve_vX */ +#define HFSC_SERVICE_CURVE_VERSION 1 + /* special class handles */ #define HFSC_NULLCLASS_HANDLE 0 #define HFSC_MAX_CLASSES 64 @@ -67,12 +76,55 @@ struct service_curve { #define HFSC_UPPERLIMITSC 4 #define HFSC_DEFAULTSC (HFSC_REALTIMESC|HFSC_LINKSHARINGSC) -struct hfsc_classstats { +struct hfsc_classstats_v0 { + u_int class_id; + u_int32_t class_handle; + struct service_curve_v0 rsc; + struct service_curve_v0 fsc; + struct service_curve_v0 usc; /* upper limit service curve */ + + u_int64_t total; /* total work in bytes */ + u_int64_t cumul; /* cumulative work in bytes + done by real-time criteria */ + u_int64_t d; /* deadline */ + u_int64_t e; /* eligible time */ + u_int64_t vt; /* virtual time */ + u_int64_t f; /* fit time for upper-limit */ + + /* info helpful for debugging */ + u_int64_t initvt; /* init virtual time */ + u_int64_t vtoff; /* cl_vt_ipoff */ + u_int64_t cvtmax; /* cl_maxvt */ + u_int64_t myf; /* cl_myf */ + u_int64_t cfmin; /* cl_mincf */ + u_int64_t cvtmin; /* cl_mincvt */ + u_int64_t myfadj; /* cl_myfadj */ + u_int64_t vtadj; /* cl_vtadj */ + u_int64_t cur_time; + u_int32_t machclk_freq; + + u_int qlength; + u_int qlimit; + struct pktcntr xmit_cnt; + struct pktcntr drop_cnt; + u_int period; + + u_int vtperiod; /* vt period sequence no */ + u_int parentperiod; /* parent's vt period seqno */ + int nactive; /* number of active children */ + + /* codel, red and rio related info */ + int qtype; + struct redstats red[3]; + struct codel_stats codel; +}; + +struct hfsc_classstats_v1 { u_int class_id; u_int32_t class_handle; - struct service_curve rsc; - struct service_curve fsc; - struct service_curve usc; /* upper limit service curve */ + struct service_curve_v1 rsc; + struct service_curve_v1 fsc; + struct service_curve_v1 usc; /* upper limit service curve */ u_int64_t total; /* total work in bytes */ u_int64_t cumul; /* cumulative work in bytes @@ -110,6 +162,12 @@ struct hfsc_classstats { struct codel_stats codel; }; +/* + * HFSC_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef ALTQ3_COMPAT struct hfsc_interface { char hfsc_ifname[IFNAMSIZ]; /* interface name (e.g., fxp0) */ @@ -310,6 +368,35 @@ struct hfsc_if { #endif }; +/* + * Kernel code always wants the latest version - avoid a bunch of renames in + * the code to the current latest versioned name. + */ +#define service_curve __CONCAT(service_curve_v, HFSC_SERVICE_CURVE_VERSION) + +#else /* _KERNEL */ + +#ifdef PFIOC_USE_LATEST +/* + * Maintaining in-tree consumers of the ioctl interface is easier when that + * code can be written in terms old names that refer to the latest interface + * version as that reduces the required changes in the consumers to those + * that are functionally necessary to accommodate a new interface version. + */ +#define hfsc_classstats __CONCAT(hfsc_classstats_v, HFSC_STATS_VERSION) +#define service_curve __CONCAT(service_curve_v, HFSC_SERVICE_CURVE_VERSION) + +#else +/* + * When building out-of-tree code that is written for the old interface, + * such as may exist in ports for example, resolve the old struct tags to + * the v0 versions. + */ +#define hfsc_classstats __CONCAT(hfsc_classstats_v, 0) +#define service_curve __CONCAT(service_curve_v, 0) + +#endif /* PFIOC_USE_LATEST */ + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/freebsd/sys/net/altq/altq_priq.c b/freebsd/sys/net/altq/altq_priq.c index 46a014ad..ce0830eb 100644 --- a/freebsd/sys/net/altq/altq_priq.c +++ b/freebsd/sys/net/altq/altq_priq.c @@ -201,7 +201,7 @@ priq_remove_queue(struct pf_altq *a) } int -priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { struct priq_if *pif; struct priq_class *cl; diff --git a/freebsd/sys/net/altq/altq_priq.h b/freebsd/sys/net/altq/altq_priq.h index fcbfee98..1a824d60 100644 --- a/freebsd/sys/net/altq/altq_priq.h +++ b/freebsd/sys/net/altq/altq_priq.h @@ -112,6 +112,12 @@ struct priq_classstats { struct codel_stats codel; }; +/* + * PRIQ_STATS_VERSION is defined in altq.h to work around issues stemming + * from mixing of public-API and internal bits in each scheduler-specific + * header. + */ + #ifdef ALTQ3_COMPAT struct priq_class_stats { struct priq_interface iface; diff --git a/freebsd/sys/net/altq/altq_subr.c b/freebsd/sys/net/altq/altq_subr.c index 47a353fc..6da36129 100644 --- a/freebsd/sys/net/altq/altq_subr.c +++ b/freebsd/sys/net/altq/altq_subr.c @@ -294,12 +294,12 @@ altq_assert(file, line, failedexpr) /* * internal representation of token bucket parameters - * rate: byte_per_unittime << 32 - * (((bits_per_sec) / 8) << 32) / machclk_freq - * depth: byte << 32 + * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq + * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq + * depth: byte << TBR_SHIFT * */ -#define TBR_SHIFT 32 +#define TBR_SHIFT 29 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) @@ -396,7 +396,20 @@ tbr_set(ifq, profile) if (tbr->tbr_rate > 0) tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; else - tbr->tbr_filluptime = 0xffffffffffffffffLL; + tbr->tbr_filluptime = LLONG_MAX; + /* + * The longest time between tbr_dequeue() calls will be about 1 + * system tick, as the callout that drives it is scheduled once per + * tick. The refill-time detection logic in tbr_dequeue() can only + * properly detect the passage of up to LLONG_MAX machclk ticks. + * Therefore, in order for this logic to function properly in the + * extreme case, the maximum value of tbr_filluptime should be + * LLONG_MAX less one system tick's worth of machclk ticks less + * some additional slop factor (here one more system tick's worth + * of machclk ticks). + */ + if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) + tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; tbr->tbr_token = tbr->tbr_depth; tbr->tbr_last = read_machclk(); tbr->tbr_lastop = ALTDQ_REMOVE; @@ -458,29 +471,6 @@ tbr_timeout(arg) } /* - * get token bucket regulator profile - */ -int -tbr_get(ifq, profile) - struct ifaltq *ifq; - struct tb_profile *profile; -{ - struct tb_regulator *tbr; - - IFQ_LOCK(ifq); - if ((tbr = ifq->altq_tbr) == NULL) { - profile->rate = 0; - profile->depth = 0; - } else { - profile->rate = - (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); - profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); - } - IFQ_UNLOCK(ifq); - return (0); -} - -/* * attach a discipline to the interface. if one already exists, it is * overridden. * Locking is done in the discipline specific attach functions. Basically @@ -735,34 +725,34 @@ altq_remove_queue(struct pf_altq *a) * copyout operations, also it is not yet clear which lock to use. */ int -altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) +altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) { int error = 0; switch (a->scheduler) { #ifdef ALTQ_CBQ case ALTQT_CBQ: - error = cbq_getqstats(a, ubuf, nbytes); + error = cbq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_PRIQ case ALTQT_PRIQ: - error = priq_getqstats(a, ubuf, nbytes); + error = priq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_HFSC case ALTQT_HFSC: - error = hfsc_getqstats(a, ubuf, nbytes); + error = hfsc_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_FAIRQ case ALTQT_FAIRQ: - error = fairq_getqstats(a, ubuf, nbytes); + error = fairq_getqstats(a, ubuf, nbytes, version); break; #endif #ifdef ALTQ_CODEL case ALTQT_CODEL: - error = codel_getqstats(a, ubuf, nbytes); + error = codel_getqstats(a, ubuf, nbytes, version); break; #endif default: diff --git a/freebsd/sys/net/altq/altq_var.h b/freebsd/sys/net/altq/altq_var.h index 1909599d..47326a03 100644 --- a/freebsd/sys/net/altq/altq_var.h +++ b/freebsd/sys/net/altq/altq_var.h @@ -196,7 +196,6 @@ u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *); void write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t); void altq_assert(const char *, int, const char *); int tbr_set(struct ifaltq *, struct tb_profile *); -int tbr_get(struct ifaltq *, struct tb_profile *); int altq_pfattach(struct pf_altq *); int altq_pfdetach(struct pf_altq *); @@ -204,40 +203,40 @@ int altq_add(struct pf_altq *); int altq_remove(struct pf_altq *); int altq_add_queue(struct pf_altq *); int altq_remove_queue(struct pf_altq *); -int altq_getqstats(struct pf_altq *, void *, int *); +int altq_getqstats(struct pf_altq *, void *, int *, int); int cbq_pfattach(struct pf_altq *); int cbq_add_altq(struct pf_altq *); int cbq_remove_altq(struct pf_altq *); int cbq_add_queue(struct pf_altq *); int cbq_remove_queue(struct pf_altq *); -int cbq_getqstats(struct pf_altq *, void *, int *); +int cbq_getqstats(struct pf_altq *, void *, int *, int); int codel_pfattach(struct pf_altq *); int codel_add_altq(struct pf_altq *); int codel_remove_altq(struct pf_altq *); -int codel_getqstats(struct pf_altq *, void *, int *); +int codel_getqstats(struct pf_altq *, void *, int *, int); int priq_pfattach(struct pf_altq *); int priq_add_altq(struct pf_altq *); int priq_remove_altq(struct pf_altq *); int priq_add_queue(struct pf_altq *); int priq_remove_queue(struct pf_altq *); -int priq_getqstats(struct pf_altq *, void *, int *); +int priq_getqstats(struct pf_altq *, void *, int *, int); int hfsc_pfattach(struct pf_altq *); int hfsc_add_altq(struct pf_altq *); int hfsc_remove_altq(struct pf_altq *); int hfsc_add_queue(struct pf_altq *); int hfsc_remove_queue(struct pf_altq *); -int hfsc_getqstats(struct pf_altq *, void *, int *); +int hfsc_getqstats(struct pf_altq *, void *, int *, int); int fairq_pfattach(struct pf_altq *); int fairq_add_altq(struct pf_altq *); int fairq_remove_altq(struct pf_altq *); int fairq_add_queue(struct pf_altq *); int fairq_remove_queue(struct pf_altq *); -int fairq_getqstats(struct pf_altq *, void *, int *); +int fairq_getqstats(struct pf_altq *, void *, int *, int); #endif /* _KERNEL */ #endif /* _ALTQ_ALTQ_VAR_H_ */ diff --git a/freebsd/sys/net/altq/if_altq.h b/freebsd/sys/net/altq/if_altq.h index 3dcc96c2..7a093500 100644 --- a/freebsd/sys/net/altq/if_altq.h +++ b/freebsd/sys/net/altq/if_altq.h @@ -143,7 +143,11 @@ struct tb_regulator { #define ALTRQ_PURGE 1 /* purge all packets */ #define ALTQ_IS_READY(ifq) ((ifq)->altq_flags & ALTQF_READY) +#ifdef ALTQ #define ALTQ_IS_ENABLED(ifq) ((ifq)->altq_flags & ALTQF_ENABLED) +#else +#define ALTQ_IS_ENABLED(ifq) 0 +#endif #define ALTQ_NEEDS_CLASSIFY(ifq) ((ifq)->altq_flags & ALTQF_CLASSIFY) #define ALTQ_IS_CNDTNING(ifq) ((ifq)->altq_flags & ALTQF_CNDTNING) diff --git a/freebsd/sys/net/bpf.c b/freebsd/sys/net/bpf.c index 57aff5b8..357fd1b1 100644 --- a/freebsd/sys/net/bpf.c +++ b/freebsd/sys/net/bpf.c @@ -124,6 +124,11 @@ struct bpf_if { CTASSERT(offsetof(struct bpf_if, bif_ext) == 0); +#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock) +#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock) +#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock) +#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock) + #if defined(DEV_BPF) || defined(NETGRAPH_BPF) #define PRINET 26 /* interruptible */ @@ -217,7 +222,7 @@ SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW, static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW, bpf_stats_sysctl, "bpf statistics portal"); -static VNET_DEFINE(int, bpf_optimize_writers) = 0; +VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0; #define V_bpf_optimize_writers VNET(bpf_optimize_writers) SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(bpf_optimize_writers), 0, @@ -1974,8 +1979,13 @@ bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) return (EINVAL); } #ifdef BPF_JITTER - /* Filter is copied inside fcode and is perfectly valid. */ - jfunc = bpf_jitter(fcode, flen); + if (cmd != BIOCSETWF) { + /* + * Filter is copied inside fcode and is + * perfectly valid. + */ + jfunc = bpf_jitter(fcode, flen); + } #endif } diff --git a/freebsd/sys/net/bpf_jitter.c b/freebsd/sys/net/bpf_jitter.c index ac3a6ddd..85782597 100644 --- a/freebsd/sys/net/bpf_jitter.c +++ b/freebsd/sys/net/bpf_jitter.c @@ -103,11 +103,13 @@ void bpf_destroy_jit_filter(bpf_jit_filter *filter) { - if (filter->func != bpf_jit_accept_all) - bpf_jit_free(filter->func, filter->size); #ifdef _KERNEL + if (filter->func != bpf_jit_accept_all) + free(filter->func, M_BPFJIT); free(filter, M_BPFJIT); #else + if (filter->func != bpf_jit_accept_all) + munmap(filter->func, filter->size); free(filter); #endif } diff --git a/freebsd/sys/net/bpf_jitter.h b/freebsd/sys/net/bpf_jitter.h index a7c7cd9f..23049d14 100644 --- a/freebsd/sys/net/bpf_jitter.h +++ b/freebsd/sys/net/bpf_jitter.h @@ -88,6 +88,5 @@ void bpf_destroy_jit_filter(bpf_jit_filter *filter); struct bpf_insn; bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *); -void bpf_jit_free(void *, size_t); #endif /* _NET_BPF_JITTER_H_ */ diff --git a/freebsd/sys/net/bpfdesc.h b/freebsd/sys/net/bpfdesc.h index 95093cff..2ce9204b 100644 --- a/freebsd/sys/net/bpfdesc.h +++ b/freebsd/sys/net/bpfdesc.h @@ -161,11 +161,6 @@ struct xbpf_d { u_int64_t bd_spare[4]; }; -#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock) -#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock) -#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock) -#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock) - #define BPFIF_FLAG_DYING 1 /* Reject new bpf consumers */ #endif diff --git a/freebsd/sys/net/ieee8023ad_lacp.c b/freebsd/sys/net/ieee8023ad_lacp.c index 2a30f4a3..9a70d6a1 100644 --- a/freebsd/sys/net/ieee8023ad_lacp.c +++ b/freebsd/sys/net/ieee8023ad_lacp.c @@ -196,13 +196,13 @@ static const char *lacp_format_portid(const struct lacp_portid *, char *, static void lacp_dprintf(const struct lacp_port *, const char *, ...) __attribute__((__format__(__printf__, 2, 3))); -static VNET_DEFINE(int, lacp_debug); +VNET_DEFINE_STATIC(int, lacp_debug); #define V_lacp_debug VNET(lacp_debug) SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad"); SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)"); -static VNET_DEFINE(int, lacp_default_strict_mode) = 1; +VNET_DEFINE_STATIC(int, lacp_default_strict_mode) = 1; SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, default_strict_mode, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(lacp_default_strict_mode), 0, "LACP strict protocol compliance default"); @@ -713,6 +713,8 @@ lacp_disable_distributing(struct lacp_port *lp) } lp->lp_state &= ~LACP_STATE_DISTRIBUTING; + if_link_state_change(sc->sc_ifp, + sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN); } static void @@ -747,6 +749,9 @@ lacp_enable_distributing(struct lacp_port *lp) } else /* try to become the active aggregator */ lacp_select_active_aggregator(lsc); + + if_link_state_change(sc->sc_ifp, + sc->sc_active ? LINK_STATE_UP : LINK_STATE_DOWN); } static void @@ -1106,6 +1111,7 @@ lacp_compose_key(struct lacp_port *lp) case IFM_100_VG: case IFM_100_T2: case IFM_100_T: + case IFM_100_SGMII: key = IFM_100_TX; break; case IFM_1000_SX: @@ -1137,14 +1143,31 @@ lacp_compose_key(struct lacp_port *lp) break; case IFM_2500_KX: case IFM_2500_T: + case IFM_2500_X: key = IFM_2500_KX; break; case IFM_5000_T: + case IFM_5000_KR: + case IFM_5000_KR_S: + case IFM_5000_KR1: key = IFM_5000_T; break; case IFM_50G_PCIE: case IFM_50G_CR2: case IFM_50G_KR2: + case IFM_50G_SR2: + case IFM_50G_LR2: + case IFM_50G_LAUI2_AC: + case IFM_50G_LAUI2: + case IFM_50G_AUI2_AC: + case IFM_50G_AUI2: + case IFM_50G_CP: + case IFM_50G_SR: + case IFM_50G_LR: + case IFM_50G_FR: + case IFM_50G_KR_PAM4: + case IFM_50G_AUI1_AC: + case IFM_50G_AUI1: key = IFM_50G_PCIE; break; case IFM_56G_R4: @@ -1157,6 +1180,12 @@ lacp_compose_key(struct lacp_port *lp) case IFM_25G_LR: case IFM_25G_ACC: case IFM_25G_AOC: + case IFM_25G_T: + case IFM_25G_CR_S: + case IFM_25G_CR1: + case IFM_25G_KR_S: + case IFM_25G_AUI: + case IFM_25G_KR1: key = IFM_25G_PCIE; break; case IFM_40G_CR4: @@ -1164,14 +1193,50 @@ lacp_compose_key(struct lacp_port *lp) case IFM_40G_LR4: case IFM_40G_XLPPI: case IFM_40G_KR4: + case IFM_40G_XLAUI: + case IFM_40G_XLAUI_AC: + case IFM_40G_ER4: key = IFM_40G_CR4; break; case IFM_100G_CR4: case IFM_100G_SR4: case IFM_100G_KR4: case IFM_100G_LR4: + case IFM_100G_CAUI4_AC: + case IFM_100G_CAUI4: + case IFM_100G_AUI4_AC: + case IFM_100G_AUI4: + case IFM_100G_CR_PAM4: + case IFM_100G_KR_PAM4: + case IFM_100G_CP2: + case IFM_100G_SR2: + case IFM_100G_DR: + case IFM_100G_KR2_PAM4: + case IFM_100G_CAUI2_AC: + case IFM_100G_CAUI2: + case IFM_100G_AUI2_AC: + case IFM_100G_AUI2: key = IFM_100G_CR4; break; + case IFM_200G_CR4_PAM4: + case IFM_200G_SR4: + case IFM_200G_FR4: + case IFM_200G_LR4: + case IFM_200G_DR4: + case IFM_200G_KR4_PAM4: + case IFM_200G_AUI4_AC: + case IFM_200G_AUI4: + case IFM_200G_AUI8_AC: + case IFM_200G_AUI8: + key = IFM_200G_CR4_PAM4; + break; + case IFM_400G_FR8: + case IFM_400G_LR8: + case IFM_400G_DR4: + case IFM_400G_AUI8_AC: + case IFM_400G_AUI8: + key = IFM_400G_FR8; + break; default: key = subtype; break; diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c index d4c18b46..4d3c303c 100644 --- a/freebsd/sys/net/if.c +++ b/freebsd/sys/net/if.c @@ -300,7 +300,7 @@ int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); -static VNET_DEFINE(int, if_indexlim) = 8; +VNET_DEFINE_STATIC(int, if_indexlim) = 8; /* Table of ifnet by index. */ VNET_DEFINE(struct ifnet **, ifindex_table); @@ -1769,29 +1769,35 @@ if_data_copy(struct ifnet *ifp, struct if_data *ifd) void if_addr_rlock(struct ifnet *ifp) { - - IF_ADDR_RLOCK(ifp); + MPASS(*(uint64_t *)&ifp->if_addr_et == 0); + epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et); } void if_addr_runlock(struct ifnet *ifp) { - - IF_ADDR_RUNLOCK(ifp); + epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et); +#ifdef INVARIANTS + bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker)); +#endif } void if_maddr_rlock(if_t ifp) { - IF_ADDR_RLOCK((struct ifnet *)ifp); + MPASS(*(uint64_t *)&ifp->if_maddr_et == 0); + epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et); } void if_maddr_runlock(if_t ifp) { - IF_ADDR_RUNLOCK((struct ifnet *)ifp); + epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et); +#ifdef INVARIANTS + bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker)); +#endif } /* @@ -1935,7 +1941,7 @@ ifa_ifwithaddr(const struct sockaddr *addr) struct ifnet *ifp; struct ifaddr *ifa; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) @@ -1978,7 +1984,7 @@ ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) struct ifnet *ifp; struct ifaddr *ifa; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; @@ -2008,7 +2014,7 @@ ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) struct ifnet *ifp; struct ifaddr *ifa; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; @@ -2041,7 +2047,7 @@ ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. @@ -2078,7 +2084,6 @@ next: continue; */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { - IF_ADDR_RUNLOCK(ifp); goto done; } } else { @@ -2137,7 +2142,8 @@ ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) if (af >= AF_MAX) return (NULL); - MPASS(in_epoch()); + + MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; @@ -2301,6 +2307,7 @@ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); +int (*vlan_pcp_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); @@ -2988,8 +2995,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) #ifdef COMPAT_FREEBSD32 caddr_t saved_data = NULL; struct ifmediareq ifmr; -#endif struct ifmediareq *ifmrp; +#endif struct ifnet *ifp; struct ifreq *ifr; int error; @@ -3035,8 +3042,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) #endif } - ifmrp = NULL; #ifdef COMPAT_FREEBSD32 + ifmrp = NULL; switch (cmd) { case SIOCGIFMEDIA32: case SIOCGIFXMEDIA32: @@ -3564,6 +3571,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa, error = ENOMEM; goto free_llsa_out; } + ll_ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else @@ -3576,6 +3584,7 @@ if_addmulti(struct ifnet *ifp, struct sockaddr *sa, * referenced link layer address. Add the primary address to the * ifnet address list. */ + ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) @@ -3776,9 +3785,10 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) if (--ifma->ifma_refcount > 0) return 0; - if (ifp != NULL && detaching == 0) + if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); - + ifma->ifma_flags &= ~IFMA_F_ENQUEUED; + } /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. @@ -3791,8 +3801,11 @@ if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { - CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, - ifma_link); + if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { + CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, + ifma_link); + ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; + } } if_freemulti(ll_ifma); } @@ -3922,6 +3935,44 @@ if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) } /* + * Tunnel interfaces can nest, also they may cause infinite recursion + * calls when misconfigured. We'll prevent this by detecting loops. + * High nesting level may cause stack exhaustion. We'll prevent this + * by introducing upper limit. + * + * Return 0, if tunnel nesting count is equal or less than limit. + */ +int +if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, + int limit) +{ + struct m_tag *mtag; + int count; + + count = 1; + mtag = NULL; + while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) { + if (*(struct ifnet **)(mtag + 1) == ifp) { + log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); + return (EIO); + } + count++; + } + if (count > limit) { + log(LOG_NOTICE, + "%s: if_output recursively called too many times(%d)\n", + if_name(ifp), count); + return (EIO); + } + mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + *(struct ifnet **)(mtag + 1) = ifp; + m_tag_prepend(m, mtag); + return (0); +} + +/* * Get the link layer address that was read from the hardware at attach. * * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c index 3e774934..aa56be48 100644 --- a/freebsd/sys/net/if_bridge.c +++ b/freebsd/sys/net/if_bridge.c @@ -231,7 +231,7 @@ struct bridge_softc { u_char sc_defaddr[6]; /* Default MAC address */ }; -static VNET_DEFINE(struct mtx, bridge_list_mtx); +VNET_DEFINE_STATIC(struct mtx, bridge_list_mtx); #define V_bridge_list_mtx VNET(bridge_list_mtx) static eventhandler_tag bridge_detach_cookie; @@ -356,59 +356,59 @@ SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW, 0, "Bridge"); /* only pass IP[46] packets when pfil is enabled */ -static VNET_DEFINE(int, pfil_onlyip) = 1; +VNET_DEFINE_STATIC(int, pfil_onlyip) = 1; #define V_pfil_onlyip VNET(pfil_onlyip) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_onlyip), 0, "Only pass IP packets when pfil is enabled"); /* run pfil hooks on the bridge interface */ -static VNET_DEFINE(int, pfil_bridge) = 1; +VNET_DEFINE_STATIC(int, pfil_bridge) = 1; #define V_pfil_bridge VNET(pfil_bridge) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_bridge), 0, "Packet filter on the bridge interface"); /* layer2 filter with ipfw */ -static VNET_DEFINE(int, pfil_ipfw); +VNET_DEFINE_STATIC(int, pfil_ipfw); #define V_pfil_ipfw VNET(pfil_ipfw) /* layer2 ARP filter with ipfw */ -static VNET_DEFINE(int, pfil_ipfw_arp); +VNET_DEFINE_STATIC(int, pfil_ipfw_arp); #define V_pfil_ipfw_arp VNET(pfil_ipfw_arp) SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_ipfw_arp), 0, "Filter ARP packets through IPFW layer2"); /* run pfil hooks on the member interface */ -static VNET_DEFINE(int, pfil_member) = 1; +VNET_DEFINE_STATIC(int, pfil_member) = 1; #define V_pfil_member VNET(pfil_member) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_member), 0, "Packet filter on the member interface"); /* run pfil hooks on the physical interface for locally destined packets */ -static VNET_DEFINE(int, pfil_local_phys); +VNET_DEFINE_STATIC(int, pfil_local_phys); #define V_pfil_local_phys VNET(pfil_local_phys) SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(pfil_local_phys), 0, "Packet filter on the physical interface for locally destined packets"); /* log STP state changes */ -static VNET_DEFINE(int, log_stp); +VNET_DEFINE_STATIC(int, log_stp); #define V_log_stp VNET(log_stp) SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(log_stp), 0, "Log STP state changes"); /* share MAC with first bridge member */ -static VNET_DEFINE(int, bridge_inherit_mac); +VNET_DEFINE_STATIC(int, bridge_inherit_mac); #define V_bridge_inherit_mac VNET(bridge_inherit_mac) SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, CTLFLAG_RWTUN | CTLFLAG_VNET, &VNET_NAME(bridge_inherit_mac), 0, "Inherit MAC address from the first bridge member"); -static VNET_DEFINE(int, allow_llz_overlap) = 0; +VNET_DEFINE_STATIC(int, allow_llz_overlap) = 0; #define V_allow_llz_overlap VNET(allow_llz_overlap) SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_llz_overlap, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(allow_llz_overlap), 0, @@ -512,7 +512,7 @@ const struct bridge_control bridge_control_table[] = { }; const int bridge_control_table_size = nitems(bridge_control_table); -static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list); +VNET_DEFINE_STATIC(LIST_HEAD(, bridge_softc), bridge_list); #define V_bridge_list VNET(bridge_list) #define BRIDGE_LIST_LOCK_INIT(x) mtx_init(&V_bridge_list_mtx, \ "if_bridge list", NULL, MTX_DEF) @@ -520,7 +520,7 @@ static VNET_DEFINE(LIST_HEAD(, bridge_softc), bridge_list); #define BRIDGE_LIST_LOCK(x) mtx_lock(&V_bridge_list_mtx) #define BRIDGE_LIST_UNLOCK(x) mtx_unlock(&V_bridge_list_mtx) -static VNET_DEFINE(struct if_clone *, bridge_cloner); +VNET_DEFINE_STATIC(struct if_clone *, bridge_cloner); #define V_bridge_cloner VNET(bridge_cloner) static const char bridge_name[] = "bridge"; diff --git a/freebsd/sys/net/if_clone.c b/freebsd/sys/net/if_clone.c index 5a9c20c2..1fa79766 100644 --- a/freebsd/sys/net/if_clone.c +++ b/freebsd/sys/net/if_clone.c @@ -110,7 +110,7 @@ static int ifc_simple_destroy(struct if_clone *, struct ifnet *); static struct mtx if_cloners_mtx; MTX_SYSINIT(if_cloners_lock, &if_cloners_mtx, "if_cloners lock", MTX_DEF); -static VNET_DEFINE(int, if_cloners_count); +VNET_DEFINE_STATIC(int, if_cloners_count); VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners); #define V_if_cloners_count VNET(if_cloners_count) diff --git a/freebsd/sys/net/if_disc.c b/freebsd/sys/net/if_disc.c index b3ff7ff8..1c0bc166 100644 --- a/freebsd/sys/net/if_disc.c +++ b/freebsd/sys/net/if_disc.c @@ -78,7 +78,7 @@ static void disc_clone_destroy(struct ifnet *); static const char discname[] = "disc"; static MALLOC_DEFINE(M_DISC, discname, "Discard interface"); -static VNET_DEFINE(struct if_clone *, disc_cloner); +VNET_DEFINE_STATIC(struct if_clone *, disc_cloner); #define V_disc_cloner VNET(disc_cloner) static int diff --git a/freebsd/sys/net/if_edsc.c b/freebsd/sys/net/if_edsc.c index 6b5671c1..b12e0bb8 100644 --- a/freebsd/sys/net/if_edsc.c +++ b/freebsd/sys/net/if_edsc.c @@ -74,7 +74,7 @@ struct edsc_softc { /* * Attach to the interface cloning framework. */ -static VNET_DEFINE(struct if_clone *, edsc_cloner); +VNET_DEFINE_STATIC(struct if_clone *, edsc_cloner); #define V_edsc_cloner VNET(edsc_cloner) static int edsc_clone_create(struct if_clone *, int, caddr_t); static void edsc_clone_destroy(struct ifnet *); diff --git a/freebsd/sys/net/if_enc.c b/freebsd/sys/net/if_enc.c index 8ca8aa4d..ebfbf5cb 100644 --- a/freebsd/sys/net/if_enc.c +++ b/freebsd/sys/net/if_enc.c @@ -88,9 +88,9 @@ struct enchdr { struct enc_softc { struct ifnet *sc_ifp; }; -static VNET_DEFINE(struct enc_softc *, enc_sc); +VNET_DEFINE_STATIC(struct enc_softc *, enc_sc); #define V_enc_sc VNET(enc_sc) -static VNET_DEFINE(struct if_clone *, enc_cloner); +VNET_DEFINE_STATIC(struct if_clone *, enc_cloner); #define V_enc_cloner VNET(enc_cloner) static int enc_ioctl(struct ifnet *, u_long, caddr_t); @@ -113,10 +113,10 @@ static const char encname[] = "enc"; * some changes to the packet, e.g. address translation. If PFIL hook * consumes mbuf, nothing will be captured. */ -static VNET_DEFINE(int, filter_mask_in) = IPSEC_ENC_BEFORE; -static VNET_DEFINE(int, bpf_mask_in) = IPSEC_ENC_BEFORE; -static VNET_DEFINE(int, filter_mask_out) = IPSEC_ENC_BEFORE; -static VNET_DEFINE(int, bpf_mask_out) = IPSEC_ENC_BEFORE | IPSEC_ENC_AFTER; +VNET_DEFINE_STATIC(int, filter_mask_in) = IPSEC_ENC_BEFORE; +VNET_DEFINE_STATIC(int, bpf_mask_in) = IPSEC_ENC_BEFORE; +VNET_DEFINE_STATIC(int, filter_mask_out) = IPSEC_ENC_BEFORE; +VNET_DEFINE_STATIC(int, bpf_mask_out) = IPSEC_ENC_BEFORE | IPSEC_ENC_AFTER; #define V_filter_mask_in VNET(filter_mask_in) #define V_bpf_mask_in VNET(bpf_mask_in) #define V_filter_mask_out VNET(filter_mask_out) diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c index d727bc2c..69ff3efc 100644 --- a/freebsd/sys/net/if_epair.c +++ b/freebsd/sys/net/if_epair.c @@ -109,6 +109,7 @@ static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t); static int epair_clone_destroy(struct if_clone *, struct ifnet *); static const char epairname[] = "epair"; +static unsigned int next_index = 0; /* Netisr related definitions and sysctl. */ static struct netisr_handler epair_nh = { @@ -181,7 +182,7 @@ STAILQ_HEAD(eid_list, epair_ifp_drain); static MALLOC_DEFINE(M_EPAIR, epairname, "Pair of virtual cross-over connected Ethernet-like interfaces"); -static VNET_DEFINE(struct if_clone *, epair_cloner); +VNET_DEFINE_STATIC(struct if_clone *, epair_cloner); #define V_epair_cloner VNET(epair_cloner) /* @@ -845,12 +846,22 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) /* * Calculate the etheraddr hashing the hostid and the - * interface index. The result would be hopefully unique + * interface index. The result would be hopefully unique. + * Note that the "a" component of an epair instance may get moved + * to a different VNET after creation. In that case its index + * will be freed and the index can get reused by new epair instance. + * Make sure we do not create same etheraddr again. */ getcredhostid(curthread->td_ucred, (unsigned long *)&hostid); if (hostid == 0) arc4rand(&hostid, sizeof(hostid), 0); - key[0] = (uint32_t)ifp->if_index; + + if (ifp->if_index > next_index) + next_index = ifp->if_index; + else + next_index++; + + key[0] = (uint32_t)next_index; key[1] = (uint32_t)(hostid & 0xffffffff); key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff); hash = jenkins_hash32(key, 3, 0); diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c index 3893d331..01e757e5 100644 --- a/freebsd/sys/net/if_ethersubr.c +++ b/freebsd/sys/net/if_ethersubr.c @@ -463,7 +463,8 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m) uint8_t pcp; pcp = ifp->if_pcp; - if (pcp != IFNET_PCP_NONE && !ether_set_pcp(&m, ifp, pcp)) + if (pcp != IFNET_PCP_NONE && ifp->if_type != IFT_L2VLAN && + !ether_set_pcp(&m, ifp, pcp)) return (0); if (PFIL_HOOKED(&V_link_pfil_hook)) { @@ -515,7 +516,7 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m) } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); - random_harvest_queue_ether(m, sizeof(*m), 2); + random_harvest_queue_ether(m, sizeof(*m)); CURVNET_SET_QUIET(ifp->if_vnet); @@ -1293,7 +1294,7 @@ static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency"); -static VNET_DEFINE(int, soft_pad); +VNET_DEFINE_STATIC(int, soft_pad); #define V_soft_pad VNET(soft_pad) SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(soft_pad), 0, diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c index 6a90538a..5a67e7ff 100644 --- a/freebsd/sys/net/if_gif.c +++ b/freebsd/sys/net/if_gif.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> -#include <sys/jail.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -57,7 +57,6 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #include <sys/priv.h> #include <sys/proc.h> -#include <sys/protosw.h> #include <sys/conf.h> #include <machine/cpu.h> @@ -87,8 +86,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip6.h> #include <netinet6/ip6_ecn.h> #include <netinet6/ip6_var.h> -#include <netinet6/scope6_var.h> -#include <netinet6/ip6protosw.h> #endif /* INET6 */ #include <netinet/ip_encap.h> @@ -100,42 +97,24 @@ __FBSDID("$FreeBSD$"); static const char gifname[] = "gif"; -/* - * gif_mtx protects a per-vnet gif_softc_list. - */ -static VNET_DEFINE(struct mtx, gif_mtx); -#define V_gif_mtx VNET(gif_mtx) -static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); -static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list); -#define V_gif_softc_list VNET(gif_softc_list) +MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); static struct sx gif_ioctl_sx; SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl"); -#define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \ - NULL, MTX_DEF) -#define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx) -#define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx) -#define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx) - void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af); void (*ng_gif_attach_p)(struct ifnet *ifp); void (*ng_gif_detach_p)(struct ifnet *ifp); -static int gif_check_nesting(struct ifnet *, struct mbuf *); -static int gif_set_tunnel(struct ifnet *, struct sockaddr *, - struct sockaddr *); -static void gif_delete_tunnel(struct ifnet *); +static void gif_delete_tunnel(struct gif_softc *); static int gif_ioctl(struct ifnet *, u_long, caddr_t); static int gif_transmit(struct ifnet *, struct mbuf *); static void gif_qflush(struct ifnet *); static int gif_clone_create(struct if_clone *, int, caddr_t); static void gif_clone_destroy(struct ifnet *); -static VNET_DEFINE(struct if_clone *, gif_cloner); +VNET_DEFINE_STATIC(struct if_clone *, gif_cloner); #define V_gif_cloner VNET(gif_cloner) -static int gifmodevent(module_t, int, void *); - SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, "Generic Tunnel Interface"); @@ -150,26 +129,11 @@ static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, */ #define MAX_GIF_NEST 1 #endif -static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST; +VNET_DEFINE_STATIC(int, max_gif_nesting) = MAX_GIF_NEST; #define V_max_gif_nesting VNET(max_gif_nesting) SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels"); -/* - * By default, we disallow creation of multiple tunnels between the same - * pair of addresses. Some applications require this functionality so - * we allow control over this check here. - */ -#ifdef XBONEHACK -static VNET_DEFINE(int, parallel_tunnels) = 1; -#else -static VNET_DEFINE(int, parallel_tunnels) = 0; -#endif -#define V_parallel_tunnels VNET(parallel_tunnels) -SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0, - "Allow parallel tunnels?"); - static int gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) { @@ -182,20 +146,15 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) sc->gif_fibnum = BSD_DEFAULT_FIB; #endif /* __rtems__ */ GIF2IFP(sc) = if_alloc(IFT_GIF); - GIF_LOCK_INIT(sc); GIF2IFP(sc)->if_softc = sc; if_initname(GIF2IFP(sc), gifname, unit); GIF2IFP(sc)->if_addrlen = 0; GIF2IFP(sc)->if_mtu = GIF_MTU; GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; -#if 0 - /* turn off ingress filter */ - GIF2IFP(sc)->if_flags |= IFF_LINK2; -#endif GIF2IFP(sc)->if_ioctl = gif_ioctl; - GIF2IFP(sc)->if_transmit = gif_transmit; - GIF2IFP(sc)->if_qflush = gif_qflush; + GIF2IFP(sc)->if_transmit = gif_transmit; + GIF2IFP(sc)->if_qflush = gif_qflush; GIF2IFP(sc)->if_output = gif_output; GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; @@ -204,9 +163,6 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) if (ng_gif_attach_p != NULL) (*ng_gif_attach_p)(GIF2IFP(sc)); - GIF_LIST_LOCK(); - LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list); - GIF_LIST_UNLOCK(); return (0); } @@ -217,10 +173,7 @@ gif_clone_destroy(struct ifnet *ifp) sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; - gif_delete_tunnel(ifp); - GIF_LIST_LOCK(); - LIST_REMOVE(sc, gif_list); - GIF_LIST_UNLOCK(); + gif_delete_tunnel(sc); if (ng_gif_detach_p != NULL) (*ng_gif_detach_p)(ifp); bpfdetach(ifp); @@ -228,8 +181,8 @@ gif_clone_destroy(struct ifnet *ifp) ifp->if_softc = NULL; sx_xunlock(&gif_ioctl_sx); + GIF_WAIT(); if_free(ifp); - GIF_LOCK_DESTROY(sc); free(sc, M_GIF); } @@ -237,10 +190,14 @@ static void vnet_gif_init(const void *unused __unused) { - LIST_INIT(&V_gif_softc_list); - GIF_LIST_LOCK_INIT(); V_gif_cloner = if_clone_simple(gifname, gif_clone_create, gif_clone_destroy, 0); +#ifdef INET + in_gif_init(); +#endif +#ifdef INET6 + in6_gif_init(); +#endif } VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_init, NULL); @@ -250,7 +207,12 @@ vnet_gif_uninit(const void *unused __unused) { if_clone_detach(V_gif_cloner); - GIF_LIST_LOCK_DESTROY(); +#ifdef INET + in_gif_uninit(); +#endif +#ifdef INET6 + in6_gif_uninit(); +#endif } VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_uninit, NULL); @@ -278,67 +240,28 @@ static moduledata_t gif_mod = { DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gif, 1); -int -gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +struct gif_list * +gif_hashinit(void) { - GIF_RLOCK_TRACKER; - const struct ip *ip; - struct gif_softc *sc; - int ret; - - sc = (struct gif_softc *)arg; - if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0) - return (0); + struct gif_list *hash; + int i; - ret = 0; - GIF_RLOCK(sc); + hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE, + M_GIF, M_WAITOK); + for (i = 0; i < GIF_HASH_SIZE; i++) + CK_LIST_INIT(&hash[i]); - /* no physical address */ - if (sc->gif_family == 0) - goto done; - - switch (proto) { -#ifdef INET - case IPPROTO_IPV4: -#endif -#ifdef INET6 - case IPPROTO_IPV6: -#endif - case IPPROTO_ETHERIP: - break; - default: - goto done; - } + return (hash); +} - /* Bail on short packets */ - M_ASSERTPKTHDR(m); - if (m->m_pkthdr.len < sizeof(struct ip)) - goto done; +void +gif_hashdestroy(struct gif_list *hash) +{ - ip = mtod(m, const struct ip *); - switch (ip->ip_v) { -#ifdef INET - case 4: - if (sc->gif_family != AF_INET) - goto done; - ret = in_gif_encapcheck(m, off, proto, arg); - break; -#endif -#ifdef INET6 - case 6: - if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) - goto done; - if (sc->gif_family != AF_INET6) - goto done; - ret = in6_gif_encapcheck(m, off, proto, arg); - break; -#endif - } -done: - GIF_RUNLOCK(sc); - return (ret); + free(hash, M_GIF); } +#define MTAG_GIF 1080679712 static int gif_transmit(struct ifnet *ifp, struct mbuf *m) { @@ -363,11 +286,13 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) } #endif error = ENETDOWN; + GIF_RLOCK(); sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || sc->gif_family == 0 || - (error = gif_check_nesting(ifp, m)) != 0) { + (error = if_tunnel_check_nesting(ifp, m, MTAG_GIF, + V_max_gif_nesting)) != 0) { m_freem(m); goto err; } @@ -450,6 +375,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) err: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + GIF_RUNLOCK(); return (error); } @@ -459,42 +385,6 @@ gif_qflush(struct ifnet *ifp __unused) } -#define MTAG_GIF 1080679712 -static int -gif_check_nesting(struct ifnet *ifp, struct mbuf *m) -{ - struct m_tag *mtag; - int count; - - /* - * gif may cause infinite recursion calls when misconfigured. - * We'll prevent this by detecting loops. - * - * High nesting level may cause stack exhaustion. - * We'll prevent this by introducing upper limit. - */ - count = 1; - mtag = NULL; - while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) { - if (*(struct ifnet **)(mtag + 1) == ifp) { - log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); - return (EIO); - } - count++; - } - if (count > V_max_gif_nesting) { - log(LOG_NOTICE, - "%s: if_output recursively called too many times(%d)\n", - if_name(ifp), count); - return (EIO); - } - mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT); - if (mtag == NULL) - return (ENOMEM); - *(struct ifnet **)(mtag + 1) = ifp; - m_tag_prepend(m, mtag); - return (0); -} int gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, @@ -622,7 +512,8 @@ gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn) break; #endif case AF_LINK: - n = sizeof(struct etherip_header) + sizeof(struct ether_header); + n = sizeof(struct etherip_header) + + sizeof(struct ether_header); if (n > m->m_len) m = m_pullup(m, n); if (m == NULL) @@ -680,20 +571,11 @@ drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } -/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ -int +static int gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - GIF_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq*)data; - struct sockaddr *dst, *src; struct gif_softc *sc; -#ifdef INET - struct sockaddr_in *sin = NULL; -#endif -#ifdef INET6 - struct sockaddr_in6 *sin6 = NULL; -#endif u_int options; int error; @@ -721,176 +603,25 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } error = 0; switch (cmd) { - case SIOCSIFPHYADDR: -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: -#endif - error = EINVAL; - switch (cmd) { -#ifdef INET - case SIOCSIFPHYADDR: - src = (struct sockaddr *) - &(((struct in_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in_aliasreq *)data)->ifra_dstaddr); - break; -#endif -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: - src = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_dstaddr); - break; -#endif - default: - goto bad; - } - /* sa_family must be equal */ - if (src->sa_family != dst->sa_family || - src->sa_len != dst->sa_len) - goto bad; - - /* validate sa_len */ - /* check sa_family looks sane for the cmd */ - switch (src->sa_family) { -#ifdef INET - case AF_INET: - if (src->sa_len != sizeof(struct sockaddr_in)) - goto bad; - if (cmd != SIOCSIFPHYADDR) { - error = EAFNOSUPPORT; - goto bad; - } - if (satosin(src)->sin_addr.s_addr == INADDR_ANY || - satosin(dst)->sin_addr.s_addr == INADDR_ANY) { - error = EADDRNOTAVAIL; - goto bad; - } - break; -#endif -#ifdef INET6 - case AF_INET6: - if (src->sa_len != sizeof(struct sockaddr_in6)) - goto bad; - if (cmd != SIOCSIFPHYADDR_IN6) { - error = EAFNOSUPPORT; - goto bad; - } - error = EADDRNOTAVAIL; - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) - || - IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) - goto bad; - /* - * Check validity of the scope zone ID of the - * addresses, and convert it into the kernel - * internal form if necessary. - */ - error = sa6_embedscope(satosin6(src), 0); - if (error != 0) - goto bad; - error = sa6_embedscope(satosin6(dst), 0); - if (error != 0) - goto bad; - break; -#endif - default: - error = EAFNOSUPPORT; - goto bad; - } - error = gif_set_tunnel(ifp, src, dst); - break; case SIOCDIFPHYADDR: - gif_delete_tunnel(ifp); + if (sc->gif_family == 0) + break; + gif_delete_tunnel(sc); break; +#ifdef INET + case SIOCSIFPHYADDR: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: + error = in_gif_ioctl(sc, cmd, data); + break; +#endif #ifdef INET6 + case SIOCSIFPHYADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: -#endif - if (sc->gif_family == 0) { - error = EADDRNOTAVAIL; - break; - } - GIF_RLOCK(sc); - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - case SIOCGIFPDSTADDR: - if (sc->gif_family != AF_INET) { - error = EADDRNOTAVAIL; - break; - } - sin = (struct sockaddr_in *)&ifr->ifr_addr; - memset(sin, 0, sizeof(*sin)); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - case SIOCGIFPDSTADDR_IN6: - if (sc->gif_family != AF_INET6) { - error = EADDRNOTAVAIL; - break; - } - sin6 = (struct sockaddr_in6 *) - &(((struct in6_ifreq *)data)->ifr_addr); - memset(sin6, 0, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); - break; -#endif - default: - error = EAFNOSUPPORT; - } - if (error == 0) { - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - sin->sin_addr = sc->gif_iphdr->ip_src; - break; - case SIOCGIFPDSTADDR: - sin->sin_addr = sc->gif_iphdr->ip_dst; - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - sin6->sin6_addr = sc->gif_ip6hdr->ip6_src; - break; - case SIOCGIFPDSTADDR_IN6: - sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst; - break; -#endif - } - } - GIF_RUNLOCK(sc); - if (error != 0) - break; - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - case SIOCGIFPDSTADDR: - error = prison_if(curthread->td_ucred, - (struct sockaddr *)sin); - if (error != 0) - memset(sin, 0, sizeof(*sin)); - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - case SIOCGIFPDSTADDR_IN6: - error = prison_if(curthread->td_ucred, - (struct sockaddr *)sin6); - if (error == 0) - error = sa6_recoverscope(sin6); - if (error != 0) - memset(sin6, 0, sizeof(*sin6)); -#endif - } + error = in6_gif_ioctl(sc, cmd, data); break; +#endif case SIOCGTUNFIB: ifr->ifr_fib = sc->gif_fibnum; break; @@ -914,159 +645,63 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sizeof(options)); if (error) break; - if (options & ~GIF_OPTMASK) + if (options & ~GIF_OPTMASK) { error = EINVAL; - else - sc->gif_options = options; - break; - default: - error = EINVAL; - break; - } -bad: - sx_xunlock(&gif_ioctl_sx); - return (error); -} - -static void -gif_detach(struct gif_softc *sc) -{ - - sx_assert(&gif_ioctl_sx, SA_XLOCKED); - if (sc->gif_ecookie != NULL) - encap_detach(sc->gif_ecookie); - sc->gif_ecookie = NULL; -} - -static int -gif_attach(struct gif_softc *sc, int af) -{ - - sx_assert(&gif_ioctl_sx, SA_XLOCKED); - switch (af) { -#ifdef INET - case AF_INET: - return (in_gif_attach(sc)); -#endif -#ifdef INET6 - case AF_INET6: - return (in6_gif_attach(sc)); -#endif - } - return (EAFNOSUPPORT); -} - -static int -gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) -{ - struct gif_softc *sc = ifp->if_softc; - struct gif_softc *tsc; + break; + } + if (sc->gif_options != options) { + switch (sc->gif_family) { #ifdef INET - struct ip *ip; + case AF_INET: + error = in_gif_setopts(sc, options); + break; #endif #ifdef INET6 - struct ip6_hdr *ip6; -#endif - void *hdr; - int error = 0; - - if (sc == NULL) - return (ENXIO); - /* Disallow parallel tunnels unless instructed otherwise. */ - if (V_parallel_tunnels == 0) { - GIF_LIST_LOCK(); - LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) { - if (tsc == sc || tsc->gif_family != src->sa_family) - continue; -#ifdef INET - if (tsc->gif_family == AF_INET && - tsc->gif_iphdr->ip_src.s_addr == - satosin(src)->sin_addr.s_addr && - tsc->gif_iphdr->ip_dst.s_addr == - satosin(dst)->sin_addr.s_addr) { - error = EADDRNOTAVAIL; - GIF_LIST_UNLOCK(); - goto bad; - } + case AF_INET6: + error = in6_gif_setopts(sc, options); + break; #endif -#ifdef INET6 - if (tsc->gif_family == AF_INET6 && - IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src, - &satosin6(src)->sin6_addr) && - IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst, - &satosin6(dst)->sin6_addr)) { - error = EADDRNOTAVAIL; - GIF_LIST_UNLOCK(); - goto bad; + default: + /* No need to invoke AF-handler */ + sc->gif_options = options; } -#endif } - GIF_LIST_UNLOCK(); + break; + default: + error = EINVAL; + break; } - switch (src->sa_family) { + if (error == 0 && sc->gif_family != 0) { + if ( #ifdef INET - case AF_INET: - hdr = ip = malloc(sizeof(struct ip), M_GIF, - M_WAITOK | M_ZERO); - ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr; - ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr; - break; + cmd == SIOCSIFPHYADDR || #endif #ifdef INET6 - case AF_INET6: - hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF, - M_WAITOK | M_ZERO); - ip6->ip6_src = satosin6(src)->sin6_addr; - ip6->ip6_dst = satosin6(dst)->sin6_addr; - ip6->ip6_vfc = IPV6_VERSION; - break; + cmd == SIOCSIFPHYADDR_IN6 || #endif - default: - return (EAFNOSUPPORT); + 0) { + ifp->if_drv_flags |= IFF_DRV_RUNNING; + if_link_state_change(ifp, LINK_STATE_UP); + } } - - if (sc->gif_family != src->sa_family) - gif_detach(sc); - if (sc->gif_family == 0 || - sc->gif_family != src->sa_family) - error = gif_attach(sc, src->sa_family); - - GIF_WLOCK(sc); - if (sc->gif_family != 0) - free(sc->gif_hdr, M_GIF); - sc->gif_family = src->sa_family; - sc->gif_hdr = hdr; - GIF_WUNLOCK(sc); -#if defined(INET) || defined(INET6) bad: -#endif - if (error == 0 && sc->gif_family != 0) { - ifp->if_drv_flags |= IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_UP); - } else { - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_DOWN); - } + sx_xunlock(&gif_ioctl_sx); return (error); } static void -gif_delete_tunnel(struct ifnet *ifp) +gif_delete_tunnel(struct gif_softc *sc) { - struct gif_softc *sc = ifp->if_softc; - int family; - if (sc == NULL) - return; - - GIF_WLOCK(sc); - family = sc->gif_family; - sc->gif_family = 0; - GIF_WUNLOCK(sc); - if (family != 0) { - gif_detach(sc); + sx_assert(&gif_ioctl_sx, SA_XLOCKED); + if (sc->gif_family != 0) { + CK_LIST_REMOVE(sc, chain); + /* Wait until it become safe to free gif_hdr */ + GIF_WAIT(); free(sc->gif_hdr, M_GIF); } - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_DOWN); + sc->gif_family = 0; + GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; + if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN); } + diff --git a/freebsd/sys/net/if_gif.h b/freebsd/sys/net/if_gif.h index 556c2acc..501a4e5d 100644 --- a/freebsd/sys/net/if_gif.h +++ b/freebsd/sys/net/if_gif.h @@ -5,6 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + * Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,14 +37,9 @@ #define _NET_IF_GIF_H_ #ifdef _KERNEL -#include <rtems/bsd/local/opt_inet.h> -#include <rtems/bsd/local/opt_inet6.h> - -#include <netinet/in.h> struct ip; struct ip6_hdr; -struct encaptab; extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); @@ -55,8 +51,6 @@ extern void (*ng_gif_detach_p)(struct ifnet *ifp); struct gif_softc { struct ifnet *gif_ifp; - struct rmlock gif_lock; - const struct encaptab *gif_ecookie; int gif_family; int gif_flags; u_int gif_fibnum; @@ -65,28 +59,22 @@ struct gif_softc { union { void *hdr; struct ip *iphdr; -#ifdef INET6 struct ip6_hdr *ip6hdr; -#endif } gif_uhdr; - LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */ + + CK_LIST_ENTRY(gif_softc) chain; }; -#define GIF2IFP(sc) ((sc)->gif_ifp) -#define GIF_LOCK_INIT(sc) rm_init(&(sc)->gif_lock, "gif softc") -#define GIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->gif_lock) -#define GIF_RLOCK_TRACKER struct rm_priotracker gif_tracker -#define GIF_RLOCK(sc) rm_rlock(&(sc)->gif_lock, &gif_tracker) -#define GIF_RUNLOCK(sc) rm_runlock(&(sc)->gif_lock, &gif_tracker) -#define GIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_RLOCKED) -#define GIF_WLOCK(sc) rm_wlock(&(sc)->gif_lock) -#define GIF_WUNLOCK(sc) rm_wunlock(&(sc)->gif_lock) -#define GIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_WLOCKED) +CK_LIST_HEAD(gif_list, gif_softc); +MALLOC_DECLARE(M_GIF); +#ifndef GIF_HASH_SIZE +#define GIF_HASH_SIZE (1 << 4) +#endif + +#define GIF2IFP(sc) ((sc)->gif_ifp) #define gif_iphdr gif_uhdr.iphdr #define gif_hdr gif_uhdr.hdr -#ifdef INET6 #define gif_ip6hdr gif_uhdr.ip6hdr -#endif #define GIF_MTU (1280) /* Default MTU */ #define GIF_MTU_MIN (1280) /* Minimum MTU */ @@ -108,21 +96,29 @@ struct etherip_header { /* mbuf adjust factor to force 32-bit alignment of IP header */ #define ETHERIP_ALIGN 2 +#define GIF_RLOCK() struct epoch_tracker gif_et; epoch_enter_preempt(net_epoch_preempt, &gif_et) +#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gif_et) +#define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt) + /* Prototypes */ +struct gif_list *gif_hashinit(void); +void gif_hashdestroy(struct gif_list *); + void gif_input(struct mbuf *, struct ifnet *, int, uint8_t); int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); -int gif_encapcheck(const struct mbuf *, int, int, void *); -#ifdef INET + +void in_gif_init(void); +void in_gif_uninit(void); int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t); -int in_gif_encapcheck(const struct mbuf *, int, int, void *); -int in_gif_attach(struct gif_softc *); -#endif -#ifdef INET6 +int in_gif_ioctl(struct gif_softc *, u_long, caddr_t); +int in_gif_setopts(struct gif_softc *, u_int); + +void in6_gif_init(void); +void in6_gif_uninit(void); int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t); -int in6_gif_encapcheck(const struct mbuf *, int, int, void *); -int in6_gif_attach(struct gif_softc *); -#endif +int in6_gif_ioctl(struct gif_softc *, u_long, caddr_t); +int in6_gif_setopts(struct gif_softc *, u_int); #endif /* _KERNEL */ #define GIFGOPTS _IOWR('i', 150, struct ifreq) diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c index 0bff9bc9..5ff41259 100644 --- a/freebsd/sys/net/if_gre.c +++ b/freebsd/sys/net/if_gre.c @@ -4,7 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1998 The NetBSD Foundation, Inc. - * Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org> + * Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org> * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -43,17 +43,13 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_inet6.h> #include <sys/param.h> -#include <sys/jail.h> #include <sys/kernel.h> #include <sys/lock.h> -#include <sys/libkern.h> #include <sys/malloc.h> #include <sys/module.h> #include <sys/mbuf.h> #include <sys/priv.h> #include <sys/proc.h> -#include <sys/protosw.h> -#include <sys/rmlock.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/sx.h> @@ -72,7 +68,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #ifdef INET -#include <netinet/in_systm.h> #include <netinet/in_var.h> #include <netinet/ip.h> #include <netinet/ip_var.h> @@ -82,7 +77,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip6.h> #include <netinet6/in6_var.h> #include <netinet6/ip6_var.h> -#include <netinet6/scope6_var.h> #endif #include <netinet/ip_encap.h> @@ -93,24 +87,16 @@ __FBSDID("$FreeBSD$"); #include <security/mac/mac_framework.h> #define GREMTU 1476 + static const char grename[] = "gre"; -static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); -static VNET_DEFINE(struct mtx, gre_mtx); -#define V_gre_mtx VNET(gre_mtx) -#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \ - MTX_DEF) -#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx) -#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx) -#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx) - -static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list); -#define V_gre_softc_list VNET(gre_softc_list) +MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); + static struct sx gre_ioctl_sx; SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl"); static int gre_clone_create(struct if_clone *, int, caddr_t); static void gre_clone_destroy(struct ifnet *); -static VNET_DEFINE(struct if_clone *, gre_cloner); +VNET_DEFINE_STATIC(struct if_clone *, gre_cloner); #define V_gre_cloner VNET(gre_cloner) static void gre_qflush(struct ifnet *); @@ -118,11 +104,7 @@ static int gre_transmit(struct ifnet *, struct mbuf *); static int gre_ioctl(struct ifnet *, u_long, caddr_t); static int gre_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); - -static void gre_updatehdr(struct gre_softc *); -static int gre_set_tunnel(struct ifnet *, struct sockaddr *, - struct sockaddr *); -static void gre_delete_tunnel(struct ifnet *); +static void gre_delete_tunnel(struct gre_softc *); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, @@ -139,7 +121,7 @@ static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, #define MAX_GRE_NEST 1 #endif -static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST; +VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST; #define V_max_gre_nesting VNET(max_gre_nesting) SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels"); @@ -147,10 +129,15 @@ SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, static void vnet_gre_init(const void *unused __unused) { - LIST_INIT(&V_gre_softc_list); - GRE_LIST_LOCK_INIT(); + V_gre_cloner = if_clone_simple(grename, gre_clone_create, gre_clone_destroy, 0); +#ifdef INET + in_gre_init(); +#endif +#ifdef INET6 + in6_gre_init(); +#endif } VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gre_init, NULL); @@ -160,7 +147,12 @@ vnet_gre_uninit(const void *unused __unused) { if_clone_detach(V_gre_cloner); - GRE_LIST_LOCK_DESTROY(); +#ifdef INET + in_gre_uninit(); +#endif +#ifdef INET6 + in6_gre_uninit(); +#endif } VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gre_uninit, NULL); @@ -177,7 +169,6 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) sc->gre_fibnum = BSD_DEFAULT_FIB; #endif /* __rtems__ */ GRE2IFP(sc) = if_alloc(IFT_TUNNEL); - GRE_LOCK_INIT(sc); GRE2IFP(sc)->if_softc = sc; if_initname(GRE2IFP(sc), grename, unit); @@ -191,9 +182,6 @@ gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(GRE2IFP(sc)); bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); - GRE_LIST_LOCK(); - LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list); - GRE_LIST_UNLOCK(); return (0); } @@ -204,33 +192,22 @@ gre_clone_destroy(struct ifnet *ifp) sx_xlock(&gre_ioctl_sx); sc = ifp->if_softc; - gre_delete_tunnel(ifp); - GRE_LIST_LOCK(); - LIST_REMOVE(sc, gre_list); - GRE_LIST_UNLOCK(); + gre_delete_tunnel(sc); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&gre_ioctl_sx); + GRE_WAIT(); if_free(ifp); - GRE_LOCK_DESTROY(sc); free(sc, M_GRE); } static int gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - GRE_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq *)data; - struct sockaddr *src, *dst; struct gre_softc *sc; -#ifdef INET - struct sockaddr_in *sin = NULL; -#endif -#ifdef INET6 - struct sockaddr_in6 *sin6 = NULL; -#endif uint32_t opt; int error; @@ -255,7 +232,6 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case GREGPROTO: return (EOPNOTSUPP); } - src = dst = NULL; sx_xlock(&gre_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { @@ -264,189 +240,25 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } error = 0; switch (cmd) { - case SIOCSIFPHYADDR: -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: -#endif - error = EINVAL; - switch (cmd) { -#ifdef INET - case SIOCSIFPHYADDR: - src = (struct sockaddr *) - &(((struct in_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in_aliasreq *)data)->ifra_dstaddr); - break; -#endif -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: - src = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_addr); - dst = (struct sockaddr *) - &(((struct in6_aliasreq *)data)->ifra_dstaddr); - break; -#endif - default: - error = EAFNOSUPPORT; - goto end; - } - /* sa_family must be equal */ - if (src->sa_family != dst->sa_family || - src->sa_len != dst->sa_len) - goto end; - - /* validate sa_len */ - switch (src->sa_family) { -#ifdef INET - case AF_INET: - if (src->sa_len != sizeof(struct sockaddr_in)) - goto end; - break; -#endif -#ifdef INET6 - case AF_INET6: - if (src->sa_len != sizeof(struct sockaddr_in6)) - goto end; - break; -#endif - default: - error = EAFNOSUPPORT; - goto end; - } - /* check sa_family looks sane for the cmd */ - error = EAFNOSUPPORT; - switch (cmd) { -#ifdef INET - case SIOCSIFPHYADDR: - if (src->sa_family == AF_INET) - break; - goto end; -#endif -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: - if (src->sa_family == AF_INET6) - break; - goto end; -#endif - } - error = EADDRNOTAVAIL; - switch (src->sa_family) { -#ifdef INET - case AF_INET: - if (satosin(src)->sin_addr.s_addr == INADDR_ANY || - satosin(dst)->sin_addr.s_addr == INADDR_ANY) - goto end; - break; -#endif -#ifdef INET6 - case AF_INET6: - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) - || - IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) - goto end; - /* - * Check validity of the scope zone ID of the - * addresses, and convert it into the kernel - * internal form if necessary. - */ - error = sa6_embedscope(satosin6(src), 0); - if (error != 0) - goto end; - error = sa6_embedscope(satosin6(dst), 0); - if (error != 0) - goto end; -#endif - } - error = gre_set_tunnel(ifp, src, dst); - break; case SIOCDIFPHYADDR: - gre_delete_tunnel(ifp); + if (sc->gre_family == 0) + break; + gre_delete_tunnel(sc); break; +#ifdef INET + case SIOCSIFPHYADDR: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: + error = in_gre_ioctl(sc, cmd, data); + break; +#endif #ifdef INET6 + case SIOCSIFPHYADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: -#endif - if (sc->gre_family == 0) { - error = EADDRNOTAVAIL; - break; - } - GRE_RLOCK(sc); - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - case SIOCGIFPDSTADDR: - if (sc->gre_family != AF_INET) { - error = EADDRNOTAVAIL; - break; - } - sin = (struct sockaddr_in *)&ifr->ifr_addr; - memset(sin, 0, sizeof(*sin)); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - case SIOCGIFPDSTADDR_IN6: - if (sc->gre_family != AF_INET6) { - error = EADDRNOTAVAIL; - break; - } - sin6 = (struct sockaddr_in6 *) - &(((struct in6_ifreq *)data)->ifr_addr); - memset(sin6, 0, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); - break; -#endif - } - if (error == 0) { - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - sin->sin_addr = sc->gre_oip.ip_src; - break; - case SIOCGIFPDSTADDR: - sin->sin_addr = sc->gre_oip.ip_dst; - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - sin6->sin6_addr = sc->gre_oip6.ip6_src; - break; - case SIOCGIFPDSTADDR_IN6: - sin6->sin6_addr = sc->gre_oip6.ip6_dst; - break; -#endif - } - } - GRE_RUNLOCK(sc); - if (error != 0) - break; - switch (cmd) { -#ifdef INET - case SIOCGIFPSRCADDR: - case SIOCGIFPDSTADDR: - error = prison_if(curthread->td_ucred, - (struct sockaddr *)sin); - if (error != 0) - memset(sin, 0, sizeof(*sin)); - break; -#endif -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - case SIOCGIFPDSTADDR_IN6: - error = prison_if(curthread->td_ucred, - (struct sockaddr *)sin6); - if (error == 0) - error = sa6_recoverscope(sin6); - if (error != 0) - memset(sin6, 0, sizeof(*sin6)); -#endif - } + error = in6_gre_ioctl(sc, cmd, data); break; +#endif case SIOCGTUNFIB: ifr->ifr_fib = sc->gre_fibnum; break; @@ -459,40 +271,50 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc->gre_fibnum = ifr->ifr_fib; break; case GRESKEY: + case GRESOPTS: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; if ((error = copyin(ifr_data_get_ptr(ifr), &opt, sizeof(opt))) != 0) break; - if (sc->gre_key != opt) { - GRE_WLOCK(sc); - sc->gre_key = opt; - gre_updatehdr(sc); - GRE_WUNLOCK(sc); + if (cmd == GRESKEY) { + if (sc->gre_key == opt) + break; + } else if (cmd == GRESOPTS) { + if (opt & ~GRE_OPTMASK) { + error = EINVAL; + break; + } + if (sc->gre_options == opt) + break; } - break; - case GREGKEY: - error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr), - sizeof(sc->gre_key)); - break; - case GRESOPTS: - if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) + switch (sc->gre_family) { +#ifdef INET + case AF_INET: + in_gre_setopts(sc, cmd, opt); break; - if ((error = copyin(ifr_data_get_ptr(ifr), &opt, - sizeof(opt))) != 0) +#endif +#ifdef INET6 + case AF_INET6: + in6_gre_setopts(sc, cmd, opt); break; - if (opt & ~GRE_OPTMASK) - error = EINVAL; - else { - if (sc->gre_options != opt) { - GRE_WLOCK(sc); +#endif + default: + if (cmd == GRESKEY) + sc->gre_key = opt; + else sc->gre_options = opt; - gre_updatehdr(sc); - GRE_WUNLOCK(sc); - } + break; } + /* + * XXX: Do we need to initiate change of interface + * state here? + */ + break; + case GREGKEY: + error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr), + sizeof(sc->gre_key)); break; - case GREGOPTS: error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr), sizeof(sc->gre_options)); @@ -501,40 +323,68 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EINVAL; break; } + if (error == 0 && sc->gre_family != 0) { + if ( +#ifdef INET + cmd == SIOCSIFPHYADDR || +#endif +#ifdef INET6 + cmd == SIOCSIFPHYADDR_IN6 || +#endif + 0) { + ifp->if_drv_flags |= IFF_DRV_RUNNING; + if_link_state_change(ifp, LINK_STATE_UP); + } + } end: sx_xunlock(&gre_ioctl_sx); return (error); } static void -gre_updatehdr(struct gre_softc *sc) +gre_delete_tunnel(struct gre_softc *sc) +{ + + sx_assert(&gre_ioctl_sx, SA_XLOCKED); + if (sc->gre_family != 0) { + CK_LIST_REMOVE(sc, chain); + GRE_WAIT(); + free(sc->gre_hdr, M_GRE); + sc->gre_family = 0; + } + GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; + if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN); +} + +struct gre_list * +gre_hashinit(void) +{ + struct gre_list *hash; + int i; + + hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE, + M_GRE, M_WAITOK); + for (i = 0; i < GRE_HASH_SIZE; i++) + CK_LIST_INIT(&hash[i]); + + return (hash); +} + +void +gre_hashdestroy(struct gre_list *hash) +{ + + free(hash, M_GRE); +} + +void +gre_updatehdr(struct gre_softc *sc, struct grehdr *gh) { - struct grehdr *gh = NULL; uint32_t *opts; uint16_t flags; - GRE_WLOCK_ASSERT(sc); - switch (sc->gre_family) { -#ifdef INET - case AF_INET: - sc->gre_hlen = sizeof(struct greip); - sc->gre_oip.ip_v = IPPROTO_IPV4; - sc->gre_oip.ip_hl = sizeof(struct ip) >> 2; - sc->gre_oip.ip_p = IPPROTO_GRE; - gh = &sc->gre_gihdr->gi_gre; - break; -#endif -#ifdef INET6 - case AF_INET6: - sc->gre_hlen = sizeof(struct greip6); - sc->gre_oip6.ip6_vfc = IPV6_VERSION; - sc->gre_oip6.ip6_nxt = IPPROTO_GRE; - gh = &sc->gre_gi6hdr->gi6_gre; - break; -#endif - default: - return; - } + sx_assert(&gre_ioctl_sx, SA_XLOCKED); + flags = 0; opts = gh->gre_opts; if (sc->gre_options & GRE_ENABLE_CSUM) { @@ -556,136 +406,12 @@ gre_updatehdr(struct gre_softc *sc) gh->gre_flags = htons(flags); } -static void -gre_detach(struct gre_softc *sc) -{ - - sx_assert(&gre_ioctl_sx, SA_XLOCKED); - if (sc->gre_ecookie != NULL) - encap_detach(sc->gre_ecookie); - sc->gre_ecookie = NULL; -} - -static int -gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src, - struct sockaddr *dst) -{ - struct gre_softc *sc, *tsc; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif -#ifdef INET - struct ip *ip; -#endif - void *hdr; - int error; - - sx_assert(&gre_ioctl_sx, SA_XLOCKED); - GRE_LIST_LOCK(); - sc = ifp->if_softc; - LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) { - if (tsc == sc || tsc->gre_family != src->sa_family) - continue; -#ifdef INET - if (tsc->gre_family == AF_INET && - tsc->gre_oip.ip_src.s_addr == - satosin(src)->sin_addr.s_addr && - tsc->gre_oip.ip_dst.s_addr == - satosin(dst)->sin_addr.s_addr) { - GRE_LIST_UNLOCK(); - return (EADDRNOTAVAIL); - } -#endif -#ifdef INET6 - if (tsc->gre_family == AF_INET6 && - IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src, - &satosin6(src)->sin6_addr) && - IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst, - &satosin6(dst)->sin6_addr)) { - GRE_LIST_UNLOCK(); - return (EADDRNOTAVAIL); - } -#endif - } - GRE_LIST_UNLOCK(); - - switch (src->sa_family) { -#ifdef INET - case AF_INET: - hdr = ip = malloc(sizeof(struct greip) + - 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); - ip->ip_src = satosin(src)->sin_addr; - ip->ip_dst = satosin(dst)->sin_addr; - break; -#endif -#ifdef INET6 - case AF_INET6: - hdr = ip6 = malloc(sizeof(struct greip6) + - 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); - ip6->ip6_src = satosin6(src)->sin6_addr; - ip6->ip6_dst = satosin6(dst)->sin6_addr; - break; -#endif - default: - return (EAFNOSUPPORT); - } - if (sc->gre_family != 0) - gre_detach(sc); - GRE_WLOCK(sc); - if (sc->gre_family != 0) - free(sc->gre_hdr, M_GRE); - sc->gre_family = src->sa_family; - sc->gre_hdr = hdr; - sc->gre_oseq = 0; - sc->gre_iseq = UINT32_MAX; - gre_updatehdr(sc); - GRE_WUNLOCK(sc); - - error = 0; - switch (src->sa_family) { -#ifdef INET - case AF_INET: - error = in_gre_attach(sc); - break; -#endif -#ifdef INET6 - case AF_INET6: - error = in6_gre_attach(sc); - break; -#endif - } - if (error == 0) { - ifp->if_drv_flags |= IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_UP); - } - return (error); -} - -static void -gre_delete_tunnel(struct ifnet *ifp) -{ - struct gre_softc *sc = ifp->if_softc; - int family; - - GRE_WLOCK(sc); - family = sc->gre_family; - sc->gre_family = 0; - GRE_WUNLOCK(sc); - if (family != 0) { - gre_detach(sc); - free(sc->gre_hdr, M_GRE); - } - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(ifp, LINK_STATE_DOWN); -} - int -gre_input(struct mbuf **mp, int *offp, int proto) +gre_input(struct mbuf *m, int off, int proto, void *arg) { - struct gre_softc *sc; + struct gre_softc *sc = arg; struct grehdr *gh; struct ifnet *ifp; - struct mbuf *m; uint32_t *opts; #ifdef notyet uint32_t key; @@ -693,12 +419,8 @@ gre_input(struct mbuf **mp, int *offp, int proto) uint16_t flags; int hlen, isr, af; - m = *mp; - sc = encap_getarg(m); - KASSERT(sc != NULL, ("encap_getarg returned NULL")); - ifp = GRE2IFP(sc); - hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t); + hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t); if (m->m_pkthdr.len < hlen) goto drop; if (m->m_len < hlen) { @@ -706,7 +428,7 @@ gre_input(struct mbuf **mp, int *offp, int proto) if (m == NULL) goto drop; } - gh = (struct grehdr *)mtodo(m, *offp); + gh = (struct grehdr *)mtodo(m, off); flags = ntohs(gh->gre_flags); if (flags & ~GRE_FLAGS_MASK) goto drop; @@ -716,7 +438,7 @@ gre_input(struct mbuf **mp, int *offp, int proto) /* reserved1 field must be zero */ if (((uint16_t *)opts)[1] != 0) goto drop; - if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0) + if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0) goto drop; hlen += 2 * sizeof(uint16_t); opts++; @@ -766,7 +488,7 @@ gre_input(struct mbuf **mp, int *offp, int proto) default: goto drop; } - m_adj(m, *offp + hlen); + m_adj(m, off + hlen); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; M_SETFIB(m, ifp->if_fib); @@ -787,70 +509,23 @@ drop: return (IPPROTO_DONE); } -#define MTAG_GRE 1307983903 -static int -gre_check_nesting(struct ifnet *ifp, struct mbuf *m) -{ - struct m_tag *mtag; - int count; - - count = 1; - mtag = NULL; - while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) { - if (*(struct ifnet **)(mtag + 1) == ifp) { - log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); - return (EIO); - } - count++; - } - if (count > V_max_gre_nesting) { - log(LOG_NOTICE, - "%s: if_output recursively called too many times(%d)\n", - ifp->if_xname, count); - return (EIO); - } - mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT); - if (mtag == NULL) - return (ENOMEM); - *(struct ifnet **)(mtag + 1) = ifp; - m_tag_prepend(m, mtag); - return (0); -} - static int gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { uint32_t af; - int error; -#ifdef MAC - error = mac_ifnet_check_transmit(ifp, m); - if (error != 0) - goto drop; -#endif - if ((ifp->if_flags & IFF_MONITOR) != 0 || - (ifp->if_flags & IFF_UP) == 0) { - error = ENETDOWN; - goto drop; - } - - error = gre_check_nesting(ifp, m); - if (error != 0) - goto drop; - - m->m_flags &= ~(M_BCAST|M_MCAST); if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; - BPF_MTAP2(ifp, &af, sizeof(af), m); - m->m_pkthdr.csum_data = af; /* save af for if_transmit */ + /* + * Now save the af in the inbound pkt csum data, this is a cheat since + * we are using the inbound csum_data field to carry the af over to + * the gre_transmit() routine, avoiding using yet another mtag. + */ + m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); -drop: - m_freem(m); - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - return (error); } static void @@ -870,95 +545,95 @@ gre_setseqn(struct grehdr *gh, uint32_t seq) *opts = htonl(seq); } +#define MTAG_GRE 1307983903 static int gre_transmit(struct ifnet *ifp, struct mbuf *m) { - GRE_RLOCK_TRACKER; struct gre_softc *sc; struct grehdr *gh; - uint32_t iaf, oaf, oseq; - int error, hlen, olen, plen; - int want_seq, want_csum; + uint32_t af; + int error, len; + uint16_t proto; - plen = 0; - sc = ifp->if_softc; - if (sc == NULL) { - error = ENETDOWN; + len = 0; +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error) { m_freem(m); goto drop; } - GRE_RLOCK(sc); - if (sc->gre_family == 0) { - GRE_RUNLOCK(sc); - error = ENETDOWN; +#endif + error = ENETDOWN; + GRE_RLOCK(); + sc = ifp->if_softc; + if ((ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0 || + sc->gre_family == 0 || + (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE, + V_max_gre_nesting)) != 0) { m_freem(m); goto drop; } - iaf = m->m_pkthdr.csum_data; - oaf = sc->gre_family; - hlen = sc->gre_hlen; - want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0; - if (want_seq) - oseq = sc->gre_oseq++; /* XXX */ - else - oseq = 0; /* Make compiler happy. */ - want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0; + af = m->m_pkthdr.csum_data; M_SETFIB(m, sc->gre_fibnum); - M_PREPEND(m, hlen, M_NOWAIT); + M_PREPEND(m, sc->gre_hlen, M_NOWAIT); if (m == NULL) { - GRE_RUNLOCK(sc); error = ENOBUFS; goto drop; } - bcopy(sc->gre_hdr, mtod(m, void *), hlen); - GRE_RUNLOCK(sc); - switch (oaf) { + bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen); + /* Determine GRE proto */ + switch (af) { #ifdef INET case AF_INET: - olen = sizeof(struct ip); + proto = htons(ETHERTYPE_IP); break; #endif #ifdef INET6 case AF_INET6: - olen = sizeof(struct ip6_hdr); + proto = htons(ETHERTYPE_IPV6); break; #endif default: + m_freem(m); error = ENETDOWN; goto drop; } - gh = (struct grehdr *)mtodo(m, olen); - switch (iaf) { + /* Determine offset of GRE header */ + switch (sc->gre_family) { #ifdef INET case AF_INET: - gh->gre_proto = htons(ETHERTYPE_IP); + len = sizeof(struct ip); break; #endif #ifdef INET6 case AF_INET6: - gh->gre_proto = htons(ETHERTYPE_IPV6); + len = sizeof(struct ip6_hdr); break; #endif default: + m_freem(m); error = ENETDOWN; goto drop; } - if (want_seq) - gre_setseqn(gh, oseq); - if (want_csum) { + gh = (struct grehdr *)mtodo(m, len); + gh->gre_proto = proto; + if (sc->gre_options & GRE_ENABLE_SEQ) + gre_setseqn(gh, sc->gre_oseq++); + if (sc->gre_options & GRE_ENABLE_CSUM) { *(uint16_t *)gh->gre_opts = in_cksum_skip(m, - m->m_pkthdr.len, olen); + m->m_pkthdr.len, len); } - plen = m->m_pkthdr.len - hlen; - switch (oaf) { + len = m->m_pkthdr.len - len; + switch (sc->gre_family) { #ifdef INET case AF_INET: - error = in_gre_output(m, iaf, hlen); + error = in_gre_output(m, af, sc->gre_hlen); break; #endif #ifdef INET6 case AF_INET6: - error = in6_gre_output(m, iaf, hlen); + error = in6_gre_output(m, af, sc->gre_hlen); break; #endif default: @@ -970,8 +645,9 @@ drop: if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); - if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); + if_inc_counter(ifp, IFCOUNTER_OBYTES, len); } + GRE_RUNLOCK(); return (error); } diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h index 0eac9e9f..cc8b08f9 100644 --- a/freebsd/sys/net/if_gre.h +++ b/freebsd/sys/net/if_gre.h @@ -64,8 +64,6 @@ struct greip6 { struct gre_softc { struct ifnet *gre_ifp; - LIST_ENTRY(gre_softc) gre_list; - struct rmlock gre_lock; int gre_family; /* AF of delivery header */ uint32_t gre_iseq; uint32_t gre_oseq; @@ -82,18 +80,20 @@ struct gre_softc { struct greip6 *gi6hdr; #endif } gre_uhdr; - const struct encaptab *gre_ecookie; + + CK_LIST_ENTRY(gre_softc) chain; }; +CK_LIST_HEAD(gre_list, gre_softc); +MALLOC_DECLARE(M_GRE); + +#ifndef GRE_HASH_SIZE +#define GRE_HASH_SIZE (1 << 4) +#endif + #define GRE2IFP(sc) ((sc)->gre_ifp) -#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc") -#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock) -#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker -#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker) -#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker) -#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED) -#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock) -#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock) -#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED) +#define GRE_RLOCK() struct epoch_tracker gre_et; epoch_enter_preempt(net_epoch_preempt, &gre_et) +#define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gre_et) +#define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt) #define gre_hdr gre_uhdr.hdr #define gre_gihdr gre_uhdr.gihdr @@ -101,15 +101,23 @@ struct gre_softc { #define gre_oip gre_gihdr->gi_ip #define gre_oip6 gre_gi6hdr->gi6_ip6 -int gre_input(struct mbuf **, int *, int); -#ifdef INET -int in_gre_attach(struct gre_softc *); +struct gre_list *gre_hashinit(void); +void gre_hashdestroy(struct gre_list *); + +int gre_input(struct mbuf *, int, int, void *); +void gre_updatehdr(struct gre_softc *, struct grehdr *); + +void in_gre_init(void); +void in_gre_uninit(void); +void in_gre_setopts(struct gre_softc *, u_long, uint32_t); +int in_gre_ioctl(struct gre_softc *, u_long, caddr_t); int in_gre_output(struct mbuf *, int, int); -#endif -#ifdef INET6 -int in6_gre_attach(struct gre_softc *); + +void in6_gre_init(void); +void in6_gre_uninit(void); +void in6_gre_setopts(struct gre_softc *, u_long, uint32_t); +int in6_gre_ioctl(struct gre_softc *, u_long, caddr_t); int in6_gre_output(struct mbuf *, int, int); -#endif /* * CISCO uses special type for GRE tunnel created as part of WCCP * connection, while in fact those packets are just IPv4 encapsulated diff --git a/freebsd/sys/net/if_ipsec.c b/freebsd/sys/net/if_ipsec.c index eaeecd5a..5b1d5e82 100644 --- a/freebsd/sys/net/if_ipsec.c +++ b/freebsd/sys/net/if_ipsec.c @@ -122,9 +122,9 @@ RM_SYSINIT(ipsec_sc_lock, &ipsec_sc_lock, "if_ipsec softc list"); #define IPSEC_SC_WLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_WLOCKED) LIST_HEAD(ipsec_iflist, ipsec_softc); -static VNET_DEFINE(struct ipsec_iflist, ipsec_sc_list); -static VNET_DEFINE(struct ipsec_iflist *, ipsec_sc_htbl); -static VNET_DEFINE(u_long, ipsec_sc_hmask); +VNET_DEFINE_STATIC(struct ipsec_iflist, ipsec_sc_list); +VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec_sc_htbl); +VNET_DEFINE_STATIC(u_long, ipsec_sc_hmask); #define V_ipsec_sc_list VNET(ipsec_sc_list) #define V_ipsec_sc_htbl VNET(ipsec_sc_htbl) #define V_ipsec_sc_hmask VNET(ipsec_sc_hmask) @@ -164,7 +164,7 @@ static void ipsec_qflush(struct ifnet *); static int ipsec_clone_create(struct if_clone *, int, caddr_t); static void ipsec_clone_destroy(struct ifnet *); -static VNET_DEFINE(struct if_clone *, ipsec_cloner); +VNET_DEFINE_STATIC(struct if_clone *, ipsec_cloner); #define V_ipsec_cloner VNET(ipsec_cloner) static int diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c index 578078c2..4d5aaa29 100644 --- a/freebsd/sys/net/if_lagg.c +++ b/freebsd/sys/net/if_lagg.c @@ -75,10 +75,10 @@ __FBSDID("$FreeBSD$"); #include <net/if_lagg.h> #include <net/ieee8023ad_lacp.h> -#define LAGG_RLOCK() epoch_enter_preempt(net_epoch_preempt) -#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) -#define LAGG_RLOCK_ASSERT() MPASS(in_epoch()) -#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch()) +#define LAGG_RLOCK() struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et) +#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &lagg_et) +#define LAGG_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt)) +#define LAGG_UNLOCK_ASSERT() MPASS(!in_epoch(net_epoch_preempt)) #define LAGG_SX_INIT(_sc) sx_init(&(_sc)->sc_sx, "if_lagg sx") #define LAGG_SX_DESTROY(_sc) sx_destroy(&(_sc)->sc_sx) @@ -99,7 +99,7 @@ static struct { VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ #define V_lagg_list VNET(lagg_list) -static VNET_DEFINE(struct mtx, lagg_list_mtx); +VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx); #define V_lagg_list_mtx VNET(lagg_list_mtx) #define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \ "if_lagg list", NULL, MTX_DEF) @@ -110,7 +110,7 @@ eventhandler_tag lagg_detach_cookie = NULL; static int lagg_clone_create(struct if_clone *, int, caddr_t); static void lagg_clone_destroy(struct ifnet *); -static VNET_DEFINE(struct if_clone *, lagg_cloner); +VNET_DEFINE_STATIC(struct if_clone *, lagg_cloner); #define V_lagg_cloner VNET(lagg_cloner) static const char laggname[] = "lagg"; @@ -251,21 +251,21 @@ SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); /* Allow input on any failover links */ -static VNET_DEFINE(int, lagg_failover_rx_all); +VNET_DEFINE_STATIC(int, lagg_failover_rx_all); #define V_lagg_failover_rx_all VNET(lagg_failover_rx_all) SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(lagg_failover_rx_all), 0, "Accept input from any interface in a failover lagg"); /* Default value for using flowid */ -static VNET_DEFINE(int, def_use_flowid) = 0; +VNET_DEFINE_STATIC(int, def_use_flowid) = 0; #define V_def_use_flowid VNET(def_use_flowid) SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); /* Default value for flowid shift */ -static VNET_DEFINE(int, def_flowid_shift) = 16; +VNET_DEFINE_STATIC(int, def_flowid_shift) = 16; #define V_def_flowid_shift VNET(def_flowid_shift) SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN, &VNET_NAME(def_flowid_shift), 0, @@ -1739,6 +1739,10 @@ lagg_linkstate(struct lagg_softc *sc) LAGG_XLOCK_ASSERT(sc); + /* LACP handles link state itself */ + if (sc->sc_proto == LAGG_PROTO_LACP) + return; + /* Our link is considered up if at least one of our ports is active */ LAGG_RLOCK(); CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { @@ -1793,6 +1797,7 @@ struct lagg_port * lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_port *lp_next, *rval = NULL; + struct epoch_tracker net_et; /* * Search a port which reports an active link state. @@ -1811,15 +1816,14 @@ lagg_link_active(struct lagg_softc *sc, struct lagg_port *lp) } search: - LAGG_RLOCK(); + epoch_enter_preempt(net_epoch_preempt, &net_et); CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (LAGG_PORTACTIVE(lp_next)) { - LAGG_RUNLOCK(); - rval = lp_next; - goto found; + epoch_exit_preempt(net_epoch_preempt, &net_et); + return (lp_next); } } - LAGG_RUNLOCK(); + epoch_exit_preempt(net_epoch_preempt, &net_et); found: return (rval); } diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c index d6e9dbaf..b220d7aa 100644 --- a/freebsd/sys/net/if_llatbl.c +++ b/freebsd/sys/net/if_llatbl.c @@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$"); MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables"); -static VNET_DEFINE(SLIST_HEAD(, lltable), lltables) = +VNET_DEFINE_STATIC(SLIST_HEAD(, lltable), lltables) = SLIST_HEAD_INITIALIZER(lltables); #define V_lltables VNET(lltables) @@ -438,6 +438,9 @@ llentry_free(struct llentry *lle) pkts_dropped = lltable_drop_entry_queue(lle); + /* cancel timer */ + if (callout_stop(&lle->lle_timer) > 0) + LLE_REMREF(lle); LLE_FREE_LOCKED(lle); return (pkts_dropped); @@ -524,8 +527,6 @@ lltable_free(struct lltable *llt) IF_AFDATA_WUNLOCK(llt->llt_ifp); CK_LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next) { - if (callout_stop(&lle->lle_timer) > 0) - LLE_REMREF(lle); llentry_free(lle); } diff --git a/freebsd/sys/net/if_loop.c b/freebsd/sys/net/if_loop.c index 988b9f9d..a96144b8 100644 --- a/freebsd/sys/net/if_loop.c +++ b/freebsd/sys/net/if_loop.c @@ -101,7 +101,7 @@ static void lo_clone_destroy(struct ifnet *); VNET_DEFINE(struct ifnet *, loif); /* Used externally */ #ifdef VIMAGE -static VNET_DEFINE(struct if_clone *, lo_cloner); +VNET_DEFINE_STATIC(struct if_clone *, lo_cloner); #define V_lo_cloner VNET(lo_cloner) #endif @@ -382,6 +382,7 @@ loioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCSIFADDR: ifp->if_flags |= IFF_UP; ifp->if_drv_flags |= IFF_DRV_RUNNING; + if_link_state_change(ifp, LINK_STATE_UP); /* * Everything else is done at a higher level. */ diff --git a/freebsd/sys/net/if_media.h b/freebsd/sys/net/if_media.h index 97cd6552..8192a679 100644 --- a/freebsd/sys/net/if_media.h +++ b/freebsd/sys/net/if_media.h @@ -202,6 +202,62 @@ uint64_t ifmedia_baudrate(int); #define IFM_10G_AOC IFM_X(59) /* 10G active optical cable */ #define IFM_25G_ACC IFM_X(60) /* 25G active copper cable */ #define IFM_25G_AOC IFM_X(61) /* 25G active optical cable */ +#define IFM_100_SGMII IFM_X(62) /* 100M media interface */ +#define IFM_2500_X IFM_X(63) /* 2500BaseX */ +#define IFM_5000_KR IFM_X(64) /* 5GBase-KR backplane */ +#define IFM_25G_T IFM_X(65) /* 25GBase-T - RJ45 */ +#define IFM_25G_CR_S IFM_X(66) /* 25GBase-CR (short) */ +#define IFM_25G_CR1 IFM_X(67) /* 25GBase-CR1 DA cable */ +#define IFM_25G_KR_S IFM_X(68) /* 25GBase-KR (short) */ +#define IFM_5000_KR_S IFM_X(69) /* 5GBase-KR backplane (short) */ +#define IFM_5000_KR1 IFM_X(70) /* 5GBase-KR backplane */ +#define IFM_25G_AUI IFM_X(71) /* 25G-AUI-C2C (chip to chip) */ +#define IFM_40G_XLAUI IFM_X(72) /* 40G-XLAUI */ +#define IFM_40G_XLAUI_AC IFM_X(73) /* 40G active copper/optical */ +#define IFM_40G_ER4 IFM_X(74) /* 40GBase-ER4 */ +#define IFM_50G_SR2 IFM_X(75) /* 50GBase-SR2 */ +#define IFM_50G_LR2 IFM_X(76) /* 50GBase-LR2 */ +#define IFM_50G_LAUI2_AC IFM_X(77) /* 50G active copper/optical */ +#define IFM_50G_LAUI2 IFM_X(78) /* 50G-LAUI2 */ +#define IFM_50G_AUI2_AC IFM_X(79) /* 50G active copper/optical */ +#define IFM_50G_AUI2 IFM_X(80) /* 50G-AUI2 */ +#define IFM_50G_CP IFM_X(81) /* 50GBase-CP */ +#define IFM_50G_SR IFM_X(82) /* 50GBase-SR */ +#define IFM_50G_LR IFM_X(83) /* 50GBase-LR */ +#define IFM_50G_FR IFM_X(84) /* 50GBase-FR */ +#define IFM_50G_KR_PAM4 IFM_X(85) /* 50GBase-KR PAM4 */ +#define IFM_25G_KR1 IFM_X(86) /* 25GBase-KR1 */ +#define IFM_50G_AUI1_AC IFM_X(87) /* 50G active copper/optical */ +#define IFM_50G_AUI1 IFM_X(88) /* 50G-AUI1 */ +#define IFM_100G_CAUI4_AC IFM_X(89) /* 100G-CAUI4 active copper/optical */ +#define IFM_100G_CAUI4 IFM_X(90) /* 100G-CAUI4 */ +#define IFM_100G_AUI4_AC IFM_X(91) /* 100G-AUI4 active copper/optical */ +#define IFM_100G_AUI4 IFM_X(92) /* 100G-AUI4 */ +#define IFM_100G_CR_PAM4 IFM_X(93) /* 100GBase-CR PAM4 */ +#define IFM_100G_KR_PAM4 IFM_X(94) /* 100GBase-CR PAM4 */ +#define IFM_100G_CP2 IFM_X(95) /* 100GBase-CP2 */ +#define IFM_100G_SR2 IFM_X(96) /* 100GBase-SR2 */ +#define IFM_100G_DR IFM_X(97) /* 100GBase-DR */ +#define IFM_100G_KR2_PAM4 IFM_X(98) /* 100GBase-KR2 PAM4 */ +#define IFM_100G_CAUI2_AC IFM_X(99) /* 100G-CAUI2 active copper/optical */ +#define IFM_100G_CAUI2 IFM_X(100) /* 100G-CAUI2 */ +#define IFM_100G_AUI2_AC IFM_X(101) /* 100G-AUI2 active copper/optical */ +#define IFM_100G_AUI2 IFM_X(102) /* 100G-AUI2 */ +#define IFM_200G_CR4_PAM4 IFM_X(103) /* 200GBase-CR4 PAM4 */ +#define IFM_200G_SR4 IFM_X(104) /* 200GBase-SR4 */ +#define IFM_200G_FR4 IFM_X(105) /* 200GBase-FR4 */ +#define IFM_200G_LR4 IFM_X(106) /* 200GBase-LR4 */ +#define IFM_200G_DR4 IFM_X(107) /* 200GBase-DR4 */ +#define IFM_200G_KR4_PAM4 IFM_X(108) /* 200GBase-KR4 PAM4 */ +#define IFM_200G_AUI4_AC IFM_X(109) /* 200G-AUI4 active copper/optical */ +#define IFM_200G_AUI4 IFM_X(110) /* 200G-AUI4 */ +#define IFM_200G_AUI8_AC IFM_X(111) /* 200G-AUI8 active copper/optical */ +#define IFM_200G_AUI8 IFM_X(112) /* 200G-AUI8 */ +#define IFM_400G_FR8 IFM_X(113) /* 400GBase-FR8 */ +#define IFM_400G_LR8 IFM_X(114) /* 400GBase-LR8 */ +#define IFM_400G_DR4 IFM_X(115) /* 400GBase-DR4 */ +#define IFM_400G_AUI8_AC IFM_X(116) /* 400G-AUI8 active copper/optical */ +#define IFM_400G_AUI8 IFM_X(117) /* 400G-AUI8 */ /* * Please update ieee8023ad_lacp.c:lacp_compose_key() @@ -432,6 +488,62 @@ struct ifmedia_description { { IFM_10G_AOC, "10GBase-AOC" }, \ { IFM_25G_ACC, "25GBase-ACC" }, \ { IFM_25G_AOC, "25GBase-AOC" }, \ + { IFM_100_SGMII, "100M-SGMII" }, \ + { IFM_2500_X, "2500Base-X" }, \ + { IFM_5000_KR, "5000Base-KR" }, \ + { IFM_25G_T, "25GBase-T" }, \ + { IFM_25G_CR_S, "25GBase-CR-S" }, \ + { IFM_25G_CR1, "25GBase-CR1" }, \ + { IFM_25G_KR_S, "25GBase-KR-S" }, \ + { IFM_5000_KR_S, "5000Base-KR-S" }, \ + { IFM_5000_KR1, "5000Base-KR1" }, \ + { IFM_25G_AUI, "25G-AUI" }, \ + { IFM_40G_XLAUI, "40G-XLAUI" }, \ + { IFM_40G_XLAUI_AC, "40G-XLAUI-AC" }, \ + { IFM_40G_ER4, "40GBase-ER4" }, \ + { IFM_50G_SR2, "50GBase-SR2" }, \ + { IFM_50G_LR2, "50GBase-LR2" }, \ + { IFM_50G_LAUI2_AC, "50G-LAUI2-AC" }, \ + { IFM_50G_LAUI2, "50G-LAUI2" }, \ + { IFM_50G_AUI2_AC, "50G-AUI2-AC" }, \ + { IFM_50G_AUI2, "50G-AUI2" }, \ + { IFM_50G_CP, "50GBase-CP" }, \ + { IFM_50G_SR, "50GBase-SR" }, \ + { IFM_50G_LR, "50GBase-LR" }, \ + { IFM_50G_FR, "50GBase-FR" }, \ + { IFM_50G_KR_PAM4, "50GBase-KR-PAM4" }, \ + { IFM_25G_KR1, "25GBase-KR1" }, \ + { IFM_50G_AUI1_AC, "50G-AUI1-AC" }, \ + { IFM_50G_AUI1, "50G-AUI1" }, \ + { IFM_100G_CAUI4_AC, "100G-CAUI4-AC" }, \ + { IFM_100G_CAUI4, "100G-CAUI4" }, \ + { IFM_100G_AUI4_AC, "100G-AUI4-AC" }, \ + { IFM_100G_AUI4, "100G-AUI4" }, \ + { IFM_100G_CR_PAM4, "100GBase-CR-PAM4" }, \ + { IFM_100G_KR_PAM4, "100GBase-KR-PAM4" }, \ + { IFM_100G_CP2, "100GBase-CP2" }, \ + { IFM_100G_SR2, "100GBase-SR2" }, \ + { IFM_100G_DR, "100GBase-DR" }, \ + { IFM_100G_KR2_PAM4, "100GBase-KR2-PAM4" }, \ + { IFM_100G_CAUI2_AC, "100G-CAUI2-AC" }, \ + { IFM_100G_CAUI2, "100G-CAUI2" }, \ + { IFM_100G_AUI2_AC, "100G-AUI2-AC" }, \ + { IFM_100G_AUI2, "100G-AUI2" }, \ + { IFM_200G_CR4_PAM4, "200GBase-CR4-PAM4" }, \ + { IFM_200G_SR4, "200GBase-SR4" }, \ + { IFM_200G_FR4, "200GBase-FR4" }, \ + { IFM_200G_LR4, "200GBase-LR4" }, \ + { IFM_200G_DR4, "200GBase-DR4" }, \ + { IFM_200G_KR4_PAM4, "200GBase-KR4-PAM4" }, \ + { IFM_200G_AUI4_AC, "200G-AUI4-AC" }, \ + { IFM_200G_AUI4, "200G-AUI4" }, \ + { IFM_200G_AUI8_AC, "200G-AUI8-AC" }, \ + { IFM_200G_AUI8, "200G-AUI8" }, \ + { IFM_400G_FR8, "400GBase-FR8" }, \ + { IFM_400G_LR8, "400GBase-LR8" }, \ + { IFM_400G_DR4, "400GBase-DR4" }, \ + { IFM_400G_AUI8_AC, "400G-AUI8-AC" }, \ + { IFM_400G_AUI8, "400G-AUI8" }, \ { 0, NULL }, \ } @@ -719,6 +831,62 @@ struct ifmedia_baudrate { { IFM_ETHER | IFM_10G_AOC, IF_Gbps(10ULL) }, \ { IFM_ETHER | IFM_25G_ACC, IF_Gbps(25ULL) }, \ { IFM_ETHER | IFM_25G_AOC, IF_Gbps(25ULL) }, \ + { IFM_ETHER | IFM_100_SGMII, IF_Mbps(100) }, \ + { IFM_ETHER | IFM_2500_X, IF_Mbps(2500ULL) }, \ + { IFM_ETHER | IFM_5000_KR, IF_Mbps(5000ULL) }, \ + { IFM_ETHER | IFM_25G_T, IF_Gbps(25ULL) }, \ + { IFM_ETHER | IFM_25G_CR_S, IF_Gbps(25ULL) }, \ + { IFM_ETHER | IFM_25G_CR1, IF_Gbps(25ULL) }, \ + { IFM_ETHER | IFM_25G_KR_S, IF_Gbps(25ULL) }, \ + { IFM_ETHER | IFM_5000_KR_S, IF_Mbps(5000ULL) }, \ + { IFM_ETHER | IFM_5000_KR1, IF_Mbps(5000ULL) }, \ + { IFM_ETHER | IFM_25G_AUI, IF_Gbps(25ULL) }, \ + { IFM_ETHER | IFM_40G_XLAUI, IF_Gbps(40ULL) }, \ + { IFM_ETHER | IFM_40G_XLAUI_AC, IF_Gbps(40ULL) }, \ + { IFM_ETHER | IFM_40G_ER4, IF_Gbps(40ULL) }, \ + { IFM_ETHER | IFM_50G_SR2, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_LR2, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_LAUI2_AC, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_LAUI2, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_AUI2_AC, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_AUI2, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_CP, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_SR, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_LR, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_FR, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_KR_PAM4, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_25G_KR1, IF_Gbps(25ULL) }, \ + { IFM_ETHER | IFM_50G_AUI1_AC, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_50G_AUI1, IF_Gbps(50ULL) }, \ + { IFM_ETHER | IFM_100G_CAUI4_AC, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_CAUI4, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_AUI4_AC, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_AUI4, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_CR_PAM4, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_KR_PAM4, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_CP2, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_SR2, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_DR, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_KR2_PAM4, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_CAUI2_AC, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_CAUI2, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_AUI2_AC, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_100G_AUI2, IF_Gbps(100ULL) }, \ + { IFM_ETHER | IFM_200G_CR4_PAM4, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_SR4, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_FR4, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_LR4, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_DR4, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_KR4_PAM4, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_AUI4_AC, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_AUI4, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_AUI8_AC, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_200G_AUI8, IF_Gbps(200ULL) }, \ + { IFM_ETHER | IFM_400G_FR8, IF_Gbps(400ULL) }, \ + { IFM_ETHER | IFM_400G_LR8, IF_Gbps(400ULL) }, \ + { IFM_ETHER | IFM_400G_DR4, IF_Gbps(400ULL) }, \ + { IFM_ETHER | IFM_400G_AUI8_AC, IF_Gbps(400ULL) }, \ + { IFM_ETHER | IFM_400G_AUI8, IF_Gbps(400ULL) }, \ \ { IFM_IEEE80211 | IFM_IEEE80211_FH1, IF_Mbps(1) }, \ { IFM_IEEE80211 | IFM_IEEE80211_FH2, IF_Mbps(2) }, \ diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c index 1d16b2d7..1102a62d 100644 --- a/freebsd/sys/net/if_stf.c +++ b/freebsd/sys/net/if_stf.c @@ -87,7 +87,6 @@ #include <sys/kernel.h> #include <sys/lock.h> #include <sys/module.h> -#include <sys/protosw.h> #include <sys/proc.h> #include <sys/queue.h> #include <sys/rmlock.h> @@ -153,19 +152,7 @@ static const char stfname[] = "stf"; static MALLOC_DEFINE(M_STF, stfname, "6to4 Tunnel Interface"); static const int ip_stf_ttl = 40; -extern struct domain inetdomain; -static int in_stf_input(struct mbuf **, int *, int); -static struct protosw in_stf_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPV6, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = in_stf_input, - .pr_output = rip_output, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; - +static int in_stf_input(struct mbuf *, int, int, void *); static char *stfnames[] = {"stf0", "stf", "6to4", NULL}; static int stfmodevent(module_t, int, void *); @@ -185,6 +172,14 @@ static int stf_clone_create(struct if_clone *, char *, size_t, caddr_t); static int stf_clone_destroy(struct if_clone *, struct ifnet *); static struct if_clone *stf_cloner; +static const struct encap_config ipv4_encap_cfg = { + .proto = IPPROTO_IPV6, + .min_length = sizeof(struct ip), + .exact_match = (sizeof(in_addr_t) << 3) + 8, + .check = stf_encapcheck, + .input = in_stf_input +}; + static int stf_clone_match(struct if_clone *ifc, const char *name) { @@ -256,8 +251,7 @@ stf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) ifp->if_dname = stfname; ifp->if_dunit = IF_DUNIT_NONE; - sc->encap_cookie = encap_attach_func(AF_INET, IPPROTO_IPV6, - stf_encapcheck, &in_stf_protosw, sc); + sc->encap_cookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK); if (sc->encap_cookie == NULL) { if_printf(ifp, "attach failed\n"); free(sc, M_STF); @@ -280,7 +274,7 @@ stf_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) struct stf_softc *sc = ifp->if_softc; int err __unused; - err = encap_detach(sc->encap_cookie); + err = ip_encap_detach(sc->encap_cookie); KASSERT(err == 0, ("Unexpected error detaching encap_cookie")); bpfdetach(ifp); if_detach(ifp); @@ -614,18 +608,13 @@ stf_checkaddr6(struct stf_softc *sc, struct in6_addr *in6, struct ifnet *inifp) } static int -in_stf_input(struct mbuf **mp, int *offp, int proto) +in_stf_input(struct mbuf *m, int off, int proto, void *arg) { - struct stf_softc *sc; + struct stf_softc *sc = arg; struct ip *ip; struct ip6_hdr *ip6; - struct mbuf *m; u_int8_t otos, itos; struct ifnet *ifp; - int off; - - m = *mp; - off = *offp; if (proto != IPPROTO_IPV6) { m_freem(m); @@ -633,9 +622,6 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) } ip = mtod(m, struct ip *); - - sc = (struct stf_softc *)encap_getarg(m); - if (sc == NULL || (STF2IFP(sc)->if_flags & IFF_UP) == 0) { m_freem(m); return (IPPROTO_DONE); @@ -686,7 +672,7 @@ in_stf_input(struct mbuf **mp, int *offp, int proto) ip6->ip6_flow |= htonl((u_int32_t)itos << 20); m->m_pkthdr.rcvif = ifp; - + if (bpf_peers_present(ifp->if_bpf)) { /* * We need to prepend the address family as diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c index e4a0b02f..14a75645 100644 --- a/freebsd/sys/net/if_tun.c +++ b/freebsd/sys/net/if_tun.c @@ -924,7 +924,7 @@ tunwrite(struct cdev *dev, struct uio *uio, int flag) m_freem(m); return (EAFNOSUPPORT); } - random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_TUN); + random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); CURVNET_SET(ifp->if_vnet); diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h index f8e18f7e..00fcbebd 100644 --- a/freebsd/sys/net/if_var.h +++ b/freebsd/sys/net/if_var.h @@ -390,6 +390,8 @@ struct ifnet { struct netdump_methods *if_netdump_methods; #endif /* __rtems__ */ struct epoch_context if_epoch_ctx; + struct epoch_tracker if_addr_et; + struct epoch_tracker if_maddr_et; #ifndef __rtems__ /* @@ -421,15 +423,17 @@ struct rtems_ifinputreq { */ #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock) -#define IF_ADDR_RLOCK(if) epoch_enter_preempt(net_epoch_preempt); -#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt); +#define IF_ADDR_RLOCK(if) struct epoch_tracker if_addr_et; epoch_enter_preempt(net_epoch_preempt, &if_addr_et); +#define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt, &if_addr_et); #define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock) -#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch() || mtx_owned(&(if)->if_addr_lock)) +#define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) -#define NET_EPOCH_ENTER() epoch_enter_preempt(net_epoch_preempt) -#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt) +#define NET_EPOCH_ENTER() struct epoch_tracker nep_et; epoch_enter_preempt(net_epoch_preempt, &nep_et) +#define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et)) +#define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et) +#define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et)) /* @@ -505,16 +509,16 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF) #define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock) -#define IF_AFDATA_RLOCK(ifp) epoch_enter_preempt(net_epoch_preempt) +#define IF_AFDATA_RLOCK(ifp) struct epoch_tracker if_afdata_et; epoch_enter_preempt(net_epoch_preempt, &if_afdata_et) #define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock) -#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt) +#define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt, &if_afdata_et) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock) -#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch() || mtx_owned(&(ifp)->if_afdata_lock)) -#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch()); +#define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock)) +#define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt)); #define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED) @@ -567,12 +571,14 @@ void ifa_ref(struct ifaddr *ifa); * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ +#define IFMA_F_ENQUEUED 0x1 struct ifmultiaddr { CK_STAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ struct ifnet *ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ + int ifma_flags; void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ struct epoch_context ifma_epoch_ctx; @@ -596,16 +602,16 @@ extern struct sx ifnet_sxlock; * write, but also whether it was acquired with sleep support or not. */ #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) -#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch()) +#define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt)) #define IFNET_WLOCK_ASSERT() do { \ sx_assert(&ifnet_sxlock, SA_XLOCKED); \ rw_assert(&ifnet_rwlock, RA_WLOCKED); \ } while (0) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) -#define IFNET_RLOCK_NOSLEEP() epoch_enter_preempt(net_epoch_preempt) +#define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) -#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt) +#define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt, &ifnet_rlock_et) /* * Look up an ifnet given its index; the _ref variant also acquires a @@ -663,6 +669,7 @@ int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3); void if_ref(struct ifnet *); void if_rele(struct ifnet *); int if_setlladdr(struct ifnet *, const u_char *, int); +int if_tunnel_check_nesting(struct ifnet *, struct mbuf *, uint32_t, int); void if_up(struct ifnet *); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(struct ifnet *, int); diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c index 26f6bbde..22061dc4 100644 --- a/freebsd/sys/net/if_vlan.c +++ b/freebsd/sys/net/if_vlan.c @@ -337,7 +337,7 @@ static void vlan_lladdr_fn(void *arg, int pending); static struct if_clone *vlan_cloner; #ifdef VIMAGE -static VNET_DEFINE(struct if_clone *, vlan_cloner); +VNET_DEFINE_STATIC(struct if_clone *, vlan_cloner); #define V_vlan_cloner VNET(vlan_cloner) #endif @@ -760,6 +760,18 @@ vlan_tag(struct ifnet *ifp, uint16_t *vidp) return (0); } +static int +vlan_pcp(struct ifnet *ifp, uint16_t *pcpp) +{ + struct ifvlan *ifv; + + if (ifp->if_type != IFT_L2VLAN) + return (EINVAL); + ifv = ifp->if_softc; + *pcpp = ifv->ifv_pcp; + return (0); +} + /* * Return a driver specific cookie for this interface. Synchronization * with setcookie must be provided by the driver. @@ -863,6 +875,7 @@ vlan_modevent(module_t mod, int type, void *data) vlan_cookie_p = vlan_cookie; vlan_setcookie_p = vlan_setcookie; vlan_tag_p = vlan_tag; + vlan_pcp_p = vlan_pcp; vlan_devat_p = vlan_devat; #ifndef VIMAGE vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, @@ -1424,6 +1437,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) ifp->if_resolvemulti = p->if_resolvemulti; ifp->if_addrlen = p->if_addrlen; ifp->if_broadcastaddr = p->if_broadcastaddr; + ifp->if_pcp = ifv->ifv_pcp; /* * Copy only a selected subset of flags from the parent. @@ -1948,6 +1962,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; } ifv->ifv_pcp = ifr->ifr_vlan_pcp; + ifp->if_pcp = ifv->ifv_pcp; vlan_tag_recalculate(ifv); /* broadcast event about PCP change */ EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP); diff --git a/freebsd/sys/net/if_vlan_var.h b/freebsd/sys/net/if_vlan_var.h index b926df80..0b66ec0a 100644 --- a/freebsd/sys/net/if_vlan_var.h +++ b/freebsd/sys/net/if_vlan_var.h @@ -132,6 +132,8 @@ struct vlanreq { ((_ifp)->if_type == IFT_L2VLAN ? (*vlan_trunkdev_p)((_ifp)) : NULL) #define VLAN_TAG(_ifp, _vid) \ ((_ifp)->if_type == IFT_L2VLAN ? (*vlan_tag_p)((_ifp), (_vid)) : EINVAL) +#define VLAN_PCP(_ifp, _pcp) \ + ((_ifp)->if_type == IFT_L2VLAN ? (*vlan_pcp_p)((_ifp), (_pcp)) : EINVAL) #define VLAN_COOKIE(_ifp) \ ((_ifp)->if_type == IFT_L2VLAN ? (*vlan_cookie_p)((_ifp)) : NULL) #define VLAN_SETCOOKIE(_ifp, _cookie) \ @@ -144,6 +146,7 @@ extern void (*vlan_trunk_cap_p)(struct ifnet *); extern struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); extern struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); extern int (*vlan_tag_p)(struct ifnet *, uint16_t *); +extern int (*vlan_pcp_p)(struct ifnet *, uint16_t *); extern int (*vlan_setcookie_p)(struct ifnet *, void *); extern void *(*vlan_cookie_p)(struct ifnet *); diff --git a/freebsd/sys/net/iflib.h b/freebsd/sys/net/iflib.h index 140c488b..6e1eee63 100644 --- a/freebsd/sys/net/iflib.h +++ b/freebsd/sys/net/iflib.h @@ -45,13 +45,6 @@ struct if_clone; */ typedef uint16_t qidx_t; #define QIDX_INVALID 0xFFFF -/* - * Most cards can handle much larger TSO requests - * but the FreeBSD TCP stack will break on larger - * values - */ -#define FREEBSD_TSO_SIZE_MAX 65518 - struct iflib_ctx; typedef struct iflib_ctx *if_ctx_t; @@ -216,6 +209,7 @@ typedef struct if_softc_ctx { int isc_tx_tso_size_max; int isc_tx_tso_segsize_max; int isc_tx_csum_flags; + int isc_capabilities; int isc_capenable; int isc_rss_table_size; int isc_rss_table_mask; @@ -242,6 +236,8 @@ struct if_shared_ctx { bus_size_t isc_q_align; bus_size_t isc_tx_maxsize; bus_size_t isc_tx_maxsegsize; + bus_size_t isc_tso_maxsize; + bus_size_t isc_tso_maxsegsize; bus_size_t isc_rx_maxsize; bus_size_t isc_rx_maxsegsize; int isc_rx_nsegments; diff --git a/freebsd/sys/net/netisr.c b/freebsd/sys/net/netisr.c index 8f2430eb..a3da964b 100644 --- a/freebsd/sys/net/netisr.c +++ b/freebsd/sys/net/netisr.c @@ -227,7 +227,7 @@ static struct netisr_proto netisr_proto[NETISR_MAXPROT]; * mechanism to stop netisr processing for vnet teardown. * Apart from that we expect a VNET to always be enabled. */ -static VNET_DEFINE(u_int, netisr_enable[NETISR_MAXPROT]); +VNET_DEFINE_STATIC(u_int, netisr_enable[NETISR_MAXPROT]); #define V_netisr_enable VNET(netisr_enable) #endif diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h index 824b8ec3..5e80b665 100644 --- a/freebsd/sys/net/pfvar.h +++ b/freebsd/sys/net/pfvar.h @@ -621,9 +621,9 @@ struct pf_rule { #define PFRULE_IFBOUND 0x00010000 /* if-bound */ #define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ -#define PFSTATE_HIWAT 10000 /* default state table size */ -#define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ -#define PFSTATE_ADAPT_END 12000 /* default adaptive timeout end */ +#define PFSTATE_HIWAT 100000 /* default state table size */ +#define PFSTATE_ADAPT_START 60000 /* default adaptive timeout start */ +#define PFSTATE_ADAPT_END 120000 /* default adaptive timeout end */ struct pf_threshold { @@ -1300,21 +1300,56 @@ struct pfioc_limit { unsigned limit; }; -struct pfioc_altq { +struct pfioc_altq_v0 { u_int32_t action; u_int32_t ticket; u_int32_t nr; - struct pf_altq altq; + struct pf_altq_v0 altq; }; -struct pfioc_qstats { +struct pfioc_altq_v1 { + u_int32_t action; + u_int32_t ticket; + u_int32_t nr; + /* + * Placed here so code that only uses the above parameters can be + * written entirely in terms of the v0 or v1 type. + */ + u_int32_t version; + struct pf_altq_v1 altq; +}; + +/* + * Latest version of struct pfioc_altq_vX. This must move in lock-step with + * the latest version of struct pf_altq_vX as it has that struct as a + * member. + */ +#define PFIOC_ALTQ_VERSION PF_ALTQ_VERSION + +struct pfioc_qstats_v0 { + u_int32_t ticket; + u_int32_t nr; + void *buf; + int nbytes; + u_int8_t scheduler; +}; + +struct pfioc_qstats_v1 { u_int32_t ticket; u_int32_t nr; void *buf; int nbytes; u_int8_t scheduler; + /* + * Placed here so code that only uses the above parameters can be + * written entirely in terms of the v0 or v1 type. + */ + u_int32_t version; /* Requested version of stats struct */ }; +/* Latest version of struct pfioc_qstats_vX */ +#define PFIOC_QSTATS_VERSION 1 + struct pfioc_ruleset { u_int32_t nr; char path[MAXPATHLEN]; @@ -1403,11 +1438,16 @@ struct pfioc_iface { #define DIOCKILLSTATES _IOWR('D', 41, struct pfioc_state_kill) #define DIOCSTARTALTQ _IO ('D', 42) #define DIOCSTOPALTQ _IO ('D', 43) -#define DIOCADDALTQ _IOWR('D', 45, struct pfioc_altq) -#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) -#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) -#define DIOCCHANGEALTQ _IOWR('D', 49, struct pfioc_altq) -#define DIOCGETQSTATS _IOWR('D', 50, struct pfioc_qstats) +#define DIOCADDALTQV0 _IOWR('D', 45, struct pfioc_altq_v0) +#define DIOCADDALTQV1 _IOWR('D', 45, struct pfioc_altq_v1) +#define DIOCGETALTQSV0 _IOWR('D', 47, struct pfioc_altq_v0) +#define DIOCGETALTQSV1 _IOWR('D', 47, struct pfioc_altq_v1) +#define DIOCGETALTQV0 _IOWR('D', 48, struct pfioc_altq_v0) +#define DIOCGETALTQV1 _IOWR('D', 48, struct pfioc_altq_v1) +#define DIOCCHANGEALTQV0 _IOWR('D', 49, struct pfioc_altq_v0) +#define DIOCCHANGEALTQV1 _IOWR('D', 49, struct pfioc_altq_v1) +#define DIOCGETQSTATSV0 _IOWR('D', 50, struct pfioc_qstats_v0) +#define DIOCGETQSTATSV1 _IOWR('D', 50, struct pfioc_qstats_v1) #define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr) #define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr) #define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) @@ -1445,11 +1485,63 @@ struct pfioc_iface { #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) #define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) -struct pf_ifspeed { +struct pf_ifspeed_v0 { char ifname[IFNAMSIZ]; u_int32_t baudrate; }; -#define DIOCGIFSPEED _IOWR('D', 92, struct pf_ifspeed) + +struct pf_ifspeed_v1 { + char ifname[IFNAMSIZ]; + u_int32_t baudrate32; + /* layout identical to struct pf_ifspeed_v0 up to this point */ + u_int64_t baudrate; +}; + +/* Latest version of struct pf_ifspeed_vX */ +#define PF_IFSPEED_VERSION 1 + +#define DIOCGIFSPEEDV0 _IOWR('D', 92, struct pf_ifspeed_v0) +#define DIOCGIFSPEEDV1 _IOWR('D', 92, struct pf_ifspeed_v1) + +/* + * Compatibility and convenience macros + */ +#ifndef _KERNEL +#ifdef PFIOC_USE_LATEST +/* + * Maintaining in-tree consumers of the ioctl interface is easier when that + * code can be written in terms old names that refer to the latest interface + * version as that reduces the required changes in the consumers to those + * that are functionally necessary to accommodate a new interface version. + */ +#define pfioc_altq __CONCAT(pfioc_altq_v, PFIOC_ALTQ_VERSION) +#define pfioc_qstats __CONCAT(pfioc_qstats_v, PFIOC_QSTATS_VERSION) +#define pf_ifspeed __CONCAT(pf_ifspeed_v, PF_IFSPEED_VERSION) + +#define DIOCADDALTQ __CONCAT(DIOCADDALTQV, PFIOC_ALTQ_VERSION) +#define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, PFIOC_ALTQ_VERSION) +#define DIOCGETALTQ __CONCAT(DIOCGETALTQV, PFIOC_ALTQ_VERSION) +#define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, PFIOC_ALTQ_VERSION) +#define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, PFIOC_QSTATS_VERSION) +#define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, PF_IFSPEED_VERSION) +#else +/* + * When building out-of-tree code that is written for the old interface, + * such as may exist in ports for example, resolve the old struct tags and + * ioctl command names to the v0 versions. + */ +#define pfioc_altq __CONCAT(pfioc_altq_v, 0) +#define pfioc_qstats __CONCAT(pfioc_qstats_v, 0) +#define pf_ifspeed __CONCAT(pf_ifspeed_v, 0) + +#define DIOCADDALTQ __CONCAT(DIOCADDALTQV, 0) +#define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, 0) +#define DIOCGETALTQ __CONCAT(DIOCGETALTQV, 0) +#define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, 0) +#define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, 0) +#define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, 0) +#endif /* PFIOC_USE_LATEST */ +#endif /* _KERNEL */ #ifdef _KERNEL LIST_HEAD(pf_src_node_list, pf_src_node); @@ -1470,7 +1562,7 @@ struct pf_idhash { extern u_long pf_hashmask; extern u_long pf_srchashmask; -#define PF_HASHSIZ (32768) +#define PF_HASHSIZ (131072) #define PF_SRCHASHSIZ (PF_HASHSIZ/4) VNET_DECLARE(struct pf_keyhash *, pf_keyhash); VNET_DECLARE(struct pf_idhash *, pf_idhash); diff --git a/freebsd/sys/net/radix.c b/freebsd/sys/net/radix.c index bbfd5f65..9fbfb298 100644 --- a/freebsd/sys/net/radix.c +++ b/freebsd/sys/net/radix.c @@ -41,7 +41,7 @@ #ifdef _KERNEL #include <sys/lock.h> #include <sys/mutex.h> -#include <sys/rwlock.h> +#include <sys/rmlock.h> #include <sys/systm.h> #include <sys/malloc.h> #include <sys/syslog.h> diff --git a/freebsd/sys/net/radix.h b/freebsd/sys/net/radix.h index 05f0f490..a0e5e5c5 100644 --- a/freebsd/sys/net/radix.h +++ b/freebsd/sys/net/radix.h @@ -38,7 +38,7 @@ #ifdef _KERNEL #include <sys/_lock.h> #include <sys/_mutex.h> -#include <sys/_rwlock.h> +#include <sys/_rmlock.h> #endif #ifdef MALLOC_DECLARE @@ -138,7 +138,7 @@ struct radix_node_head { rn_close_t *rnh_close; /*do something when the last ref drops*/ struct radix_node rnh_nodes[3]; /* empty tree for common case */ #ifdef _KERNEL - struct rwlock rnh_lock; /* locks entire radix tree */ + struct rmlock rnh_lock; /* locks entire radix tree */ #endif }; @@ -159,18 +159,18 @@ void rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes, #define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO)) #define R_Free(p) free((caddr_t)p, M_RTABLE); +#define RADIX_NODE_HEAD_RLOCK_TRACKER struct rm_priotracker _rnh_tracker #define RADIX_NODE_HEAD_LOCK_INIT(rnh) \ - rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0) -#define RADIX_NODE_HEAD_LOCK(rnh) rw_wlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_UNLOCK(rnh) rw_wunlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_RLOCK(rnh) rw_rlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_RUNLOCK(rnh) rw_runlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh) rw_try_upgrade(&(rnh)->rnh_lock) - - -#define RADIX_NODE_HEAD_DESTROY(rnh) rw_destroy(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_LOCKED) -#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED) + rm_init(&(rnh)->rnh_lock, "radix node head") +#define RADIX_NODE_HEAD_LOCK(rnh) rm_wlock(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_UNLOCK(rnh) rm_wunlock(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_RLOCK(rnh) rm_rlock(&(rnh)->rnh_lock,\ + &_rnh_tracker) +#define RADIX_NODE_HEAD_RUNLOCK(rnh) rm_runlock(&(rnh)->rnh_lock,\ + &_rnh_tracker) +#define RADIX_NODE_HEAD_DESTROY(rnh) rm_destroy(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rm_assert(&(rnh)->rnh_lock, RA_LOCKED) +#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rm_assert(&(rnh)->rnh_lock, RA_WLOCKED) #endif /* _KERNEL */ int rn_inithead(void **, int); diff --git a/freebsd/sys/net/radix_mpath.c b/freebsd/sys/net/radix_mpath.c index 7eccd8f1..3f90a9da 100644 --- a/freebsd/sys/net/radix_mpath.c +++ b/freebsd/sys/net/radix_mpath.c @@ -45,12 +45,15 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/lock.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/socket.h> #include <sys/domain.h> #include <sys/syslog.h> #include <net/radix.h> #include <net/radix_mpath.h> +#include <sys/rmlock.h> #include <net/route.h> #include <net/route_var.h> #include <net/if.h> diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c index c2348e31..3cd909c1 100644 --- a/freebsd/sys/net/route.c +++ b/freebsd/sys/net/route.c @@ -56,6 +56,8 @@ #include <sys/proc.h> #include <sys/domain.h> #include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/rmlock.h> #include <net/if.h> #include <net/if_var.h> @@ -141,7 +143,7 @@ VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ */ #define RNTORT(p) ((struct rtentry *)(p)) -static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ +VNET_DEFINE_STATIC(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) static int rtrequest1_fib_change(struct rib_head *, struct rt_addrinfo *, @@ -472,6 +474,7 @@ struct rtentry * rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *newrt; @@ -762,7 +765,7 @@ ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway, struct ifaddr *ifa; int not_found = 0; - MPASS(in_epoch()); + MPASS(in_epoch(net_epoch_preempt)); if ((flags & RTF_GATEWAY) == 0) { /* * If we are adding a route to an interface, @@ -955,6 +958,7 @@ int rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, uint32_t flowid, struct rt_addrinfo *info) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; @@ -1976,6 +1980,7 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { + RIB_RLOCK_TRACKER; struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; diff --git a/freebsd/sys/net/route.h b/freebsd/sys/net/route.h index 28f1db01..15ec1b3e 100644 --- a/freebsd/sys/net/route.h +++ b/freebsd/sys/net/route.h @@ -410,10 +410,8 @@ struct rt_addrinfo { } while (0) #define RO_RTFREE(_ro) do { \ - if ((_ro)->ro_rt) { \ - RT_LOCK((_ro)->ro_rt); \ - RTFREE_LOCKED((_ro)->ro_rt); \ - } \ + if ((_ro)->ro_rt) \ + RTFREE((_ro)->ro_rt); \ } while (0) #define RO_INVALIDATE_CACHE(ro) do { \ diff --git a/freebsd/sys/net/route_var.h b/freebsd/sys/net/route_var.h index f32dbc21..9d0d1931 100644 --- a/freebsd/sys/net/route_var.h +++ b/freebsd/sys/net/route_var.h @@ -44,18 +44,19 @@ struct rib_head { rt_gen_t rnh_gen; /* generation counter */ int rnh_multipath; /* multipath capable ? */ struct radix_node rnh_nodes[3]; /* empty tree for common case */ - struct rwlock rib_lock; /* config/data path lock */ + struct rmlock rib_lock; /* config/data path lock */ struct radix_mask_head rmhead; /* masks radix head */ }; -#define RIB_LOCK_INIT(rh) rw_init(&(rh)->rib_lock, "rib head lock") -#define RIB_LOCK_DESTROY(rh) rw_destroy(&(rh)->rib_lock) -#define RIB_RLOCK(rh) rw_rlock(&(rh)->rib_lock) -#define RIB_RUNLOCK(rh) rw_runlock(&(rh)->rib_lock) -#define RIB_WLOCK(rh) rw_wlock(&(rh)->rib_lock) -#define RIB_WUNLOCK(rh) rw_wunlock(&(rh)->rib_lock) -#define RIB_LOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_LOCKED) -#define RIB_WLOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_WLOCKED) +#define RIB_RLOCK_TRACKER struct rm_priotracker _rib_tracker +#define RIB_LOCK_INIT(rh) rm_init(&(rh)->rib_lock, "rib head lock") +#define RIB_LOCK_DESTROY(rh) rm_destroy(&(rh)->rib_lock) +#define RIB_RLOCK(rh) rm_rlock(&(rh)->rib_lock, &_rib_tracker) +#define RIB_RUNLOCK(rh) rm_runlock(&(rh)->rib_lock, &_rib_tracker) +#define RIB_WLOCK(rh) rm_wlock(&(rh)->rib_lock) +#define RIB_WUNLOCK(rh) rm_wunlock(&(rh)->rib_lock) +#define RIB_LOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_LOCKED) +#define RIB_WLOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_WLOCKED) struct rib_head *rt_tables_get_rnh(int fib, int family); diff --git a/freebsd/sys/net/rtsock.c b/freebsd/sys/net/rtsock.c index c0c5c5c2..84afd627 100644 --- a/freebsd/sys/net/rtsock.c +++ b/freebsd/sys/net/rtsock.c @@ -47,6 +47,7 @@ #include <sys/priv.h> #include <sys/proc.h> #include <sys/protosw.h> +#include <sys/rmlock.h> #include <sys/rwlock.h> #include <sys/signalvar.h> #include <sys/socket.h> @@ -141,7 +142,7 @@ typedef struct { int ip6_count; /* attached w/ AF_INET6 */ int any_count; /* total attached */ } route_cb_t; -static VNET_DEFINE(route_cb_t, route_cb); +VNET_DEFINE_STATIC(route_cb_t, route_cb); #define V_route_cb VNET(route_cb) struct mtx rtsock_mtx; @@ -550,6 +551,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, static int route_output(struct mbuf *m, struct socket *so, ...) { + RIB_RLOCK_TRACKER; struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct rib_head *rnh; @@ -1746,15 +1748,15 @@ sysctl_iflist(int af, struct walkarg *w) struct rt_addrinfo info; int len, error = 0; struct sockaddr_storage ss; + struct epoch_tracker et; bzero((caddr_t)&info, sizeof(info)); bzero(&ifd, sizeof(ifd)); - IFNET_RLOCK_NOSLEEP(); + NET_EPOCH_ENTER_ET(et); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (w->w_arg && w->w_arg != ifp->if_index) continue; if_data_copy(ifp, &ifd); - IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len); @@ -1795,15 +1797,12 @@ sysctl_iflist(int af, struct walkarg *w) goto done; } } - IF_ADDR_RUNLOCK(ifp); info.rti_info[RTAX_IFA] = NULL; info.rti_info[RTAX_NETMASK] = NULL; info.rti_info[RTAX_BRD] = NULL; } done: - if (ifp != NULL) - IF_ADDR_RUNLOCK(ifp); - IFNET_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT_ET(et); return (error); } @@ -1862,6 +1861,7 @@ sysctl_ifmalist(int af, struct walkarg *w) static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { + RIB_RLOCK_TRACKER; int *name = (int *)arg1; u_int namelen = arg2; struct rib_head *rnh = NULL; /* silence compiler. */ diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h index 0ec00aad..b4168750 100644 --- a/freebsd/sys/net/vnet.h +++ b/freebsd/sys/net/vnet.h @@ -93,6 +93,8 @@ struct vnet { #define VNET_PCPUSTAT_DEFINE(type, name) \ VNET_DEFINE(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)]) +#define VNET_PCPUSTAT_DEFINE_STATIC(type, name) \ + VNET_DEFINE_STATIC(counter_u64_t, name[sizeof(type) / sizeof(uint64_t)]) #define VNET_PCPUSTAT_ALLOC(name, wait) \ COUNTER_ARRAY_ALLOC(VNET(name), \ @@ -268,7 +270,20 @@ extern struct sx vnet_sxlock; */ #define VNET_NAME(n) vnet_entry_##n #define VNET_DECLARE(t, n) extern t VNET_NAME(n) -#define VNET_DEFINE(t, n) t VNET_NAME(n) __section(VNET_SETNAME) __used +/* struct _hack is to stop this from being used with static data */ +#define VNET_DEFINE(t, n) \ + struct _hack; t VNET_NAME(n) __section(VNET_SETNAME) __used +#if defined(KLD_MODULE) && (defined(__aarch64__) || defined(__riscv)) +/* + * As with DPCPU_DEFINE_STATIC we are unable to mark this data as static + * in modules on some architectures. + */ +#define VNET_DEFINE_STATIC(t, n) \ + t VNET_NAME(n) __section(VNET_SETNAME) __used +#else +#define VNET_DEFINE_STATIC(t, n) \ + static t VNET_NAME(n) __section(VNET_SETNAME) __used +#endif #define _VNET_PTR(b, n) (__typeof(VNET_NAME(n))*) \ ((b) + (uintptr_t)&VNET_NAME(n)) @@ -400,7 +415,8 @@ do { \ */ #define VNET_NAME(n) n #define VNET_DECLARE(t, n) extern t n -#define VNET_DEFINE(t, n) t n +#define VNET_DEFINE(t, n) struct _hack; t n +#define VNET_DEFINE_STATIC(t, n) static t n #define _VNET_PTR(b, n) &VNET_NAME(n) /* |