diff options
Diffstat (limited to '')
99 files changed, 1913 insertions, 1002 deletions
diff --git a/freebsd/sys/contrib/ck/src/ck_epoch.c b/freebsd/sys/contrib/ck/src/ck_epoch.c index be0f201d..53b1f7dd 100644 --- a/freebsd/sys/contrib/ck/src/ck_epoch.c +++ b/freebsd/sys/contrib/ck/src/ck_epoch.c @@ -129,6 +129,14 @@ */ #define CK_EPOCH_GRACE 3U +/* + * CK_EPOCH_LENGTH must be a power-of-2 (because (CK_EPOCH_LENGTH - 1) is used + * as a mask, and it must be at least 3 (see comments above). + */ +#if (CK_EPOCH_LENGTH < 3 || (CK_EPOCH_LENGTH & (CK_EPOCH_LENGTH - 1)) != 0) +#error "CK_EPOCH_LENGTH must be a power of 2 and >= 3" +#endif + enum { CK_EPOCH_STATE_USED = 0, CK_EPOCH_STATE_FREE = 1 @@ -350,7 +358,7 @@ ck_epoch_scan(struct ck_epoch *global, return NULL; } -static void +static unsigned int ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e, ck_stack_t *deferred) { unsigned int epoch = e & (CK_EPOCH_LENGTH - 1); @@ -368,6 +376,7 @@ ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e, ck_stack_t *de ck_stack_push_spnc(deferred, &entry->stack_entry); else entry->function(entry); + i++; } @@ -383,7 +392,7 @@ ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e, ck_stack_t *de ck_pr_sub_uint(&record->n_pending, i); } - return; + return i; } /* @@ -562,16 +571,28 @@ ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred) unsigned int epoch; struct ck_epoch_record *cr = NULL; struct ck_epoch *global = record->global; + unsigned int n_dispatch; epoch = ck_pr_load_uint(&global->epoch); /* Serialize epoch snapshots with respect to global epoch. */ ck_pr_fence_memory(); + + /* + * At this point, epoch is the current global epoch value. + * There may or may not be active threads which observed epoch - 1. + * (ck_epoch_scan() will tell us that). However, there should be + * no active threads which observed epoch - 2. + * + * Note that checking epoch - 2 is necessary, as race conditions can + * allow another thread to increment the global epoch before this + * thread runs. + */ + n_dispatch = ck_epoch_dispatch(record, epoch - 2, deferred); + cr = ck_epoch_scan(global, cr, epoch, &active); - if (cr != NULL) { - record->epoch = epoch; - return false; - } + if (cr != NULL) + return (n_dispatch > 0); /* We are at a grace period if all threads are inactive. */ if (active == false) { @@ -582,10 +603,17 @@ ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred) return true; } - /* If an active thread exists, rely on epoch observation. */ + /* + * If an active thread exists, rely on epoch observation. + * + * All the active threads entered the epoch section during + * the current epoch. Therefore, we can now run the handlers + * for the immediately preceding epoch and attempt to + * advance the epoch if it hasn't been already. + */ (void)ck_pr_cas_uint(&global->epoch, epoch, epoch + 1); - ck_epoch_dispatch(record, epoch + 1, deferred); + ck_epoch_dispatch(record, epoch - 1, deferred); return true; } diff --git a/freebsd/sys/crypto/chacha20/_chacha.h b/freebsd/sys/crypto/chacha20/_chacha.h new file mode 100644 index 00000000..20f8bb9a --- /dev/null +++ b/freebsd/sys/crypto/chacha20/_chacha.h @@ -0,0 +1,15 @@ +/* $FreeBSD$ */ +/* + * Public domain. + */ + +#ifndef _CHACHA_H +#define _CHACHA_H + +#include <sys/types.h> + +struct chacha_ctx { + u_int input[16]; +}; + +#endif diff --git a/freebsd/sys/crypto/chacha20/chacha.c b/freebsd/sys/crypto/chacha20/chacha.c index 3ce5310a..154726c2 100644 --- a/freebsd/sys/crypto/chacha20/chacha.c +++ b/freebsd/sys/crypto/chacha20/chacha.c @@ -130,8 +130,10 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u_int bytes) for (;;) { if (bytes < 64) { +#ifndef KEYSTREAM_ONLY for (i = 0;i < bytes;++i) tmp[i] = m[i]; m = tmp; +#endif ctarget = c; c = tmp; } diff --git a/freebsd/sys/crypto/chacha20/chacha.h b/freebsd/sys/crypto/chacha20/chacha.h index e67cc812..73548331 100644 --- a/freebsd/sys/crypto/chacha20/chacha.h +++ b/freebsd/sys/crypto/chacha20/chacha.h @@ -12,10 +12,7 @@ Public domain. #define CHACHA_H #include <sys/types.h> - -struct chacha_ctx { - u_int input[16]; -}; +#include <crypto/chacha20/_chacha.h> #define CHACHA_MINKEYLEN 16 #define CHACHA_NONCELEN 8 @@ -23,10 +20,10 @@ struct chacha_ctx { #define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN) #define CHACHA_BLOCKLEN 64 -#ifdef _KERNEL -#define LOCAL -#else +#ifdef CHACHA_EMBED #define LOCAL static +#else +#define LOCAL #endif LOCAL void chacha_keysetup(struct chacha_ctx *x, const u_char *k, u_int kbits); diff --git a/freebsd/sys/dev/bce/if_bce.c b/freebsd/sys/dev/bce/if_bce.c index 61c9708d..765b6877 100644 --- a/freebsd/sys/dev/bce/if_bce.c +++ b/freebsd/sys/dev/bce/if_bce.c @@ -532,7 +532,7 @@ MODULE_DEPEND(bce, miibus, 1, 1, 1); DRIVER_MODULE(bce, pci, bce_driver, bce_devclass, NULL, NULL); DRIVER_MODULE(miibus, bce, miibus_driver, miibus_devclass, NULL, NULL); MODULE_PNP_INFO("U16:vendor;U16:device;U16:#;U16:#;D:#", pci, bce, - bce_devs, sizeof(bce_devs[0]), nitems(bce_devs) - 1); + bce_devs, nitems(bce_devs) - 1); /****************************************************************************/ /* Tunable device values */ diff --git a/freebsd/sys/dev/bfe/if_bfe.c b/freebsd/sys/dev/bfe/if_bfe.c index f1586d59..c02f4ac1 100644 --- a/freebsd/sys/dev/bfe/if_bfe.c +++ b/freebsd/sys/dev/bfe/if_bfe.c @@ -160,7 +160,7 @@ static devclass_t bfe_devclass; DRIVER_MODULE(bfe, pci, bfe_driver, bfe_devclass, 0, 0); MODULE_PNP_INFO("U16:vendor;U16:device;D:#", pci, bfe, bfe_devs, - sizeof(bfe_devs[0]), nitems(bfe_devs) - 1); + nitems(bfe_devs) - 1); DRIVER_MODULE(miibus, bfe, miibus_driver, miibus_devclass, 0, 0); /* diff --git a/freebsd/sys/dev/bge/if_bge.c b/freebsd/sys/dev/bge/if_bge.c index 260c6c23..a4a937b2 100644 --- a/freebsd/sys/dev/bge/if_bge.c +++ b/freebsd/sys/dev/bge/if_bge.c @@ -550,7 +550,7 @@ static devclass_t bge_devclass; DRIVER_MODULE(bge, pci, bge_driver, bge_devclass, 0, 0); MODULE_PNP_INFO("U16:vendor;U16:device", pci, bge, bge_devs, - sizeof(bge_devs[0]), nitems(bge_devs) - 1); + nitems(bge_devs) - 1); DRIVER_MODULE(miibus, bge, miibus_driver, miibus_devclass, 0, 0); static int bge_allow_asf = 1; diff --git a/freebsd/sys/dev/dc/if_dc.c b/freebsd/sys/dev/dc/if_dc.c index 14023e54..7fc0ef54 100644 --- a/freebsd/sys/dev/dc/if_dc.c +++ b/freebsd/sys/dev/dc/if_dc.c @@ -362,7 +362,7 @@ static devclass_t dc_devclass; DRIVER_MODULE_ORDERED(dc, pci, dc_driver, dc_devclass, NULL, NULL, SI_ORDER_ANY); MODULE_PNP_INFO("W32:vendor/device;U8:revision;D:#", pci, dc, dc_devs, - sizeof(dc_devs[0]), nitems(dc_devs) - 1); + nitems(dc_devs) - 1); DRIVER_MODULE(miibus, dc, miibus_driver, miibus_devclass, NULL, NULL); #define DC_SETBIT(sc, reg, x) \ diff --git a/freebsd/sys/dev/e1000/em_txrx.c b/freebsd/sys/dev/e1000/em_txrx.c index 8157c9ce..3ceada31 100644 --- a/freebsd/sys/dev/e1000/em_txrx.c +++ b/freebsd/sys/dev/e1000/em_txrx.c @@ -448,8 +448,13 @@ em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) status = txr->tx_base[cur].upper.fields.status; updated = !!(status & E1000_TXD_STAT_DD); - if (clear == false || updated == 0) - return (updated); + if (!updated) + return (0); + + /* If clear is false just let caller know that there + * are descriptors to reclaim */ + if (!clear) + return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; @@ -555,22 +560,14 @@ lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) u32 staterr = 0; int cnt, i; - if (budget == 1) { - rxd = (struct e1000_rx_desc *)&rxr->rx_base[idx]; - staterr = rxd->status; - return (staterr & E1000_RXD_STAT_DD); - } - for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[i]; staterr = rxd->status; if ((staterr & E1000_RXD_STAT_DD) == 0) break; - if (++i == scctx->isc_nrxd[0]) i = 0; - if (staterr & E1000_RXD_STAT_EOP) cnt++; } @@ -588,26 +585,16 @@ em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) u32 staterr = 0; int cnt, i; - if (budget == 1) { - rxd = &rxr->rx_base[idx]; - staterr = le32toh(rxd->wb.upper.status_error); - return (staterr & E1000_RXD_STAT_DD); - } - for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; - - if (++i == scctx->isc_nrxd[0]) { + if (++i == scctx->isc_nrxd[0]) i = 0; - } - if (staterr & E1000_RXD_STAT_EOP) cnt++; - } return (cnt); } diff --git a/freebsd/sys/dev/e1000/if_em.c b/freebsd/sys/dev/e1000/if_em.c index 4fe66dfc..803b68ec 100644 --- a/freebsd/sys/dev/e1000/if_em.c +++ b/freebsd/sys/dev/e1000/if_em.c @@ -711,7 +711,8 @@ em_set_num_queues(if_ctx_t ctx) #define IGB_CAPS \ IFCAP_HWCSUM | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_HWCSUM | IFCAP_WOL | IFCAP_VLAN_HWFILTER | IFCAP_TSO4 | \ - IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU | IFCAP_HWCSUM_IPV6; + IFCAP_LRO | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU | IFCAP_HWCSUM_IPV6 |\ + IFCAP_TSO6 /********************************************************************* * Device initialization routine @@ -1805,13 +1806,11 @@ em_if_update_admin_status(if_ctx_t ctx) } iflib_link_state_change(ctx, LINK_STATE_UP, IF_Mbps(adapter->link_speed)); - printf("Link state changed to up\n"); } else if (!link_check && (adapter->link_active == 1)) { adapter->link_speed = 0; adapter->link_duplex = 0; adapter->link_active = 0; iflib_link_state_change(ctx, LINK_STATE_DOWN, 0); - printf("Link state changed to down\n"); } em_update_stats_counters(adapter); diff --git a/freebsd/sys/dev/e1000/if_em.h b/freebsd/sys/dev/e1000/if_em.h index 26bdcb2e..d573107b 100644 --- a/freebsd/sys/dev/e1000/if_em.h +++ b/freebsd/sys/dev/e1000/if_em.h @@ -341,8 +341,11 @@ #define EM_MSIX_LINK 0x01000000 /* For 82574 use */ #define ETH_ZLEN 60 #define ETH_ADDR_LEN 6 -#define EM_CSUM_OFFLOAD 7 /* Offload bits in mbuf flag */ -#define IGB_CSUM_OFFLOAD 0x0E0F /* Offload bits in mbuf flag */ +#define EM_CSUM_OFFLOAD (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP) /* Offload bits in mbuf flag */ +#define IGB_CSUM_OFFLOAD (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP | \ + CSUM_IP_SCTP | CSUM_IP6_UDP | CSUM_IP6_TCP | \ + CSUM_IP6_SCTP) /* Offload bits in mbuf flag */ + #define IGB_PKTTYPE_MASK 0x0000FFF0 #define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coalesce Flush */ diff --git a/freebsd/sys/dev/e1000/igb_txrx.c b/freebsd/sys/dev/e1000/igb_txrx.c index 3a56a496..32bab4bf 100644 --- a/freebsd/sys/dev/e1000/igb_txrx.c +++ b/freebsd/sys/dev/e1000/igb_txrx.c @@ -154,7 +154,6 @@ igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 * u32 vlan_macip_lens, type_tucmd_mlhl; u32 mss_l4len_idx; mss_l4len_idx = vlan_macip_lens = type_tucmd_mlhl = 0; - int offload = TRUE; /* First check if TSO is to be used */ if (pi->ipi_csum_flags & CSUM_TSO) @@ -188,7 +187,6 @@ igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 * type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; default: - offload = FALSE; break; } @@ -197,25 +195,27 @@ igb_tx_ctx_setup(struct tx_ring *txr, if_pkt_info_t pi, u32 *cmd_type_len, u32 * switch (pi->ipi_ipproto) { case IPPROTO_TCP: - if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) + if (pi->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + } break; case IPPROTO_UDP: - if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) + if (pi->ipi_csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + } break; case IPPROTO_SCTP: - if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) + if (pi->ipi_csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) { type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; + *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + } break; default: - offload = FALSE; break; } - if (offload) /* For the TX descriptor setup */ - *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; - /* 82575 needs the queue index added */ if (adapter->hw.mac.type == e1000_82575) mss_l4len_idx = txr->me << 4; @@ -323,8 +323,13 @@ igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) status = ((union e1000_adv_tx_desc *)&txr->tx_base[cur])->wb.status; updated = !!(status & E1000_TXD_STAT_DD); - if (!clear || !updated) - return (updated); + if (!updated) + return (0); + + /* If clear is false just let caller know that there + * are descriptors to reclaim */ + if (!clear) + return (1); prev = txr->tx_cidx_processed; ntxd = scctx->isc_ntxd[0]; @@ -394,28 +399,18 @@ igb_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) struct rx_ring *rxr = &que->rxr; union e1000_adv_rx_desc *rxd; u32 staterr = 0; - int cnt, i, iter; + int cnt, i; - if (budget == 1) { - rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[idx]; - staterr = le32toh(rxd->wb.upper.status_error); - return (staterr & E1000_RXD_STAT_DD); - } - - for (iter = cnt = 0, i = idx; iter < scctx->isc_nrxd[0] && iter <= budget;) { + for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (union e1000_adv_rx_desc *)&rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); if ((staterr & E1000_RXD_STAT_DD) == 0) break; - - if (++i == scctx->isc_nrxd[0]) { + if (++i == scctx->isc_nrxd[0]) i = 0; - } - if (staterr & E1000_RXD_STAT_EOP) cnt++; - iter++; } return (cnt); } diff --git a/freebsd/sys/dev/ffec/if_ffec.c b/freebsd/sys/dev/ffec/if_ffec.c index 03dca1a9..aee2aa64 100644 --- a/freebsd/sys/dev/ffec/if_ffec.c +++ b/freebsd/sys/dev/ffec/if_ffec.c @@ -887,12 +887,14 @@ ffec_alloc_mbufcl(struct ffec_softc *sc) struct mbuf *m; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); - if (m == NULL) - return (m); - - m->m_pkthdr.len = m->m_len = m->m_ext.ext_size; + if (m != NULL) #ifdef __rtems__ - rtems_cache_invalidate_multiple_data_lines(m->m_data, m->m_len); + { +#endif /* __rtems__ */ + m->m_pkthdr.len = m->m_len = m->m_ext.ext_size; +#ifdef __rtems__ + rtems_cache_invalidate_multiple_data_lines(m->m_data, m->m_len); + } #endif /* __rtems__ */ return (m); diff --git a/freebsd/sys/dev/fxp/if_fxp.c b/freebsd/sys/dev/fxp/if_fxp.c index 032246d8..e9c8721e 100644 --- a/freebsd/sys/dev/fxp/if_fxp.c +++ b/freebsd/sys/dev/fxp/if_fxp.c @@ -310,7 +310,7 @@ static devclass_t fxp_devclass; DRIVER_MODULE_ORDERED(fxp, pci, fxp_driver, fxp_devclass, NULL, NULL, SI_ORDER_ANY); MODULE_PNP_INFO("U16:vendor;U16:device", pci, fxp, fxp_ident_table, - sizeof(fxp_ident_table[0]), nitems(fxp_ident_table) - 1); + nitems(fxp_ident_table) - 1); DRIVER_MODULE(miibus, fxp, miibus_driver, miibus_devclass, NULL, NULL); static struct resource_spec fxp_res_spec_mem[] = { diff --git a/freebsd/sys/dev/ofw/ofw_bus_subr.h b/freebsd/sys/dev/ofw/ofw_bus_subr.h index 7bf66a10..468fdc39 100644 --- a/freebsd/sys/dev/ofw/ofw_bus_subr.h +++ b/freebsd/sys/dev/ofw/ofw_bus_subr.h @@ -67,7 +67,7 @@ struct intr_map_data_fdt { #define SIMPLEBUS_PNP_DESCR "Z:compat;P:#;" #define SIMPLEBUS_PNP_INFO(t) \ - MODULE_PNP_INFO(SIMPLEBUS_PNP_DESCR, simplebus, t, t, sizeof(t[0]), sizeof(t) / sizeof(t[0])); + MODULE_PNP_INFO(SIMPLEBUS_PNP_DESCR, simplebus, t, t, sizeof(t) / sizeof(t[0])); /* Generic implementation of ofw_bus_if.m methods and helper routines */ int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *, phandle_t); diff --git a/freebsd/sys/dev/pci/pci_user.c b/freebsd/sys/dev/pci/pci_user.c index b3a2e9e2..27fb3475 100644 --- a/freebsd/sys/dev/pci/pci_user.c +++ b/freebsd/sys/dev/pci/pci_user.c @@ -68,6 +68,49 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/pcib_if.h> #include <rtems/bsd/local/pci_if.h> +#ifdef COMPAT_FREEBSD32 +struct pci_conf32 { + struct pcisel pc_sel; /* domain+bus+slot+function */ + u_int8_t pc_hdr; /* PCI header type */ + u_int16_t pc_subvendor; /* card vendor ID */ + u_int16_t pc_subdevice; /* card device ID, assigned by + card vendor */ + u_int16_t pc_vendor; /* chip vendor ID */ + u_int16_t pc_device; /* chip device ID, assigned by + chip vendor */ + u_int8_t pc_class; /* chip PCI class */ + u_int8_t pc_subclass; /* chip PCI subclass */ + u_int8_t pc_progif; /* chip PCI programming interface */ + u_int8_t pc_revid; /* chip revision ID */ + char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ + u_int32_t pd_unit; /* device unit number */ +}; + +struct pci_match_conf32 { + struct pcisel pc_sel; /* domain+bus+slot+function */ + char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ + u_int32_t pd_unit; /* Unit number */ + u_int16_t pc_vendor; /* PCI Vendor ID */ + u_int16_t pc_device; /* PCI Device ID */ + u_int8_t pc_class; /* PCI class */ + u_int32_t flags; /* Matching expression */ +}; + +struct pci_conf_io32 { + u_int32_t pat_buf_len; /* pattern buffer length */ + u_int32_t num_patterns; /* number of patterns */ + u_int32_t patterns; /* struct pci_match_conf ptr */ + u_int32_t match_buf_len; /* match buffer length */ + u_int32_t num_matches; /* number of matches returned */ + u_int32_t matches; /* struct pci_conf ptr */ + u_int32_t offset; /* offset into device list */ + u_int32_t generation; /* device list generation */ + u_int32_t status; /* request status */ +}; + +#define PCIOCGETCONF32 _IOC_NEWTYPE(PCIOCGETCONF, struct pci_conf_io32) +#endif + /* * This is the user interface to PCI configuration space. */ @@ -177,6 +220,73 @@ pci_conf_match_native(struct pci_match_conf *matches, int num_matches, return(1); } +#ifdef COMPAT_FREEBSD32 +static int +pci_conf_match32(struct pci_match_conf32 *matches, int num_matches, + struct pci_conf *match_buf) +{ + int i; + + if ((matches == NULL) || (match_buf == NULL) || (num_matches <= 0)) + return(1); + + for (i = 0; i < num_matches; i++) { + /* + * I'm not sure why someone would do this...but... + */ + if (matches[i].flags == PCI_GETCONF_NO_MATCH) + continue; + + /* + * Look at each of the match flags. If it's set, do the + * comparison. If the comparison fails, we don't have a + * match, go on to the next item if there is one. + */ + if (((matches[i].flags & PCI_GETCONF_MATCH_DOMAIN) != 0) + && (match_buf->pc_sel.pc_domain != + matches[i].pc_sel.pc_domain)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_BUS) != 0) + && (match_buf->pc_sel.pc_bus != matches[i].pc_sel.pc_bus)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_DEV) != 0) + && (match_buf->pc_sel.pc_dev != matches[i].pc_sel.pc_dev)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_FUNC) != 0) + && (match_buf->pc_sel.pc_func != matches[i].pc_sel.pc_func)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_VENDOR) != 0) + && (match_buf->pc_vendor != matches[i].pc_vendor)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_DEVICE) != 0) + && (match_buf->pc_device != matches[i].pc_device)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_CLASS) != 0) + && (match_buf->pc_class != matches[i].pc_class)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_UNIT) != 0) + && (match_buf->pd_unit != matches[i].pd_unit)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_NAME) != 0) + && (strncmp(matches[i].pd_name, match_buf->pd_name, + sizeof(match_buf->pd_name)) != 0)) + continue; + + return(0); + } + + return(1); +} +#endif /* COMPAT_FREEBSD32 */ + #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) #define PRE7_COMPAT @@ -261,20 +371,6 @@ struct pci_match_conf_old32 { pci_getconf_flags_old flags; /* Matching expression */ }; -struct pci_conf_io32 { - uint32_t pat_buf_len; /* pattern buffer length */ - uint32_t num_patterns; /* number of patterns */ - uint32_t patterns; /* pattern buffer - (struct pci_match_conf_old32 *) */ - uint32_t match_buf_len; /* match buffer length */ - uint32_t num_matches; /* number of matches returned */ - uint32_t matches; /* match buffer - (struct pci_conf_old32 *) */ - uint32_t offset; /* offset into device list */ - uint32_t generation; /* device list generation */ - pci_getconf_status status; /* request status */ -}; - #define PCIOCGETCONF_OLD32 _IOWR('p', 1, struct pci_conf_io32) #endif /* COMPAT_FREEBSD32 */ @@ -413,6 +509,9 @@ pci_conf_match_old32(struct pci_match_conf_old32 *matches, int num_matches, union pci_conf_union { struct pci_conf pc; +#ifdef COMPAT_FREEBSD32 + struct pci_conf32 pc32; +#endif #ifdef PRE7_COMPAT struct pci_conf_old pco; #ifdef COMPAT_FREEBSD32 @@ -430,6 +529,11 @@ pci_conf_match(u_long cmd, struct pci_match_conf *matches, int num_matches, case PCIOCGETCONF: return (pci_conf_match_native( (struct pci_match_conf *)matches, num_matches, match_buf)); +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: + return (pci_conf_match32((struct pci_match_conf32 *)matches, + num_matches, match_buf)); +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: return (pci_conf_match_old( @@ -546,6 +650,10 @@ pci_match_conf_size(u_long cmd) switch (cmd) { case PCIOCGETCONF: return (sizeof(struct pci_match_conf)); +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: + return (sizeof(struct pci_match_conf32)); +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: return (sizeof(struct pci_match_conf_old)); @@ -567,6 +675,10 @@ pci_conf_size(u_long cmd) switch (cmd) { case PCIOCGETCONF: return (sizeof(struct pci_conf)); +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: + return (sizeof(struct pci_conf32)); +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: return (sizeof(struct pci_conf_old)); @@ -584,7 +696,7 @@ pci_conf_size(u_long cmd) static void pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd) { -#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32) +#if defined(COMPAT_FREEBSD32) struct pci_conf_io32 *cio32; #endif @@ -596,8 +708,11 @@ pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd) *cio = *(struct pci_conf_io *)data; return; -#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32) +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: +#ifdef PRE7_COMPAT case PCIOCGETCONF_OLD32: +#endif cio32 = (struct pci_conf_io32 *)data; cio->pat_buf_len = cio32->pat_buf_len; cio->num_patterns = cio32->num_patterns; @@ -622,7 +737,7 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data, u_long cmd) { struct pci_conf_io *d_cio; -#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32) +#if defined(COMPAT_FREEBSD32) struct pci_conf_io32 *cio32; #endif @@ -638,8 +753,11 @@ pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data, d_cio->num_matches = cio->num_matches; return; -#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32) +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: +#ifdef PRE7_COMPAT case PCIOCGETCONF_OLD32: +#endif cio32 = (struct pci_conf_io32 *)data; cio32->status = cio->status; @@ -667,6 +785,24 @@ pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup, pcup->pc = *pcp; return; +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: + pcup->pc32.pc_sel = pcp->pc_sel; + pcup->pc32.pc_hdr = pcp->pc_hdr; + pcup->pc32.pc_subvendor = pcp->pc_subvendor; + pcup->pc32.pc_subdevice = pcp->pc_subdevice; + pcup->pc32.pc_vendor = pcp->pc_vendor; + pcup->pc32.pc_device = pcp->pc_device; + pcup->pc32.pc_class = pcp->pc_class; + pcup->pc32.pc_subclass = pcp->pc_subclass; + pcup->pc32.pc_progif = pcp->pc_progif; + pcup->pc32.pc_revid = pcp->pc_revid; + strlcpy(pcup->pc32.pd_name, pcp->pd_name, + sizeof(pcup->pc32.pd_name)); + pcup->pc32.pd_unit = (uint32_t)pcp->pd_unit; + return; +#endif + #ifdef PRE7_COMPAT #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF_OLD32: diff --git a/freebsd/sys/dev/pci/pcireg.h b/freebsd/sys/dev/pci/pcireg.h index 00589c4b..edec95c8 100644 --- a/freebsd/sys/dev/pci/pcireg.h +++ b/freebsd/sys/dev/pci/pcireg.h @@ -122,6 +122,9 @@ #define PCIM_MFDEV 0x80 #define PCIR_BIST 0x0f +/* PCI Spec rev 2.2: 0FFFFh is an invalid value for Vendor ID. */ +#define PCIV_INVALID 0xffff + /* Capability Register Offsets */ #define PCICAP_ID 0x0 diff --git a/freebsd/sys/dev/pci/pcivar.h b/freebsd/sys/dev/pci/pcivar.h index 21d9bd03..8fd0d9f7 100644 --- a/freebsd/sys/dev/pci/pcivar.h +++ b/freebsd/sys/dev/pci/pcivar.h @@ -311,7 +311,7 @@ struct pci_device_table { "M16:mask;U16:vendor;U16:device;U16:subvendor;U16:subdevice;" \ "U16:class;U16:subclass;U16:revid;" #define PCI_PNP_INFO(table) \ - MODULE_PNP_INFO(PCI_PNP_STR, pci, table, table, sizeof(table[0]), \ + MODULE_PNP_INFO(PCI_PNP_STR, pci, table, table, \ sizeof(table) / sizeof(table[0])) const struct pci_device_table *pci_match_device(device_t child, diff --git a/freebsd/sys/dev/usb/controller/dwc_otg.c b/freebsd/sys/dev/usb/controller/dwc_otg.c index abc23ac6..1332b485 100644 --- a/freebsd/sys/dev/usb/controller/dwc_otg.c +++ b/freebsd/sys/dev/usb/controller/dwc_otg.c @@ -1460,6 +1460,8 @@ dwc_otg_host_data_rx(struct dwc_otg_softc *sc, struct dwc_otg_td *td) /* check if we are complete */ if (td->tt_xactpos == HCSPLT_XACTPOS_BEGIN) { goto complete; + } else if (td->hcsplt != 0) { + goto receive_pkt; } else { /* get more packets */ goto busy; @@ -1518,8 +1520,10 @@ receive_pkt: if (td->hcsplt != 0) { delta = td->tt_complete_slot - sc->sc_last_frame_num - 1; if (td->tt_scheduled == 0 || delta < DWC_OTG_TT_SLOT_MAX) { - td->state = DWC_CHAN_ST_WAIT_C_PKT; - goto busy; + if (td->ep_type != UE_ISOCHRONOUS) { + td->state = DWC_CHAN_ST_WAIT_C_PKT; + goto busy; + } } delta = sc->sc_last_frame_num - td->tt_start_slot; if (delta > DWC_OTG_TT_SLOT_MAX) { @@ -1565,12 +1569,23 @@ receive_pkt: hcchar = td->hcchar; hcchar |= HCCHAR_EPDIR_IN; - /* receive complete split ASAP */ - if ((sc->sc_last_frame_num & 1) != 0 && - td->ep_type == UE_ISOCHRONOUS) - hcchar |= HCCHAR_ODDFRM; - else + if (td->ep_type == UE_ISOCHRONOUS) { + if (td->hcsplt != 0) { + /* continously buffer */ + if (sc->sc_last_frame_num & 1) + hcchar &= ~HCCHAR_ODDFRM; + else + hcchar |= HCCHAR_ODDFRM; + } else { + /* multi buffer, if any */ + if (sc->sc_last_frame_num & 1) + hcchar |= HCCHAR_ODDFRM; + else + hcchar &= ~HCCHAR_ODDFRM; + } + } else { hcchar &= ~HCCHAR_ODDFRM; + } /* must enable channel before data can be received */ DWC_OTG_WRITE_4(sc, DOTG_HCCHAR(channel), hcchar); diff --git a/freebsd/sys/dev/usb/net/if_ure.c b/freebsd/sys/dev/usb/net/if_ure.c index bcae02cb..136b61f9 100644 --- a/freebsd/sys/dev/usb/net/if_ure.c +++ b/freebsd/sys/dev/usb/net/if_ure.c @@ -70,6 +70,8 @@ SYSCTL_INT(_hw_usb_ure, OID_AUTO, debug, CTLFLAG_RWTUN, &ure_debug, 0, static const STRUCT_USB_HOST_ID ure_devs[] = { #define URE_DEV(v,p,i) { USB_VPI(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, i) } URE_DEV(LENOVO, RTL8153, 0), + URE_DEV(LENOVO, TBT3LAN, 0), + URE_DEV(LENOVO, USBCLAN, 0), URE_DEV(NVIDIA, RTL8153, 0), URE_DEV(REALTEK, RTL8152, URE_FLAG_8152), URE_DEV(REALTEK, RTL8153, 0), @@ -171,6 +173,7 @@ MODULE_DEPEND(ure, usb, 1, 1, 1); MODULE_DEPEND(ure, ether, 1, 1, 1); MODULE_DEPEND(ure, miibus, 1, 1, 1); MODULE_VERSION(ure, 1); +USB_PNP_HOST_INFO(ure_devs); static const struct usb_ether_methods ure_ue_methods = { .ue_attach_post = ure_attach_post, diff --git a/freebsd/sys/dev/usb/serial/uplcom.c b/freebsd/sys/dev/usb/serial/uplcom.c index 2b90e4d6..a58ba3ce 100644 --- a/freebsd/sys/dev/usb/serial/uplcom.c +++ b/freebsd/sys/dev/usb/serial/uplcom.c @@ -136,11 +136,19 @@ SYSCTL_INT(_hw_usb_uplcom, OID_AUTO, debug, CTLFLAG_RWTUN, #define UPLCOM_SET_CRTSCTS 0x41 #define UPLCOM_SET_CRTSCTS_PL2303X 0x61 #define RSAQ_STATUS_CTS 0x80 +#define RSAQ_STATUS_OVERRUN_ERROR 0x40 +#define RSAQ_STATUS_PARITY_ERROR 0x20 +#define RSAQ_STATUS_FRAME_ERROR 0x10 +#define RSAQ_STATUS_RING 0x08 +#define RSAQ_STATUS_BREAK_ERROR 0x04 #define RSAQ_STATUS_DSR 0x02 #define RSAQ_STATUS_DCD 0x01 #define TYPE_PL2303 0 #define TYPE_PL2303HX 1 +#define TYPE_PL2303HXD 2 + +#define UPLCOM_STATE_INDEX 8 enum { UPLCOM_BULK_DT_WR, @@ -371,18 +379,49 @@ uplcom_attach(device_t dev) sc->sc_udev = uaa->device; - /* Determine the chip type. This algorithm is taken from Linux. */ dd = usbd_get_device_descriptor(sc->sc_udev); - if (dd->bDeviceClass == 0x02) - sc->sc_chiptype = TYPE_PL2303; - else if (dd->bMaxPacketSize == 0x40) + + switch (UGETW(dd->bcdDevice)) { + case 0x0300: sc->sc_chiptype = TYPE_PL2303HX; - else - sc->sc_chiptype = TYPE_PL2303; + /* or TA, that is HX with external crystal */ + break; + case 0x0400: + sc->sc_chiptype = TYPE_PL2303HXD; + /* or EA, that is HXD with ESD protection */ + /* or RA, that has internal voltage level converter that works only up to 1Mbaud (!) */ + break; + case 0x0500: + sc->sc_chiptype = TYPE_PL2303HXD; + /* in fact it's TB, that is HXD with external crystal */ + break; + default: + /* NOTE: I have no info about the bcdDevice for the base PL2303 (up to 1.2Mbaud, + only fixed rates) and for PL2303SA (8-pin chip, up to 115200 baud */ + /* Determine the chip type. This algorithm is taken from Linux. */ + if (dd->bDeviceClass == 0x02) + sc->sc_chiptype = TYPE_PL2303; + else if (dd->bMaxPacketSize == 0x40) + sc->sc_chiptype = TYPE_PL2303HX; + else + sc->sc_chiptype = TYPE_PL2303; + break; + } - DPRINTF("chiptype: %s\n", - (sc->sc_chiptype == TYPE_PL2303HX) ? - "2303X" : "2303"); + switch (sc->sc_chiptype) { + case TYPE_PL2303: + DPRINTF("chiptype: 2303\n"); + break; + case TYPE_PL2303HX: + DPRINTF("chiptype: 2303HX/TA\n"); + break; + case TYPE_PL2303HXD: + DPRINTF("chiptype: 2303HXD/TB/RA/EA\n"); + break; + default: + DPRINTF("chiptype: unknown %d\n", sc->sc_chiptype); + break; + } /* * USB-RSAQ1 has two interface @@ -431,13 +470,14 @@ uplcom_attach(device_t dev) goto detach; } - if (sc->sc_chiptype != TYPE_PL2303HX) { + if (sc->sc_chiptype == TYPE_PL2303) { /* HX variants seem to lock up after a clear stall request. */ mtx_lock(&sc->sc_mtx); usbd_xfer_set_stall(sc->sc_xfer[UPLCOM_BULK_DT_WR]); usbd_xfer_set_stall(sc->sc_xfer[UPLCOM_BULK_DT_RD]); mtx_unlock(&sc->sc_mtx); } else { + /* reset upstream data pipes */ if (uplcom_pl2303_do(sc->sc_udev, UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 8, 0, 0) || uplcom_pl2303_do(sc->sc_udev, UT_WRITE_VENDOR_DEVICE, @@ -556,7 +596,7 @@ uplcom_pl2303_init(struct usb_device *udev, uint8_t chiptype) || uplcom_pl2303_do(udev, UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 1, 0, 0)) return (EIO); - if (chiptype == TYPE_PL2303HX) + if (chiptype != TYPE_PL2303) err = uplcom_pl2303_do(udev, UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 2, 0x44, 0); else err = uplcom_pl2303_do(udev, UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 2, 0x24, 0); @@ -636,23 +676,52 @@ uplcom_cfg_set_break(struct ucom_softc *ucom, uint8_t onoff) &req, NULL, 0, 1000); } +/* + * NOTE: These baud rates are officially supported, they can be written + * directly into dwDTERate register. + * + * Free baudrate setting is not supported by the base PL2303, and on + * other models it requires writing a divisor value to dwDTERate instead + * of the raw baudrate. The formula for divisor calculation is not published + * by the vendor, so it is speculative, though the official product homepage + * refers to the Linux module source as a reference implementation. + */ static const uint32_t uplcom_rates[] = { - 75, 150, 300, 600, 1200, 1800, 2400, 3600, 4800, 7200, 9600, 14400, - 19200, 28800, 38400, 57600, 115200, /* - * Higher speeds are probably possible. PL2303X supports up to - * 6Mb and can set any rate + * Basic 'standard' speed rates, supported by all models + * NOTE: 900 and 56000 actually works as well */ - 230400, 460800, 614400, 921600, 1228800 + 75, 150, 300, 600, 900, 1200, 1800, 2400, 3600, 4800, 7200, 9600, 14400, + 19200, 28800, 38400, 56000, 57600, 115200, + /* + * Advanced speed rates up to 6Mbs, supported by HX/TA and HXD/TB/EA/RA + * NOTE: regardless of the spec, 256000 does not work + */ + 128000, 134400, 161280, 201600, 230400, 268800, 403200, 460800, 614400, + 806400, 921600, 1228800, 2457600, 3000000, 6000000, + /* + * Advanced speed rates up to 12, supported by HXD/TB/EA/RA + */ + 12000000 }; #define N_UPLCOM_RATES nitems(uplcom_rates) static int +uplcom_baud_supported(unsigned int speed) +{ + int i; + for (i = 0; i < N_UPLCOM_RATES; i++) { + if (uplcom_rates[i] == speed) + return 1; + } + return 0; +} + +static int uplcom_pre_param(struct ucom_softc *ucom, struct termios *t) { struct uplcom_softc *sc = ucom->sc_parent; - uint8_t i; DPRINTF("\n"); @@ -660,26 +729,75 @@ uplcom_pre_param(struct ucom_softc *ucom, struct termios *t) * Check requested baud rate. * * The PL2303 can only set specific baud rates, up to 1228800 baud. - * The PL2303X can set any baud rate up to 6Mb. + * The PL2303HX can set any baud rate up to 6Mb. * The PL2303HX rev. D can set any baud rate up to 12Mb. * - * XXX: We currently cannot identify the PL2303HX rev. D, so treat - * it the same as the PL2303X. */ - if (sc->sc_chiptype != TYPE_PL2303HX) { - for (i = 0; i < N_UPLCOM_RATES; i++) { - if (uplcom_rates[i] == t->c_ospeed) + + /* accept raw divisor data, if someone wants to do the math in user domain */ + if (t->c_ospeed & 0x80000000) + return 0; + switch (sc->sc_chiptype) { + case TYPE_PL2303HXD: + if (t->c_ospeed <= 12000000) return (0); - } - } else { - if (t->c_ospeed <= 6000000) - return (0); + break; + case TYPE_PL2303HX: + if (t->c_ospeed <= 6000000) + return (0); + break; + default: + if (uplcom_baud_supported(t->c_ospeed)) + return (0); + break; } DPRINTF("uplcom_param: bad baud rate (%d)\n", t->c_ospeed); return (EIO); } +static unsigned int +uplcom_encode_baud_rate_divisor(uint8_t *buf, unsigned int baud) +{ + unsigned int baseline, mantissa, exponent; + + /* Determine the baud rate divisor. This algorithm is taken from Linux. */ + /* + * Apparently the formula is: + * baudrate = baseline / (mantissa * 4^exponent) + * where + * mantissa = buf[8:0] + * exponent = buf[11:9] + */ + if (baud == 0) + baud = 1; + baseline = 383385600; + mantissa = baseline / baud; + if (mantissa == 0) + mantissa = 1; + exponent = 0; + while (mantissa >= 512) { + if (exponent < 7) { + mantissa >>= 2; /* divide by 4 */ + exponent++; + } else { + /* Exponent is maxed. Trim mantissa and leave. This gives approx. 45.8 baud */ + mantissa = 511; + break; + } + } + + buf[3] = 0x80; + buf[2] = 0; + buf[1] = exponent << 1 | mantissa >> 8; + buf[0] = mantissa & 0xff; + + /* Calculate and return the exact baud rate. */ + baud = (baseline / mantissa) >> (exponent << 1); + DPRINTF("real baud rate will be %u\n", baud); + + return baud; +} static void uplcom_cfg_param(struct ucom_softc *ucom, struct termios *t) { @@ -691,10 +809,24 @@ uplcom_cfg_param(struct ucom_softc *ucom, struct termios *t) memset(&ls, 0, sizeof(ls)); - USETDW(ls.dwDTERate, t->c_ospeed); + /* + * NOTE: If unsupported baud rates are set directly, the PL2303* uses 9600 baud. + */ + if ((t->c_ospeed & 0x80000000) || uplcom_baud_supported(t->c_ospeed)) + USETDW(ls.dwDTERate, t->c_ospeed); + else + t->c_ospeed = uplcom_encode_baud_rate_divisor((uint8_t*)&ls.dwDTERate, t->c_ospeed); if (t->c_cflag & CSTOPB) { - ls.bCharFormat = UCDC_STOP_BIT_2; + if ((t->c_cflag & CSIZE) == CS5) { + /* + * NOTE: Comply with "real" UARTs / RS232: + * use 1.5 instead of 2 stop bits with 5 data bits + */ + ls.bCharFormat = UCDC_STOP_BIT_1_5; + } else { + ls.bCharFormat = UCDC_STOP_BIT_2; + } } else { ls.bCharFormat = UCDC_STOP_BIT_1; } @@ -724,7 +856,7 @@ uplcom_cfg_param(struct ucom_softc *ucom, struct termios *t) break; } - DPRINTF("rate=%d fmt=%d parity=%d bits=%d\n", + DPRINTF("rate=0x%08x fmt=%d parity=%d bits=%d\n", UGETDW(ls.dwDTERate), ls.bCharFormat, ls.bParityType, ls.bDataBits); @@ -745,7 +877,7 @@ uplcom_cfg_param(struct ucom_softc *ucom, struct termios *t) req.bmRequestType = UT_WRITE_VENDOR_DEVICE; req.bRequest = UPLCOM_SET_REQUEST; USETW(req.wValue, 0); - if (sc->sc_chiptype == TYPE_PL2303HX) + if (sc->sc_chiptype != TYPE_PL2303) USETW(req.wIndex, UPLCOM_SET_CRTSCTS_PL2303X); else USETW(req.wIndex, UPLCOM_SET_CRTSCTS); @@ -811,7 +943,6 @@ uplcom_cfg_get_status(struct ucom_softc *ucom, uint8_t *lsr, uint8_t *msr) DPRINTF("\n"); - /* XXX Note: sc_lsr is always zero */ *lsr = sc->sc_lsr; *msr = sc->sc_msr; } @@ -836,18 +967,33 @@ uplcom_intr_callback(struct usb_xfer *xfer, usb_error_t error) pc = usbd_xfer_get_frame(xfer, 0); usbd_copy_out(pc, 0, buf, sizeof(buf)); - DPRINTF("status = 0x%02x\n", buf[8]); + DPRINTF("status = 0x%02x\n", buf[UPLCOM_STATE_INDEX]); sc->sc_lsr = 0; sc->sc_msr = 0; - if (buf[8] & RSAQ_STATUS_CTS) { + if (buf[UPLCOM_STATE_INDEX] & RSAQ_STATUS_CTS) { sc->sc_msr |= SER_CTS; } - if (buf[8] & RSAQ_STATUS_DSR) { + if (buf[UPLCOM_STATE_INDEX] & RSAQ_STATUS_OVERRUN_ERROR) { + sc->sc_lsr |= ULSR_OE; + } + if (buf[UPLCOM_STATE_INDEX] & RSAQ_STATUS_PARITY_ERROR) { + sc->sc_lsr |= ULSR_PE; + } + if (buf[UPLCOM_STATE_INDEX] & RSAQ_STATUS_FRAME_ERROR) { + sc->sc_lsr |= ULSR_FE; + } + if (buf[UPLCOM_STATE_INDEX] & RSAQ_STATUS_RING) { + sc->sc_msr |= SER_RI; + } + if (buf[UPLCOM_STATE_INDEX] & RSAQ_STATUS_BREAK_ERROR) { + sc->sc_lsr |= ULSR_BI; + } + if (buf[UPLCOM_STATE_INDEX] & RSAQ_STATUS_DSR) { sc->sc_msr |= SER_DSR; } - if (buf[8] & RSAQ_STATUS_DCD) { + if (buf[UPLCOM_STATE_INDEX] & RSAQ_STATUS_DCD) { sc->sc_msr |= SER_DCD; } ucom_status_change(&sc->sc_ucom); diff --git a/freebsd/sys/dev/usb/usbdi.h b/freebsd/sys/dev/usb/usbdi.h index 147b5d5e..d5648c03 100644 --- a/freebsd/sys/dev/usb/usbdi.h +++ b/freebsd/sys/dev/usb/usbdi.h @@ -342,13 +342,13 @@ struct usb_device_id { #define USB_STD_PNP_HOST_INFO USB_STD_PNP_INFO "T:mode=host;" #define USB_STD_PNP_DEVICE_INFO USB_STD_PNP_INFO "T:mode=device;" #define USB_PNP_HOST_INFO(table) \ - MODULE_PNP_INFO(USB_STD_PNP_HOST_INFO, uhub, table, table, sizeof(table[0]), \ + MODULE_PNP_INFO(USB_STD_PNP_HOST_INFO, uhub, table, table, \ sizeof(table) / sizeof(table[0])) #define USB_PNP_DEVICE_INFO(table) \ - MODULE_PNP_INFO(USB_STD_PNP_DEVICE_INFO, uhub, table, table, sizeof(table[0]), \ + MODULE_PNP_INFO(USB_STD_PNP_DEVICE_INFO, uhub, table, table, \ sizeof(table) / sizeof(table[0])) #define USB_PNP_DUAL_INFO(table) \ - MODULE_PNP_INFO(USB_STD_PNP_INFO, uhub, table, table, sizeof(table[0]), \ + MODULE_PNP_INFO(USB_STD_PNP_INFO, uhub, table, table, \ sizeof(table) / sizeof(table[0])) /* check that the size of the structure above is correct */ diff --git a/freebsd/sys/isa/isavar.h b/freebsd/sys/isa/isavar.h index 30fd1f8d..55e33ef2 100644 --- a/freebsd/sys/isa/isavar.h +++ b/freebsd/sys/isa/isavar.h @@ -144,7 +144,7 @@ enum isa_device_ivars { #define ISA_PNP_DESCR "E:pnpid;D:#" #define ISA_PNP_INFO(t) \ - MODULE_PNP_INFO(ISA_PNP_DESCR, isa, t, t, sizeof(t[0]), nitems(t) - 1); \ + MODULE_PNP_INFO(ISA_PNP_DESCR, isa, t, t, nitems(t) - 1); \ /* * Simplified accessors for isa devices diff --git a/freebsd/sys/kern/kern_conf.c b/freebsd/sys/kern/kern_conf.c index 8605cc43..92237d9d 100644 --- a/freebsd/sys/kern/kern_conf.c +++ b/freebsd/sys/kern/kern_conf.c @@ -210,7 +210,8 @@ dev_refthread(struct cdev *dev, int *ref) csw = NULL; } dev_unlock(); - *ref = 1; + if (csw != NULL) + *ref = 1; return (csw); } diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c index 197ee5bb..49aee591 100644 --- a/freebsd/sys/kern/kern_linker.c +++ b/freebsd/sys/kern/kern_linker.c @@ -1037,17 +1037,35 @@ linker_ddb_search_symbol_name(caddr_t value, char *buf, u_int buflen, * obey locking protocols, and offer a significantly less complex interface. */ int -linker_search_symbol_name(caddr_t value, char *buf, u_int buflen, - long *offset) +linker_search_symbol_name_flags(caddr_t value, char *buf, u_int buflen, + long *offset, int flags) { int error; - sx_slock(&kld_sx); + KASSERT((flags & (M_NOWAIT | M_WAITOK)) != 0 && + (flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK), + ("%s: bad flags: 0x%x", __func__, flags)); + + if (flags & M_NOWAIT) { + if (!sx_try_slock(&kld_sx)) + return (EWOULDBLOCK); + } else + sx_slock(&kld_sx); + error = linker_debug_search_symbol_name(value, buf, buflen, offset); sx_sunlock(&kld_sx); return (error); } +int +linker_search_symbol_name(caddr_t value, char *buf, u_int buflen, + long *offset) +{ + + return (linker_search_symbol_name_flags(value, buf, buflen, offset, + M_WAITOK)); +} + /* * Syscalls. */ @@ -2139,7 +2157,7 @@ linker_load_dependencies(linker_file_t lf) const struct mod_depend *verinfo; modlist_t mod; const char *modname, *nmodname; - int ver, error = 0, count; + int ver, error = 0; /* * All files are dependent on /kernel. @@ -2152,7 +2170,7 @@ linker_load_dependencies(linker_file_t lf) return (error); } if (linker_file_lookup_set(lf, MDT_SETNAME, &start, &stop, - &count) != 0) + NULL) != 0) return (0); for (mdp = start; mdp < stop; mdp++) { mp = *mdp; diff --git a/freebsd/sys/kern/kern_mbuf.c b/freebsd/sys/kern/kern_mbuf.c index 13467dfe..09cae9b2 100644 --- a/freebsd/sys/kern/kern_mbuf.c +++ b/freebsd/sys/kern/kern_mbuf.c @@ -35,8 +35,8 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_param.h> #include <sys/param.h> +#include <sys/conf.h> #include <sys/malloc.h> -#include <sys/types.h> #include <sys/systm.h> #include <sys/mbuf.h> #include <sys/domain.h> @@ -423,6 +423,8 @@ nd_buf_import(void *arg, void **store, int count, int domain __unused, struct mbuf *m; int i; + KASSERT(!dumping, ("%s: ran out of pre-allocated mbufs", __func__)); + q = arg; for (i = 0; i < count; i++) { @@ -458,6 +460,8 @@ nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, void *clust; int i; + KASSERT(!dumping, ("%s: ran out of pre-allocated mbufs", __func__)); + for (i = 0; i < count; i++) { m = m_get(MT_DATA, M_NOWAIT); if (m == NULL) diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c index 6e719897..2b45c13e 100644 --- a/freebsd/sys/kern/subr_prf.c +++ b/freebsd/sys/kern/subr_prf.c @@ -273,27 +273,6 @@ vtprintf(struct proc *p, int pri, const char *fmt, va_list ap) msgbuftrigger = 1; } -/* - * Ttyprintf displays a message on a tty; it should be used only by - * the tty driver, or anything that knows the underlying tty will not - * be revoke(2)'d away. Other callers should use tprintf. - */ -int -ttyprintf(struct tty *tp, const char *fmt, ...) -{ - va_list ap; - struct putchar_arg pca; - int retval; - - va_start(ap, fmt); - pca.tty = tp; - pca.flags = TOTTY; - pca.p_bufr = NULL; - retval = kvprintf(fmt, putchar, &pca, 10, ap); - va_end(ap); - return (retval); -} - static int _vprintf(int level, int flags, const char *fmt, va_list ap) { @@ -1089,11 +1068,6 @@ msgbufinit(void *ptr, int size) oldp = msgbufp; } -static int unprivileged_read_msgbuf = 1; -SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_read_msgbuf, - CTLFLAG_RW, &unprivileged_read_msgbuf, 0, - "Unprivileged processes may read the kernel message buffer"); - /* Sysctls for accessing/clearing the msgbuf */ static int sysctl_kern_msgbuf(SYSCTL_HANDLER_ARGS) @@ -1102,11 +1076,9 @@ sysctl_kern_msgbuf(SYSCTL_HANDLER_ARGS) u_int seq; int error, len; - if (!unprivileged_read_msgbuf) { - error = priv_check(req->td, PRIV_MSGBUF); - if (error) - return (error); - } + error = priv_check(req->td, PRIV_MSGBUF); + if (error) + return (error); /* Read the whole buffer, one chunk at a time. */ mtx_lock(&msgbuf_lock); diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c index 9e41f0f6..273cb730 100644 --- a/freebsd/sys/kern/sys_generic.c +++ b/freebsd/sys/kern/sys_generic.c @@ -1053,8 +1053,9 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, error = copyin(name, ibits[x], ncpubytes); \ if (error != 0) \ goto done; \ - bzero((char *)ibits[x] + ncpubytes, \ - ncpbytes - ncpubytes); \ + if (ncpbytes != ncpubytes) \ + bzero((char *)ibits[x] + ncpubytes, \ + ncpbytes - ncpubytes); \ } \ } while (0) getbits(fd_in, 0); diff --git a/freebsd/sys/kern/tty_ttydisc.c b/freebsd/sys/kern/tty_ttydisc.c index b94080fb..36d541e3 100644 --- a/freebsd/sys/kern/tty_ttydisc.c +++ b/freebsd/sys/kern/tty_ttydisc.c @@ -1255,17 +1255,27 @@ ttydisc_getc_poll(struct tty *tp) */ int -tty_putchar(struct tty *tp, char c) +tty_putstrn(struct tty *tp, const char *p, size_t n) { + size_t i; + tty_lock_assert(tp, MA_OWNED); if (tty_gone(tp)) return (-1); - ttydisc_echo_force(tp, c, 0); + for (i = 0; i < n; i++) + ttydisc_echo_force(tp, p[i], 0); + tp->t_writepos = tp->t_column; ttyinq_reprintpos_set(&tp->t_inq); ttydevsw_outwakeup(tp); return (0); } + +int +tty_putchar(struct tty *tp, char c) +{ + return (tty_putstrn(tp, &c, 1)); +} diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index 3143a392..aa045cd9 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -942,12 +942,13 @@ solisten_dequeue(struct socket *head, struct socket **ret, int flags) if (head->so_error) { error = head->so_error; head->so_error = 0; + } else if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) + error = EWOULDBLOCK; + else + error = 0; + if (error) { SOLISTEN_UNLOCK(head); return (error); - } - if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) { - SOLISTEN_UNLOCK(head); - return (EWOULDBLOCK); } so = TAILQ_FIRST(&head->sol_comp); SOCK_LOCK(so); @@ -1050,6 +1051,9 @@ sofree(struct socket *so) so->so_error = ECONNABORTED; SOCK_UNLOCK(so); + if (so->so_dtor != NULL) + so->so_dtor(so); + VNET_SO_ASSERT(so); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(so); @@ -1126,8 +1130,6 @@ soclose(struct socket *so) drop: if (so->so_proto->pr_usrreqs->pru_close != NULL) (*so->so_proto->pr_usrreqs->pru_close)(so); - if (so->so_dtor != NULL) - so->so_dtor(so); SOCK_LOCK(so); if ((listening = (so->so_options & SO_ACCEPTCONN))) { @@ -2620,11 +2622,20 @@ soshutdown(struct socket *so, int how) * both backward-compatibility and POSIX requirements by forcing * ENOTCONN but still asking protocol to perform pru_shutdown(). */ - if (so->so_type != SOCK_DGRAM) + if (so->so_type != SOCK_DGRAM && !SOLISTENING(so)) return (ENOTCONN); soerror_enotconn = 1; } + if (SOLISTENING(so)) { + if (how != SHUT_WR) { + SOLISTEN_LOCK(so); + so->so_error = ECONNABORTED; + solisten_wakeup(so); /* unlocks so */ + } + goto done; + } + CURVNET_SET(so->so_vnet); if (pr->pr_usrreqs->pru_flush != NULL) (*pr->pr_usrreqs->pru_flush)(so, how); @@ -2639,6 +2650,7 @@ soshutdown(struct socket *so, int how) wakeup(&so->so_timeo); CURVNET_RESTORE(); +done: return (soerror_enotconn ? ENOTCONN : 0); } @@ -3324,6 +3336,8 @@ sopoll_generic(struct socket *so, int events, struct ucred *active_cred, revents = 0; else if (!TAILQ_EMPTY(&so->sol_comp)) revents = events & (POLLIN | POLLRDNORM); + else if ((events & POLLINIGNEOF) == 0 && so->so_error) + revents = (events & (POLLIN | POLLRDNORM)) | POLLHUP; else { selrecord(td, &so->so_rdsel); revents = 0; @@ -3609,6 +3623,11 @@ filt_soread(struct knote *kn, long hint) if (SOLISTENING(so)) { SOCK_LOCK_ASSERT(so); kn->kn_data = so->sol_qlen; + if (so->so_error) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } return (!TAILQ_EMPTY(&so->sol_comp)); } diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c index 4d3c303c..7721af11 100644 --- a/freebsd/sys/net/if.c +++ b/freebsd/sys/net/if.c @@ -271,7 +271,6 @@ static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); -static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); @@ -974,12 +973,18 @@ if_attachdomain1(struct ifnet *ifp) void if_purgeaddrs(struct ifnet *ifp) { - struct ifaddr *ifa, *next; + struct ifaddr *ifa; - NET_EPOCH_ENTER(); - CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { - if (ifa->ifa_addr->sa_family == AF_LINK) - continue; + while (1) { + NET_EPOCH_ENTER(); + CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_LINK) + break; + } + NET_EPOCH_EXIT(); + + if (ifa == NULL) + break; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { @@ -1006,7 +1011,6 @@ if_purgeaddrs(struct ifnet *ifp) IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } - NET_EPOCH_EXIT(); } /* @@ -2521,7 +2525,7 @@ ifr_data_get_ptr(void *ifrp) /* * Hardware specific interface ioctls. */ -static int +int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; diff --git a/freebsd/sys/net/if_gif.c b/freebsd/sys/net/if_gif.c index 5a67e7ff..3a50316f 100644 --- a/freebsd/sys/net/if_gif.c +++ b/freebsd/sys/net/if_gif.c @@ -278,6 +278,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) uint8_t proto, ecn; int error; + GIF_RLOCK(); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { @@ -286,10 +287,10 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m) } #endif error = ENETDOWN; - GIF_RLOCK(); sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || + (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->gif_family == 0 || (error = if_tunnel_check_nesting(ifp, m, MTAG_GIF, V_max_gif_nesting)) != 0) { @@ -680,7 +681,6 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) cmd == SIOCSIFPHYADDR_IN6 || #endif 0) { - ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_UP); } } @@ -695,6 +695,7 @@ gif_delete_tunnel(struct gif_softc *sc) sx_assert(&gif_ioctl_sx, SA_XLOCKED); if (sc->gif_family != 0) { + CK_LIST_REMOVE(sc, srchash); CK_LIST_REMOVE(sc, chain); /* Wait until it become safe to free gif_hdr */ GIF_WAIT(); diff --git a/freebsd/sys/net/if_gif.h b/freebsd/sys/net/if_gif.h index 501a4e5d..264fe7b3 100644 --- a/freebsd/sys/net/if_gif.h +++ b/freebsd/sys/net/if_gif.h @@ -63,6 +63,7 @@ struct gif_softc { } gif_uhdr; CK_LIST_ENTRY(gif_softc) chain; + CK_LIST_ENTRY(gif_softc) srchash; }; CK_LIST_HEAD(gif_list, gif_softc); MALLOC_DECLARE(M_GIF); diff --git a/freebsd/sys/net/if_gre.c b/freebsd/sys/net/if_gre.c index 5ff41259..4fbc105e 100644 --- a/freebsd/sys/net/if_gre.c +++ b/freebsd/sys/net/if_gre.c @@ -332,7 +332,6 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) cmd == SIOCSIFPHYADDR_IN6 || #endif 0) { - ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_UP); } } @@ -348,6 +347,7 @@ gre_delete_tunnel(struct gre_softc *sc) sx_assert(&gre_ioctl_sx, SA_XLOCKED); if (sc->gre_family != 0) { CK_LIST_REMOVE(sc, chain); + CK_LIST_REMOVE(sc, srchash); GRE_WAIT(); free(sc->gre_hdr, M_GRE); sc->gre_family = 0; @@ -549,6 +549,7 @@ gre_setseqn(struct grehdr *gh, uint32_t seq) static int gre_transmit(struct ifnet *ifp, struct mbuf *m) { + GRE_RLOCK_TRACKER; struct gre_softc *sc; struct grehdr *gh; uint32_t af; @@ -556,6 +557,7 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m) uint16_t proto; len = 0; + GRE_RLOCK(); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { @@ -564,10 +566,10 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m) } #endif error = ENETDOWN; - GRE_RLOCK(); sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || + (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->gre_family == 0 || (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE, V_max_gre_nesting)) != 0) { @@ -575,6 +577,8 @@ gre_transmit(struct ifnet *ifp, struct mbuf *m) goto drop; } af = m->m_pkthdr.csum_data; + BPF_MTAP2(ifp, &af, sizeof(af), m); + m->m_flags &= ~(M_BCAST|M_MCAST); M_SETFIB(m, sc->gre_fibnum); M_PREPEND(m, sc->gre_hlen, M_NOWAIT); if (m == NULL) { diff --git a/freebsd/sys/net/if_gre.h b/freebsd/sys/net/if_gre.h index cc8b08f9..4b93321a 100644 --- a/freebsd/sys/net/if_gre.h +++ b/freebsd/sys/net/if_gre.h @@ -82,6 +82,7 @@ struct gre_softc { } gre_uhdr; CK_LIST_ENTRY(gre_softc) chain; + CK_LIST_ENTRY(gre_softc) srchash; }; CK_LIST_HEAD(gre_list, gre_softc); MALLOC_DECLARE(M_GRE); @@ -91,7 +92,8 @@ MALLOC_DECLARE(M_GRE); #endif #define GRE2IFP(sc) ((sc)->gre_ifp) -#define GRE_RLOCK() struct epoch_tracker gre_et; epoch_enter_preempt(net_epoch_preempt, &gre_et) +#define GRE_RLOCK_TRACKER struct epoch_tracker gre_et +#define GRE_RLOCK() epoch_enter_preempt(net_epoch_preempt, &gre_et) #define GRE_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &gre_et) #define GRE_WAIT() epoch_wait_preempt(net_epoch_preempt) diff --git a/freebsd/sys/net/if_ipsec.c b/freebsd/sys/net/if_ipsec.c index 5b1d5e82..08465911 100644 --- a/freebsd/sys/net/if_ipsec.c +++ b/freebsd/sys/net/if_ipsec.c @@ -1,8 +1,8 @@ #include <machine/rtems-bsd-kernel-space.h> /*- - * Copyright (c) 2016 Yandex LLC - * Copyright (c) 2016 Andrey V. Elsukov <ae@FreeBSD.org> + * Copyright (c) 2016-2018 Yandex LLC + * Copyright (c) 2016-2018 Andrey V. Elsukov <ae@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/module.h> -#include <sys/rmlock.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/sx.h> @@ -63,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_var.h> #include <netinet/ip.h> +#include <netinet/ip_encap.h> #include <netinet/ip6.h> #include <netinet6/in6_var.h> @@ -89,57 +89,71 @@ static const char ipsecname[] = "ipsec"; struct ipsec_softc { struct ifnet *ifp; - - struct rmlock lock; struct secpolicy *sp[IPSEC_SPCOUNT]; - uint32_t reqid; u_int family; u_int fibnum; - LIST_ENTRY(ipsec_softc) chain; - LIST_ENTRY(ipsec_softc) hash; + + CK_LIST_ENTRY(ipsec_softc) idhash; + CK_LIST_ENTRY(ipsec_softc) srchash; }; -#define IPSEC_LOCK_INIT(sc) rm_init(&(sc)->lock, "if_ipsec softc") -#define IPSEC_LOCK_DESTROY(sc) rm_destroy(&(sc)->lock) -#define IPSEC_RLOCK_TRACKER struct rm_priotracker ipsec_tracker -#define IPSEC_RLOCK(sc) rm_rlock(&(sc)->lock, &ipsec_tracker) -#define IPSEC_RUNLOCK(sc) rm_runlock(&(sc)->lock, &ipsec_tracker) -#define IPSEC_RLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_RLOCKED) -#define IPSEC_WLOCK(sc) rm_wlock(&(sc)->lock) -#define IPSEC_WUNLOCK(sc) rm_wunlock(&(sc)->lock) -#define IPSEC_WLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_WLOCKED) - -static struct rmlock ipsec_sc_lock; -RM_SYSINIT(ipsec_sc_lock, &ipsec_sc_lock, "if_ipsec softc list"); - -#define IPSEC_SC_RLOCK_TRACKER struct rm_priotracker ipsec_sc_tracker -#define IPSEC_SC_RLOCK() rm_rlock(&ipsec_sc_lock, &ipsec_sc_tracker) -#define IPSEC_SC_RUNLOCK() rm_runlock(&ipsec_sc_lock, &ipsec_sc_tracker) -#define IPSEC_SC_RLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_RLOCKED) -#define IPSEC_SC_WLOCK() rm_wlock(&ipsec_sc_lock) -#define IPSEC_SC_WUNLOCK() rm_wunlock(&ipsec_sc_lock) -#define IPSEC_SC_WLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_WLOCKED) - -LIST_HEAD(ipsec_iflist, ipsec_softc); -VNET_DEFINE_STATIC(struct ipsec_iflist, ipsec_sc_list); -VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec_sc_htbl); -VNET_DEFINE_STATIC(u_long, ipsec_sc_hmask); -#define V_ipsec_sc_list VNET(ipsec_sc_list) -#define V_ipsec_sc_htbl VNET(ipsec_sc_htbl) -#define V_ipsec_sc_hmask VNET(ipsec_sc_hmask) - -static uint32_t -ipsec_hash(uint32_t id) +#define IPSEC_RLOCK_TRACKER struct epoch_tracker ipsec_et +#define IPSEC_RLOCK() epoch_enter_preempt(net_epoch_preempt, &ipsec_et) +#define IPSEC_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &ipsec_et) +#define IPSEC_WAIT() epoch_wait_preempt(net_epoch_preempt) + +#ifndef IPSEC_HASH_SIZE +#define IPSEC_HASH_SIZE (1 << 5) +#endif + +CK_LIST_HEAD(ipsec_iflist, ipsec_softc); +VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec_idhtbl) = NULL; +#define V_ipsec_idhtbl VNET(ipsec_idhtbl) + +#ifdef INET +VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec4_srchtbl) = NULL; +#define V_ipsec4_srchtbl VNET(ipsec4_srchtbl) +static const struct srcaddrtab *ipsec4_srctab = NULL; +#endif + +#ifdef INET6 +VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec6_srchtbl) = NULL; +#define V_ipsec6_srchtbl VNET(ipsec6_srchtbl) +static const struct srcaddrtab *ipsec6_srctab = NULL; +#endif + +static struct ipsec_iflist * +ipsec_idhash(uint32_t id) { - return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT)); + return (&V_ipsec_idhtbl[fnv_32_buf(&id, sizeof(id), + FNV1_32_INIT) & (IPSEC_HASH_SIZE - 1)]); } -#define SCHASH_NHASH_LOG2 5 -#define SCHASH_NHASH (1 << SCHASH_NHASH_LOG2) -#define SCHASH_HASHVAL(id) (ipsec_hash((id)) & V_ipsec_sc_hmask) -#define SCHASH_HASH(id) &V_ipsec_sc_htbl[SCHASH_HASHVAL(id)] +static struct ipsec_iflist * +ipsec_srchash(const struct sockaddr *sa) +{ + uint32_t hval; + + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + hval = fnv_32_buf( + &((const struct sockaddr_in *)sa)->sin_addr.s_addr, + sizeof(in_addr_t), FNV1_32_INIT); + return (&V_ipsec4_srchtbl[hval & (IPSEC_HASH_SIZE - 1)]); +#endif +#ifdef INET6 + case AF_INET6: + hval = fnv_32_buf( + &((const struct sockaddr_in6 *)sa)->sin6_addr, + sizeof(struct in6_addr), FNV1_32_INIT); + return (&V_ipsec6_srchtbl[hval & (IPSEC_HASH_SIZE - 1)]); +#endif + } + return (NULL); +} /* * ipsec_ioctl_sx protects from concurrent ioctls. @@ -150,12 +164,14 @@ SX_SYSINIT(ipsec_ioctl_sx, &ipsec_ioctl_sx, "ipsec_ioctl"); static int ipsec_init_reqid(struct ipsec_softc *); static int ipsec_set_tunnel(struct ipsec_softc *, struct sockaddr *, struct sockaddr *, uint32_t); -static void ipsec_delete_tunnel(struct ifnet *, int); +static void ipsec_delete_tunnel(struct ipsec_softc *); static int ipsec_set_addresses(struct ifnet *, struct sockaddr *, struct sockaddr *); -static int ipsec_set_reqid(struct ifnet *, uint32_t); +static int ipsec_set_reqid(struct ipsec_softc *, uint32_t); +static void ipsec_set_running(struct ipsec_softc *); +static void ipsec_srcaddr(void *, const struct sockaddr *, int); static int ipsec_ioctl(struct ifnet *, u_long, caddr_t); static int ipsec_transmit(struct ifnet *, struct mbuf *); static int ipsec_output(struct ifnet *, struct mbuf *, @@ -180,7 +196,6 @@ ipsec_clone_create(struct if_clone *ifc, int unit, caddr_t params) sc->fibnum = BSD_DEFAULT_FIB; #endif /* __rtems__ */ sc->ifp = ifp = if_alloc(IFT_TUNNEL); - IPSEC_LOCK_INIT(sc); ifp->if_softc = sc; if_initname(ifp, ipsecname, unit); @@ -194,9 +209,6 @@ ipsec_clone_create(struct if_clone *ifc, int unit, caddr_t params) if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(uint32_t)); - IPSEC_SC_WLOCK(); - LIST_INSERT_HEAD(&V_ipsec_sc_list, sc, chain); - IPSEC_SC_WUNLOCK(); return (0); } @@ -207,28 +219,48 @@ ipsec_clone_destroy(struct ifnet *ifp) sx_xlock(&ipsec_ioctl_sx); sc = ifp->if_softc; - - IPSEC_SC_WLOCK(); - ipsec_delete_tunnel(ifp, 1); - LIST_REMOVE(sc, chain); - IPSEC_SC_WUNLOCK(); - + ipsec_delete_tunnel(sc); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&ipsec_ioctl_sx); + IPSEC_WAIT(); if_free(ifp); - IPSEC_LOCK_DESTROY(sc); free(sc, M_IPSEC); } +static struct ipsec_iflist * +ipsec_hashinit(void) +{ + struct ipsec_iflist *hash; + int i; + + hash = malloc(sizeof(struct ipsec_iflist) * IPSEC_HASH_SIZE, + M_IPSEC, M_WAITOK); + for (i = 0; i < IPSEC_HASH_SIZE; i++) + CK_LIST_INIT(&hash[i]); + + return (hash); +} + static void vnet_ipsec_init(const void *unused __unused) { - LIST_INIT(&V_ipsec_sc_list); - V_ipsec_sc_htbl = hashinit(SCHASH_NHASH, M_IPSEC, &V_ipsec_sc_hmask); + V_ipsec_idhtbl = ipsec_hashinit(); +#ifdef INET + V_ipsec4_srchtbl = ipsec_hashinit(); + if (IS_DEFAULT_VNET(curvnet)) + ipsec4_srctab = ip_encap_register_srcaddr(ipsec_srcaddr, + NULL, M_WAITOK); +#endif +#ifdef INET6 + V_ipsec6_srchtbl = ipsec_hashinit(); + if (IS_DEFAULT_VNET(curvnet)) + ipsec6_srctab = ip6_encap_register_srcaddr(ipsec_srcaddr, + NULL, M_WAITOK); +#endif V_ipsec_cloner = if_clone_simple(ipsecname, ipsec_clone_create, ipsec_clone_destroy, 0); } @@ -240,7 +272,17 @@ vnet_ipsec_uninit(const void *unused __unused) { if_clone_detach(V_ipsec_cloner); - hashdestroy(V_ipsec_sc_htbl, M_IPSEC, V_ipsec_sc_hmask); + free(V_ipsec_idhtbl, M_IPSEC); +#ifdef INET + if (IS_DEFAULT_VNET(curvnet)) + ip_encap_unregister_srcaddr(ipsec4_srctab); + free(V_ipsec4_srchtbl, M_IPSEC); +#endif +#ifdef INET6 + if (IS_DEFAULT_VNET(curvnet)) + ip6_encap_unregister_srcaddr(ipsec6_srctab); + free(V_ipsec6_srchtbl, M_IPSEC); +#endif } VNET_SYSUNINIT(vnet_ipsec_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_ipsec_uninit, NULL); @@ -287,6 +329,7 @@ ipsec_transmit(struct ifnet *ifp, struct mbuf *m) uint32_t af; int error; + IPSEC_RLOCK(); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { @@ -298,7 +341,7 @@ ipsec_transmit(struct ifnet *ifp, struct mbuf *m) sc = ifp->if_softc; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_MONITOR) != 0 || - (ifp->if_flags & IFF_UP) == 0) { + (ifp->if_flags & IFF_UP) == 0 || sc->family == 0) { m_freem(m); goto err; } @@ -333,16 +376,9 @@ ipsec_transmit(struct ifnet *ifp, struct mbuf *m) goto err; } - IPSEC_RLOCK(sc); - if (sc->family == 0) { - IPSEC_RUNLOCK(sc); - m_freem(m); - goto err; - } sp = ipsec_getpolicy(sc, IPSEC_DIR_OUTBOUND, af); key_addref(sp); M_SETFIB(m, sc->fibnum); - IPSEC_RUNLOCK(sc); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); @@ -365,6 +401,7 @@ ipsec_transmit(struct ifnet *ifp, struct mbuf *m) err: if (error != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + IPSEC_RUNLOCK(); return (error); } @@ -385,7 +422,7 @@ ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, int ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) { - IPSEC_SC_RLOCK_TRACKER; + IPSEC_RLOCK_TRACKER; struct secasindex *saidx; struct ipsec_softc *sc; struct ifnet *ifp; @@ -400,13 +437,10 @@ ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) sav->sah->saidx.proto != IPPROTO_ESP) return (0); - IPSEC_SC_RLOCK(); - /* - * We only acquire SC_RLOCK() while we are doing search in - * ipsec_sc_htbl. It is safe, because removing softc or changing - * of reqid/addresses requires removing from hash table. - */ - LIST_FOREACH(sc, SCHASH_HASH(sav->sah->saidx.reqid), hash) { + IPSEC_RLOCK(); + CK_LIST_FOREACH(sc, ipsec_idhash(sav->sah->saidx.reqid), idhash) { + if (sc->family == 0) + continue; saidx = ipsec_getsaidx(sc, IPSEC_DIR_INBOUND, sav->sah->saidx.src.sa.sa_family); /* SA's reqid should match reqid in SP */ @@ -422,14 +456,14 @@ ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) break; } if (sc == NULL) { - IPSEC_SC_RUNLOCK(); + IPSEC_RUNLOCK(); /* Tunnel was not found. Nothing to do. */ return (0); } ifp = sc->ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_UP) == 0) { - IPSEC_SC_RUNLOCK(); + IPSEC_RUNLOCK(); m_freem(m); return (ENETDOWN); } @@ -438,7 +472,6 @@ ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) * Set its ifnet as receiving interface. */ m->m_pkthdr.rcvif = ifp; - IPSEC_SC_RUNLOCK(); m_clrprotoflags(m); M_SETFIB(m, ifp->if_fib); @@ -446,17 +479,17 @@ ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) { + IPSEC_RUNLOCK(); m_freem(m); return (ENETDOWN); } + IPSEC_RUNLOCK(); return (0); } -/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ -int +static int ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - IPSEC_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq*)data; struct sockaddr *dst, *src; struct ipsec_softc *sc; @@ -570,9 +603,10 @@ ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif #ifdef INET6 case AF_INET6: - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) - || - IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) + if (IN6_IS_ADDR_UNSPECIFIED( + &satosin6(src)->sin6_addr) || + IN6_IS_ADDR_UNSPECIFIED( + &satosin6(dst)->sin6_addr)) goto bad; /* * Check validity of the scope zone ID of the @@ -590,7 +624,7 @@ ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = ipsec_set_addresses(ifp, src, dst); break; case SIOCDIFPHYADDR: - ipsec_delete_tunnel(ifp, 0); + ipsec_delete_tunnel(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: @@ -598,9 +632,7 @@ ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: #endif - IPSEC_RLOCK(sc); if (sc->family == 0) { - IPSEC_RUNLOCK(sc); error = EADDRNOTAVAIL; break; } @@ -656,7 +688,6 @@ ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) #endif } } - IPSEC_RUNLOCK(sc); if (error != 0) break; switch (cmd) { @@ -702,7 +733,7 @@ ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = copyin(ifr_data_get_ptr(ifr), &reqid, sizeof(reqid)); if (error != 0) break; - error = ipsec_set_reqid(ifp, reqid); + error = ipsec_set_reqid(sc, reqid); break; default: error = EINVAL; @@ -714,6 +745,59 @@ bad: } /* + * Check that ingress address belongs to local host. + */ +static void +ipsec_set_running(struct ipsec_softc *sc) +{ + struct secasindex *saidx; + int localip; + + saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); + localip = 0; + switch (sc->family) { +#ifdef INET + case AF_INET: + localip = in_localip(saidx->src.sin.sin_addr); + break; +#endif +#ifdef INET6 + case AF_INET6: + localip = in6_localip(&saidx->src.sin6.sin6_addr); + break; +#endif + } + if (localip != 0) + sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; + else + sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; +} + +/* + * ifaddr_event handler. + * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent + * source address spoofing. + */ +static void +ipsec_srcaddr(void *arg __unused, const struct sockaddr *sa, + int event __unused) +{ + struct ipsec_softc *sc; + struct secasindex *saidx; + + MPASS(in_epoch(net_epoch_preempt)); + CK_LIST_FOREACH(sc, ipsec_srchash(sa), srchash) { + if (sc->family == 0) + continue; + saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sa->sa_family); + if (saidx == NULL || + key_sockaddrcmp(&saidx->src.sa, sa, 0) != 0) + continue; + ipsec_set_running(sc); + } +} + +/* * Allocate new private security policies for tunneling interface. * Each tunneling interface has following security policies for * both AF: @@ -785,8 +869,8 @@ ipsec_check_reqid(uint32_t reqid) { struct ipsec_softc *sc; - IPSEC_SC_RLOCK_ASSERT(); - LIST_FOREACH(sc, &V_ipsec_sc_list, chain) { + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + CK_LIST_FOREACH(sc, ipsec_idhash(reqid), idhash) { if (sc->reqid == reqid) return (EEXIST); } @@ -806,8 +890,7 @@ ipsec_init_reqid(struct ipsec_softc *sc) uint32_t reqid; int trycount; - IPSEC_SC_RLOCK_ASSERT(); - + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); if (sc->reqid != 0) /* already initialized */ return (0); @@ -820,6 +903,7 @@ ipsec_init_reqid(struct ipsec_softc *sc) if (trycount == 0) return (EEXIST); sc->reqid = reqid; + CK_LIST_INSERT_HEAD(ipsec_idhash(reqid), sc, idhash); return (0); } @@ -830,34 +914,30 @@ ipsec_init_reqid(struct ipsec_softc *sc) * Also softc would not disappear while we hold ioctl_sx lock. */ static int -ipsec_set_reqid(struct ifnet *ifp, uint32_t reqid) +ipsec_set_reqid(struct ipsec_softc *sc, uint32_t reqid) { - IPSEC_SC_RLOCK_TRACKER; - struct ipsec_softc *sc; struct secasindex *saidx; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); - sc = ifp->if_softc; if (sc->reqid == reqid && reqid != 0) return (0); - IPSEC_SC_RLOCK(); if (reqid != 0) { /* Check that specified reqid doesn't exist */ - if (ipsec_check_reqid(reqid) != 0) { - IPSEC_SC_RUNLOCK(); + if (ipsec_check_reqid(reqid) != 0) return (EEXIST); + if (sc->reqid != 0) { + CK_LIST_REMOVE(sc, idhash); + IPSEC_WAIT(); } sc->reqid = reqid; + CK_LIST_INSERT_HEAD(ipsec_idhash(reqid), sc, idhash); } else { /* Generate new reqid */ - if (ipsec_init_reqid(sc) != 0) { - IPSEC_SC_RUNLOCK(); + if (ipsec_init_reqid(sc) != 0) return (EEXIST); - } } - IPSEC_SC_RUNLOCK(); /* Tunnel isn't fully configured, just return. */ if (sc->family == 0) @@ -877,7 +957,6 @@ static int ipsec_set_addresses(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) { - IPSEC_SC_RLOCK_TRACKER; struct ipsec_softc *sc, *tsc; struct secasindex *saidx; @@ -893,43 +972,21 @@ ipsec_set_addresses(struct ifnet *ifp, struct sockaddr *src, return (0); /* Nothing has been changed. */ } - /* - * We cannot service IPsec tunnel when source address is - * not our own. - */ -#ifdef INET - if (src->sa_family == AF_INET && - in_localip(satosin(src)->sin_addr) == 0) - return (EADDRNOTAVAIL); -#endif -#ifdef INET6 - /* - * NOTE: IPv6 addresses are in kernel internal form with - * embedded scope zone id. - */ - if (src->sa_family == AF_INET6 && - in6_localip(&satosin6(src)->sin6_addr) == 0) - return (EADDRNOTAVAIL); -#endif /* Check that given addresses aren't already configured */ - IPSEC_SC_RLOCK(); - LIST_FOREACH(tsc, &V_ipsec_sc_list, chain) { - if (tsc == sc || tsc->family != src->sa_family) + CK_LIST_FOREACH(tsc, ipsec_srchash(src), srchash) { + if (tsc == sc) continue; + MPASS(tsc->family == src->sa_family); saidx = ipsec_getsaidx(tsc, IPSEC_DIR_OUTBOUND, tsc->family); if (key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) { /* We already have tunnel with such addresses */ - IPSEC_SC_RUNLOCK(); return (EADDRNOTAVAIL); } } /* If reqid is not set, generate new one. */ - if (ipsec_init_reqid(sc) != 0) { - IPSEC_SC_RUNLOCK(); + if (ipsec_init_reqid(sc) != 0) return (EEXIST); - } - IPSEC_SC_RUNLOCK(); return (ipsec_set_tunnel(sc, src, dst, sc->reqid)); } @@ -938,8 +995,7 @@ ipsec_set_tunnel(struct ipsec_softc *sc, struct sockaddr *src, struct sockaddr *dst, uint32_t reqid) { struct secpolicy *sp[IPSEC_SPCOUNT]; - struct secpolicy *oldsp[IPSEC_SPCOUNT]; - int i, f; + int i; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); @@ -951,58 +1007,41 @@ ipsec_set_tunnel(struct ipsec_softc *sc, struct sockaddr *src, key_freesp(&sp[i]); return (EAGAIN); } - IPSEC_SC_WLOCK(); - if ((f = sc->family) != 0) - LIST_REMOVE(sc, hash); - IPSEC_WLOCK(sc); - for (i = 0; i < IPSEC_SPCOUNT; i++) { - oldsp[i] = sc->sp[i]; + if (sc->family != 0) + ipsec_delete_tunnel(sc); + for (i = 0; i < IPSEC_SPCOUNT; i++) sc->sp[i] = sp[i]; - } sc->family = src->sa_family; - IPSEC_WUNLOCK(sc); - LIST_INSERT_HEAD(SCHASH_HASH(sc->reqid), sc, hash); - IPSEC_SC_WUNLOCK(); + CK_LIST_INSERT_HEAD(ipsec_srchash(src), sc, srchash); } else { sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; return (ENOMEM); } - - sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; - if (f != 0) { - key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); - for (i = 0; i < IPSEC_SPCOUNT; i++) - key_freesp(&oldsp[i]); - } + ipsec_set_running(sc); return (0); } static void -ipsec_delete_tunnel(struct ifnet *ifp, int locked) +ipsec_delete_tunnel(struct ipsec_softc *sc) { - struct ipsec_softc *sc = ifp->if_softc; - struct secpolicy *oldsp[IPSEC_SPCOUNT]; int i; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (sc->family != 0) { - if (!locked) - IPSEC_SC_WLOCK(); - /* Remove from hash table */ - LIST_REMOVE(sc, hash); - IPSEC_WLOCK(sc); - for (i = 0; i < IPSEC_SPCOUNT; i++) { - oldsp[i] = sc->sp[i]; - sc->sp[i] = NULL; - } + CK_LIST_REMOVE(sc, srchash); + IPSEC_WAIT(); + + /* + * Make sure that ipsec_if_input() will not do access + * to softc's policies. + */ sc->family = 0; - IPSEC_WUNLOCK(sc); - if (!locked) - IPSEC_SC_WUNLOCK(); - key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); + IPSEC_WAIT(); + + key_unregister_ifnet(sc->sp, IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) - key_freesp(&oldsp[i]); + key_freesp(&sc->sp[i]); } } diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c index 4d5aaa29..632ea744 100644 --- a/freebsd/sys/net/if_lagg.c +++ b/freebsd/sys/net/if_lagg.c @@ -2035,15 +2035,18 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) { struct lagg_lb *lb = (struct lagg_lb *)sc->sc_psc; struct lagg_port *lp_next; - int i = 0; + int i = 0, rv; + rv = 0; bzero(&lb->lb_ports, sizeof(lb->lb_ports)); LAGG_RLOCK(); CK_SLIST_FOREACH(lp_next, &sc->sc_ports, lp_entries) { if (lp_next == lp) continue; - if (i >= LAGG_MAX_PORTS) - return (EINVAL); + if (i >= LAGG_MAX_PORTS) { + rv = EINVAL; + break; + } if (sc->sc_ifflags & IFF_DEBUG) printf("%s: port %s at index %d\n", sc->sc_ifname, lp_next->lp_ifp->if_xname, i); @@ -2051,7 +2054,7 @@ lagg_lb_porttable(struct lagg_softc *sc, struct lagg_port *lp) } LAGG_RUNLOCK(); - return (0); + return (rv); } static int diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c index c918a14e..a540a59a 100644 --- a/freebsd/sys/net/if_tap.c +++ b/freebsd/sys/net/if_tap.c @@ -729,10 +729,12 @@ tapifstart(struct ifnet *ifp) static int tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { + struct ifreq ifr; struct tap_softc *tp = dev->si_drv1; struct ifnet *ifp = tp->tap_ifp; struct tapinfo *tapp = NULL; int f; + int error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) int ival; @@ -744,7 +746,18 @@ tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td if (ifp->if_type != tapp->type) return (EPROTOTYPE); mtx_lock(&tp->tap_mtx); - ifp->if_mtu = tapp->mtu; + if (ifp->if_mtu != tapp->mtu) { + strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ); + ifr.ifr_mtu = tapp->mtu; + CURVNET_SET(ifp->if_vnet); + error = ifhwioctl(SIOCSIFMTU, ifp, + (caddr_t)&ifr, td); + CURVNET_RESTORE(); + if (error) { + mtx_unlock(&tp->tap_mtx); + return (error); + } + } ifp->if_baudrate = tapp->baudrate; mtx_unlock(&tp->tap_mtx); break; diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c index 14a75645..328b1963 100644 --- a/freebsd/sys/net/if_tun.c +++ b/freebsd/sys/net/if_tun.c @@ -672,24 +672,29 @@ static int tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { - int error; + struct ifreq ifr; struct tun_softc *tp = dev->si_drv1; struct tuninfo *tunp; + int error; switch (cmd) { case TUNSIFINFO: tunp = (struct tuninfo *)data; - if (tunp->mtu < IF_MINMTU) - return (EINVAL); - if (TUN2IFP(tp)->if_mtu != tunp->mtu) { - error = priv_check(td, PRIV_NET_SETIFMTU); - if (error) - return (error); - } if (TUN2IFP(tp)->if_type != tunp->type) return (EPROTOTYPE); mtx_lock(&tp->tun_mtx); - TUN2IFP(tp)->if_mtu = tunp->mtu; + if (TUN2IFP(tp)->if_mtu != tunp->mtu) { + strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ); + ifr.ifr_mtu = tunp->mtu; + CURVNET_SET(TUN2IFP(tp)->if_vnet); + error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp), + (caddr_t)&ifr, td); + CURVNET_RESTORE(); + if (error) { + mtx_unlock(&tp->tun_mtx); + return (error); + } + } TUN2IFP(tp)->if_baudrate = tunp->baudrate; mtx_unlock(&tp->tun_mtx); break; diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h index 00fcbebd..6504837b 100644 --- a/freebsd/sys/net/if_var.h +++ b/freebsd/sys/net/if_var.h @@ -434,6 +434,7 @@ struct rtems_ifinputreq { #define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et)) #define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et) #define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et)) +#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt) /* @@ -454,6 +455,11 @@ EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); +typedef void (*ifaddr_event_ext_handler_t)(void *, struct ifnet *, + struct ifaddr *, int); +EVENTHANDLER_DECLARE(ifaddr_event_ext, ifaddr_event_ext_handler_t); +#define IFADDR_EVENT_ADD 0 +#define IFADDR_EVENT_DEL 1 /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); @@ -782,6 +788,8 @@ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); /* accessors for struct ifreq */ void *ifr_data_get_ptr(void *ifrp); +int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); + #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS }; diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c index 22061dc4..6f07b4b4 100644 --- a/freebsd/sys/net/if_vlan.c +++ b/freebsd/sys/net/if_vlan.c @@ -89,11 +89,11 @@ __FBSDID("$FreeBSD$"); #define UP_AND_RUNNING(ifp) \ ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) -LIST_HEAD(ifvlanhead, ifvlan); +CK_SLIST_HEAD(ifvlanhead, ifvlan); struct ifvlantrunk { struct ifnet *parent; /* parent interface of this trunk */ - struct rmlock lock; + struct mtx lock; #ifdef VLAN_ARRAY #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ @@ -119,7 +119,7 @@ struct ifvlantrunk { struct ifvlan *_next; \ size_t _i; \ for (_i = 0; _i < (1 << (_trunk)->hwidth); _i++) \ - LIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next) + CK_SLIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next) #endif /* VLAN_ARRAY */ /* @@ -148,13 +148,14 @@ struct ifvlantrunk { for (_i = 0; \ !(_cond) && _i < (1 << (_trunk)->hwidth); \ _i = (_touch && ((_trunk) != NULL) ? 0 : _i + 1), _touch = false) \ - if (((_ifv) = LIST_FIRST(&(_trunk)->hash[_i])) != NULL && \ + if (((_ifv) = CK_SLIST_FIRST(&(_trunk)->hash[_i])) != NULL && \ (_touch = true)) #endif /* VLAN_ARRAY */ struct vlan_mc_entry { struct sockaddr_dl mc_addr; - SLIST_ENTRY(vlan_mc_entry) mc_entries; + CK_SLIST_ENTRY(vlan_mc_entry) mc_entries; + struct epoch_context mc_epoch_ctx; }; struct ifvlan { @@ -175,9 +176,9 @@ struct ifvlan { uint8_t ifvm_pcp; /* Priority Code Point (PCP). */ } ifv_mib; struct task lladdr_task; - SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; + CK_SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; #ifndef VLAN_ARRAY - LIST_ENTRY(ifvlan) ifv_list; + CK_SLIST_ENTRY(ifvlan) ifv_list; #endif }; #define ifv_proto ifv_mib.ifvm_proto @@ -207,55 +208,36 @@ static eventhandler_tag ifdetach_tag; static eventhandler_tag iflladdr_tag; /* - * if_vlan uses two module-level locks to allow concurrent modification of vlan - * interfaces and (mostly) allow for vlans to be destroyed while they are being - * used for tx/rx. To accomplish this in a way that has acceptable performance - * and cooperation with other parts of the network stack there is a - * non-sleepable rmlock(9) and an sx(9). Both locks are exclusively acquired - * when destroying a vlan interface, i.e. when the if_vlantrunk field of struct - * ifnet is de-allocated and NULL'd. Thus a reader holding either lock has a - * guarantee that the struct ifvlantrunk references a valid vlan trunk. + * if_vlan uses two module-level synchronizations primitives to allow concurrent + * modification of vlan interfaces and (mostly) allow for vlans to be destroyed + * while they are being used for tx/rx. To accomplish this in a way that has + * acceptable performance and cooperation with other parts of the network stack + * there is a non-sleepable epoch(9) and an sx(9). * - * The performance-sensitive paths that warrant using the rmlock(9) are + * The performance-sensitive paths that warrant using the epoch(9) are * vlan_transmit and vlan_input. Both have to check for the vlan interface's * existence using if_vlantrunk, and being in the network tx/rx paths the use - * of an rmlock(9) gives a measureable improvement in performance. + * of an epoch(9) gives a measureable improvement in performance. * * The reason for having an sx(9) is mostly because there are still areas that * must be sleepable and also have safe concurrent access to a vlan interface. * Since the sx(9) exists, it is used by default in most paths unless sleeping * is not permitted, or if it is not clear whether sleeping is permitted. * - * Note that despite these protections, there is still an inherent race in the - * destruction of vlans since there's no guarantee that the ifnet hasn't been - * freed/reused when the tx/rx functions are called by the stack. This can only - * be fixed by addressing ifnet's lifetime issues. */ -#define _VLAN_RM_ID ifv_rm_lock #define _VLAN_SX_ID ifv_sx -static struct rmlock _VLAN_RM_ID; static struct sx _VLAN_SX_ID; #define VLAN_LOCKING_INIT() \ - rm_init(&_VLAN_RM_ID, "vlan_rm"); \ sx_init(&_VLAN_SX_ID, "vlan_sx") #define VLAN_LOCKING_DESTROY() \ - rm_destroy(&_VLAN_RM_ID); \ sx_destroy(&_VLAN_SX_ID) -#define _VLAN_RM_TRACKER _vlan_rm_tracker -#define VLAN_RLOCK() rm_rlock(&_VLAN_RM_ID, \ - &_VLAN_RM_TRACKER) -#define VLAN_RUNLOCK() rm_runlock(&_VLAN_RM_ID, \ - &_VLAN_RM_TRACKER) -#define VLAN_WLOCK() rm_wlock(&_VLAN_RM_ID) -#define VLAN_WUNLOCK() rm_wunlock(&_VLAN_RM_ID) -#define VLAN_RLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_RLOCKED) -#define VLAN_WLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_WLOCKED) -#define VLAN_RWLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_LOCKED) -#define VLAN_LOCK_READER struct rm_priotracker _VLAN_RM_TRACKER +#define VLAN_RLOCK() NET_EPOCH_ENTER(); +#define VLAN_RUNLOCK() NET_EPOCH_EXIT(); +#define VLAN_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt)) #define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID) #define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID) @@ -267,25 +249,18 @@ static struct sx _VLAN_SX_ID; /* - * We also have a per-trunk rmlock(9), that is locked shared on packet - * processing and exclusive when configuration is changed. Note: This should - * only be acquired while there is a shared lock on either of the global locks - * via VLAN_SLOCK or VLAN_RLOCK. Thus, an exclusive lock on the global locks - * makes a call to TRUNK_RLOCK/TRUNK_WLOCK technically superfluous. + * We also have a per-trunk mutex that should be acquired when changing + * its state. */ -#define _TRUNK_RM_TRACKER _trunk_rm_tracker -#define TRUNK_LOCK_INIT(trunk) rm_init(&(trunk)->lock, vlanname) -#define TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock) -#define TRUNK_RLOCK(trunk) rm_rlock(&(trunk)->lock, \ - &_TRUNK_RM_TRACKER) -#define TRUNK_WLOCK(trunk) rm_wlock(&(trunk)->lock) -#define TRUNK_RUNLOCK(trunk) rm_runlock(&(trunk)->lock, \ - &_TRUNK_RM_TRACKER) -#define TRUNK_WUNLOCK(trunk) rm_wunlock(&(trunk)->lock) -#define TRUNK_RLOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED) -#define TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_LOCKED) -#define TRUNK_WLOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED) -#define TRUNK_LOCK_READER struct rm_priotracker _TRUNK_RM_TRACKER +#define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF) +#define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock) +#define TRUNK_RLOCK(trunk) NET_EPOCH_ENTER() +#define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock) +#define TRUNK_RUNLOCK(trunk) NET_EPOCH_EXIT(); +#define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock) +#define TRUNK_RLOCK_ASSERT(trunk) MPASS(in_epoch(net_epoch_preempt)) +#define TRUNK_LOCK_ASSERT(trunk) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(trunk)->lock)) +#define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED); /* * The VLAN_ARRAY substitutes the dynamic hash with a static array @@ -345,6 +320,13 @@ VNET_DEFINE_STATIC(struct if_clone *, vlan_cloner); #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) static void +vlan_mc_free(struct epoch_context *ctx) +{ + struct vlan_mc_entry *mc = __containerof(ctx, struct vlan_mc_entry, mc_epoch_ctx); + free(mc, M_VLAN); +} + +static void vlan_inithash(struct ifvlantrunk *trunk) { int i, n; @@ -363,7 +345,7 @@ vlan_inithash(struct ifvlantrunk *trunk) trunk->hmask = n - 1; trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); for (i = 0; i < n; i++) - LIST_INIT(&trunk->hash[i]); + CK_SLIST_INIT(&trunk->hash[i]); } static void @@ -374,7 +356,7 @@ vlan_freehash(struct ifvlantrunk *trunk) KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); for (i = 0; i < (1 << trunk->hwidth); i++) - KASSERT(LIST_EMPTY(&trunk->hash[i]), + KASSERT(CK_SLIST_EMPTY(&trunk->hash[i]), ("%s: hash table not empty", __func__)); #endif free(trunk->hash, M_VLAN); @@ -388,12 +370,12 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) int i, b; struct ifvlan *ifv2; - TRUNK_WLOCK_ASSERT(trunk); + VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); - LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) + CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv->ifv_vid == ifv2->ifv_vid) return (EEXIST); @@ -406,7 +388,7 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) vlan_growhash(trunk, 1); i = HASH(ifv->ifv_vid, trunk->hmask); } - LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); + CK_SLIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); trunk->refcnt++; return (0); @@ -418,15 +400,15 @@ vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) int i, b; struct ifvlan *ifv2; - TRUNK_WLOCK_ASSERT(trunk); + VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); - LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) + CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv2 == ifv) { trunk->refcnt--; - LIST_REMOVE(ifv2, ifv_list); + CK_SLIST_REMOVE(&trunk->hash[i], ifv2, ifvlan, ifv_list); if (trunk->refcnt < (b * b) / 2) vlan_growhash(trunk, -1); return (0); @@ -446,7 +428,7 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch) struct ifvlanhead *hash2; int hwidth2, i, j, n, n2; - TRUNK_WLOCK_ASSERT(trunk); + VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); if (howmuch == 0) { @@ -462,21 +444,21 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch) if (hwidth2 < VLAN_DEF_HWIDTH) return; - /* M_NOWAIT because we're called with trunk mutex held */ - hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT); + hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_WAITOK); if (hash2 == NULL) { printf("%s: out of memory -- hash size not changed\n", __func__); return; /* We can live with the old hash table */ } for (j = 0; j < n2; j++) - LIST_INIT(&hash2[j]); + CK_SLIST_INIT(&hash2[j]); for (i = 0; i < n; i++) - while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) { - LIST_REMOVE(ifv, ifv_list); + while ((ifv = CK_SLIST_FIRST(&trunk->hash[i])) != NULL) { + CK_SLIST_REMOVE(&trunk->hash[i], ifv, ifvlan, ifv_list); j = HASH(ifv->ifv_vid, n2 - 1); - LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); + CK_SLIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); } + NET_EPOCH_WAIT(); free(trunk->hash, M_VLAN); trunk->hash = hash2; trunk->hwidth = hwidth2; @@ -494,7 +476,7 @@ vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) TRUNK_RLOCK_ASSERT(trunk); - LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) + CK_SLIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) if (ifv->ifv_vid == vid) return (ifv); return (NULL); @@ -510,7 +492,7 @@ vlan_dumphash(struct ifvlantrunk *trunk) for (i = 0; i < (1 << trunk->hwidth); i++) { printf("%d: ", i); - LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) + CK_SLIST_FOREACH(ifv, &trunk->hash[i], ifv_list) printf("%s ", ifv->ifv_ifp->if_xname); printf("\n"); } @@ -563,7 +545,6 @@ static void trunk_destroy(struct ifvlantrunk *trunk) { VLAN_XLOCK_ASSERT(); - VLAN_WLOCK_ASSERT(); vlan_freehash(trunk); trunk->parent->if_vlantrunk = NULL; @@ -589,24 +570,19 @@ vlan_setmulti(struct ifnet *ifp) struct vlan_mc_entry *mc; int error; - /* - * XXX This stupidly needs the rmlock to avoid sleeping while holding - * the in6_multi_mtx (see in6_mc_join_locked). - */ - VLAN_RWLOCK_ASSERT(); + VLAN_XLOCK_ASSERT(); /* Find the parent. */ sc = ifp->if_softc; - TRUNK_WLOCK_ASSERT(TRUNK(sc)); ifp_p = PARENT(sc); CURVNET_SET_QUIET(ifp_p->if_vnet); /* First, remove any existing filter entries. */ - while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { - SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); + while ((mc = CK_SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { + CK_SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); - free(mc, M_VLAN); + epoch_call(net_epoch_preempt, &mc->mc_epoch_ctx, vlan_mc_free); } /* Now program new ones. */ @@ -621,10 +597,10 @@ vlan_setmulti(struct ifnet *ifp) } bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); mc->mc_addr.sdl_index = ifp_p->if_index; - SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); + CK_SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); } IF_ADDR_WUNLOCK(ifp); - SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { + CK_SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, NULL); if (error) @@ -647,7 +623,6 @@ vlan_iflladdr(void *arg __unused, struct ifnet *ifp) struct ifnet *ifv_ifp; struct ifvlantrunk *trunk; struct sockaddr_dl *sdl; - VLAN_LOCK_READER; /* Need the rmlock since this is run on taskqueue_swi. */ VLAN_RLOCK(); @@ -726,12 +701,10 @@ static struct ifnet * vlan_trunkdev(struct ifnet *ifp) { struct ifvlan *ifv; - VLAN_LOCK_READER; if (ifp->if_type != IFT_L2VLAN) return (NULL); - /* Not clear if callers are sleepable, so acquire the rmlock. */ VLAN_RLOCK(); ifv = ifp->if_softc; ifp = NULL; @@ -811,10 +784,7 @@ vlan_devat(struct ifnet *ifp, uint16_t vid) { struct ifvlantrunk *trunk; struct ifvlan *ifv; - VLAN_LOCK_READER; - TRUNK_LOCK_READER; - /* Not clear if callers are sleepable, so acquire the rmlock. */ VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { @@ -822,11 +792,9 @@ vlan_devat(struct ifnet *ifp, uint16_t vid) return (NULL); } ifp = NULL; - TRUNK_RLOCK(trunk); ifv = vlan_gethash(trunk, vid); if (ifv) ifp = ifv->ifv_ifp; - TRUNK_RUNLOCK(trunk); VLAN_RUNLOCK(); return (ifp); } @@ -1078,7 +1046,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) if_rele(p); return (ENOSPC); } - SLIST_INIT(&ifv->vlan_mc_listhead); + CK_SLIST_INIT(&ifv->vlan_mc_listhead); ifp->if_softc = ifv; /* * Set the name manually rather than using if_initname because @@ -1145,6 +1113,7 @@ vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) * ifvlan. */ taskqueue_drain(taskqueue_thread, &ifv->lladdr_task); + NET_EPOCH_WAIT(); if_free(ifp); free(ifv, M_VLAN); ifc_free_unit(ifc, unit); @@ -1169,7 +1138,6 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m) struct ifvlan *ifv; struct ifnet *p; int error, len, mcast; - VLAN_LOCK_READER; VLAN_RLOCK(); ifv = ifp->if_softc; @@ -1229,8 +1197,6 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) { struct ifvlantrunk *trunk; struct ifvlan *ifv; - VLAN_LOCK_READER; - TRUNK_LOCK_READER; struct m_tag *mtag; uint16_t vid, tag; @@ -1291,16 +1257,13 @@ vlan_input(struct ifnet *ifp, struct mbuf *m) vid = EVL_VLANOFTAG(tag); - TRUNK_RLOCK(trunk); ifv = vlan_gethash(trunk, vid); if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { - TRUNK_RUNLOCK(trunk); - if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); VLAN_RUNLOCK(); + if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } - TRUNK_RUNLOCK(trunk); if (vlan_mtag_pcp) { /* @@ -1341,8 +1304,13 @@ vlan_lladdr_fn(void *arg, int pending __unused) ifv = (struct ifvlan *)arg; ifp = ifv->ifv_ifp; + + CURVNET_SET(ifp->if_vnet); + /* The ifv_ifp already has the lladdr copied in. */ if_setlladdr(ifp, IF_LLADDR(ifp), ifp->if_addrlen); + + CURVNET_RESTORE(); } static int @@ -1371,22 +1339,19 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) if (ifv->ifv_trunk) return (EBUSY); - /* Acquire rmlock after the branch so we can M_WAITOK. */ VLAN_XLOCK(); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); vlan_inithash(trunk); TRUNK_LOCK_INIT(trunk); - VLAN_WLOCK(); TRUNK_WLOCK(trunk); p->if_vlantrunk = trunk; trunk->parent = p; if_ref(trunk->parent); + TRUNK_WUNLOCK(trunk); } else { - VLAN_WLOCK(); trunk = p->if_vlantrunk; - TRUNK_WLOCK(trunk); } ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ @@ -1450,7 +1415,9 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) ifp->if_link_state = p->if_link_state; + TRUNK_RLOCK(TRUNK(ifv)); vlan_capabilities(ifv); + TRUNK_RUNLOCK(TRUNK(ifv)); /* * Set up our interface address to reflect the underlying @@ -1460,12 +1427,6 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = p->if_addrlen; - /* - * Configure multicast addresses that may already be - * joined on the vlan device. - */ - (void)vlan_setmulti(ifp); - TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv); /* We are ready for operation now. */ @@ -1473,13 +1434,14 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); -done: + /* - * We need to drop the non-sleepable rmlock so that the underlying - * devices can sleep in their vlan_config hooks. + * Configure multicast addresses that may already be + * joined on the vlan device. */ - TRUNK_WUNLOCK(trunk); - VLAN_WUNLOCK(); + (void)vlan_setmulti(ifp); + +done: if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); VLAN_XUNLOCK(); @@ -1512,13 +1474,6 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) parent = NULL; if (trunk != NULL) { - /* - * Both vlan_transmit and vlan_input rely on the trunk fields - * being NULL to determine whether to bail, so we need to get - * an exclusive lock here to prevent them from using bad - * ifvlans. - */ - VLAN_WLOCK(); parent = trunk->parent; /* @@ -1526,7 +1481,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) * empty the list of multicast groups that we may have joined * while we were alive from the parent's list. */ - while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { + while ((mc = CK_SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { /* * If the parent interface is being detached, * all its multicast addresses have already @@ -1543,19 +1498,13 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) "Failed to delete multicast address from parent: %d\n", error); } - SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); - free(mc, M_VLAN); + CK_SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); + epoch_call(net_epoch_preempt, &mc->mc_epoch_ctx, vlan_mc_free); } vlan_setflags(ifp, 0); /* clear special flags on parent */ - /* - * The trunk lock isn't actually required here, but - * vlan_remhash expects it. - */ - TRUNK_WLOCK(trunk); vlan_remhash(trunk, ifv); - TRUNK_WUNLOCK(trunk); ifv->ifv_trunk = NULL; /* @@ -1563,9 +1512,9 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) */ if (trunk->refcnt == 0) { parent->if_vlantrunk = NULL; + NET_EPOCH_WAIT(); trunk_destroy(trunk); } - VLAN_WUNLOCK(); } /* Disconnect from parent. */ @@ -1642,7 +1591,6 @@ vlan_link_state(struct ifnet *ifp) { struct ifvlantrunk *trunk; struct ifvlan *ifv; - VLAN_LOCK_READER; /* Called from a taskqueue_swi task, so we cannot sleep. */ VLAN_RLOCK(); @@ -1672,7 +1620,7 @@ vlan_capabilities(struct ifvlan *ifv) u_long hwa = 0; VLAN_SXLOCK_ASSERT(); - TRUNK_WLOCK_ASSERT(TRUNK(ifv)); + TRUNK_RLOCK_ASSERT(TRUNK(ifv)); p = PARENT(ifv); ifp = ifv->ifv_ifp; @@ -1773,11 +1721,11 @@ vlan_trunk_capabilities(struct ifnet *ifp) VLAN_SUNLOCK(); return; } - TRUNK_WLOCK(trunk); + TRUNK_RLOCK(trunk); VLAN_FOREACH(ifv, trunk) { vlan_capabilities(ifv); } - TRUNK_WUNLOCK(trunk); + TRUNK_RUNLOCK(trunk); VLAN_SUNLOCK(); } @@ -1791,7 +1739,6 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ifvlantrunk *trunk; struct vlanreq vlr; int error = 0; - VLAN_LOCK_READER; ifr = (struct ifreq *)data; ifa = (struct ifaddr *) data; @@ -1927,16 +1874,13 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) * XXX We need the rmlock here to avoid sleeping while * holding in6_multi_mtx. */ - VLAN_RLOCK(); + VLAN_XLOCK(); trunk = TRUNK(ifv); - if (trunk != NULL) { - TRUNK_WLOCK(trunk); + if (trunk != NULL) error = vlan_setmulti(ifp); - TRUNK_WUNLOCK(trunk); - } - VLAN_RUNLOCK(); - break; + VLAN_XUNLOCK(); + break; case SIOCGVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { @@ -1973,9 +1917,9 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ifv->ifv_capenable = ifr->ifr_reqcap; trunk = TRUNK(ifv); if (trunk != NULL) { - TRUNK_WLOCK(trunk); + TRUNK_RLOCK(trunk); vlan_capabilities(ifv); - TRUNK_WUNLOCK(trunk); + TRUNK_RUNLOCK(trunk); } VLAN_SUNLOCK(); break; diff --git a/freebsd/sys/net/iflib.h b/freebsd/sys/net/iflib.h index 6e1eee63..8c2be41b 100644 --- a/freebsd/sys/net/iflib.h +++ b/freebsd/sys/net/iflib.h @@ -173,7 +173,7 @@ typedef struct pci_vendor_info { #define IFLIB_PNP_DESCR "U32:vendor;U32:device;U32:subvendor;U32:subdevice;" \ "U32:revision;U32:class;D:#" #define IFLIB_PNP_INFO(b, u, t) \ - MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, sizeof(t[0]), nitems(t) - 1) + MODULE_PNP_INFO(IFLIB_PNP_DESCR, b, u, t, nitems(t) - 1) typedef struct if_txrx { int (*ift_txd_encap) (void *, if_pkt_info_t); @@ -246,7 +246,7 @@ struct if_shared_ctx { /* fields necessary for probe */ pci_vendor_info_t *isc_vendor_info; char *isc_driver_version; -/* optional function to transform the read values to match the table*/ + /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, uint16_t *subdevice_id, uint16_t *rev_id); int isc_nrxd_min[8]; @@ -375,6 +375,8 @@ if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx); if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx); void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]); +void iflib_request_reset(if_ctx_t ctx); +uint8_t iflib_in_detach(if_ctx_t ctx); /* * If the driver can plug cleanly in to newbus use these diff --git a/freebsd/sys/netinet/in.c b/freebsd/sys/netinet/in.c index 78fd00c0..db1ebda0 100644 --- a/freebsd/sys/netinet/in.c +++ b/freebsd/sys/netinet/in.c @@ -524,7 +524,12 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) &ii->ii_allhosts); } - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + /* + * Note: we don't need extra reference for ifa, since we called + * with sx lock held, and ifaddr can not be deleted in concurrent + * thread. + */ + EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, ifa, IFADDR_EVENT_ADD); return (error); @@ -647,7 +652,8 @@ in_difaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) } IF_ADDR_WUNLOCK(ifp); - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, &ia->ia_ifa, + IFADDR_EVENT_DEL); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ return (0); diff --git a/freebsd/sys/netinet/in_gif.c b/freebsd/sys/netinet/in_gif.c index 03aaaf08..b8732a33 100644 --- a/freebsd/sys/netinet/in_gif.c +++ b/freebsd/sys/netinet/in_gif.c @@ -84,12 +84,16 @@ SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW, * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list. */ VNET_DEFINE_STATIC(struct gif_list *, ipv4_hashtbl) = NULL; +VNET_DEFINE_STATIC(struct gif_list *, ipv4_srchashtbl) = NULL; VNET_DEFINE_STATIC(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER(); #define V_ipv4_hashtbl VNET(ipv4_hashtbl) +#define V_ipv4_srchashtbl VNET(ipv4_srchashtbl) #define V_ipv4_list VNET(ipv4_list) #define GIF_HASH(src, dst) (V_ipv4_hashtbl[\ in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)]) +#define GIF_SRCHASH(src) (V_ipv4_srchashtbl[\ + fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GIF_HASH_SIZE - 1)]) #define GIF_HASH_SC(sc) GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\ (sc)->gif_iphdr->ip_dst.s_addr) static uint32_t @@ -121,6 +125,43 @@ in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst) return (0); } +/* + * Check that ingress address belongs to local host. + */ +static void +in_gif_set_running(struct gif_softc *sc) +{ + + if (in_localip(sc->gif_iphdr->ip_src)) + GIF2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; + else + GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; +} + +/* + * ifaddr_event handler. + * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent + * source address spoofing. + */ +static void +in_gif_srcaddr(void *arg __unused, const struct sockaddr *sa, + int event __unused) +{ + const struct sockaddr_in *sin; + struct gif_softc *sc; + + if (V_ipv4_srchashtbl == NULL) + return; + + MPASS(in_epoch(net_epoch_preempt)); + sin = (const struct sockaddr_in *)sa; + CK_LIST_FOREACH(sc, &GIF_SRCHASH(sin->sin_addr.s_addr), srchash) { + if (sc->gif_iphdr->ip_src.s_addr != sin->sin_addr.s_addr) + continue; + in_gif_set_running(sc); + } +} + static void in_gif_attach(struct gif_softc *sc) { @@ -129,6 +170,9 @@ in_gif_attach(struct gif_softc *sc) CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain); else CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain); + + CK_LIST_INSERT_HEAD(&GIF_SRCHASH(sc->gif_iphdr->ip_src.s_addr), + sc, srchash); } int @@ -141,6 +185,7 @@ in_gif_setopts(struct gif_softc *sc, u_int options) if ((options & GIF_IGNORE_SOURCE) != (sc->gif_options & GIF_IGNORE_SOURCE)) { + CK_LIST_REMOVE(sc, srchash); CK_LIST_REMOVE(sc, chain); sc->gif_options = options; in_gif_attach(sc); @@ -174,8 +219,10 @@ in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) error = EADDRNOTAVAIL; break; } - if (V_ipv4_hashtbl == NULL) + if (V_ipv4_hashtbl == NULL) { V_ipv4_hashtbl = gif_hashinit(); + V_ipv4_srchashtbl = gif_hashinit(); + } error = in_gif_checkdup(sc, src->sin_addr.s_addr, dst->sin_addr.s_addr); if (error == EADDRNOTAVAIL) @@ -190,6 +237,7 @@ in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) ip->ip_dst.s_addr = dst->sin_addr.s_addr; if (sc->gif_family != 0) { /* Detach existing tunnel first */ + CK_LIST_REMOVE(sc, srchash); CK_LIST_REMOVE(sc, chain); GIF_WAIT(); free(sc->gif_hdr, M_GIF); @@ -198,6 +246,7 @@ in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) sc->gif_family = AF_INET; sc->gif_iphdr = ip; in_gif_attach(sc); + in_gif_set_running(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: @@ -344,6 +393,7 @@ done: return (ret); } +static const struct srcaddrtab *ipv4_srcaddrtab; static struct { const struct encap_config encap; const struct encaptab *cookie; @@ -389,6 +439,9 @@ in_gif_init(void) if (!IS_DEFAULT_VNET(curvnet)) return; + + ipv4_srcaddrtab = ip_encap_register_srcaddr(in_gif_srcaddr, + NULL, M_WAITOK); for (i = 0; i < nitems(ipv4_encap_cfg); i++) ipv4_encap_cfg[i].cookie = ip_encap_attach( &ipv4_encap_cfg[i].encap, NULL, M_WAITOK); @@ -402,8 +455,11 @@ in_gif_uninit(void) if (IS_DEFAULT_VNET(curvnet)) { for (i = 0; i < nitems(ipv4_encap_cfg); i++) ip_encap_detach(ipv4_encap_cfg[i].cookie); + ip_encap_unregister_srcaddr(ipv4_srcaddrtab); } - if (V_ipv4_hashtbl != NULL) + if (V_ipv4_hashtbl != NULL) { gif_hashdestroy(V_ipv4_hashtbl); + gif_hashdestroy(V_ipv4_srchashtbl); + } } diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h index 86c9705c..6d2c86d5 100644 --- a/freebsd/sys/netinet/in_pcb.h +++ b/freebsd/sys/netinet/in_pcb.h @@ -642,6 +642,8 @@ int inp_so_options(const struct inpcb *inp); #define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock)) #define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt)) #define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED) +#define INP_INFO_WUNLOCK_ASSERT(ipi) \ + mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED) #define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock)) #define INP_LIST_LOCK_INIT(ipi, d) \ diff --git a/freebsd/sys/netinet/ip_encap.c b/freebsd/sys/netinet/ip_encap.c index 1e794f73..adc5a41d 100644 --- a/freebsd/sys/netinet/ip_encap.c +++ b/freebsd/sys/netinet/ip_encap.c @@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/eventhandler.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -102,22 +103,139 @@ struct encaptab { encap_input_t input; }; +struct srcaddrtab { + CK_LIST_ENTRY(srcaddrtab) chain; + + encap_srcaddr_t srcaddr; + void *arg; +}; + CK_LIST_HEAD(encaptab_head, encaptab); +CK_LIST_HEAD(srcaddrtab_head, srcaddrtab); #ifdef INET static struct encaptab_head ipv4_encaptab = CK_LIST_HEAD_INITIALIZER(); +static struct srcaddrtab_head ipv4_srcaddrtab = CK_LIST_HEAD_INITIALIZER(); #endif #ifdef INET6 static struct encaptab_head ipv6_encaptab = CK_LIST_HEAD_INITIALIZER(); +static struct srcaddrtab_head ipv6_srcaddrtab = CK_LIST_HEAD_INITIALIZER(); #endif -static struct mtx encapmtx; +static struct mtx encapmtx, srcaddrmtx; MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF); +MTX_SYSINIT(srcaddrmtx, &srcaddrmtx, "srcaddrmtx", MTX_DEF); #define ENCAP_WLOCK() mtx_lock(&encapmtx) #define ENCAP_WUNLOCK() mtx_unlock(&encapmtx) -#define ENCAP_RLOCK() struct epoch_tracker encap_et; epoch_enter_preempt(net_epoch_preempt, &encap_et) -#define ENCAP_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &encap_et) +#define ENCAP_RLOCK_TRACKER struct epoch_tracker encap_et +#define ENCAP_RLOCK() \ + epoch_enter_preempt(net_epoch_preempt, &encap_et) +#define ENCAP_RUNLOCK() \ + epoch_exit_preempt(net_epoch_preempt, &encap_et) #define ENCAP_WAIT() epoch_wait_preempt(net_epoch_preempt) +#define SRCADDR_WLOCK() mtx_lock(&srcaddrmtx) +#define SRCADDR_WUNLOCK() mtx_unlock(&srcaddrmtx) +#define SRCADDR_RLOCK_TRACKER struct epoch_tracker srcaddr_et +#define SRCADDR_RLOCK() \ + epoch_enter_preempt(net_epoch_preempt, &srcaddr_et) +#define SRCADDR_RUNLOCK() \ + epoch_exit_preempt(net_epoch_preempt, &srcaddr_et) +#define SRCADDR_WAIT() epoch_wait_preempt(net_epoch_preempt) + +/* + * ifaddr_event_ext handler. + * + * Tunnelling interfaces may request the kernel to notify when + * some interface addresses appears or disappears. Usually tunnelling + * interface must use an address configured on the local machine as + * ingress address to be able receive datagramms and do not send + * spoofed packets. + */ +static void +srcaddr_change_event(void *arg __unused, struct ifnet *ifp, + struct ifaddr *ifa, int event) +{ + SRCADDR_RLOCK_TRACKER; + struct srcaddrtab_head *head; + struct srcaddrtab *p; + + /* Support for old ifaddr_event. */ + EVENTHANDLER_INVOKE(ifaddr_event, ifp); + + switch (ifa->ifa_addr->sa_family) { +#ifdef INET + case AF_INET: + head = &ipv4_srcaddrtab; + break; +#endif +#ifdef INET6 + case AF_INET6: + head = &ipv6_srcaddrtab; + break; +#endif + default: + /* ignore event */ + return; + } + + SRCADDR_RLOCK(); + CK_LIST_FOREACH(p, head, chain) { + (*p->srcaddr)(p->arg, ifa->ifa_addr, event); + } + SRCADDR_RUNLOCK(); +} +EVENTHANDLER_DEFINE(ifaddr_event_ext, srcaddr_change_event, NULL, 0); + +static struct srcaddrtab * +encap_register_srcaddr(struct srcaddrtab_head *head, encap_srcaddr_t func, + void *arg, int mflags) +{ + struct srcaddrtab *p, *tmp; + + if (func == NULL) + return (NULL); + p = malloc(sizeof(*p), M_NETADDR, mflags); + if (p == NULL) + return (NULL); + p->srcaddr = func; + p->arg = arg; + + SRCADDR_WLOCK(); + CK_LIST_FOREACH(tmp, head, chain) { + if (func == tmp->srcaddr && arg == tmp->arg) + break; + } + if (tmp == NULL) + CK_LIST_INSERT_HEAD(head, p, chain); + SRCADDR_WUNLOCK(); + + if (tmp != NULL) { + free(p, M_NETADDR); + p = tmp; + } + return (p); +} + +static int +encap_unregister_srcaddr(struct srcaddrtab_head *head, + const struct srcaddrtab *cookie) +{ + struct srcaddrtab *p; + + SRCADDR_WLOCK(); + CK_LIST_FOREACH(p, head, chain) { + if (p == cookie) { + CK_LIST_REMOVE(p, chain); + SRCADDR_WUNLOCK(); + SRCADDR_WAIT(); + free(p, M_NETADDR); + return (0); + } + } + SRCADDR_WUNLOCK(); + return (EINVAL); +} + static struct encaptab * encap_attach(struct encaptab_head *head, const struct encap_config *cfg, void *arg, int mflags) @@ -177,6 +295,7 @@ encap_detach(struct encaptab_head *head, const struct encaptab *cookie) static int encap_input(struct encaptab_head *head, struct mbuf *m, int off, int proto) { + ENCAP_RLOCK_TRACKER; struct encaptab *ep, *match; void *arg; int matchprio, ret; @@ -222,6 +341,20 @@ encap_input(struct encaptab_head *head, struct mbuf *m, int off, int proto) } #ifdef INET +const struct srcaddrtab * +ip_encap_register_srcaddr(encap_srcaddr_t func, void *arg, int mflags) +{ + + return (encap_register_srcaddr(&ipv4_srcaddrtab, func, arg, mflags)); +} + +int +ip_encap_unregister_srcaddr(const struct srcaddrtab *cookie) +{ + + return (encap_unregister_srcaddr(&ipv4_srcaddrtab, cookie)); +} + const struct encaptab * ip_encap_attach(const struct encap_config *cfg, void *arg, int mflags) { @@ -247,6 +380,20 @@ encap4_input(struct mbuf **mp, int *offp, int proto) #endif /* INET */ #ifdef INET6 +const struct srcaddrtab * +ip6_encap_register_srcaddr(encap_srcaddr_t func, void *arg, int mflags) +{ + + return (encap_register_srcaddr(&ipv6_srcaddrtab, func, arg, mflags)); +} + +int +ip6_encap_unregister_srcaddr(const struct srcaddrtab *cookie) +{ + + return (encap_unregister_srcaddr(&ipv6_srcaddrtab, cookie)); +} + const struct encaptab * ip6_encap_attach(const struct encap_config *cfg, void *arg, int mflags) { diff --git a/freebsd/sys/netinet/ip_encap.h b/freebsd/sys/netinet/ip_encap.h index f3d1d3af..898114c2 100644 --- a/freebsd/sys/netinet/ip_encap.h +++ b/freebsd/sys/netinet/ip_encap.h @@ -43,26 +43,38 @@ int encap6_input(struct mbuf **, int *, int); typedef int (*encap_lookup_t)(const struct mbuf *, int, int, void **); typedef int (*encap_check_t)(const struct mbuf *, int, int, void *); -typedef int (*encap_input_t)(struct mbuf *, int , int, void *); +typedef int (*encap_input_t)(struct mbuf *, int, int, void *); +typedef void (*encap_srcaddr_t)(void *, const struct sockaddr *, int); struct encap_config { int proto; /* protocol */ int min_length; /* minimum packet length */ + int max_hdrsize; /* maximum header size */ int exact_match; /* a packet is exactly matched */ #define ENCAP_DRV_LOOKUP 0x7fffffff encap_lookup_t lookup; encap_check_t check; encap_input_t input; + + void *pad[3]; }; struct encaptab; +struct srcaddrtab; const struct encaptab *ip_encap_attach(const struct encap_config *, void *arg, int mflags); const struct encaptab *ip6_encap_attach(const struct encap_config *, void *arg, int mflags); +const struct srcaddrtab *ip_encap_register_srcaddr(encap_srcaddr_t, + void *arg, int mflags); +const struct srcaddrtab *ip6_encap_register_srcaddr(encap_srcaddr_t, + void *arg, int mflags); + +int ip_encap_unregister_srcaddr(const struct srcaddrtab *); +int ip6_encap_unregister_srcaddr(const struct srcaddrtab *); int ip_encap_detach(const struct encaptab *); int ip6_encap_detach(const struct encaptab *); #endif diff --git a/freebsd/sys/netinet/ip_fw.h b/freebsd/sys/netinet/ip_fw.h index a7bf5b4d..cfcdaa29 100644 --- a/freebsd/sys/netinet/ip_fw.h +++ b/freebsd/sys/netinet/ip_fw.h @@ -615,6 +615,7 @@ struct ip_fw_rule { ipfw_insn cmd[1]; /* storage for commands */ }; #define IPFW_RULE_NOOPT 0x01 /* Has no options in body */ +#define IPFW_RULE_JUSTOPTS 0x02 /* new format of rule body */ /* Unaligned version */ diff --git a/freebsd/sys/netinet/ip_gre.c b/freebsd/sys/netinet/ip_gre.c index 65ab0ab9..6a88f37f 100644 --- a/freebsd/sys/netinet/ip_gre.c +++ b/freebsd/sys/netinet/ip_gre.c @@ -77,9 +77,13 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets"); VNET_DEFINE_STATIC(struct gre_list *, ipv4_hashtbl) = NULL; +VNET_DEFINE_STATIC(struct gre_list *, ipv4_srchashtbl) = NULL; #define V_ipv4_hashtbl VNET(ipv4_hashtbl) +#define V_ipv4_srchashtbl VNET(ipv4_srchashtbl) #define GRE_HASH(src, dst) (V_ipv4_hashtbl[\ in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)]) +#define GRE_SRCHASH(src) (V_ipv4_srchashtbl[\ + fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (GRE_HASH_SIZE - 1)]) #define GRE_HASH_SC(sc) GRE_HASH((sc)->gre_oip.ip_src.s_addr,\ (sc)->gre_oip.ip_dst.s_addr) @@ -140,6 +144,43 @@ in_gre_lookup(const struct mbuf *m, int off, int proto, void **arg) return (0); } +/* + * Check that ingress address belongs to local host. + */ +static void +in_gre_set_running(struct gre_softc *sc) +{ + + if (in_localip(sc->gre_oip.ip_src)) + GRE2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; + else + GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; +} + +/* + * ifaddr_event handler. + * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent + * source address spoofing. + */ +static void +in_gre_srcaddr(void *arg __unused, const struct sockaddr *sa, + int event __unused) +{ + const struct sockaddr_in *sin; + struct gre_softc *sc; + + if (V_ipv4_srchashtbl == NULL) + return; + + MPASS(in_epoch(net_epoch_preempt)); + sin = (const struct sockaddr_in *)sa; + CK_LIST_FOREACH(sc, &GRE_SRCHASH(sin->sin_addr.s_addr), srchash) { + if (sc->gre_oip.ip_src.s_addr != sin->sin_addr.s_addr) + continue; + in_gre_set_running(sc); + } +} + static void in_gre_attach(struct gre_softc *sc) { @@ -150,6 +191,8 @@ in_gre_attach(struct gre_softc *sc) sc->gre_oip.ip_p = IPPROTO_GRE; gre_updatehdr(sc, &sc->gre_gihdr->gi_gre); CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain); + CK_LIST_INSERT_HEAD(&GRE_SRCHASH(sc->gre_oip.ip_src.s_addr), + sc, srchash); } void @@ -161,6 +204,7 @@ in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value) /* NOTE: we are protected with gre_ioctl_sx lock */ MPASS(sc->gre_family == AF_INET); CK_LIST_REMOVE(sc, chain); + CK_LIST_REMOVE(sc, srchash); GRE_WAIT(); if (cmd == GRESKEY) sc->gre_key = value; @@ -195,8 +239,10 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data) error = EADDRNOTAVAIL; break; } - if (V_ipv4_hashtbl == NULL) + if (V_ipv4_hashtbl == NULL) { V_ipv4_hashtbl = gre_hashinit(); + V_ipv4_srchashtbl = gre_hashinit(); + } error = in_gre_checkdup(sc, src->sin_addr.s_addr, dst->sin_addr.s_addr); if (error == EADDRNOTAVAIL) @@ -213,6 +259,7 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data) if (sc->gre_family != 0) { /* Detach existing tunnel first */ CK_LIST_REMOVE(sc, chain); + CK_LIST_REMOVE(sc, srchash); GRE_WAIT(); free(sc->gre_hdr, M_GRE); /* XXX: should we notify about link state change? */ @@ -222,6 +269,7 @@ in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data) sc->gre_oseq = 0; sc->gre_iseq = UINT32_MAX; in_gre_attach(sc); + in_gre_set_running(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: @@ -273,6 +321,7 @@ in_gre_output(struct mbuf *m, int af, int hlen) return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL)); } +static const struct srcaddrtab *ipv4_srcaddrtab = NULL; static const struct encaptab *ecookie = NULL; static const struct encap_config ipv4_encap_cfg = { .proto = IPPROTO_GRE, @@ -288,6 +337,8 @@ in_gre_init(void) if (!IS_DEFAULT_VNET(curvnet)) return; + ipv4_srcaddrtab = ip_encap_register_srcaddr(in_gre_srcaddr, + NULL, M_WAITOK); ecookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK); } @@ -295,8 +346,12 @@ void in_gre_uninit(void) { - if (IS_DEFAULT_VNET(curvnet)) + if (IS_DEFAULT_VNET(curvnet)) { ip_encap_detach(ecookie); - if (V_ipv4_hashtbl != NULL) + ip_encap_unregister_srcaddr(ipv4_srcaddrtab); + } + if (V_ipv4_hashtbl != NULL) { gre_hashdestroy(V_ipv4_hashtbl); + gre_hashdestroy(V_ipv4_srchashtbl); + } } diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c index 414e3812..2f8b6fd1 100644 --- a/freebsd/sys/netinet/ip_icmp.c +++ b/freebsd/sys/netinet/ip_icmp.c @@ -160,6 +160,12 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW, &VNET_NAME(icmptstamprepl), 0, "Respond to ICMP Timestamp packets"); +VNET_DEFINE_STATIC(int, error_keeptags) = 0; +#define V_error_keeptags VNET(error_keeptags) +SYSCTL_INT(_net_inet_icmp, OID_AUTO, error_keeptags, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(error_keeptags), 0, + "ICMP error response keeps copy of mbuf_tags of original packet"); + #ifdef ICMPPRINTFS int icmpprintfs = 0; #endif @@ -369,6 +375,10 @@ stdreply: icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - nip->ip_p = IPPROTO_ICMP; nip->ip_tos = 0; nip->ip_off = 0; + + if (V_error_keeptags) + m_tag_copy_chain(m, n, M_NOWAIT); + icmp_reflect(m); freeit: diff --git a/freebsd/sys/netinet/ip_options.c b/freebsd/sys/netinet/ip_options.c index 7c189bdb..c90077ea 100644 --- a/freebsd/sys/netinet/ip_options.c +++ b/freebsd/sys/netinet/ip_options.c @@ -112,16 +112,16 @@ ip_dooptions(struct mbuf *m, int pass) struct nhop4_extended nh_ext; struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; - NET_EPOCH_ENTER(); /* Ignore or reject packets with IP options. */ if (V_ip_doopts == 0) return 0; else if (V_ip_doopts == 2) { type = ICMP_UNREACH; code = ICMP_UNREACH_FILTER_PROHIB; - goto bad; + goto bad_unlocked; } + NET_EPOCH_ENTER(); dst = ip->ip_dst; cp = (u_char *)(ip + 1); cnt = (ip->ip_hl << 2) - sizeof (struct ip); @@ -390,6 +390,7 @@ dropit: return (0); bad: NET_EPOCH_EXIT(); +bad_unlocked: icmp_error(m, type, code, 0, 0); IPSTAT_INC(ips_badoptions); return (1); diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c index 5f643746..477388d7 100644 --- a/freebsd/sys/netinet/ip_output.c +++ b/freebsd/sys/netinet/ip_output.c @@ -264,11 +264,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, ip->ip_v = IPVERSION; ip->ip_hl = hlen >> 2; ip_fillid(ip); - IPSTAT_INC(ips_localout); } else { /* Header already set, fetch hlen from there */ hlen = ip->ip_hl << 2; } + if ((flags & IP_FORWARDING) == 0) + IPSTAT_INC(ips_localout); /* * dst/gw handling: @@ -934,10 +935,11 @@ in_delayed_cksum(struct mbuf *m) if (m->m_pkthdr.csum_flags & CSUM_UDP) { /* if udp header is not in the first mbuf copy udplen */ - if (offset + sizeof(struct udphdr) > m->m_len) + if (offset + sizeof(struct udphdr) > m->m_len) { m_copydata(m, offset + offsetof(struct udphdr, uh_ulen), sizeof(cklen), (caddr_t)&cklen); - else { + cklen = ntohs(cklen); + } else { uh = (struct udphdr *)mtodo(m, offset); cklen = ntohs(uh->uh_ulen); } diff --git a/freebsd/sys/netinet/sctp_asconf.c b/freebsd/sys/netinet/sctp_asconf.c index c21e3251..2c66f65c 100644 --- a/freebsd/sys/netinet/sctp_asconf.c +++ b/freebsd/sys/netinet/sctp_asconf.c @@ -672,6 +672,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset, SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: couldn't get lookup addr!\n"); /* respond with a missing/invalid mandatory parameter error */ + sctp_m_freem(m_ack); return; } /* param_length is already validated in process_control... */ diff --git a/freebsd/sys/netinet/sctp_auth.c b/freebsd/sys/netinet/sctp_auth.c index 0fc076e1..8301a98f 100644 --- a/freebsd/sys/netinet/sctp_auth.c +++ b/freebsd/sys/netinet/sctp_auth.c @@ -1062,40 +1062,6 @@ sctp_hmac_m(uint16_t hmac_algo, uint8_t *key, uint32_t keylen, return (digestlen); } -/*- - * verify the HMAC digest using the desired hash key, text, and HMAC - * algorithm. - * Returns -1 on error, 0 on success. - */ -int -sctp_verify_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen, - uint8_t *text, uint32_t textlen, - uint8_t *digest, uint32_t digestlen) -{ - uint32_t len; - uint8_t temp[SCTP_AUTH_DIGEST_LEN_MAX]; - - /* sanity check the material and length */ - if ((key == NULL) || (keylen == 0) || - (text == NULL) || (textlen == 0) || (digest == NULL)) { - /* can't do HMAC with empty key or text or digest */ - return (-1); - } - len = sctp_get_hmac_digest_len(hmac_algo); - if ((len == 0) || (digestlen != len)) - return (-1); - - /* compute the expected hash */ - if (sctp_hmac(hmac_algo, key, keylen, text, textlen, temp) != len) - return (-1); - - if (memcmp(digest, temp, digestlen) != 0) - return (-1); - else - return (0); -} - - /* * computes the requested HMAC using a key struct (which may be modified if * the keylen exceeds the HMAC block len). @@ -1742,7 +1708,7 @@ sctp_handle_auth(struct sctp_tcb *stcb, struct sctp_auth_chunk *auth, m, offset, computed_digest); /* compare the computed digest with the one in the AUTH chunk */ - if (memcmp(digest, computed_digest, digestlen) != 0) { + if (timingsafe_bcmp(digest, computed_digest, digestlen) != 0) { SCTP_STAT_INCR(sctps_recvauthfailed); SCTPDBG(SCTP_DEBUG_AUTH1, "SCTP Auth: HMAC digest check failed\n"); diff --git a/freebsd/sys/netinet/sctp_auth.h b/freebsd/sys/netinet/sctp_auth.h index 44126e3e..5c22cc74 100644 --- a/freebsd/sys/netinet/sctp_auth.h +++ b/freebsd/sys/netinet/sctp_auth.h @@ -178,9 +178,6 @@ extern uint32_t sctp_get_hmac_digest_len(uint16_t hmac_algo); extern uint32_t sctp_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen, uint8_t *text, uint32_t textlen, uint8_t *digest); -extern int -sctp_verify_hmac(uint16_t hmac_algo, uint8_t *key, uint32_t keylen, - uint8_t *text, uint32_t textlen, uint8_t *digest, uint32_t digestlen); extern uint32_t sctp_compute_hmac(uint16_t hmac_algo, sctp_key_t *key, uint8_t *text, uint32_t textlen, uint8_t *digest); diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c index c7e86e78..5386aae4 100644 --- a/freebsd/sys/netinet/sctp_input.c +++ b/freebsd/sys/netinet/sctp_input.c @@ -2556,7 +2556,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, return (NULL); } /* compare the received digest with the computed digest */ - if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) { + if (timingsafe_bcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) { /* try the old cookie? */ if ((cookie->time_entered.tv_sec == (long)ep->time_of_secret_change) && (ep->current_secret_number != ep->last_secret_number)) { @@ -2565,7 +2565,7 @@ sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset, (uint8_t *)ep->secret_key[(int)ep->last_secret_number], SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0); /* compare */ - if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0) + if (timingsafe_bcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0) cookie_ok = 1; } } else { @@ -5671,7 +5671,6 @@ sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int lengt vrf_id, port); goto out; } - } if (IS_SCTP_CONTROL(ch)) { /* process the control portion of the SCTP packet */ diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c index 8f0c8aa4..9493882c 100644 --- a/freebsd/sys/netinet/sctp_output.c +++ b/freebsd/sys/netinet/sctp_output.c @@ -3574,7 +3574,6 @@ static int sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *error) { struct cmsghdr cmh; - int tlen, at; struct sctp_initmsg initmsg; #ifdef INET struct sockaddr_in sin; @@ -3582,34 +3581,37 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er #ifdef INET6 struct sockaddr_in6 sin6; #endif + int tot_len, rem_len, cmsg_data_len, cmsg_data_off, off; - tlen = SCTP_BUF_LEN(control); - at = 0; - while (at < tlen) { - if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { + tot_len = SCTP_BUF_LEN(control); + for (off = 0; off < tot_len; off += CMSG_ALIGN(cmh.cmsg_len)) { + rem_len = tot_len - off; + if (rem_len < (int)CMSG_ALIGN(sizeof(cmh))) { /* There is not enough room for one more. */ *error = EINVAL; return (1); } - m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); + m_copydata(control, off, sizeof(cmh), (caddr_t)&cmh); if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) { /* We dont't have a complete CMSG header. */ *error = EINVAL; return (1); } - if (((int)cmh.cmsg_len + at) > tlen) { + if ((cmh.cmsg_len > INT_MAX) || ((int)cmh.cmsg_len > rem_len)) { /* We don't have the complete CMSG. */ *error = EINVAL; return (1); } + cmsg_data_len = (int)cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh)); + cmsg_data_off = off + CMSG_ALIGN(sizeof(cmh)); if (cmh.cmsg_level == IPPROTO_SCTP) { switch (cmh.cmsg_type) { case SCTP_INIT: - if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_initmsg)) { + if (cmsg_data_len < (int)sizeof(struct sctp_initmsg)) { *error = EINVAL; return (1); } - m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg); + m_copydata(control, cmsg_data_off, sizeof(struct sctp_initmsg), (caddr_t)&initmsg); if (initmsg.sinit_max_attempts) stcb->asoc.max_init_times = initmsg.sinit_max_attempts; if (initmsg.sinit_num_ostreams) @@ -3664,7 +3666,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er break; #ifdef INET case SCTP_DSTADDRV4: - if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) { + if (cmsg_data_len < (int)sizeof(struct in_addr)) { *error = EINVAL; return (1); } @@ -3672,7 +3674,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_port = stcb->rport; - m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr); + m_copydata(control, cmsg_data_off, sizeof(struct in_addr), (caddr_t)&sin.sin_addr); if ((sin.sin_addr.s_addr == INADDR_ANY) || (sin.sin_addr.s_addr == INADDR_BROADCAST) || IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { @@ -3688,7 +3690,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er #endif #ifdef INET6 case SCTP_DSTADDRV6: - if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) { + if (cmsg_data_len < (int)sizeof(struct in6_addr)) { *error = EINVAL; return (1); } @@ -3696,7 +3698,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_port = stcb->rport; - m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); + m_copydata(control, cmsg_data_off, sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) || IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) { *error = EINVAL; @@ -3729,7 +3731,6 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er break; } } - at += CMSG_ALIGN(cmh.cmsg_len); } return (0); } @@ -3742,7 +3743,6 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, int *error) { struct cmsghdr cmh; - int tlen, at; struct sctp_tcb *stcb; struct sockaddr *addr; #ifdef INET @@ -3751,31 +3751,34 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, #ifdef INET6 struct sockaddr_in6 sin6; #endif + int tot_len, rem_len, cmsg_data_len, cmsg_data_off, off; - tlen = SCTP_BUF_LEN(control); - at = 0; - while (at < tlen) { - if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { + tot_len = SCTP_BUF_LEN(control); + for (off = 0; off < tot_len; off += CMSG_ALIGN(cmh.cmsg_len)) { + rem_len = tot_len - off; + if (rem_len < (int)CMSG_ALIGN(sizeof(cmh))) { /* There is not enough room for one more. */ *error = EINVAL; return (NULL); } - m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); + m_copydata(control, off, sizeof(cmh), (caddr_t)&cmh); if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) { /* We dont't have a complete CMSG header. */ *error = EINVAL; return (NULL); } - if (((int)cmh.cmsg_len + at) > tlen) { + if ((cmh.cmsg_len > INT_MAX) || ((int)cmh.cmsg_len > rem_len)) { /* We don't have the complete CMSG. */ *error = EINVAL; return (NULL); } + cmsg_data_len = (int)cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh)); + cmsg_data_off = off + CMSG_ALIGN(sizeof(cmh)); if (cmh.cmsg_level == IPPROTO_SCTP) { switch (cmh.cmsg_type) { #ifdef INET case SCTP_DSTADDRV4: - if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) { + if (cmsg_data_len < (int)sizeof(struct in_addr)) { *error = EINVAL; return (NULL); } @@ -3783,13 +3786,13 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_port = port; - m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr); + m_copydata(control, cmsg_data_off, sizeof(struct in_addr), (caddr_t)&sin.sin_addr); addr = (struct sockaddr *)&sin; break; #endif #ifdef INET6 case SCTP_DSTADDRV6: - if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) { + if (cmsg_data_len < (int)sizeof(struct in6_addr)) { *error = EINVAL; return (NULL); } @@ -3797,7 +3800,7 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_port = port; - m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); + m_copydata(control, cmsg_data_off, sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) { in6_sin6_2_sin(&sin, &sin6); @@ -3818,7 +3821,6 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, } } } - at += CMSG_ALIGN(cmh.cmsg_len); } return (NULL); } @@ -4265,6 +4267,9 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, atomic_subtract_int(&stcb->asoc.refcnt, 1); } #endif + if (port) { + UDPSTAT_INC(udps_opackets); + } SCTP_STAT_INCR(sctps_sendpackets); SCTP_STAT_INCR_COUNTER64(sctps_outpackets); if (ret) @@ -4364,6 +4369,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); + sctp_m_freem(m); return (EINVAL); } if (net == NULL) { @@ -4428,6 +4434,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); + sctp_m_freem(m); return (EINVAL); } /* Cache the source address */ @@ -4454,6 +4461,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); + sctp_m_freem(m); return (EINVAL); } if (over_addr == NULL) { @@ -4605,6 +4613,9 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, sin6->sin6_port = prev_port; } SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret); + if (port) { + UDPSTAT_INC(udps_opackets); + } SCTP_STAT_INCR(sctps_sendpackets); SCTP_STAT_INCR_COUNTER64(sctps_outpackets); if (ret) { @@ -4974,7 +4985,6 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, struct sctp_paramhdr *phdr, params; struct mbuf *mat, *op_err; - char tempbuf[SCTP_PARAM_BUFFER_SIZE]; int at, limit, pad_needed; uint16_t ptype, plen, padded_size; int err_at; @@ -5114,15 +5124,13 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, l_len = SCTP_MIN_V4_OVERHEAD; #endif l_len += sizeof(struct sctp_chunkhdr); - l_len += plen; - l_len += sizeof(struct sctp_paramhdr); + l_len += sizeof(struct sctp_gen_error_cause); op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA); if (op_err) { SCTP_BUF_LEN(op_err) = 0; /* - * pre-reserve space for ip - * and sctp header and - * chunk hdr + * Pre-reserve space for IP, + * SCTP, and chunk header. */ #ifdef INET6 SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr)); @@ -5135,7 +5143,7 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, } if (op_err) { /* If we have space */ - struct sctp_paramhdr s; + struct sctp_gen_error_cause cause; if (err_at % 4) { uint32_t cpthis = 0; @@ -5144,26 +5152,15 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis); err_at += pad_needed; } - s.param_type = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR); - s.param_length = htons(sizeof(s) + plen); - m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s); - err_at += sizeof(s); - if (plen > sizeof(tempbuf)) { - plen = sizeof(tempbuf); - } - phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, plen); - if (phdr == NULL) { + cause.code = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR); + cause.length = htons((uint16_t)(sizeof(struct sctp_gen_error_cause) + plen)); + m_copyback(op_err, err_at, sizeof(struct sctp_gen_error_cause), (caddr_t)&cause); + err_at += sizeof(struct sctp_gen_error_cause); + SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(mat, at, plen, M_NOWAIT); + if (SCTP_BUF_NEXT(op_err) == NULL) { sctp_m_freem(op_err); - /* - * we are out of memory but - * we still need to have a - * look at what to do (the - * system is in trouble - * though). - */ return (NULL); } - m_copyback(op_err, err_at, plen, (caddr_t)phdr); } return (op_err); break; @@ -5187,7 +5184,6 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, l_len = SCTP_MIN_V4_OVERHEAD; #endif l_len += sizeof(struct sctp_chunkhdr); - l_len += plen; l_len += sizeof(struct sctp_paramhdr); op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA); if (op_err) { @@ -5213,14 +5209,11 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, err_at += pad_needed; } s.param_type = htons(SCTP_UNRECOG_PARAM); - s.param_length = htons(sizeof(s) + plen); - m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s); - err_at += sizeof(s); - if (plen > sizeof(tempbuf)) { - plen = sizeof(tempbuf); - } - phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, plen); - if (phdr == NULL) { + s.param_length = htons((uint16_t)sizeof(struct sctp_paramhdr) + plen); + m_copyback(op_err, err_at, sizeof(struct sctp_paramhdr), (caddr_t)&s); + err_at += sizeof(struct sctp_paramhdr); + SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(mat, at, plen, M_NOWAIT); + if (SCTP_BUF_NEXT(op_err) == NULL) { sctp_m_freem(op_err); /* * we are out of memory but @@ -5232,7 +5225,6 @@ sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, op_err = NULL; goto more_processing; } - m_copyback(op_err, err_at, plen, (caddr_t)phdr); err_at += plen; } } @@ -7212,7 +7204,7 @@ one_more_time: if ((sp->msg_is_complete) && (sp->length == 0)) { if (sp->sender_all_done) { /* - * We are doing differed cleanup. Last time through + * We are doing deferred cleanup. Last time through * when we took all the data the sender_all_done was * not set. */ @@ -8966,14 +8958,15 @@ sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err) return; } chk->copy_by_ref = 0; + chk->rec.chunk_id.id = SCTP_OPERATION_ERROR; + chk->rec.chunk_id.can_take_data = 0; + chk->flags = 0; chk->send_size = (uint16_t)chunk_length; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->asoc = &stcb->asoc; chk->data = op_err; chk->whoTo = NULL; - chk->rec.chunk_id.id = SCTP_OPERATION_ERROR; - chk->rec.chunk_id.can_take_data = 0; hdr = mtod(op_err, struct sctp_chunkhdr *); hdr->chunk_type = SCTP_OPERATION_ERROR; hdr->chunk_flags = 0; @@ -9195,7 +9188,6 @@ sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net) chk->send_size = sizeof(struct sctp_chunkhdr); chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; - chk->flags = 0; chk->asoc = &stcb->asoc; chk->data = m_shutdown_ack; chk->whoTo = net; @@ -9250,7 +9242,6 @@ sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net) chk->send_size = sizeof(struct sctp_shutdown_chunk); chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; - chk->flags = 0; chk->asoc = &stcb->asoc; chk->data = m_shutdown; chk->whoTo = net; @@ -11292,6 +11283,9 @@ sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst, return; } SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret); + if (port) { + UDPSTAT_INC(udps_opackets); + } SCTP_STAT_INCR(sctps_sendpackets); SCTP_STAT_INCR_COUNTER64(sctps_outpackets); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); @@ -12156,7 +12150,6 @@ sctp_send_str_reset_req(struct sctp_tcb *stcb, chk->book_size = sizeof(struct sctp_chunkhdr); chk->send_size = SCTP_SIZE32(chk->book_size); chk->book_size_scale = 0; - chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA); if (chk->data == NULL) { sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED); diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c index c3cb115e..ddf136ef 100644 --- a/freebsd/sys/netinet/sctputil.c +++ b/freebsd/sys/netinet/sctputil.c @@ -3702,7 +3702,7 @@ sctp_ulp_notify(uint32_t notification, struct sctp_tcb *stcb, return; } if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) || - (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) { + (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) { if ((notification == SCTP_NOTIFY_INTERFACE_DOWN) || (notification == SCTP_NOTIFY_INTERFACE_UP) || (notification == SCTP_NOTIFY_INTERFACE_CONFIRMED)) { @@ -7393,8 +7393,8 @@ sctp_set_state(struct sctp_tcb *stcb, int new_state) #endif KASSERT((new_state & ~SCTP_STATE_MASK) == 0, - ("sctp_set_state: Can't set substate (new_state = %x)", - new_state)); + ("sctp_set_state: Can't set substate (new_state = %x)", + new_state)); stcb->asoc.state = (stcb->asoc.state & ~SCTP_STATE_MASK) | new_state; if ((new_state == SCTP_STATE_SHUTDOWN_RECEIVED) || (new_state == SCTP_STATE_SHUTDOWN_SENT) || @@ -7404,7 +7404,7 @@ sctp_set_state(struct sctp_tcb *stcb, int new_state) #if defined(KDTRACE_HOOKS) if (((old_state & SCTP_STATE_MASK) != new_state) && !(((old_state & SCTP_STATE_MASK) == SCTP_STATE_EMPTY) && - (new_state == SCTP_STATE_INUSE))) { + (new_state == SCTP_STATE_INUSE))) { SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state); } #endif @@ -7418,14 +7418,14 @@ sctp_add_substate(struct sctp_tcb *stcb, int substate) #endif KASSERT((substate & SCTP_STATE_MASK) == 0, - ("sctp_add_substate: Can't set state (substate = %x)", - substate)); + ("sctp_add_substate: Can't set state (substate = %x)", + substate)); stcb->asoc.state |= substate; #if defined(KDTRACE_HOOKS) if (((substate & SCTP_STATE_ABOUT_TO_BE_FREED) && - ((old_state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) || + ((old_state & SCTP_STATE_ABOUT_TO_BE_FREED) == 0)) || ((substate & SCTP_STATE_SHUTDOWN_PENDING) && - ((old_state & SCTP_STATE_SHUTDOWN_PENDING) == 0))) { + ((old_state & SCTP_STATE_SHUTDOWN_PENDING) == 0))) { SCTP_PROBE6(state__change, NULL, stcb, NULL, stcb, NULL, old_state); } #endif diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c index 2c6c3048..d00504dc 100644 --- a/freebsd/sys/netinet/tcp_input.c +++ b/freebsd/sys/netinet/tcp_input.c @@ -802,7 +802,7 @@ findpcb: if (ti_locked == TI_RLOCKED) { INP_INFO_RLOCK_ASSERT(&V_tcbinfo); } else { - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); } #endif #ifdef INET6 @@ -1167,7 +1167,7 @@ tfo_socket_result: * causes. */ if (thflags & TH_RST) { - syncache_chkrst(&inc, th); + syncache_chkrst(&inc, th, m); goto dropunlock; } /* @@ -1360,7 +1360,7 @@ tfo_socket_result: INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); ti_locked = TI_UNLOCKED; } - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); return (IPPROTO_DONE); } else if (tp->t_state == TCPS_LISTEN) { /* @@ -1407,7 +1407,7 @@ dropwithreset: else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset " "ti_locked: %d", __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); } #endif @@ -1431,7 +1431,7 @@ dropunlock: else { KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock " "ti_locked: %d", __func__, ti_locked)); - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); } #endif @@ -1439,7 +1439,7 @@ dropunlock: INP_WUNLOCK(inp); drop: - INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); + INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); if (s != NULL) free(s, M_TCPLOG); if (m != NULL) diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c index 8f83440d..60276348 100644 --- a/freebsd/sys/netinet/tcp_output.c +++ b/freebsd/sys/netinet/tcp_output.c @@ -897,6 +897,7 @@ send: len = max_len; } } + /* * Prevent the last segment from being * fractional unless the send sockbuf can be diff --git a/freebsd/sys/netinet/tcp_reass.c b/freebsd/sys/netinet/tcp_reass.c index 4776a808..fea3b716 100644 --- a/freebsd/sys/netinet/tcp_reass.c +++ b/freebsd/sys/netinet/tcp_reass.c @@ -530,7 +530,6 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start, struct tseg_qent *p = NULL; struct tseg_qent *nq = NULL; struct tseg_qent *te = NULL; - struct tseg_qent tqs; struct mbuf *mlast = NULL; struct sockbuf *sb; struct socket *so = tp->t_inpcb->inp_socket; @@ -1055,8 +1054,7 @@ present: KASSERT(tp->t_segqmbuflen >= q->tqe_mbuf_cnt, ("tp:%p seg queue goes negative", tp)); tp->t_segqmbuflen -= q->tqe_mbuf_cnt; - if (q != &tqs) - uma_zfree(tcp_reass_zone, q); + uma_zfree(tcp_reass_zone, q); tp->t_segqlen--; q = nq; } while (q && q->tqe_start == tp->rcv_nxt); diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c index 6fdd859d..a913f5b6 100644 --- a/freebsd/sys/netinet/tcp_syncache.c +++ b/freebsd/sys/netinet/tcp_syncache.c @@ -132,8 +132,8 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack, static void syncache_drop(struct syncache *, struct syncache_head *); static void syncache_free(struct syncache *); static void syncache_insert(struct syncache *, struct syncache_head *); -static int syncache_respond(struct syncache *, struct syncache_head *, int, - const struct mbuf *); +static int syncache_respond(struct syncache *, struct syncache_head *, + const struct mbuf *, int); static struct socket *syncache_socket(struct syncache *, struct socket *, struct mbuf *m); static void syncache_timeout(struct syncache *sc, struct syncache_head *sch, @@ -491,7 +491,7 @@ syncache_timer(void *xsch) free(s, M_TCPLOG); } - syncache_respond(sc, sch, 1, NULL); + syncache_respond(sc, sch, NULL, TH_SYN|TH_ACK); TCPSTAT_INC(tcps_sc_retransmitted); syncache_timeout(sc, sch, 0); } @@ -539,9 +539,10 @@ syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) * This function is called when we get a RST for a * non-existent connection, so that we can see if the * connection is in the syn cache. If it is, zap it. + * If required send a challenge ACK. */ void -syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th) +syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m) { struct syncache *sc; struct syncache_head *sch; @@ -592,19 +593,36 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th) * send a reset with the sequence number at the rightmost edge * of our receive window, and we have to handle this case. */ - if (SEQ_GEQ(th->th_seq, sc->sc_irs) && - SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) { - syncache_drop(sc, sch); - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) - log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, " - "connection attempt aborted by remote endpoint\n", - s, __func__); - TCPSTAT_INC(tcps_sc_reset); + if ((SEQ_GEQ(th->th_seq, sc->sc_irs + 1) && + SEQ_LT(th->th_seq, sc->sc_irs + 1 + sc->sc_wnd)) || + (sc->sc_wnd == 0 && th->th_seq == sc->sc_irs + 1)) { + if (V_tcp_insecure_rst || + th->th_seq == sc->sc_irs + 1) { + syncache_drop(sc, sch); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, + "%s; %s: Our SYN|ACK was rejected, " + "connection attempt aborted by remote " + "endpoint\n", + s, __func__); + TCPSTAT_INC(tcps_sc_reset); + } else { + TCPSTAT_INC(tcps_badrst); + /* Send challenge ACK. */ + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: RST with invalid " + " SEQ %u != NXT %u (+WND %u), " + "sending challenge ACK\n", + s, __func__, + th->th_seq, sc->sc_irs + 1, sc->sc_wnd); + syncache_respond(sc, sch, m, TH_ACK); + } } else { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != " - "IRS %u (+WND %u), segment ignored\n", - s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd); + "NXT %u (+WND %u), segment ignored\n", + s, __func__, + th->th_seq, sc->sc_irs + 1, sc->sc_wnd); TCPSTAT_INC(tcps_badrst); } @@ -1423,7 +1441,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, s, __func__); free(s, M_TCPLOG); } - if (syncache_respond(sc, sch, 1, m) == 0) { + if (syncache_respond(sc, sch, m, TH_SYN|TH_ACK) == 0) { sc->sc_rxmits = 0; syncache_timeout(sc, sch, 1); TCPSTAT_INC(tcps_sndacks); @@ -1587,7 +1605,7 @@ skip_alloc: /* * Do a standard 3-way handshake. */ - if (syncache_respond(sc, sch, 0, m) == 0) { + if (syncache_respond(sc, sch, m, TH_SYN|TH_ACK) == 0) { if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs) syncache_free(sc); else if (sc != &scs) @@ -1628,12 +1646,12 @@ tfo_expanded: } /* - * Send SYN|ACK to the peer. Either in response to the peer's SYN, + * Send SYN|ACK or ACK to the peer. Either in response to a peer's segment, * i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL. */ static int -syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked, - const struct mbuf *m0) +syncache_respond(struct syncache *sc, struct syncache_head *sch, + const struct mbuf *m0, int flags) { struct ip *ip = NULL; struct mbuf *m; @@ -1719,15 +1737,18 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked, th->th_sport = sc->sc_inc.inc_lport; th->th_dport = sc->sc_inc.inc_fport; - th->th_seq = htonl(sc->sc_iss); + if (flags & TH_SYN) + th->th_seq = htonl(sc->sc_iss); + else + th->th_seq = htonl(sc->sc_iss + 1); th->th_ack = htonl(sc->sc_irs + 1); th->th_off = sizeof(struct tcphdr) >> 2; th->th_x2 = 0; - th->th_flags = TH_SYN|TH_ACK; + th->th_flags = flags; th->th_win = htons(sc->sc_wnd); th->th_urp = 0; - if (sc->sc_flags & SCF_ECN) { + if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) { th->th_flags |= TH_ECE; TCPSTAT_INC(tcps_ecn_shs); } @@ -1736,30 +1757,32 @@ syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked, if ((sc->sc_flags & SCF_NOOPT) == 0) { to.to_flags = 0; - to.to_mss = mssopt; - to.to_flags = TOF_MSS; - if (sc->sc_flags & SCF_WINSCALE) { - to.to_wscale = sc->sc_requested_r_scale; - to.to_flags |= TOF_SCALE; + if (flags & TH_SYN) { + to.to_mss = mssopt; + to.to_flags = TOF_MSS; + if (sc->sc_flags & SCF_WINSCALE) { + to.to_wscale = sc->sc_requested_r_scale; + to.to_flags |= TOF_SCALE; + } + if (sc->sc_flags & SCF_SACK) + to.to_flags |= TOF_SACKPERM; +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (sc->sc_flags & SCF_SIGNATURE) + to.to_flags |= TOF_SIGNATURE; +#endif + if (sc->sc_tfo_cookie) { + to.to_flags |= TOF_FASTOPEN; + to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; + to.to_tfo_cookie = sc->sc_tfo_cookie; + /* don't send cookie again when retransmitting response */ + sc->sc_tfo_cookie = NULL; + } } if (sc->sc_flags & SCF_TIMESTAMP) { to.to_tsval = sc->sc_tsoff + tcp_ts_getticks(); to.to_tsecr = sc->sc_tsreflect; to.to_flags |= TOF_TS; } - if (sc->sc_flags & SCF_SACK) - to.to_flags |= TOF_SACKPERM; -#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) - if (sc->sc_flags & SCF_SIGNATURE) - to.to_flags |= TOF_SIGNATURE; -#endif - if (sc->sc_tfo_cookie) { - to.to_flags |= TOF_FASTOPEN; - to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; - to.to_tfo_cookie = sc->sc_tfo_cookie; - /* don't send cookie again when retransmitting response */ - sc->sc_tfo_cookie = NULL; - } optlen = tcp_addoptions(&to, (u_char *)(th + 1)); /* Adjust headers by option size. */ diff --git a/freebsd/sys/netinet/tcp_syncache.h b/freebsd/sys/netinet/tcp_syncache.h index 0104e528..3067b7ef 100644 --- a/freebsd/sys/netinet/tcp_syncache.h +++ b/freebsd/sys/netinet/tcp_syncache.h @@ -46,7 +46,7 @@ int syncache_expand(struct in_conninfo *, struct tcpopt *, int syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *, void *, void *); -void syncache_chkrst(struct in_conninfo *, struct tcphdr *); +void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *); void syncache_badack(struct in_conninfo *); int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported); diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c index 9557c154..ca4fecc0 100644 --- a/freebsd/sys/netinet/udp_usrreq.c +++ b/freebsd/sys/netinet/udp_usrreq.c @@ -534,7 +534,6 @@ udp_input(struct mbuf **mp, int *offp, int proto) in_broadcast(ip->ip_dst, ifp)) { struct inpcb *last; struct inpcbhead *pcblist; - struct ip_moptions *imo; INP_INFO_RLOCK_ET(pcbinfo, et); pcblist = udp_get_pcblist(proto); @@ -558,6 +557,11 @@ udp_input(struct mbuf **mp, int *offp, int proto) INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) { + INP_RUNLOCK(inp); + continue; + } + /* * XXXRW: Because we weren't holding either the inpcb * or the hash lock when we checked for a match @@ -569,10 +573,12 @@ udp_input(struct mbuf **mp, int *offp, int proto) * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ - imo = inp->inp_moptions; if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { + struct ip_moptions *imo; struct sockaddr_in group; int blocked; + + imo = inp->inp_moptions; if (imo == NULL) { INP_RUNLOCK(inp); continue; @@ -762,13 +768,7 @@ struct inpcb * udp_notify(struct inpcb *inp, int errno) { - /* - * While udp_ctlinput() always calls udp_notify() with a read lock - * when invoking it directly, in_pcbnotifyall() currently uses write - * locks due to sharing code with TCP. For now, accept either a read - * or a write lock, but a read lock is sufficient. - */ - INP_LOCK_ASSERT(inp); + INP_WLOCK_ASSERT(inp); if ((errno == EHOSTUNREACH || errno == ENETUNREACH || errno == EHOSTDOWN) && inp->inp_route.ro_rt) { RTFREE(inp->inp_route.ro_rt); @@ -814,13 +814,13 @@ udp_common_ctlinput(int cmd, struct sockaddr *sa, void *vip, if (ip != NULL) { uh = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport, - ip->ip_src, uh->uh_sport, INPLOOKUP_RLOCKPCB, NULL); + ip->ip_src, uh->uh_sport, INPLOOKUP_WLOCKPCB, NULL); if (inp != NULL) { - INP_RLOCK_ASSERT(inp); + INP_WLOCK_ASSERT(inp); if (inp->inp_socket != NULL) { udp_notify(inp, inetctlerrmap[cmd]); } - INP_RUNLOCK(inp); + INP_WUNLOCK(inp); } else { inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport, ip->ip_src, uh->uh_sport, diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c index 2b080169..6dd25e98 100644 --- a/freebsd/sys/netinet6/icmp6.c +++ b/freebsd/sys/netinet6/icmp6.c @@ -1950,6 +1950,10 @@ icmp6_rip6_input(struct mbuf **mp, int off) !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; INP_RLOCK(in6p); + if (__predict_false(in6p->inp_flags2 & INP_FREED)) { + INP_RUNLOCK(in6p); + continue; + } if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) { INP_RUNLOCK(in6p); diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c index c415cf78..ef59203e 100644 --- a/freebsd/sys/netinet6/in6.c +++ b/freebsd/sys/netinet6/in6.c @@ -716,7 +716,8 @@ aifaddr_out: ND6_WUNLOCK(); nd6_prefix_del(pr); } - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, &ia->ia_ifa, + IFADDR_EVENT_DEL); break; } @@ -1460,7 +1461,10 @@ done: WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Invoking IPv6 network device address event may sleep"); - EVENTHANDLER_INVOKE(ifaddr_event, ifp); + ifa_ref(&ia->ia_ifa); + EVENTHANDLER_INVOKE(ifaddr_event_ext, ifp, &ia->ia_ifa, + IFADDR_EVENT_ADD); + ifa_free(&ia->ia_ifa); return (error); } diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c index 66b4c63a..66c2cfb4 100644 --- a/freebsd/sys/netinet6/in6_gif.c +++ b/freebsd/sys/netinet6/in6_gif.c @@ -89,12 +89,16 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list. */ VNET_DEFINE_STATIC(struct gif_list *, ipv6_hashtbl) = NULL; +VNET_DEFINE_STATIC(struct gif_list *, ipv6_srchashtbl) = NULL; VNET_DEFINE_STATIC(struct gif_list, ipv6_list) = CK_LIST_HEAD_INITIALIZER(); #define V_ipv6_hashtbl VNET(ipv6_hashtbl) +#define V_ipv6_srchashtbl VNET(ipv6_srchashtbl) #define V_ipv6_list VNET(ipv6_list) #define GIF_HASH(src, dst) (V_ipv6_hashtbl[\ in6_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)]) +#define GIF_SRCHASH(src) (V_ipv6_srchashtbl[\ + fnv_32_buf((src), sizeof(*src), FNV1_32_INIT) & (GIF_HASH_SIZE - 1)]) #define GIF_HASH_SC(sc) GIF_HASH(&(sc)->gif_ip6hdr->ip6_src,\ &(sc)->gif_ip6hdr->ip6_dst) static uint32_t @@ -127,6 +131,43 @@ in6_gif_checkdup(const struct gif_softc *sc, const struct in6_addr *src, return (0); } +/* + * Check that ingress address belongs to local host. + */ +static void +in6_gif_set_running(struct gif_softc *sc) +{ + + if (in6_localip(&sc->gif_ip6hdr->ip6_src)) + GIF2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; + else + GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; +} + +/* + * ifaddr_event handler. + * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent + * source address spoofing. + */ +static void +in6_gif_srcaddr(void *arg __unused, const struct sockaddr *sa, int event) +{ + const struct sockaddr_in6 *sin; + struct gif_softc *sc; + + if (V_ipv6_srchashtbl == NULL) + return; + + MPASS(in_epoch(net_epoch_preempt)); + sin = (const struct sockaddr_in6 *)sa; + CK_LIST_FOREACH(sc, &GIF_SRCHASH(&sin->sin6_addr), srchash) { + if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, + &sin->sin6_addr) == 0) + continue; + in6_gif_set_running(sc); + } +} + static void in6_gif_attach(struct gif_softc *sc) { @@ -135,6 +176,9 @@ in6_gif_attach(struct gif_softc *sc) CK_LIST_INSERT_HEAD(&V_ipv6_list, sc, chain); else CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain); + + CK_LIST_INSERT_HEAD(&GIF_SRCHASH(&sc->gif_ip6hdr->ip6_src), + sc, srchash); } int @@ -147,6 +191,7 @@ in6_gif_setopts(struct gif_softc *sc, u_int options) if ((options & GIF_IGNORE_SOURCE) != (sc->gif_options & GIF_IGNORE_SOURCE)) { + CK_LIST_REMOVE(sc, srchash); CK_LIST_REMOVE(sc, chain); sc->gif_options = options; in6_gif_attach(sc); @@ -189,8 +234,10 @@ in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) (error = sa6_embedscope(dst, 0)) != 0) break; - if (V_ipv6_hashtbl == NULL) + if (V_ipv6_hashtbl == NULL) { V_ipv6_hashtbl = gif_hashinit(); + V_ipv6_srchashtbl = gif_hashinit(); + } error = in6_gif_checkdup(sc, &src->sin6_addr, &dst->sin6_addr); if (error == EADDRNOTAVAIL) @@ -206,6 +253,7 @@ in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) ip6->ip6_vfc = IPV6_VERSION; if (sc->gif_family != 0) { /* Detach existing tunnel first */ + CK_LIST_REMOVE(sc, srchash); CK_LIST_REMOVE(sc, chain); GIF_WAIT(); free(sc->gif_hdr, M_GIF); @@ -214,6 +262,7 @@ in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) sc->gif_family = AF_INET6; sc->gif_ip6hdr = ip6; in6_gif_attach(sc); + in6_gif_set_running(sc); break; case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: @@ -367,6 +416,7 @@ done: return (ret); } +static const struct srcaddrtab *ipv6_srcaddrtab; static struct { const struct encap_config encap; const struct encaptab *cookie; @@ -412,6 +462,9 @@ in6_gif_init(void) if (!IS_DEFAULT_VNET(curvnet)) return; + + ipv6_srcaddrtab = ip6_encap_register_srcaddr(in6_gif_srcaddr, + NULL, M_WAITOK); for (i = 0; i < nitems(ipv6_encap_cfg); i++) ipv6_encap_cfg[i].cookie = ip6_encap_attach( &ipv6_encap_cfg[i].encap, NULL, M_WAITOK); @@ -425,7 +478,10 @@ in6_gif_uninit(void) if (IS_DEFAULT_VNET(curvnet)) { for (i = 0; i < nitems(ipv6_encap_cfg); i++) ip6_encap_detach(ipv6_encap_cfg[i].cookie); + ip6_encap_unregister_srcaddr(ipv6_srcaddrtab); } - if (V_ipv6_hashtbl != NULL) + if (V_ipv6_hashtbl != NULL) { gif_hashdestroy(V_ipv6_hashtbl); + gif_hashdestroy(V_ipv6_srchashtbl); + } } diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c index a30cb98b..53102764 100644 --- a/freebsd/sys/netinet6/in6_pcb.c +++ b/freebsd/sys/netinet6/in6_pcb.c @@ -823,6 +823,10 @@ in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) INP_INFO_WLOCK(pcbinfo); CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { INP_WLOCK(in6p); + if (__predict_false(in6p->inp_flags2 & INP_FREED)) { + INP_WUNLOCK(in6p); + continue; + } im6o = in6p->in6p_moptions; if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { /* @@ -883,10 +887,9 @@ in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo, const struct in6_addr *laddr, uint16_t lport, const struct in6_addr *faddr, uint16_t fport, int lookupflags) { - struct inpcb *local_wild = NULL; + struct inpcb *local_wild; const struct inpcblbgrouphead *hdr; struct inpcblbgroup *grp; - struct inpcblbgroup *grp_local_wild; uint32_t idx; INP_HASH_LOCK_ASSERT(pcbinfo); @@ -903,33 +906,24 @@ in6_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo, * - Load balanced group does not contain jailed sockets. * - Load balanced does not contain IPv4 mapped INET6 wild sockets. */ + local_wild = NULL; CK_LIST_FOREACH(grp, hdr, il_list) { #ifdef INET if (!(grp->il_vflag & INP_IPV6)) continue; #endif - if (grp->il_lport == lport) { - idx = 0; - int pkt_hash = INP_PCBLBGROUP_PKTHASH( - INP6_PCBHASHKEY(faddr), lport, fport); - - idx = pkt_hash % grp->il_inpcnt; + if (grp->il_lport != lport) + continue; - if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) { - return (grp->il_inp[idx]); - } else { - if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) && - (lookupflags & INPLOOKUP_WILDCARD)) { - local_wild = grp->il_inp[idx]; - grp_local_wild = grp; - } - } - } + idx = INP_PCBLBGROUP_PKTHASH(INP6_PCBHASHKEY(faddr), lport, + fport) % grp->il_inpcnt; + if (IN6_ARE_ADDR_EQUAL(&grp->il6_laddr, laddr)) + return (grp->il_inp[idx]); + if (IN6_IS_ADDR_UNSPECIFIED(&grp->il6_laddr) && + (lookupflags & INPLOOKUP_WILDCARD) != 0) + local_wild = grp->il_inp[idx]; } - if (local_wild != NULL) { - return (local_wild); - } - return (NULL); + return (local_wild); } #ifdef PCBGROUP diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c index 67ed0e35..e0fcd06d 100644 --- a/freebsd/sys/netinet6/udp6_usrreq.c +++ b/freebsd/sys/netinet6/udp6_usrreq.c @@ -436,8 +436,8 @@ udp6_input(struct mbuf **mp, int *offp, int proto) INP_RUNLOCK(last); } else INP_RUNLOCK(last); - INP_INFO_RUNLOCK_ET(pcbinfo, et); inp_lost: + INP_INFO_RUNLOCK_ET(pcbinfo, et); return (IPPROTO_DONE); } /* @@ -700,7 +700,7 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, u_int32_t ulen, plen; uint16_t cscov; u_short fport; - uint8_t nxt, unlock_udbinfo; + uint8_t nxt, unlock_inp, unlock_udbinfo; /* addr6 has been validated in udp6_send(). */ sin6 = (struct sockaddr_in6 *)addr6; @@ -736,7 +736,22 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, inp = sotoinpcb(so); KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); - INP_RLOCK(inp); + /* + * In the following cases we want a write lock on the inp for either + * local operations or for possible route cache updates in the IPv6 + * output path: + * - on connected sockets (sin6 is NULL) for route cache updates, + * - when we are not bound to an address and source port (it is + * in6_pcbsetport() which will require the write lock). + */ + if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && + inp->inp_lport == 0)) { + INP_WLOCK(inp); + unlock_inp = UH_WLOCKED; + } else { + INP_RLOCK(inp); + unlock_inp = UH_RLOCKED; + } nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; @@ -760,7 +775,10 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, * potential race in which the factors causing us to * select the UDPv4 output routine are invalidated? */ - INP_RUNLOCK(inp); + if (unlock_inp == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); if (sin6) in6_sin6_2_sin_in_sock((struct sockaddr *)sin6); pru = inetsw[ip_protox[nxt]].pr_usrreqs; @@ -768,13 +786,28 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, return ((*pru->pru_send)(so, flags_arg, m, (struct sockaddr *)sin6, control, td)); } - } + } else #endif + if (sin6 && IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { + /* + * Given this is either an IPv6-only socket or no INET is + * supported we will fail the send if the given destination + * address is a v4mapped address. + */ + if (unlock_inp == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); + return (EINVAL); + } if (control) { if ((error = ip6_setpktopts(control, &opt, inp->in6p_outputopts, td->td_ucred, nxt)) != 0) { - INP_RUNLOCK(inp); + if (unlock_inp == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); ip6_clearpktopts(&opt, -1); if (control) m_freem(control); @@ -788,12 +821,6 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); if (sin6 != NULL && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) { - INP_RUNLOCK(inp); - /* - * XXX there is a short window here which could lead to a race; - * should we re-check that what got us here is still valid? - */ - INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); unlock_udbinfo = UH_WLOCKED; } else if (sin6 != NULL && @@ -974,9 +1001,10 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6); else UDP_PROBE(send, NULL, inp, ip6, inp, udp6); - error = ip6_output(m, optp, &inp->inp_route6, flags, + error = ip6_output(m, optp, + (unlock_inp == UH_WLOCKED) ? &inp->inp_route6 : NULL, flags, inp->in6p_moptions, NULL, inp); - if (unlock_udbinfo == UH_WLOCKED) + if (unlock_inp == UH_WLOCKED) INP_WUNLOCK(inp); else INP_RUNLOCK(inp); @@ -989,12 +1017,20 @@ udp6_output(struct socket *so, int flags_arg, struct mbuf *m, release: if (unlock_udbinfo == UH_WLOCKED) { + KASSERT(unlock_inp == UH_WLOCKED, ("%s: excl udbinfo lock, " + "non-excl inp lock: pcbinfo %p %#x inp %p %#x", + __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp)); INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); } else if (unlock_udbinfo == UH_RLOCKED) { + KASSERT(unlock_inp == UH_RLOCKED, ("%s: non-excl udbinfo lock, " + "excl inp lock: pcbinfo %p %#x inp %p %#x", + __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp)); INP_HASH_RUNLOCK_ET(pcbinfo, et); INP_RUNLOCK(inp); - } else + } else if (unlock_inp == UH_WLOCKED) + INP_WUNLOCK(inp); + else INP_RUNLOCK(inp); if (control) { ip6_clearpktopts(&opt, -1); diff --git a/freebsd/sys/netipsec/key.c b/freebsd/sys/netipsec/key.c index 9bd3f234..4b79f881 100644 --- a/freebsd/sys/netipsec/key.c +++ b/freebsd/sys/netipsec/key.c @@ -536,14 +536,6 @@ MALLOC_DEFINE(M_IPSEC_SPDCACHE, "ipsec-spdcache", "ipsec SPD cache"); VNET_DEFINE_STATIC(uma_zone_t, key_lft_zone); #define V_key_lft_zone VNET(key_lft_zone) -static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER(); -static struct mtx xforms_lock; -#define XFORMS_LOCK_INIT() \ - mtx_init(&xforms_lock, "xforms_list", "IPsec transforms list", MTX_DEF) -#define XFORMS_LOCK_DESTROY() mtx_destroy(&xforms_lock) -#define XFORMS_LOCK() mtx_lock(&xforms_lock) -#define XFORMS_UNLOCK() mtx_unlock(&xforms_lock) - /* * set parameters into secpolicyindex buffer. * Must allocate secpolicyindex buffer passed to this function. @@ -719,7 +711,6 @@ static int key_delete(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_delete_all(struct socket *, struct mbuf *, const struct sadb_msghdr *, struct secasindex *); -static void key_delete_xform(const struct xformsw *); static int key_get(struct socket *, struct mbuf *, const struct sadb_msghdr *); @@ -752,7 +743,6 @@ static int key_validate_ext(const struct sadb_ext *, int); static int key_align(struct mbuf *, struct sadb_msghdr *); static struct mbuf *key_setlifetime(struct seclifetime *, uint16_t); static struct mbuf *key_setkey(struct seckey *, uint16_t); -static int xform_init(struct secasvar *, u_short); static void spdcache_init(void); static void spdcache_clear(void); @@ -6169,7 +6159,7 @@ key_delete_all(struct socket *so, struct mbuf *m, * Larval SAs have not initialized tdb_xform, so it is safe to leave them * here when xform disappears. */ -static void +void key_delete_xform(const struct xformsw *xsp) { struct secasvar_queue drainq; @@ -6697,7 +6687,9 @@ key_acquire(const struct secasindex *saidx, struct secpolicy *sp) /* XXX proxy address (optional) */ - /* set sadb_x_policy */ + /* + * Set sadb_x_policy. This is KAME extension to RFC2367. + */ if (sp != NULL) { m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id, sp->priority); @@ -6708,6 +6700,18 @@ key_acquire(const struct secasindex *saidx, struct secpolicy *sp) m_cat(result, m); } + /* + * Set sadb_x_sa2 extension if saidx->reqid is not zero. + * This is FreeBSD extension to RFC2367. + */ + if (saidx->reqid != 0) { + m = key_setsadbxsa2(saidx->mode, 0, saidx->reqid); + if (m == NULL) { + error = ENOBUFS; + goto fail; + } + m_cat(result, m); + } /* XXX identity (optional) */ #if 0 if (idexttype && fqdn) { @@ -8337,7 +8341,6 @@ key_init(void) if (!IS_DEFAULT_VNET(curvnet)) return; - XFORMS_LOCK_INIT(); SPTREE_LOCK_INIT(); REGTREE_LOCK_INIT(); SAHTREE_LOCK_INIT(); @@ -8460,7 +8463,6 @@ key_destroy(void) #ifndef IPSEC_DEBUG2 callout_drain(&key_timer); #endif - XFORMS_LOCK_DESTROY(); SPTREE_LOCK_DESTROY(); REGTREE_LOCK_DESTROY(); SAHTREE_LOCK_DESTROY(); @@ -8619,70 +8621,3 @@ comp_algorithm_lookup(int alg) return (NULL); } -/* - * Register a transform. - */ -static int -xform_register(struct xformsw* xsp) -{ - struct xformsw *entry; - - XFORMS_LOCK(); - LIST_FOREACH(entry, &xforms, chain) { - if (entry->xf_type == xsp->xf_type) { - XFORMS_UNLOCK(); - return (EEXIST); - } - } - LIST_INSERT_HEAD(&xforms, xsp, chain); - XFORMS_UNLOCK(); - return (0); -} - -void -xform_attach(void *data) -{ - struct xformsw *xsp = (struct xformsw *)data; - - if (xform_register(xsp) != 0) - printf("%s: failed to register %s xform\n", __func__, - xsp->xf_name); -} - -void -xform_detach(void *data) -{ - struct xformsw *xsp = (struct xformsw *)data; - - XFORMS_LOCK(); - LIST_REMOVE(xsp, chain); - XFORMS_UNLOCK(); - - /* Delete all SAs related to this xform. */ - key_delete_xform(xsp); -} - -/* - * Initialize transform support in an sav. - */ -static int -xform_init(struct secasvar *sav, u_short xftype) -{ - struct xformsw *entry; - int ret; - - IPSEC_ASSERT(sav->tdb_xform == NULL, - ("tdb_xform is already initialized")); - - ret = EINVAL; - XFORMS_LOCK(); - LIST_FOREACH(entry, &xforms, chain) { - if (entry->xf_type == xftype) { - ret = (*entry->xf_init)(sav, entry); - break; - } - } - XFORMS_UNLOCK(); - return (ret); -} - diff --git a/freebsd/sys/netipsec/key.h b/freebsd/sys/netipsec/key.h index 6c3e05c0..7d7ae69f 100644 --- a/freebsd/sys/netipsec/key.h +++ b/freebsd/sys/netipsec/key.h @@ -46,6 +46,7 @@ struct sadb_msg; struct sadb_x_policy; struct secasindex; union sockaddr_union; +struct xformsw; struct secpolicy *key_newsp(void); struct secpolicy *key_allocsp(struct secpolicyindex *, u_int); @@ -74,6 +75,8 @@ int key_sockaddrcmp_withmask(const struct sockaddr *, const struct sockaddr *, int key_register_ifnet(struct secpolicy **, u_int); void key_unregister_ifnet(struct secpolicy **, u_int); +void key_delete_xform(const struct xformsw *); + extern u_long key_random(void); extern void key_randomfill(void *, size_t); extern void key_freereg(struct socket *); diff --git a/freebsd/sys/netipsec/subr_ipsec.c b/freebsd/sys/netipsec/subr_ipsec.c index 1ddae59b..58c98e32 100644 --- a/freebsd/sys/netipsec/subr_ipsec.c +++ b/freebsd/sys/netipsec/subr_ipsec.c @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include <netipsec/ipsec6.h> #include <netipsec/key.h> #include <netipsec/key_debug.h> +#include <netipsec/xform.h> #include <machine/atomic.h> /* @@ -126,14 +127,6 @@ ipsec6_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, } #endif -#ifdef IPSEC_SUPPORT -/* - * IPSEC_SUPPORT - loading of ipsec.ko and tcpmd5.ko is supported. - * IPSEC + IPSEC_SUPPORT - loading tcpmd5.ko is supported. - * IPSEC + TCP_SIGNATURE - all is build in the kernel, do not build - * IPSEC_SUPPORT. - */ -#if !defined(IPSEC) || !defined(TCP_SIGNATURE) #define IPSEC_MODULE_INCR 2 static int ipsec_kmod_enter(volatile u_int *cntr) @@ -173,6 +166,83 @@ ipsec_kmod_drain(volatile u_int *cntr) pause("ipsecd", hz/2); } +static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER(); +static struct mtx xforms_lock; +MTX_SYSINIT(xfroms_list, &xforms_lock, "IPsec transforms list", MTX_DEF); +#define XFORMS_LOCK() mtx_lock(&xforms_lock) +#define XFORMS_UNLOCK() mtx_unlock(&xforms_lock) + +void +xform_attach(void *data) +{ + struct xformsw *xsp, *entry; + + xsp = (struct xformsw *)data; + XFORMS_LOCK(); + LIST_FOREACH(entry, &xforms, chain) { + if (entry->xf_type == xsp->xf_type) { + XFORMS_UNLOCK(); + printf("%s: failed to register %s xform\n", + __func__, xsp->xf_name); + return; + } + } + LIST_INSERT_HEAD(&xforms, xsp, chain); + xsp->xf_cntr = IPSEC_MODULE_ENABLED; + XFORMS_UNLOCK(); +} + +void +xform_detach(void *data) +{ + struct xformsw *xsp = (struct xformsw *)data; + + XFORMS_LOCK(); + LIST_REMOVE(xsp, chain); + XFORMS_UNLOCK(); + + /* Delete all SAs related to this xform. */ + key_delete_xform(xsp); + if (xsp->xf_cntr & IPSEC_MODULE_ENABLED) + ipsec_kmod_drain(&xsp->xf_cntr); +} + +/* + * Initialize transform support in an sav. + */ +int +xform_init(struct secasvar *sav, u_short xftype) +{ + struct xformsw *entry; + int ret; + + IPSEC_ASSERT(sav->tdb_xform == NULL, + ("tdb_xform is already initialized")); + + XFORMS_LOCK(); + LIST_FOREACH(entry, &xforms, chain) { + if (entry->xf_type == xftype) { + ret = ipsec_kmod_enter(&entry->xf_cntr); + XFORMS_UNLOCK(); + if (ret != 0) + return (ret); + ret = (*entry->xf_init)(sav, entry); + ipsec_kmod_exit(&entry->xf_cntr); + return (ret); + } + } + XFORMS_UNLOCK(); + return (EINVAL); +} + +#ifdef IPSEC_SUPPORT +/* + * IPSEC_SUPPORT - loading of ipsec.ko and tcpmd5.ko is supported. + * IPSEC + IPSEC_SUPPORT - loading tcpmd5.ko is supported. + * IPSEC + TCP_SIGNATURE - all is build in the kernel, do not build + * IPSEC_SUPPORT. + */ +#if !defined(IPSEC) || !defined(TCP_SIGNATURE) #define METHOD_DECL(...) __VA_ARGS__ #define METHOD_ARGS(...) __VA_ARGS__ #define IPSEC_KMOD_METHOD(type, name, sc, method, decl, args) \ diff --git a/freebsd/sys/netipsec/xform.h b/freebsd/sys/netipsec/xform.h index 389d0b66..910a88a7 100644 --- a/freebsd/sys/netipsec/xform.h +++ b/freebsd/sys/netipsec/xform.h @@ -86,14 +86,16 @@ struct xform_data { #define XF_IPCOMP 6 /* IPCOMP */ struct xformsw { - u_short xf_type; /* xform ID */ - char *xf_name; /* human-readable name */ + u_short xf_type; /* xform ID */ + const char *xf_name; /* human-readable name */ int (*xf_init)(struct secasvar*, struct xformsw*); /* setup */ int (*xf_zeroize)(struct secasvar*); /* cleanup */ int (*xf_input)(struct mbuf*, struct secasvar*, /* input */ int, int); int (*xf_output)(struct mbuf*, /* output */ struct secpolicy *, struct secasvar *, u_int, int, int); + + volatile u_int xf_cntr; LIST_ENTRY(xformsw) chain; }; @@ -103,6 +105,7 @@ const struct comp_algo * comp_algorithm_lookup(int); void xform_attach(void *); void xform_detach(void *); +int xform_init(struct secasvar *, u_short); struct cryptoini; /* XF_AH */ diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c index f765350d..5fa7a8fe 100644 --- a/freebsd/sys/netpfil/pf/pf.c +++ b/freebsd/sys/netpfil/pf/pf.c @@ -4413,7 +4413,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL); REASON_SET(reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); - } else if (!(th->th_flags & TH_ACK) || + } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { REASON_SET(reason, PFRES_SYNPROXY); diff --git a/freebsd/sys/opencrypto/cryptosoft.c b/freebsd/sys/opencrypto/cryptosoft.c index 8dff61c1..81c5bde8 100644 --- a/freebsd/sys/opencrypto/cryptosoft.c +++ b/freebsd/sys/opencrypto/cryptosoft.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/endian.h> #include <sys/limits.h> +#include <sys/mutex.h> #include <crypto/blowfish/blowfish.h> #include <crypto/sha1.h> @@ -767,6 +768,7 @@ swcr_newsession(device_t dev, crypto_session_t cses, struct cryptoini *cri) return EINVAL; ses = crypto_get_driver_session(cses); + mtx_init(&ses->swcr_lock, "swcr session lock", NULL, MTX_DEF); for (i = 0; cri != NULL && i < nitems(ses->swcr_algorithms); i++) { swd = &ses->swcr_algorithms[i]; @@ -1024,6 +1026,7 @@ swcr_freesession(device_t dev, crypto_session_t cses) ses = crypto_get_driver_session(cses); + mtx_destroy(&ses->swcr_lock); for (i = 0; i < nitems(ses->swcr_algorithms); i++) { swd = &ses->swcr_algorithms[i]; @@ -1111,7 +1114,7 @@ swcr_freesession(device_t dev, crypto_session_t cses) static int swcr_process(device_t dev, struct cryptop *crp, int hint) { - struct swcr_session *ses; + struct swcr_session *ses = NULL; struct cryptodesc *crd; struct swcr_data *sw; size_t i; @@ -1126,6 +1129,7 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) } ses = crypto_get_driver_session(crp->crp_session); + mtx_lock(&ses->swcr_lock); /* Go through crypto descriptors, processing as we go */ for (crd = crp->crp_desc; crd; crd = crd->crd_next) { @@ -1215,6 +1219,8 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) } done: + if (ses) + mtx_unlock(&ses->swcr_lock); crypto_done(crp); return 0; } diff --git a/freebsd/sys/opencrypto/cryptosoft.h b/freebsd/sys/opencrypto/cryptosoft.h index d88b09d4..d787dc24 100644 --- a/freebsd/sys/opencrypto/cryptosoft.h +++ b/freebsd/sys/opencrypto/cryptosoft.h @@ -58,6 +58,7 @@ struct swcr_data { }; struct swcr_session { + struct mtx swcr_lock; struct swcr_data swcr_algorithms[2]; unsigned swcr_nalgs; }; diff --git a/freebsd/sys/powerpc/include/machine/spr.h b/freebsd/sys/powerpc/include/machine/spr.h index fba367cb..8fa828e1 100644 --- a/freebsd/sys/powerpc/include/machine/spr.h +++ b/freebsd/sys/powerpc/include/machine/spr.h @@ -245,6 +245,33 @@ #define SPR_PTCR 0x1d0 /* Partition Table Control Register */ #define SPR_SPEFSCR 0x200 /* ..8 Signal Processing Engine FSCR. */ +#define SPEFSCR_SOVH 0x80000000 +#define SPEFSCR_OVH 0x40000000 +#define SPEFSCR_FGH 0x20000000 +#define SPEFSCR_FXH 0x10000000 +#define SPEFSCR_FINVH 0x08000000 +#define SPEFSCR_FDBZH 0x04000000 +#define SPEFSCR_FUNFH 0x02000000 +#define SPEFSCR_FOVFH 0x01000000 +#define SPEFSCR_FINXS 0x00200000 +#define SPEFSCR_FINVS 0x00100000 +#define SPEFSCR_FDBZS 0x00080000 +#define SPEFSCR_FUNFS 0x00040000 +#define SPEFSCR_FOVFS 0x00020000 +#define SPEFSCR_SOV 0x00008000 +#define SPEFSCR_OV 0x00004000 +#define SPEFSCR_FG 0x00002000 +#define SPEFSCR_FX 0x00001000 +#define SPEFSCR_FINV 0x00000800 +#define SPEFSCR_FDBZ 0x00000400 +#define SPEFSCR_FUNF 0x00000200 +#define SPEFSCR_FOVF 0x00000100 +#define SPEFSCR_FINXE 0x00000040 +#define SPEFSCR_FINVE 0x00000020 +#define SPEFSCR_FDBZE 0x00000010 +#define SPEFSCR_FUNFE 0x00000008 +#define SPEFSCR_FOVFE 0x00000004 +#define SPEFSCR_FRMC_M 0x00000003 #define SPR_IBAT0U 0x210 /* .6. Instruction BAT Reg 0 Upper */ #define SPR_IBAT0L 0x211 /* .6. Instruction BAT Reg 0 Lower */ #define SPR_IBAT1U 0x212 /* .6. Instruction BAT Reg 1 Upper */ diff --git a/freebsd/sys/security/audit/audit.h b/freebsd/sys/security/audit/audit.h index 055194d3..f24bc1e5 100644 --- a/freebsd/sys/security/audit/audit.h +++ b/freebsd/sys/security/audit/audit.h @@ -2,7 +2,7 @@ * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1999-2005 Apple Inc. - * Copyright (c) 2016-2017 Robert N. M. Watson + * Copyright (c) 2016-2018 Robert N. M. Watson * All rights reserved. * * This software was developed by BAE Systems, the University of Cambridge @@ -55,14 +55,23 @@ #include <sys/sysctl.h> /* - * Audit subsystem condition flags. The audit_enabled flag is set and + * Audit subsystem condition flags. The audit_trail_enabled flag is set and * removed automatically as a result of configuring log files, and can be * observed but should not be directly manipulated. The audit suspension * flag permits audit to be temporarily disabled without reconfiguring the * audit target. + * + * As DTrace can also request system-call auditing, a further + * audit_syscalls_enabled flag tracks whether newly entering system calls + * should be considered for auditing or not. + * + * XXXRW: Move trail flags to audit_private.h, as they no longer need to be + * visible outside the audit code...? */ -extern int audit_enabled; -extern int audit_suspended; +extern u_int audit_dtrace_enabled; +extern int audit_trail_enabled; +extern int audit_trail_suspended; +extern int audit_syscalls_enabled; void audit_syscall_enter(unsigned short code, struct thread *td); void audit_syscall_exit(int error, struct thread *td); @@ -139,7 +148,7 @@ void audit_thread_free(struct thread *td); /* * Define macros to wrap the audit_arg_* calls by checking the global - * audit_enabled flag before performing the actual call. + * audit_syscalls_enabled flag before performing the actual call. */ #define AUDITING_TD(td) ((td)->td_pflags & TDP_AUDITREC) @@ -369,7 +378,7 @@ void audit_thread_free(struct thread *td); } while (0) #define AUDIT_SYSCALL_ENTER(code, td) do { \ - if (audit_enabled) { \ + if (audit_syscalls_enabled) { \ audit_syscall_enter(code, td); \ } \ } while (0) @@ -377,7 +386,7 @@ void audit_thread_free(struct thread *td); /* * Wrap the audit_syscall_exit() function so that it is called only when * we have a audit record on the thread. Audit records can persist after - * auditing is disabled, so we don't just check audit_enabled here. + * auditing is disabled, so we don't just check audit_syscalls_enabled here. */ #define AUDIT_SYSCALL_EXIT(error, td) do { \ if (td->td_pflags & TDP_AUDITREC) \ diff --git a/freebsd/sys/sys/_domainset.h b/freebsd/sys/sys/_domainset.h index 30d8501c..34d8f61c 100644 --- a/freebsd/sys/sys/_domainset.h +++ b/freebsd/sys/sys/_domainset.h @@ -54,7 +54,7 @@ typedef struct _domainset domainset_t; struct domainset; struct domainset_ref { struct domainset * volatile dr_policy; - int dr_iterator; + unsigned int dr_iterator; }; #endif /* !_SYS__DOMAINSET_H_ */ diff --git a/freebsd/sys/sys/capsicum.h b/freebsd/sys/sys/capsicum.h index bf97d0b2..d40b8572 100644 --- a/freebsd/sys/sys/capsicum.h +++ b/freebsd/sys/sys/capsicum.h @@ -471,7 +471,13 @@ u_char cap_rights_to_vmprot(const cap_rights_t *havep); /* * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to * extract the rights from a capability. + * + * Dereferencing fdep requires filedesc.h, but including it would cause + * significant pollution. Instead add a macro for consumers which want it, + * most notably kern_descrip.c. */ +#define cap_rights_fde_inline(fdep) (&(fdep)->fde_rights) + const cap_rights_t *cap_rights_fde(const struct filedescent *fde); const cap_rights_t *cap_rights(struct filedesc *fdp, int fd); diff --git a/freebsd/sys/sys/cons.h b/freebsd/sys/sys/cons.h index 04784646..432631d4 100644 --- a/freebsd/sys/sys/cons.h +++ b/freebsd/sys/sys/cons.h @@ -137,6 +137,7 @@ int cngetc(void); void cngets(char *, size_t, int); void cnputc(int); void cnputs(char *); +void cnputsn(const char *, size_t); int cnunavailable(void); void constty_set(struct tty *tp); void constty_clear(void); diff --git a/freebsd/sys/sys/domainset.h b/freebsd/sys/sys/domainset.h index 81375ed0..32b35ac5 100644 --- a/freebsd/sys/sys/domainset.h +++ b/freebsd/sys/sys/domainset.h @@ -32,8 +32,8 @@ #define _SYS_DOMAINSET_H_ #include <sys/_domainset.h> - #include <sys/bitset.h> +#include <sys/queue.h> #define _NDOMAINSETBITS _BITSET_BITS #define _NDOMAINSETWORDS __bitset_words(DOMAINSET_SETSIZE) @@ -96,6 +96,12 @@ struct domainset { domainid_t ds_order[MAXMEMDOM]; /* nth domain table. */ }; +extern struct domainset domainset_prefer[MAXMEMDOM]; +#define DOMAINSET_PREF(domain) (&domainset_prefer[(domain)]) +extern struct domainset domainset_roundrobin; +#define DOMAINSET_RR() (&domainset_roundrobin) + +void domainset_init(void); void domainset_zero(void); /* diff --git a/freebsd/sys/sys/jail.h b/freebsd/sys/sys/jail.h index 6f8c9aa5..c9f24bdd 100644 --- a/freebsd/sys/sys/jail.h +++ b/freebsd/sys/sys/jail.h @@ -228,9 +228,10 @@ struct prison_racct { #define PR_ALLOW_QUOTAS 0x00000020 #define PR_ALLOW_SOCKET_AF 0x00000040 #define PR_ALLOW_MLOCK 0x00000080 +#define PR_ALLOW_READ_MSGBUF 0x00000100 #define PR_ALLOW_RESERVED_PORTS 0x00008000 #define PR_ALLOW_KMEM_ACCESS 0x00010000 /* reserved, not used yet */ -#define PR_ALLOW_ALL_STATIC 0x000180ff +#define PR_ALLOW_ALL_STATIC 0x000181ff /* * OSD methods diff --git a/freebsd/sys/sys/linker.h b/freebsd/sys/sys/linker.h index 8aae31d9..a03ea88c 100644 --- a/freebsd/sys/sys/linker.h +++ b/freebsd/sys/sys/linker.h @@ -183,6 +183,8 @@ int linker_ddb_search_symbol_name(caddr_t value, char *buf, u_int buflen, /* * stack(9) helper for situations where kernel locking is required. */ +int linker_search_symbol_name_flags(caddr_t value, char *buf, u_int buflen, + long *offset, int flags); int linker_search_symbol_name(caddr_t value, char *buf, u_int buflen, long *offset); diff --git a/freebsd/sys/sys/malloc.h b/freebsd/sys/sys/malloc.h index 33e1aab9..4e7a8df3 100644 --- a/freebsd/sys/sys/malloc.h +++ b/freebsd/sys/sys/malloc.h @@ -101,7 +101,7 @@ struct malloc_type_internal { uint32_t mti_probes[DTMALLOC_PROBE_MAX]; /* DTrace probe ID array. */ u_char mti_zone; - struct malloc_type_stats mti_stats[MAXCPU]; + struct malloc_type_stats *mti_stats; }; /* diff --git a/freebsd/sys/sys/module.h b/freebsd/sys/sys/module.h index b40870d3..89377df4 100644 --- a/freebsd/sys/sys/module.h +++ b/freebsd/sys/sys/module.h @@ -178,12 +178,12 @@ struct mod_pnp_match_info * to allow external tools to parse their internal device tables * to make an informed guess about what driver(s) to load. */ -#define MODULE_PNP_INFO(d, b, unique, t, l, n) \ +#define MODULE_PNP_INFO(d, b, unique, t, n) \ static const struct mod_pnp_match_info _module_pnp_##b##_##unique = { \ .descr = d, \ .bus = #b, \ .table = t, \ - .entry_len = l, \ + .entry_len = sizeof((t)[0]), \ .num_entry = n \ }; \ MODULE_METADATA(_md_##b##_pnpinfo_##unique, MDT_PNP_INFO, \ diff --git a/freebsd/sys/sys/mouse.h b/freebsd/sys/sys/mouse.h index 882d59c9..3bb65d5c 100644 --- a/freebsd/sys/sys/mouse.h +++ b/freebsd/sys/sys/mouse.h @@ -141,8 +141,8 @@ typedef struct synapticshw { /* iftype */ #define MOUSE_IF_UNKNOWN (-1) #define MOUSE_IF_SERIAL 0 -#define MOUSE_IF_BUS 1 -#define MOUSE_IF_INPORT 2 +/* 1 was bus */ +/* 2 was inport */ #define MOUSE_IF_PS2 3 #define MOUSE_IF_SYSMOUSE 4 #define MOUSE_IF_USB 5 @@ -200,8 +200,8 @@ typedef struct mousemode { #define MOUSE_PROTO_LOGI 2 /* Logitech, 3 bytes */ #define MOUSE_PROTO_MM 3 /* MM series, 3 bytes */ #define MOUSE_PROTO_LOGIMOUSEMAN 4 /* Logitech MouseMan 3/4 bytes */ -#define MOUSE_PROTO_BUS 5 /* MS/Logitech bus mouse */ -#define MOUSE_PROTO_INPORT 6 /* MS/ATI InPort mouse */ +/* 5 was bus mouse */ +/* 6 was inport mosue */ #define MOUSE_PROTO_PS2 7 /* PS/2 mouse, 3 bytes */ #define MOUSE_PROTO_HITTAB 8 /* Hitachi Tablet 3 bytes */ #define MOUSE_PROTO_GLIDEPOINT 9 /* ALPS GlidePoint, 3/4 bytes */ diff --git a/freebsd/sys/sys/racct.h b/freebsd/sys/sys/racct.h index ec3322bd..84de705f 100644 --- a/freebsd/sys/sys/racct.h +++ b/freebsd/sys/sys/racct.h @@ -164,6 +164,15 @@ extern struct mtx racct_lock; #define RACCT_UNLOCK() mtx_unlock(&racct_lock) #define RACCT_LOCK_ASSERT() mtx_assert(&racct_lock, MA_OWNED) +#define RACCT_PROC_LOCK(p) do { \ + if (__predict_false(racct_enable)) \ + PROC_LOCK(p); \ +} while (0) +#define RACCT_PROC_UNLOCK(p) do { \ + if (__predict_false(racct_enable)) \ + PROC_UNLOCK(p); \ +} while (0) + int racct_add(struct proc *p, int resource, uint64_t amount); void racct_add_cred(struct ucred *cred, int resource, uint64_t amount); void racct_add_force(struct proc *p, int resource, uint64_t amount); @@ -189,6 +198,9 @@ void racct_proc_throttle(struct proc *p, int timeout); #else +#define RACCT_PROC_LOCK(p) do { } while (0) +#define RACCT_PROC_UNLOCK(p) do { } while (0) + static inline int racct_add(struct proc *p, int resource, uint64_t amount) { diff --git a/freebsd/sys/sys/random.h b/freebsd/sys/sys/random.h index f32d3f66..2543bfec 100644 --- a/freebsd/sys/sys/random.h +++ b/freebsd/sys/sys/random.h @@ -67,9 +67,6 @@ read_random(void *ptr, u_int n) * Note: if you add or remove members of random_entropy_source, remember to * also update the strings in the static array random_source_descr[] in * random_harvestq.c. - * - * NOTE: complain loudly to markm@ or on the lists if this enum gets more than 32 - * distinct values (0-31)! ENTROPYSOURCE may be == 32, but not > 32. */ enum random_entropy_source { RANDOM_START = 0, @@ -102,6 +99,8 @@ enum random_entropy_source { RANDOM_PURE_DARN, ENTROPYSOURCE }; +_Static_assert(ENTROPYSOURCE <= 32, + "hardcoded assumption that values fit in a typical word-sized bitset"); #define RANDOM_HARVEST_EVERYTHING_MASK ((1 << (RANDOM_ENVIRONMENTAL_END + 1)) - 1) #define RANDOM_HARVEST_PURE_MASK (((1 << ENTROPYSOURCE) - 1) & (-1UL << RANDOM_PURE_START)) diff --git a/freebsd/sys/sys/refcount.h b/freebsd/sys/sys/refcount.h index 040584ff..41713147 100644 --- a/freebsd/sys/sys/refcount.h +++ b/freebsd/sys/sys/refcount.h @@ -62,7 +62,7 @@ refcount_release(volatile u_int *count) atomic_thread_fence_rel(); old = atomic_fetchadd_int((volatile int *)count, -1); - KASSERT(old > 0, ("negative refcount %p", count)); + KASSERT(old > 0, ("refcount %p is zero", count)); if (old > 1) return (0); @@ -77,15 +77,19 @@ refcount_release(volatile u_int *count) } /* + * This functions returns non-zero if the refcount was + * incremented. Else zero is returned. + * * A temporary hack until refcount_* APIs are sorted out. */ -static __inline int +static __inline __result_use_check int refcount_acquire_if_not_zero(volatile u_int *count) { u_int old; old = *count; for (;;) { + KASSERT(old < UINT_MAX, ("refcount %p overflowed", count)); if (old == 0) return (0); if (atomic_fcmpset_int(count, &old, old + 1)) @@ -93,13 +97,14 @@ refcount_acquire_if_not_zero(volatile u_int *count) } } -static __inline int +static __inline __result_use_check int refcount_release_if_not_last(volatile u_int *count) { u_int old; old = *count; for (;;) { + KASSERT(old > 0, ("refcount %p is zero", count)); if (old == 1) return (0); if (atomic_fcmpset_int(count, &old, old - 1)) diff --git a/freebsd/sys/sys/resourcevar.h b/freebsd/sys/sys/resourcevar.h index 9301c3e0..7c7e8458 100644 --- a/freebsd/sys/sys/resourcevar.h +++ b/freebsd/sys/sys/resourcevar.h @@ -93,12 +93,10 @@ struct racct; * (a) Constant from inception * (b) Lockless, updated using atomics * (c) Locked by global uihashtbl_lock - * (d) Locked by the ui_vmsize_mtx */ struct uidinfo { LIST_ENTRY(uidinfo) ui_hash; /* (c) hash chain of uidinfos */ - struct mtx ui_vmsize_mtx; - vm_ooffset_t ui_vmsize; /* (d) swap reservation by uid */ + u_long ui_vmsize; /* (b) pages of swap reservation by uid */ long ui_sbsize; /* (b) socket buffer space consumed */ long ui_proccnt; /* (b) number of processes */ long ui_ptscnt; /* (b) number of pseudo-terminals */ diff --git a/freebsd/sys/sys/signalvar.h b/freebsd/sys/sys/signalvar.h index 8735e3a0..0a1413b5 100644 --- a/freebsd/sys/sys/signalvar.h +++ b/freebsd/sys/sys/signalvar.h @@ -350,7 +350,7 @@ static inline int sigdeferstop(int mode) { - if (mode == SIGDEFERSTOP_NOP) + if (__predict_true(mode == SIGDEFERSTOP_NOP)) return (SIGDEFERSTOP_VAL_NCHG); return (sigdeferstop_impl(mode)); } @@ -359,7 +359,7 @@ static inline void sigallowstop(int prev) { - if (prev == SIGDEFERSTOP_VAL_NCHG) + if (__predict_true(prev == SIGDEFERSTOP_VAL_NCHG)) return; sigallowstop_impl(prev); } diff --git a/freebsd/sys/sys/sockbuf.h b/freebsd/sys/sys/sockbuf.h index 915fee0b..fc287023 100644 --- a/freebsd/sys/sys/sockbuf.h +++ b/freebsd/sys/sys/sockbuf.h @@ -167,10 +167,9 @@ int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td); struct mbuf * sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff); +void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len); struct mbuf * sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff); -void - sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len); struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff); int sbwait(struct sockbuf *sb); diff --git a/freebsd/sys/sys/sysproto.h b/freebsd/sys/sys/sysproto.h index 89467f3b..8317e586 100644 --- a/freebsd/sys/sys/sysproto.h +++ b/freebsd/sys/sys/sysproto.h @@ -141,23 +141,23 @@ struct recvfrom_args { char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)]; char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char from_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict from; char from_r_[PADR_(struct sockaddr *__restrict)]; - char fromlenaddr_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict fromlenaddr; char fromlenaddr_r_[PADR_(__socklen_t *__restrict)]; + char from_l_[PADL_(struct sockaddr *)]; struct sockaddr * from; char from_r_[PADR_(struct sockaddr *)]; + char fromlenaddr_l_[PADL_(__socklen_t *)]; __socklen_t * fromlenaddr; char fromlenaddr_r_[PADR_(__socklen_t *)]; }; struct accept_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict name; char name_r_[PADR_(struct sockaddr *__restrict)]; - char anamelen_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict anamelen; char anamelen_r_[PADR_(__socklen_t *__restrict)]; + char name_l_[PADL_(struct sockaddr *)]; struct sockaddr * name; char name_r_[PADR_(struct sockaddr *)]; + char anamelen_l_[PADL_(__socklen_t *)]; __socklen_t * anamelen; char anamelen_r_[PADR_(__socklen_t *)]; }; struct getpeername_args { char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)]; - char asa_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict asa; char asa_r_[PADR_(struct sockaddr *__restrict)]; - char alen_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict alen; char alen_r_[PADR_(__socklen_t *__restrict)]; + char asa_l_[PADL_(struct sockaddr *)]; struct sockaddr * asa; char asa_r_[PADR_(struct sockaddr *)]; + char alen_l_[PADL_(__socklen_t *)]; __socklen_t * alen; char alen_r_[PADR_(__socklen_t *)]; }; struct getsockname_args { char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)]; - char asa_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict asa; char asa_r_[PADR_(struct sockaddr *__restrict)]; - char alen_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict alen; char alen_r_[PADR_(__socklen_t *__restrict)]; + char asa_l_[PADL_(struct sockaddr *)]; struct sockaddr * asa; char asa_r_[PADR_(struct sockaddr *)]; + char alen_l_[PADL_(__socklen_t *)]; __socklen_t * alen; char alen_r_[PADR_(__socklen_t *)]; }; #ifndef __rtems__ struct access_args { @@ -1703,8 +1703,8 @@ struct chflagsat_args { }; struct accept4_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(struct sockaddr *__restrict)]; struct sockaddr *__restrict name; char name_r_[PADR_(struct sockaddr *__restrict)]; - char anamelen_l_[PADL_(__socklen_t *__restrict)]; __socklen_t *__restrict anamelen; char anamelen_r_[PADR_(__socklen_t *__restrict)]; + char name_l_[PADL_(struct sockaddr *)]; struct sockaddr * name; char name_r_[PADR_(struct sockaddr *)]; + char anamelen_l_[PADL_(__socklen_t *)]; __socklen_t * anamelen; char anamelen_r_[PADR_(__socklen_t *)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; }; struct pipe2_args { diff --git a/freebsd/sys/sys/systm.h b/freebsd/sys/sys/systm.h index 4145be2c..9f9b00bd 100644 --- a/freebsd/sys/sys/systm.h +++ b/freebsd/sys/sys/systm.h @@ -331,7 +331,6 @@ int vasprintf(char **ret, struct malloc_type *mtp, const char *format, int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0); int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0); int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0); -int ttyprintf(struct tty *, const char *, ...) __printflike(2, 3); int sscanf(const char *, char const * _Nonnull, ...) __scanflike(2, 3); int vsscanf(const char * _Nonnull, char const * _Nonnull, __va_list) __scanflike(2, 0); long strtol(const char *, char **, int); @@ -384,6 +383,12 @@ void *memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n); int memcmp(const void *b1, const void *b2, size_t len); #define memcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len)) +void *memset_early(void * _Nonnull buf, int c, size_t len); +#define bzero_early(buf, len) memset_early((buf), 0, (len)) +void *memcpy_early(void * _Nonnull to, const void * _Nonnull from, size_t len); +void *memmove_early(void * _Nonnull dest, const void * _Nonnull src, size_t n); +#define bcopy_early(from, to, len) memmove_early((to), (from), (len)) + #ifndef __rtems__ int copystr(const void * _Nonnull __restrict kfaddr, void * _Nonnull __restrict kdaddr, size_t len, diff --git a/freebsd/sys/sys/tty.h b/freebsd/sys/sys/tty.h index 92ad89a6..c85ca559 100644 --- a/freebsd/sys/sys/tty.h +++ b/freebsd/sys/sys/tty.h @@ -132,6 +132,13 @@ struct tty { void *t_devswsoftc; /* (c) Soft config, for drivers. */ void *t_hooksoftc; /* (t) Soft config, for hooks. */ struct cdev *t_dev; /* (c) Primary character device. */ + +#ifndef PRINTF_BUFR_SIZE +#define TTY_PRINTF_SIZE 256 +#else +#define TTY_PRINTF_SIZE PRINTF_BUFR_SIZE +#endif + char t_prbuf[TTY_PRINTF_SIZE]; /* (t) */ }; /* @@ -194,6 +201,7 @@ void tty_wakeup(struct tty *tp, int flags); /* System messages. */ int tty_checkoutq(struct tty *tp); int tty_putchar(struct tty *tp, char c); +int tty_putstrn(struct tty *tp, const char *p, size_t n); int tty_ioctl(struct tty *tp, u_long cmd, void *data, int fflag, struct thread *td); diff --git a/freebsd/sys/sys/user.h b/freebsd/sys/sys/user.h index f6aa2b3f..1218deec 100644 --- a/freebsd/sys/sys/user.h +++ b/freebsd/sys/sys/user.h @@ -349,85 +349,96 @@ struct kinfo_file { int64_t kf_offset; /* Seek location. */ union { struct { - /* Sendq size */ - uint32_t kf_sock_sendq; - /* Socket domain. */ - int kf_sock_domain0; - /* Socket type. */ - int kf_sock_type0; - /* Socket protocol. */ - int kf_sock_protocol0; - /* Socket address. */ + /* API compatiblity with FreeBSD < 12. */ + int kf_vnode_type; + int kf_sock_domain; + int kf_sock_type; + int kf_sock_protocol; struct sockaddr_storage kf_sa_local; - /* Peer address. */ struct sockaddr_storage kf_sa_peer; - /* Address of so_pcb. */ - uint64_t kf_sock_pcb; - /* Address of inp_ppcb. */ - uint64_t kf_sock_inpcb; - /* Address of unp_conn. */ - uint64_t kf_sock_unpconn; - /* Send buffer state. */ - uint16_t kf_sock_snd_sb_state; - /* Receive buffer state. */ - uint16_t kf_sock_rcv_sb_state; - /* Recvq size. */ - uint32_t kf_sock_recvq; - } kf_sock; - struct { - /* Vnode type. */ - int kf_file_type; - /* Space for future use */ - int kf_spareint[3]; - uint64_t kf_spareint64[30]; - /* Vnode filesystem id. */ - uint64_t kf_file_fsid; - /* File device. */ - uint64_t kf_file_rdev; - /* Global file id. */ - uint64_t kf_file_fileid; - /* File size. */ - uint64_t kf_file_size; - /* Vnode filesystem id, FreeBSD 11 compat. */ - uint32_t kf_file_fsid_freebsd11; - /* File device, FreeBSD 11 compat. */ - uint32_t kf_file_rdev_freebsd11; - /* File mode. */ - uint16_t kf_file_mode; - /* Round to 64 bit alignment. */ - uint16_t kf_file_pad0; - uint32_t kf_file_pad1; - } kf_file; - struct { - uint32_t kf_spareint[4]; - uint64_t kf_spareint64[32]; - uint32_t kf_sem_value; - uint16_t kf_sem_mode; - } kf_sem; - struct { - uint32_t kf_spareint[4]; - uint64_t kf_spareint64[32]; - uint64_t kf_pipe_addr; - uint64_t kf_pipe_peer; - uint32_t kf_pipe_buffer_cnt; - /* Round to 64 bit alignment. */ - uint32_t kf_pipe_pad0[3]; - } kf_pipe; - struct { - uint32_t kf_spareint[4]; - uint64_t kf_spareint64[32]; - uint32_t kf_pts_dev_freebsd11; - uint32_t kf_pts_pad0; - uint64_t kf_pts_dev; - /* Round to 64 bit alignment. */ - uint32_t kf_pts_pad1[4]; - } kf_pts; - struct { - uint32_t kf_spareint[4]; - uint64_t kf_spareint64[32]; - pid_t kf_pid; - } kf_proc; - } kf_un; + }; + union { + struct { + /* Sendq size */ + uint32_t kf_sock_sendq; + /* Socket domain. */ + int kf_sock_domain0; + /* Socket type. */ + int kf_sock_type0; + /* Socket protocol. */ + int kf_sock_protocol0; + /* Socket address. */ + struct sockaddr_storage kf_sa_local; + /* Peer address. */ + struct sockaddr_storage kf_sa_peer; + /* Address of so_pcb. */ + uint64_t kf_sock_pcb; + /* Address of inp_ppcb. */ + uint64_t kf_sock_inpcb; + /* Address of unp_conn. */ + uint64_t kf_sock_unpconn; + /* Send buffer state. */ + uint16_t kf_sock_snd_sb_state; + /* Receive buffer state. */ + uint16_t kf_sock_rcv_sb_state; + /* Recvq size. */ + uint32_t kf_sock_recvq; + } kf_sock; + struct { + /* Vnode type. */ + int kf_file_type; + /* Space for future use */ + int kf_spareint[3]; + uint64_t kf_spareint64[30]; + /* Vnode filesystem id. */ + uint64_t kf_file_fsid; + /* File device. */ + uint64_t kf_file_rdev; + /* Global file id. */ + uint64_t kf_file_fileid; + /* File size. */ + uint64_t kf_file_size; + /* Vnode filesystem id, FreeBSD 11 compat. */ + uint32_t kf_file_fsid_freebsd11; + /* File device, FreeBSD 11 compat. */ + uint32_t kf_file_rdev_freebsd11; + /* File mode. */ + uint16_t kf_file_mode; + /* Round to 64 bit alignment. */ + uint16_t kf_file_pad0; + uint32_t kf_file_pad1; + } kf_file; + struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + uint32_t kf_sem_value; + uint16_t kf_sem_mode; + } kf_sem; + struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + uint64_t kf_pipe_addr; + uint64_t kf_pipe_peer; + uint32_t kf_pipe_buffer_cnt; + /* Round to 64 bit alignment. */ + uint32_t kf_pipe_pad0[3]; + } kf_pipe; + struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + uint32_t kf_pts_dev_freebsd11; + uint32_t kf_pts_pad0; + uint64_t kf_pts_dev; + /* Round to 64 bit alignment. */ + uint32_t kf_pts_pad1[4]; + } kf_pts; + struct { + uint32_t kf_spareint[4]; + uint64_t kf_spareint64[32]; + pid_t kf_pid; + } kf_proc; + } kf_un; + }; uint16_t kf_status; /* Status flags. */ uint16_t kf_pad1; /* Round to 32 bit alignment. */ int _kf_ispare0; /* Space for more stuff. */ @@ -439,12 +450,6 @@ struct kinfo_file { int kf_dummy; #endif /* __rtems__ */ }; -#ifndef _KERNEL -#define kf_vnode_type kf_un.kf_file.kf_file_type -#define kf_sock_domain kf_un.kf_sock.kf_sock_domain0 -#define kf_sock_type kf_un.kf_sock.kf_sock_type0 -#define kf_sock_protocol kf_un.kf_sock.kf_sock_protocol0 -#endif /* * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of diff --git a/freebsd/sys/sys/vmmeter.h b/freebsd/sys/sys/vmmeter.h index c41b151f..579d1675 100644 --- a/freebsd/sys/sys/vmmeter.h +++ b/freebsd/sys/sys/vmmeter.h @@ -145,6 +145,7 @@ struct vmmeter { #include <sys/domainset.h> extern struct vmmeter vm_cnt; +extern domainset_t all_domains; extern domainset_t vm_min_domains; extern domainset_t vm_severe_domains; @@ -177,7 +178,7 @@ vm_wire_count(void) /* * Return TRUE if we are under our severe low-free-pages threshold * - * This routine is typically used at the user<->system interface to determine + * These routines are typically used at the user<->system interface to determine * whether we need to block in order to avoid a low memory deadlock. */ static inline int @@ -188,7 +189,14 @@ vm_page_count_severe(void) } static inline int -vm_page_count_severe_set(domainset_t *mask) +vm_page_count_severe_domain(int domain) +{ + + return (DOMAINSET_ISSET(domain, &vm_severe_domains)); +} + +static inline int +vm_page_count_severe_set(const domainset_t *mask) { return (DOMAINSET_SUBSET(&vm_severe_domains, mask)); @@ -197,7 +205,7 @@ vm_page_count_severe_set(domainset_t *mask) /* * Return TRUE if we are under our minimum low-free-pages threshold. * - * This routine is typically used within the system to determine whether + * These routines are typically used within the system to determine whether * we can execute potentially very expensive code in terms of memory. It * is also used by the pageout daemon to calculate when to sleep, when * to wake waiters up, and when (after making a pass) to become more @@ -210,5 +218,19 @@ vm_page_count_min(void) return (!DOMAINSET_EMPTY(&vm_min_domains)); } +static inline int +vm_page_count_min_domain(int domain) +{ + + return (DOMAINSET_ISSET(domain, &vm_min_domains)); +} + +static inline int +vm_page_count_min_set(const domainset_t *mask) +{ + + return (DOMAINSET_SUBSET(&vm_min_domains, mask)); +} + #endif /* _KERNEL */ #endif /* _SYS_VMMETER_H_ */ diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c index 2a1e1b07..f1ea87e8 100644 --- a/freebsd/sys/vm/uma_core.c +++ b/freebsd/sys/vm/uma_core.c @@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_pageout.h> #include <vm/vm_param.h> #include <vm/vm_phys.h> +#include <vm/vm_pagequeue.h> #include <vm/vm_map.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -2550,6 +2551,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) * the current cache; when we re-acquire the critical section, we * must detect and handle migration if it has occurred. */ +zalloc_restart: critical_enter(); cpu = curcpu; cache = &zone->uz_cpu[cpu]; @@ -2611,9 +2613,11 @@ zalloc_start: bucket_free(zone, bucket, udata); #ifndef __rtems__ - if (zone->uz_flags & UMA_ZONE_NUMA) + if (zone->uz_flags & UMA_ZONE_NUMA) { domain = PCPU_GET(domain); - else + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } else #endif /* __rtems__ */ domain = UMA_ANYDOMAIN; @@ -2692,15 +2696,21 @@ zalloc_start: * the memory directly. */ #ifndef __rtems__ - if (cache->uc_allocbucket != NULL || - (zone->uz_flags & UMA_ZONE_NUMA && - domain != PCPU_GET(domain))) + if (cache->uc_allocbucket == NULL && + ((zone->uz_flags & UMA_ZONE_NUMA) == 0 || + domain == PCPU_GET(domain))) { #else /* __rtems__ */ - if (cache->uc_allocbucket != NULL) + if (cache->uc_allocbucket == NULL) { #endif /* __rtems__ */ - LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); - else cache->uc_allocbucket = bucket; + } else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) { + critical_exit(); + ZONE_UNLOCK(zone); + bucket_drain(zone, bucket); + bucket_free(zone, bucket, udata); + goto zalloc_restart; + } else + LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); ZONE_UNLOCK(zone); goto zalloc_start; } @@ -2765,8 +2775,10 @@ keg_first_slab(uma_keg_t keg, int domain, int rr) LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); return (slab); } +#ifndef __rtems__ if (rr) domain = (domain + 1) % vm_ndomains; +#endif /* __rtems__ */ } while (domain != start); return (NULL); @@ -2790,17 +2802,26 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags) * Round-robin for non first-touch zones when there is more than one * domain. */ +#ifndef __rtems__ if (vm_ndomains == 1) rdomain = 0; rr = rdomain == UMA_ANYDOMAIN; if (rr) { - keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; + start = keg->uk_cursor; + do { + keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; + domain = keg->uk_cursor; + } while (VM_DOMAIN_EMPTY(domain) && domain != start); domain = start = keg->uk_cursor; /* Only block on the second pass. */ if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK) allocflags = (allocflags & ~M_WAITOK) | M_NOWAIT; } else domain = start = rdomain; +#else /* __rtems__ */ + rr = 1; + domain = start = keg->uk_cursor; +#endif /* __rtems__ */ again: do { @@ -2845,10 +2866,13 @@ again: LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); return (slab); } +#ifndef __rtems__ if (rr) { - keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; - domain = keg->uk_cursor; + do { + domain = (domain + 1) % vm_ndomains; + } while (VM_DOMAIN_EMPTY(domain) && domain != start); } +#endif /* __rtems__ */ } while (domain != start); /* Retry domain scan with blocking. */ @@ -3054,6 +3078,8 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags) uma_bucket_t bucket; int max; + CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain); + /* Don't wait for buckets, preserve caller's NOVM setting. */ bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM)); if (bucket == NULL) @@ -3121,6 +3147,13 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags) item = NULL; +#ifndef __rtems__ + if (domain != UMA_ANYDOMAIN) { + /* avoid allocs targeting empty domains */ + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } +#endif /* __rtems__ */ if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1) goto fail; atomic_add_long(&zone->uz_allocs, 1); @@ -3297,9 +3330,11 @@ zfree_start: critical_exit(); #ifndef __rtems__ - if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) + if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) { domain = PCPU_GET(domain); - else + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } else domain = 0; #endif /* __rtems__ */ zdom = &zone->uz_domain[0]; @@ -3761,7 +3796,9 @@ uma_prealloc(uma_zone_t zone, int items) dom = &keg->uk_domain[slab->us_domain]; LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); slabs--; - domain = (domain + 1) % vm_ndomains; + do { + domain = (domain + 1) % vm_ndomains; + } while (VM_DOMAIN_EMPTY(domain)); } KEG_UNLOCK(keg); } @@ -3857,6 +3894,11 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait) vm_offset_t addr; uma_slab_t slab; + if (domain != UMA_ANYDOMAIN) { + /* avoid allocs targeting empty domains */ + if (VM_DOMAIN_EMPTY(domain)) + domain = UMA_ANYDOMAIN; + } slab = zone_alloc_item(slabzone, NULL, domain, wait); if (slab == NULL) return (NULL); |