summaryrefslogtreecommitdiffstats
path: root/freebsd/sys
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys')
-rw-r--r--freebsd/sys/arm/freescale/imx/imx_gpio.c12
-rw-r--r--freebsd/sys/arm/ti/ti_sdhci.c27
-rw-r--r--freebsd/sys/cam/cam_periph.h2
-rw-r--r--freebsd/sys/cam/scsi/scsi_all.c40
-rw-r--r--freebsd/sys/dev/dwc/if_dwc.c9
-rw-r--r--freebsd/sys/dev/e1000/e1000_api.c1
-rw-r--r--freebsd/sys/dev/e1000/e1000_hw.h1
-rw-r--r--freebsd/sys/dev/e1000/if_em.c55
-rw-r--r--freebsd/sys/dev/gpio/gpiobus.c144
-rw-r--r--freebsd/sys/dev/gpio/gpiobusvar.h19
-rw-r--r--freebsd/sys/dev/gpio/ofw_gpiobus.c82
-rw-r--r--freebsd/sys/dev/kbd/kbd.c21
-rw-r--r--freebsd/sys/dev/kbd/kbdreg.h15
-rw-r--r--freebsd/sys/dev/mii/mii.c14
-rw-r--r--freebsd/sys/dev/nvme/nvme.c19
-rw-r--r--freebsd/sys/dev/nvme/nvme.h12
-rw-r--r--freebsd/sys/dev/nvme/nvme_ctrlr.c324
-rw-r--r--freebsd/sys/dev/nvme/nvme_ctrlr_cmd.c20
-rw-r--r--freebsd/sys/dev/nvme/nvme_pci.c87
-rw-r--r--freebsd/sys/dev/nvme/nvme_private.h49
-rw-r--r--freebsd/sys/dev/nvme/nvme_qpair.c119
-rw-r--r--freebsd/sys/dev/nvme/nvme_sysctl.c6
-rw-r--r--freebsd/sys/dev/ofw/ofw_bus_subr.h3
-rw-r--r--freebsd/sys/dev/pci/pci.c17
-rw-r--r--freebsd/sys/dev/pci/pci_private.h2
-rw-r--r--freebsd/sys/dev/sdhci/sdhci.c9
-rw-r--r--freebsd/sys/dev/usb/controller/dwc_otg_fdt.c26
-rw-r--r--freebsd/sys/dev/usb/input/uep.c1
-rw-r--r--freebsd/sys/dev/usb/input/ukbd.c5
-rw-r--r--freebsd/sys/dev/usb/input/ums.c1
-rw-r--r--freebsd/sys/dev/usb/serial/uslcom.c7
-rw-r--r--freebsd/sys/dev/usb/usb_bus.h12
-rw-r--r--freebsd/sys/dev/usb/usb_device.h14
-rw-r--r--freebsd/sys/dev/usb/usb_generic.c5
-rw-r--r--freebsd/sys/dev/usb/usb_ioctl.h2
-rw-r--r--freebsd/sys/dev/usb/usb_transfer.c42
-rw-r--r--freebsd/sys/fs/devfs/devfs_vnops.c58
-rw-r--r--freebsd/sys/kern/kern_conf.c45
-rw-r--r--freebsd/sys/kern/kern_linker.c12
-rw-r--r--freebsd/sys/kern/kern_mib.c5
-rw-r--r--freebsd/sys/kern/kern_mtxpool.c2
-rw-r--r--freebsd/sys/kern/kern_sysctl.c53
-rw-r--r--freebsd/sys/kern/kern_timeout.c11
-rw-r--r--freebsd/sys/kern/subr_bus.c38
-rw-r--r--freebsd/sys/kern/subr_firmware.c6
-rw-r--r--freebsd/sys/kern/subr_gtaskqueue.c89
-rw-r--r--freebsd/sys/kern/subr_taskqueue.c140
-rwxr-xr-xfreebsd/sys/kern/sys_pipe.c21
-rw-r--r--freebsd/sys/kern/tty.c72
-rw-r--r--freebsd/sys/kern/uipc_mbuf2.c4
-rw-r--r--freebsd/sys/kern/uipc_usrreq.c3
-rw-r--r--freebsd/sys/net/dlt.h18
-rw-r--r--freebsd/sys/net/if.c128
-rw-r--r--freebsd/sys/net/if_bridge.c22
-rw-r--r--freebsd/sys/net/if_clone.c19
-rw-r--r--freebsd/sys/net/if_clone.h3
-rw-r--r--freebsd/sys/net/if_epair.c42
-rw-r--r--freebsd/sys/net/if_lagg.c84
-rw-r--r--freebsd/sys/net/if_lagg.h12
-rw-r--r--freebsd/sys/net/if_llatbl.c20
-rw-r--r--freebsd/sys/net/if_tap.c1153
-rw-r--r--freebsd/sys/net/if_tap.h24
-rw-r--r--freebsd/sys/net/if_tapvar.h71
-rw-r--r--freebsd/sys/net/if_tun.c1132
-rw-r--r--freebsd/sys/net/if_tuntap.c1923
-rw-r--r--freebsd/sys/net/if_vlan.c36
-rw-r--r--freebsd/sys/net/iflib.h5
-rw-r--r--freebsd/sys/net/route.c17
-rw-r--r--freebsd/sys/net/sff8472.h79
-rw-r--r--freebsd/sys/net/vnet.h4
-rw-r--r--freebsd/sys/net80211/ieee80211.c2
-rw-r--r--freebsd/sys/netinet/in_mcast.c22
-rw-r--r--freebsd/sys/netinet/ip_carp.c26
-rw-r--r--freebsd/sys/netinet/ip_carp.h4
-rw-r--r--freebsd/sys/netinet/ip_mroute.c26
-rw-r--r--freebsd/sys/netinet/ip_output.c2
-rw-r--r--freebsd/sys/netinet/ip_reass.c56
-rw-r--r--freebsd/sys/netinet/sctp_asconf.c111
-rw-r--r--freebsd/sys/netinet/sctp_dtrace_define.h177
-rw-r--r--freebsd/sys/netinet/sctp_indata.c52
-rw-r--r--freebsd/sys/netinet/sctp_input.c36
-rw-r--r--freebsd/sys/netinet/sctp_os_bsd.h7
-rw-r--r--freebsd/sys/netinet/sctp_output.c13
-rw-r--r--freebsd/sys/netinet/sctp_pcb.c9
-rw-r--r--freebsd/sys/netinet/sctp_pcb.h2
-rw-r--r--freebsd/sys/netinet/sctp_usrreq.c10
-rw-r--r--freebsd/sys/netinet/sctputil.c34
-rw-r--r--freebsd/sys/netinet/sctputil.h2
-rw-r--r--freebsd/sys/netinet/tcp_input.c43
-rw-r--r--freebsd/sys/netinet/tcp_output.c14
-rw-r--r--freebsd/sys/netinet/tcp_subr.c2
-rw-r--r--freebsd/sys/netinet/tcp_timer.c9
-rw-r--r--freebsd/sys/netinet/tcp_timer.h3
-rw-r--r--freebsd/sys/netinet/tcp_usrreq.c106
-rw-r--r--freebsd/sys/netinet/tcp_var.h3
-rw-r--r--freebsd/sys/netinet/udp_usrreq.c11
-rw-r--r--freebsd/sys/netinet/udp_var.h3
-rw-r--r--freebsd/sys/netinet6/dest6.c45
-rw-r--r--freebsd/sys/netinet6/frag6.c1107
-rw-r--r--freebsd/sys/netinet6/icmp6.c347
-rw-r--r--freebsd/sys/netinet6/in6.c3
-rw-r--r--freebsd/sys/netinet6/in6_mcast.c24
-rw-r--r--freebsd/sys/netinet6/in6_pcb.c16
-rw-r--r--freebsd/sys/netinet6/in6_pcb.h2
-rw-r--r--freebsd/sys/netinet6/in6_proto.c35
-rw-r--r--freebsd/sys/netinet6/in6_src.c12
-rw-r--r--freebsd/sys/netinet6/ip6_forward.c1
-rw-r--r--freebsd/sys/netinet6/ip6_input.c215
-rw-r--r--freebsd/sys/netinet6/ip6_mroute.c23
-rw-r--r--freebsd/sys/netinet6/ip6_output.c216
-rw-r--r--freebsd/sys/netinet6/ip6_var.h38
-rw-r--r--freebsd/sys/netinet6/mld6.c26
-rw-r--r--freebsd/sys/netinet6/mld6_var.h2
-rw-r--r--freebsd/sys/netinet6/nd6.c138
-rw-r--r--freebsd/sys/netinet6/nd6.h11
-rw-r--r--freebsd/sys/netinet6/nd6_nbr.c128
-rw-r--r--freebsd/sys/netinet6/nd6_rtr.c1032
-rw-r--r--freebsd/sys/netinet6/raw_ip6.c64
-rw-r--r--freebsd/sys/netinet6/route6.c26
-rw-r--r--freebsd/sys/netinet6/sctp6_usrreq.c73
-rw-r--r--freebsd/sys/netinet6/udp6_usrreq.c59
-rw-r--r--freebsd/sys/netipsec/xform_ah.c16
-rw-r--r--freebsd/sys/netipsec/xform_esp.c20
-rw-r--r--freebsd/sys/netpfil/pf/pf.c7
-rw-r--r--freebsd/sys/opencrypto/cryptodev.c124
-rw-r--r--freebsd/sys/sys/buf.h2
-rw-r--r--freebsd/sys/sys/bus.h1
-rw-r--r--freebsd/sys/sys/conf.h2
-rw-r--r--freebsd/sys/sys/kernel.h2
-rw-r--r--freebsd/sys/sys/linker.h5
-rw-r--r--freebsd/sys/sys/malloc.h8
-rw-r--r--freebsd/sys/sys/mbuf.h4
-rw-r--r--freebsd/sys/sys/mount.h1
-rw-r--r--freebsd/sys/sys/pcpu.h24
-rw-r--r--freebsd/sys/sys/proc.h3
-rw-r--r--freebsd/sys/sys/signalvar.h1
-rw-r--r--freebsd/sys/sys/smp.h2
-rw-r--r--freebsd/sys/sys/sysctl.h14
-rw-r--r--freebsd/sys/sys/systm.h7
-rw-r--r--freebsd/sys/sys/taskqueue.h5
-rw-r--r--freebsd/sys/sys/unpcb.h2
-rw-r--r--freebsd/sys/sys/vnode.h4
-rw-r--r--freebsd/sys/vm/uma_core.c42
-rw-r--r--freebsd/sys/vm/vm_extern.h9
144 files changed, 5827 insertions, 5481 deletions
diff --git a/freebsd/sys/arm/freescale/imx/imx_gpio.c b/freebsd/sys/arm/freescale/imx/imx_gpio.c
index 983e4d74..f5b476d7 100644
--- a/freebsd/sys/arm/freescale/imx/imx_gpio.c
+++ b/freebsd/sys/arm/freescale/imx/imx_gpio.c
@@ -870,6 +870,15 @@ imx51_gpio_detach(device_t dev)
return(0);
}
+static phandle_t
+imx51_gpio_get_node(device_t bus, device_t dev)
+{
+ /*
+ * Share controller node with gpiobus device
+ */
+ return ofw_bus_get_node(bus);
+}
+
static device_method_t imx51_gpio_methods[] = {
DEVMETHOD(device_probe, imx51_gpio_probe),
DEVMETHOD(device_attach, imx51_gpio_attach),
@@ -887,6 +896,9 @@ static device_method_t imx51_gpio_methods[] = {
DEVMETHOD(pic_pre_ithread, gpio_pic_pre_ithread),
#endif
+ /* OFW methods */
+ DEVMETHOD(ofw_bus_get_node, imx51_gpio_get_node),
+
/* GPIO protocol */
DEVMETHOD(gpio_get_bus, imx51_gpio_get_bus),
DEVMETHOD(gpio_pin_max, imx51_gpio_pin_max),
diff --git a/freebsd/sys/arm/ti/ti_sdhci.c b/freebsd/sys/arm/ti/ti_sdhci.c
index a2be1f19..e3502099 100644
--- a/freebsd/sys/arm/ti/ti_sdhci.c
+++ b/freebsd/sys/arm/ti/ti_sdhci.c
@@ -483,15 +483,16 @@ ti_sdhci_hw_init(device_t dev)
/*
* The attach() routine has examined fdt data and set flags in
* slot.host.caps to reflect what voltages we can handle. Set those
- * values in the CAPA register. The manual says that these values can
- * only be set once, and that they survive a reset so unless u-boot didn't
- * set this register this code is a no-op.
+ * values in the CAPA register. Empirical testing shows that the
+ * values in this register can be overwritten at any time, but the
+ * manual says that these values should only be set once, "before
+ * initialization" whatever that means, and that they survive a reset.
*/
regval = ti_mmchs_read_4(sc, MMCHS_SD_CAPA);
if (sc->slot.host.caps & MMC_OCR_LOW_VOLTAGE)
regval |= MMCHS_SD_CAPA_VS18;
- if (sc->slot.host.caps & (MMC_OCR_320_330 | MMC_OCR_330_340))
- regval |= MMCHS_SD_CAPA_VS33;
+ if (sc->slot.host.caps & (MMC_OCR_290_300 | MMC_OCR_300_310))
+ regval |= MMCHS_SD_CAPA_VS30;
ti_mmchs_write_4(sc, MMCHS_SD_CAPA, regval);
/* Set initial host configuration (1-bit, std speed, pwr off). */
@@ -525,20 +526,16 @@ ti_sdhci_attach(device_t dev)
}
/*
- * The hardware can inherently do dual-voltage (1p8v, 3p3v) on the first
+ * The hardware can inherently do dual-voltage (1p8v, 3p0v) on the first
* device, and only 1p8v on other devices unless an external transceiver
* is used. The only way we could know about a transceiver is fdt data.
* Note that we have to do this before calling ti_sdhci_hw_init() so
- * that it can set the right values in the CAPA register, which can only
- * be done once and never reset.
+ * that it can set the right values in the CAPA register.
*/
- if (OF_hasprop(node, "ti,dual-volt")) {
- sc->slot.host.caps |= MMC_OCR_LOW_VOLTAGE | MMC_OCR_320_330 | MMC_OCR_330_340;
- } else if (OF_hasprop(node, "no-1-8-v")) {
- sc->slot.host.caps |= MMC_OCR_320_330 | MMC_OCR_330_340;
- } else
- sc->slot.host.caps |= MMC_OCR_LOW_VOLTAGE;
-
+ sc->slot.host.caps |= MMC_OCR_LOW_VOLTAGE;
+ if (sc->mmchs_clk_id == MMC1_CLK || OF_hasprop(node, "ti,dual-volt")) {
+ sc->slot.host.caps |= MMC_OCR_290_300 | MMC_OCR_300_310;
+ }
/*
* Set the offset from the device's memory start to the MMCHS registers.
diff --git a/freebsd/sys/cam/cam_periph.h b/freebsd/sys/cam/cam_periph.h
index b087b872..d5dcfed0 100644
--- a/freebsd/sys/cam/cam_periph.h
+++ b/freebsd/sys/cam/cam_periph.h
@@ -132,6 +132,8 @@ struct cam_periph {
#define CAM_PERIPH_RUN_TASK 0x40
#define CAM_PERIPH_FREE 0x80
#define CAM_PERIPH_ANNOUNCED 0x100
+#define CAM_PERIPH_RECOVERY_WAIT 0x200
+#define CAM_PERIPH_RECOVERY_WAIT_FAILED 0x400
uint32_t scheduled_priority;
uint32_t immediate_priority;
int periph_allocating;
diff --git a/freebsd/sys/cam/scsi/scsi_all.c b/freebsd/sys/cam/scsi/scsi_all.c
index 99d82fee..b547fbbd 100644
--- a/freebsd/sys/cam/scsi/scsi_all.c
+++ b/freebsd/sys/cam/scsi/scsi_all.c
@@ -1115,7 +1115,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x08, SS_FATAL | EBUSY,
"Logical unit not ready, long write in progress") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x09, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x09, SS_FATAL | EBUSY,
"Logical unit not ready, self-test in progress") },
/* DTLPWROMAEBKVF */
{ SST(0x04, 0x0A, SS_WAIT | ENXIO,
@@ -1133,37 +1133,37 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x0E, SS_RDEF, /* XXX TBD */
"Logical unit not ready, security session in progress") },
/* DT WROM B */
- { SST(0x04, 0x10, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x10, SS_FATAL | ENODEV,
"Logical unit not ready, auxiliary memory not accessible") },
/* DT WRO AEB VF */
- { SST(0x04, 0x11, SS_WAIT | EBUSY,
+ { SST(0x04, 0x11, SS_WAIT | ENXIO,
"Logical unit not ready, notify (enable spinup) required") },
/* M V */
- { SST(0x04, 0x12, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x12, SS_FATAL | ENXIO,
"Logical unit not ready, offline") },
/* DT R MAEBKV */
- { SST(0x04, 0x13, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x13, SS_WAIT | EBUSY,
"Logical unit not ready, SA creation in progress") },
/* D B */
- { SST(0x04, 0x14, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x14, SS_WAIT | ENOSPC,
"Logical unit not ready, space allocation in progress") },
/* M */
- { SST(0x04, 0x15, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x15, SS_FATAL | ENXIO,
"Logical unit not ready, robotics disabled") },
/* M */
- { SST(0x04, 0x16, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x16, SS_FATAL | ENXIO,
"Logical unit not ready, configuration required") },
/* M */
- { SST(0x04, 0x17, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x17, SS_FATAL | ENXIO,
"Logical unit not ready, calibration required") },
/* M */
- { SST(0x04, 0x18, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x18, SS_FATAL | ENXIO,
"Logical unit not ready, a door is open") },
/* M */
- { SST(0x04, 0x19, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x19, SS_FATAL | ENODEV,
"Logical unit not ready, operating in sequential mode") },
/* DT B */
- { SST(0x04, 0x1A, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x1A, SS_WAIT | EBUSY,
"Logical unit not ready, START/STOP UNIT command in progress") },
/* D B */
{ SST(0x04, 0x1B, SS_WAIT | EBUSY,
@@ -1172,7 +1172,7 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x1C, SS_START | SSQ_DECREMENT_COUNT | ENXIO,
"Logical unit not ready, additional power use not yet granted") },
/* D */
- { SST(0x04, 0x1D, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x1D, SS_WAIT | EBUSY,
"Logical unit not ready, configuration in progress") },
/* D */
{ SST(0x04, 0x1E, SS_FATAL | ENXIO,
@@ -1181,14 +1181,20 @@ static struct asc_table_entry asc_table[] = {
{ SST(0x04, 0x1F, SS_FATAL | ENXIO,
"Logical unit not ready, microcode download required") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x20, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x20, SS_FATAL | ENXIO,
"Logical unit not ready, logical unit reset required") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x21, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x21, SS_FATAL | ENXIO,
"Logical unit not ready, hard reset required") },
/* DTLPWROMAEBKVF */
- { SST(0x04, 0x22, SS_RDEF, /* XXX TBD */
+ { SST(0x04, 0x22, SS_FATAL | ENXIO,
"Logical unit not ready, power cycle required") },
+ /* D */
+ { SST(0x04, 0x23, SS_FATAL | ENXIO,
+ "Logical unit not ready, affiliation required") },
+ /* D */
+ { SST(0x04, 0x24, SS_FATAL | EBUSY,
+ "Depopulation in progress") },
/* DTL WROMAEBKVF */
{ SST(0x05, 0x00, SS_RDEF,
"Logical unit does not respond to selection") },
@@ -3387,7 +3393,7 @@ scsi_error_action(struct ccb_scsiio *csio, struct scsi_inquiry_data *inq_data,
if (!scsi_extract_sense_ccb((union ccb *)csio,
&error_code, &sense_key, &asc, &ascq)) {
- action = SS_RETRY | SSQ_DECREMENT_COUNT | SSQ_PRINT_SENSE | EIO;
+ action = SS_RDEF;
} else if ((error_code == SSD_DEFERRED_ERROR)
|| (error_code == SSD_DESC_DEFERRED_ERROR)) {
/*
diff --git a/freebsd/sys/dev/dwc/if_dwc.c b/freebsd/sys/dev/dwc/if_dwc.c
index 895fdfe5..7e249414 100644
--- a/freebsd/sys/dev/dwc/if_dwc.c
+++ b/freebsd/sys/dev/dwc/if_dwc.c
@@ -1239,16 +1239,13 @@ dwc_reset(device_t dev)
if (OF_hasprop(node, "snps,reset-active-low"))
pin_value = GPIO_PIN_HIGH;
- if (flags & GPIO_ACTIVE_LOW)
- pin_value = !pin_value;
-
GPIO_PIN_SETFLAGS(gpio, pin, GPIO_PIN_OUTPUT);
GPIO_PIN_SET(gpio, pin, pin_value);
- DELAY(delay_prop[0]);
+ DELAY(delay_prop[0] * 5);
GPIO_PIN_SET(gpio, pin, !pin_value);
- DELAY(delay_prop[1]);
+ DELAY(delay_prop[1] * 5);
GPIO_PIN_SET(gpio, pin, pin_value);
- DELAY(delay_prop[2]);
+ DELAY(delay_prop[2] * 5);
#endif /* __rtems__ */
return (0);
diff --git a/freebsd/sys/dev/e1000/e1000_api.c b/freebsd/sys/dev/e1000/e1000_api.c
index c351901c..21b11bc2 100644
--- a/freebsd/sys/dev/e1000/e1000_api.c
+++ b/freebsd/sys/dev/e1000/e1000_api.c
@@ -321,6 +321,7 @@ s32 e1000_set_mac_type(struct e1000_hw *hw)
case E1000_DEV_ID_PCH_ICP_I219_V8:
case E1000_DEV_ID_PCH_ICP_I219_LM9:
case E1000_DEV_ID_PCH_ICP_I219_V9:
+ case E1000_DEV_ID_PCH_ICP_I219_V10:
mac->type = e1000_pch_cnp;
break;
case E1000_DEV_ID_82575EB_COPPER:
diff --git a/freebsd/sys/dev/e1000/e1000_hw.h b/freebsd/sys/dev/e1000/e1000_hw.h
index 7e4e7f1a..6c0b5203 100644
--- a/freebsd/sys/dev/e1000/e1000_hw.h
+++ b/freebsd/sys/dev/e1000/e1000_hw.h
@@ -155,6 +155,7 @@ struct e1000_hw;
#define E1000_DEV_ID_PCH_ICP_I219_V8 0x15E0
#define E1000_DEV_ID_PCH_ICP_I219_LM9 0x15E1
#define E1000_DEV_ID_PCH_ICP_I219_V9 0x15E2
+#define E1000_DEV_ID_PCH_ICP_I219_V10 0x0D4F
#define E1000_DEV_ID_82576 0x10C9
#define E1000_DEV_ID_82576_FIBER 0x10E6
#define E1000_DEV_ID_82576_SERDES 0x10E7
diff --git a/freebsd/sys/dev/e1000/if_em.c b/freebsd/sys/dev/e1000/if_em.c
index 32eb4afe..9b52c35a 100644
--- a/freebsd/sys/dev/e1000/if_em.c
+++ b/freebsd/sys/dev/e1000/if_em.c
@@ -176,6 +176,7 @@ static pci_vendor_info_t em_vendor_info_array[] =
PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V8, "Intel(R) PRO/1000 Network Connection"),
PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_LM9, "Intel(R) PRO/1000 Network Connection"),
PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V9, "Intel(R) PRO/1000 Network Connection"),
+ PVID(0x8086, E1000_DEV_ID_PCH_ICP_I219_V10, "Intel(R) PRO/1000 Network Connection"),
/* required last entry */
PVID_END
};
@@ -1397,10 +1398,8 @@ em_intr(void *arg)
IFDI_INTR_DISABLE(ctx);
/* Link status change */
- if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
- adapter->hw.mac.get_link_status = 1;
- iflib_admin_intr_deferred(ctx);
- }
+ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
+ em_handle_link(ctx);
if (reg_icr & E1000_ICR_RXO)
adapter->rx_overruns++;
@@ -1483,22 +1482,24 @@ em_msix_link(void *arg)
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
em_handle_link(adapter->ctx);
- } else {
- E1000_WRITE_REG(&adapter->hw, E1000_IMS,
- EM_MSIX_LINK | E1000_IMS_LSC);
- if (adapter->hw.mac.type >= igb_mac_min)
- E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
+ } else if (adapter->hw.mac.type == e1000_82574) {
+ /* Only re-arm 82574 if em_if_update_admin_status() won't. */
+ E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK |
+ E1000_IMS_LSC);
}
- /*
- * Because we must read the ICR for this interrupt
- * it may clear other causes using autoclear, for
- * this reason we simply create a soft interrupt
- * for all these vectors.
- */
- if (reg_icr && adapter->hw.mac.type < igb_mac_min) {
- E1000_WRITE_REG(&adapter->hw,
- E1000_ICS, adapter->ims);
+ if (adapter->hw.mac.type == e1000_82574) {
+ /*
+ * Because we must read the ICR for this interrupt it may
+ * clear other causes using autoclear, for this reason we
+ * simply create a soft interrupt for all these vectors.
+ */
+ if (reg_icr)
+ E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims);
+ } else {
+ /* Re-arm unconditionally */
+ E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
+ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
}
return (FILTER_HANDLED);
@@ -1514,7 +1515,6 @@ em_handle_link(void *context)
iflib_admin_intr_deferred(ctx);
}
-
/*********************************************************************
*
* Media Ioctl callback
@@ -1831,14 +1831,15 @@ em_if_update_admin_status(if_ctx_t ctx)
em_update_stats_counters(adapter);
/* Reset LAA into RAR[0] on 82571 */
- if ((adapter->hw.mac.type == e1000_82571) &&
- e1000_get_laa_state_82571(&adapter->hw))
- e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
+ if (hw->mac.type == e1000_82571 && e1000_get_laa_state_82571(hw))
+ e1000_rar_set(hw, hw->mac.addr, 0);
- if (adapter->hw.mac.type < em_mac_min)
+ if (hw->mac.type < em_mac_min)
lem_smartspeed(adapter);
-
- E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
+ else if (hw->mac.type == e1000_82574 &&
+ adapter->intr_type == IFLIB_INTR_MSIX)
+ E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK |
+ E1000_IMS_LSC);
}
static void
@@ -3905,6 +3906,7 @@ em_disable_aspm(struct adapter *adapter)
static void
em_update_stats_counters(struct adapter *adapter)
{
+ u64 prev_xoffrxc = adapter->stats.xoffrxc;
if(adapter->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
@@ -3928,7 +3930,8 @@ em_update_stats_counters(struct adapter *adapter)
** For watchdog management we need to know if we have been
** paused during the last interval, so capture that here.
*/
- adapter->shared->isc_pause_frames = adapter->stats.xoffrxc;
+ if (adapter->stats.xoffrxc != prev_xoffrxc)
+ adapter->shared->isc_pause_frames = 1;
adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
diff --git a/freebsd/sys/dev/gpio/gpiobus.c b/freebsd/sys/dev/gpio/gpiobus.c
index d256ee4a..25daf717 100644
--- a/freebsd/sys/dev/gpio/gpiobus.c
+++ b/freebsd/sys/dev/gpio/gpiobus.c
@@ -80,6 +80,18 @@ static int gpiobus_pin_get(device_t, device_t, uint32_t, unsigned int*);
static int gpiobus_pin_toggle(device_t, device_t, uint32_t);
/*
+ * gpiobus_pin flags
+ * The flags in struct gpiobus_pin are not related to the flags used by the
+ * low-level controller driver in struct gpio_pin. Currently, only pins
+ * acquired via FDT data have gpiobus_pin.flags set, sourced from the flags in
+ * the FDT properties. In theory, these flags are defined per-platform. In
+ * practice they are always the flags from the dt-bindings/gpio/gpio.h file.
+ * The only one of those flags we currently support is for handling active-low
+ * pins, so we just define that flag here instead of including a GPL'd header.
+ */
+#define GPIO_ACTIVE_LOW 1
+
+/*
* XXX -> Move me to better place - gpio_subr.c?
* Also, this function must be changed when interrupt configuration
* data will be moved into struct resource.
@@ -137,6 +149,114 @@ gpio_check_flags(uint32_t caps, uint32_t flags)
return (0);
}
+int
+gpio_pin_get_by_bus_pinnum(device_t busdev, uint32_t pinnum, gpio_pin_t *ppin)
+{
+ gpio_pin_t pin;
+ int err;
+
+ err = gpiobus_acquire_pin(busdev, pinnum);
+ if (err != 0)
+ return (EBUSY);
+
+ pin = malloc(sizeof(*pin), M_DEVBUF, M_WAITOK | M_ZERO);
+
+ pin->dev = device_get_parent(busdev);
+ pin->pin = pinnum;
+ pin->flags = 0;
+
+ *ppin = pin;
+ return (0);
+}
+
+int
+gpio_pin_get_by_child_index(device_t childdev, uint32_t idx, gpio_pin_t *ppin)
+{
+ struct gpiobus_ivar *devi;
+
+ devi = GPIOBUS_IVAR(childdev);
+ if (idx >= devi->npins)
+ return (EINVAL);
+
+ return (gpio_pin_get_by_bus_pinnum(device_get_parent(childdev),
+ devi->pins[idx], ppin));
+}
+
+int
+gpio_pin_getcaps(gpio_pin_t pin, uint32_t *caps)
+{
+
+ KASSERT(pin != NULL, ("GPIO pin is NULL."));
+ KASSERT(pin->dev != NULL, ("GPIO pin device is NULL."));
+ return (GPIO_PIN_GETCAPS(pin->dev, pin->pin, caps));
+}
+
+int
+gpio_pin_is_active(gpio_pin_t pin, bool *active)
+{
+ int rv;
+ uint32_t tmp;
+
+ KASSERT(pin != NULL, ("GPIO pin is NULL."));
+ KASSERT(pin->dev != NULL, ("GPIO pin device is NULL."));
+ rv = GPIO_PIN_GET(pin->dev, pin->pin, &tmp);
+ if (rv != 0) {
+ return (rv);
+ }
+
+ if (pin->flags & GPIO_ACTIVE_LOW)
+ *active = tmp == 0;
+ else
+ *active = tmp != 0;
+ return (0);
+}
+
+void
+gpio_pin_release(gpio_pin_t gpio)
+{
+ device_t busdev;
+
+ if (gpio == NULL)
+ return;
+
+ KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL."));
+
+ busdev = GPIO_GET_BUS(gpio->dev);
+ if (busdev != NULL)
+ gpiobus_release_pin(busdev, gpio->pin);
+
+ free(gpio, M_DEVBUF);
+}
+
+int
+gpio_pin_set_active(gpio_pin_t pin, bool active)
+{
+ int rv;
+ uint32_t tmp;
+
+ if (pin->flags & GPIO_ACTIVE_LOW)
+ tmp = active ? 0 : 1;
+ else
+ tmp = active ? 1 : 0;
+
+ KASSERT(pin != NULL, ("GPIO pin is NULL."));
+ KASSERT(pin->dev != NULL, ("GPIO pin device is NULL."));
+ rv = GPIO_PIN_SET(pin->dev, pin->pin, tmp);
+ return (rv);
+}
+
+int
+gpio_pin_setflags(gpio_pin_t pin, uint32_t flags)
+{
+ int rv;
+
+ KASSERT(pin != NULL, ("GPIO pin is NULL."));
+ KASSERT(pin->dev != NULL, ("GPIO pin device is NULL."));
+
+ rv = GPIO_PIN_SETFLAGS(pin->dev, pin->pin, flags);
+ return (rv);
+}
+
static void
gpiobus_print_pins(struct gpiobus_ivar *devi, char *buf, size_t buflen)
{
@@ -372,8 +492,6 @@ gpiobus_parse_pins(struct gpiobus_softc *sc, device_t child, int mask)
devi->pins[npins++] = i;
}
- if (gpiobus_acquire_child_pins(sc->sc_busdev, child) != 0)
- return (EINVAL);
return (0);
}
@@ -427,8 +545,6 @@ gpiobus_parse_pin_list(struct gpiobus_softc *sc, device_t child,
p = endp + 1;
}
- if (gpiobus_acquire_child_pins(sc->sc_busdev, child) != 0)
- return (EINVAL);
return (0);
}
@@ -602,6 +718,21 @@ gpiobus_add_child(device_t dev, u_int order, const char *name, int unit)
return (child);
}
+static int
+gpiobus_rescan(device_t dev)
+{
+
+ /*
+ * Re-scan is supposed to remove and add children, but if someone has
+ * deleted the hints for a child we attached earlier, we have no easy
+ * way to handle that. So this just attaches new children for whom new
+ * hints or drivers have arrived since we last tried.
+ */
+ bus_enumerate_hinted_children(dev);
+ bus_generic_attach(dev);
+ return (0);
+}
+
static void
gpiobus_hinted_child(device_t bus, const char *dname, int dunit)
{
@@ -611,6 +742,10 @@ gpiobus_hinted_child(device_t bus, const char *dname, int dunit)
const char *pins;
int irq, pinmask;
+ if (device_find_child(bus, dname, dunit) != NULL) {
+ return;
+ }
+
child = BUS_ADD_CHILD(bus, 0, dname, dunit);
devi = GPIOBUS_IVAR(child);
if (resource_int_value(dname, dunit, "pins", &pinmask) == 0) {
@@ -963,6 +1098,7 @@ static device_method_t gpiobus_methods[] = {
DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
DEVMETHOD(bus_get_resource_list, gpiobus_get_resource_list),
DEVMETHOD(bus_add_child, gpiobus_add_child),
+ DEVMETHOD(bus_rescan, gpiobus_rescan),
DEVMETHOD(bus_probe_nomatch, gpiobus_probe_nomatch),
DEVMETHOD(bus_print_child, gpiobus_print_child),
DEVMETHOD(bus_child_pnpinfo_str, gpiobus_child_pnpinfo_str),
diff --git a/freebsd/sys/dev/gpio/gpiobusvar.h b/freebsd/sys/dev/gpio/gpiobusvar.h
index 3ba8993e..ff49784a 100644
--- a/freebsd/sys/dev/gpio/gpiobusvar.h
+++ b/freebsd/sys/dev/gpio/gpiobusvar.h
@@ -141,7 +141,7 @@ int ofw_gpiobus_parse_gpios(device_t, char *, struct gpiobus_pin **);
void ofw_gpiobus_register_provider(device_t);
void ofw_gpiobus_unregister_provider(device_t);
-/* Consumers interface. */
+/* Acquire a pin by parsing FDT data. */
int gpio_pin_get_by_ofw_name(device_t consumer, phandle_t node,
char *name, gpio_pin_t *gpio);
int gpio_pin_get_by_ofw_idx(device_t consumer, phandle_t node,
@@ -150,14 +150,29 @@ int gpio_pin_get_by_ofw_property(device_t consumer, phandle_t node,
char *name, gpio_pin_t *gpio);
int gpio_pin_get_by_ofw_propidx(device_t consumer, phandle_t node,
char *name, int idx, gpio_pin_t *gpio);
+#endif /* FDT */
+
+/* Acquire a pin by bus and pin number. */
+int gpio_pin_get_by_bus_pinnum(device_t _bus, uint32_t _pinnum, gpio_pin_t *_gp);
+
+/* Acquire a pin by child and index (used by direct children of gpiobus). */
+int gpio_pin_get_by_child_index(device_t _child, uint32_t _idx, gpio_pin_t *_gp);
+
+/* Release a pin acquired via any gpio_pin_get_xxx() function. */
void gpio_pin_release(gpio_pin_t gpio);
+
+/* Work with gpio pins acquired using the functions above. */
int gpio_pin_getcaps(gpio_pin_t pin, uint32_t *caps);
int gpio_pin_is_active(gpio_pin_t pin, bool *active);
int gpio_pin_set_active(gpio_pin_t pin, bool active);
int gpio_pin_setflags(gpio_pin_t pin, uint32_t flags);
-#endif
struct resource *gpio_alloc_intr_resource(device_t consumer_dev, int *rid,
u_int alloc_flags, gpio_pin_t pin, uint32_t intr_mode);
+
+/*
+ * Functions shared between gpiobus and other bus classes that derive from it;
+ * these should not be called directly by other drivers.
+ */
int gpio_check_flags(uint32_t, uint32_t);
device_t gpiobus_attach_bus(device_t);
int gpiobus_detach_bus(device_t);
diff --git a/freebsd/sys/dev/gpio/ofw_gpiobus.c b/freebsd/sys/dev/gpio/ofw_gpiobus.c
index 1cf3aa82..bd617ead 100644
--- a/freebsd/sys/dev/gpio/ofw_gpiobus.c
+++ b/freebsd/sys/dev/gpio/ofw_gpiobus.c
@@ -49,8 +49,6 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/gpiobus_if.h>
-#define GPIO_ACTIVE_LOW 1
-
static struct ofw_gpiobus_devinfo *ofw_gpiobus_setup_devinfo(device_t,
device_t, phandle_t);
static void ofw_gpiobus_destroy_devinfo(device_t, struct ofw_gpiobus_devinfo *);
@@ -146,82 +144,6 @@ gpio_pin_get_by_ofw_name(device_t consumer, phandle_t node,
return (gpio_pin_get_by_ofw_idx(consumer, node, idx, pin));
}
-void
-gpio_pin_release(gpio_pin_t gpio)
-{
- device_t busdev;
-
- if (gpio == NULL)
- return;
-
- KASSERT(gpio->dev != NULL, ("invalid pin state"));
-
- busdev = GPIO_GET_BUS(gpio->dev);
- if (busdev != NULL)
- gpiobus_release_pin(busdev, gpio->pin);
-
- /* XXXX Unreserve pin. */
- free(gpio, M_DEVBUF);
-}
-
-int
-gpio_pin_getcaps(gpio_pin_t pin, uint32_t *caps)
-{
-
- KASSERT(pin != NULL, ("GPIO pin is NULL."));
- KASSERT(pin->dev != NULL, ("GPIO pin device is NULL."));
- return (GPIO_PIN_GETCAPS(pin->dev, pin->pin, caps));
-}
-
-int
-gpio_pin_is_active(gpio_pin_t pin, bool *active)
-{
- int rv;
- uint32_t tmp;
-
- KASSERT(pin != NULL, ("GPIO pin is NULL."));
- KASSERT(pin->dev != NULL, ("GPIO pin device is NULL."));
- rv = GPIO_PIN_GET(pin->dev, pin->pin, &tmp);
- if (rv != 0) {
- return (rv);
- }
-
- if (pin->flags & GPIO_ACTIVE_LOW)
- *active = tmp == 0;
- else
- *active = tmp != 0;
- return (0);
-}
-
-int
-gpio_pin_set_active(gpio_pin_t pin, bool active)
-{
- int rv;
- uint32_t tmp;
-
- if (pin->flags & GPIO_ACTIVE_LOW)
- tmp = active ? 0 : 1;
- else
- tmp = active ? 1 : 0;
-
- KASSERT(pin != NULL, ("GPIO pin is NULL."));
- KASSERT(pin->dev != NULL, ("GPIO pin device is NULL."));
- rv = GPIO_PIN_SET(pin->dev, pin->pin, tmp);
- return (rv);
-}
-
-int
-gpio_pin_setflags(gpio_pin_t pin, uint32_t flags)
-{
- int rv;
-
- KASSERT(pin != NULL, ("GPIO pin is NULL."));
- KASSERT(pin->dev != NULL, ("GPIO pin device is NULL."));
-
- rv = GPIO_PIN_SETFLAGS(pin->dev, pin->pin, flags);
- return (rv);
-}
-
/*
* OFW_GPIOBUS driver.
*/
@@ -498,7 +420,7 @@ ofw_gpiobus_probe(device_t dev)
return (ENXIO);
device_set_desc(dev, "OFW GPIO bus");
- return (0);
+ return (BUS_PROBE_DEFAULT);
}
static int
@@ -517,6 +439,8 @@ ofw_gpiobus_attach(device_t dev)
*/
for (child = OF_child(ofw_bus_get_node(dev)); child != 0;
child = OF_peer(child)) {
+ if (OF_hasprop(child, "gpio-hog"))
+ continue;
if (!OF_hasprop(child, "gpios"))
continue;
if (ofw_gpiobus_add_fdt_child(dev, NULL, child) == NULL)
diff --git a/freebsd/sys/dev/kbd/kbd.c b/freebsd/sys/dev/kbd/kbd.c
index b157e57e..235e8f78 100644
--- a/freebsd/sys/dev/kbd/kbd.c
+++ b/freebsd/sys/dev/kbd/kbd.c
@@ -178,6 +178,10 @@ kbd_add_driver(keyboard_driver_t *driver)
{
if (SLIST_NEXT(driver, link))
return (EINVAL);
+ if (driver->kbdsw->get_fkeystr == NULL)
+ driver->kbdsw->get_fkeystr = genkbd_get_fkeystr;
+ if (driver->kbdsw->diag == NULL)
+ driver->kbdsw->diag = genkbd_diag;
SLIST_INSERT_HEAD(&keyboard_drivers, driver, link);
return (0);
}
@@ -1516,3 +1520,20 @@ kbd_ev_event(keyboard_t *kbd, uint16_t type, uint16_t code, int32_t value)
kbdd_ioctl(kbd, KDSETREPEAT, (caddr_t)delay);
}
}
+
+static void
+kbd_drv_init(void)
+{
+ const keyboard_driver_t **list;
+ const keyboard_driver_t *p;
+
+ SET_FOREACH(list, kbddriver_set) {
+ p = *list;
+ if (p->kbdsw->get_fkeystr == NULL)
+ p->kbdsw->get_fkeystr = genkbd_get_fkeystr;
+ if (p->kbdsw->diag == NULL)
+ p->kbdsw->diag = genkbd_diag;
+ }
+}
+
+SYSINIT(kbd_drv_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, kbd_drv_init, NULL);
diff --git a/freebsd/sys/dev/kbd/kbdreg.h b/freebsd/sys/dev/kbd/kbdreg.h
index 07c4cfd9..886b6c49 100644
--- a/freebsd/sys/dev/kbd/kbdreg.h
+++ b/freebsd/sys/dev/kbd/kbdreg.h
@@ -205,14 +205,19 @@ typedef struct keyboard_switch {
#define kbdd_poll(kbd, on) \
(*kbdsw[(kbd)->kb_index]->poll)((kbd), (on))
#define kbdd_diag(kbd, level) \
- (*kbdsw[(kbd)->kb_index]->diag)((kbd), (leve))
+ (*kbdsw[(kbd)->kb_index]->diag)((kbd), (level))
-/* keyboard driver */
+/*
+ * Keyboard driver definition. Some of these be immutable after definition
+ * time, e.g. one shouldn't be able to rename a driver or use a different kbdsw
+ * entirely, but patching individual methods is acceptable.
+ */
typedef struct keyboard_driver {
SLIST_ENTRY(keyboard_driver) link;
- char *name;
- keyboard_switch_t *kbdsw;
- int (*configure)(int); /* backdoor for the console driver */
+ const char * const name;
+ keyboard_switch_t * const kbdsw;
+ /* backdoor for the console driver */
+ int (* const configure)(int);
} keyboard_driver_t;
#ifdef _KERNEL
diff --git a/freebsd/sys/dev/mii/mii.c b/freebsd/sys/dev/mii/mii.c
index 2ed40543..d0428f24 100644
--- a/freebsd/sys/dev/mii/mii.c
+++ b/freebsd/sys/dev/mii/mii.c
@@ -62,6 +62,7 @@ MODULE_VERSION(miibus, 1);
#include <rtems/bsd/local/miibus_if.h>
static device_attach_t miibus_attach;
+static bus_child_detached_t miibus_child_detached;
static bus_child_location_str_t miibus_child_location_str;
static bus_child_pnpinfo_str_t miibus_child_pnpinfo_str;
static device_detach_t miibus_detach;
@@ -87,6 +88,7 @@ static device_method_t miibus_methods[] = {
/* bus interface */
DEVMETHOD(bus_print_child, miibus_print_child),
DEVMETHOD(bus_read_ivar, miibus_read_ivar),
+ DEVMETHOD(bus_child_detached, miibus_child_detached),
DEVMETHOD(bus_child_pnpinfo_str, miibus_child_pnpinfo_str),
DEVMETHOD(bus_child_location_str, miibus_child_location_str),
DEVMETHOD(bus_hinted_child, miibus_hinted_child),
@@ -162,15 +164,27 @@ static int
miibus_detach(device_t dev)
{
struct mii_data *mii;
+ struct miibus_ivars *ivars;
+ ivars = device_get_ivars(dev);
bus_generic_detach(dev);
mii = device_get_softc(dev);
ifmedia_removeall(&mii->mii_media);
+ free(ivars, M_DEVBUF);
mii->mii_ifp = NULL;
return (0);
}
+static void
+miibus_child_detached(device_t dev, device_t child)
+{
+ struct mii_attach_args *args;
+
+ args = device_get_ivars(child);
+ free(args, M_DEVBUF);
+}
+
static int
miibus_print_child(device_t dev, device_t child)
{
diff --git a/freebsd/sys/dev/nvme/nvme.c b/freebsd/sys/dev/nvme/nvme.c
index 20b328c9..00759aa3 100644
--- a/freebsd/sys/dev/nvme/nvme.c
+++ b/freebsd/sys/dev/nvme/nvme.c
@@ -134,25 +134,6 @@ nvme_attach(device_t dev)
int status;
status = nvme_ctrlr_construct(ctrlr, dev);
-
- if (status != 0) {
- nvme_ctrlr_destruct(ctrlr, dev);
- return (status);
- }
-
- /*
- * Reset controller twice to ensure we do a transition from cc.en==1 to
- * cc.en==0. This is because we don't really know what status the
- * controller was left in when boot handed off to OS. Linux doesn't do
- * this, however. If we adopt that policy, see also nvme_ctrlr_resume().
- */
- status = nvme_ctrlr_hw_reset(ctrlr);
- if (status != 0) {
- nvme_ctrlr_destruct(ctrlr, dev);
- return (status);
- }
-
- status = nvme_ctrlr_hw_reset(ctrlr);
if (status != 0) {
nvme_ctrlr_destruct(ctrlr, dev);
return (status);
diff --git a/freebsd/sys/dev/nvme/nvme.h b/freebsd/sys/dev/nvme/nvme.h
index 16b9aa5f..21ae79cb 100644
--- a/freebsd/sys/dev/nvme/nvme.h
+++ b/freebsd/sys/dev/nvme/nvme.h
@@ -1561,9 +1561,19 @@ struct nvme_get_nsid {
uint32_t nsid;
};
+struct nvme_hmb_desc {
+ uint64_t addr;
+ uint32_t size;
+ uint32_t reserved;
+};
+
#define nvme_completion_is_error(cpl) \
(NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0)
+#ifdef __rtems__
+/* This function is also used by user-space programs */
+#define nvme_strvis _bsd_nvme_strvis
+#endif /* __rtems__ */
void nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen);
#ifdef _KERNEL
@@ -1596,6 +1606,8 @@ int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
/* Admin functions */
void nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
uint8_t feature, uint32_t cdw11,
+ uint32_t cdw12, uint32_t cdw13,
+ uint32_t cdw14, uint32_t cdw15,
void *payload, uint32_t payload_size,
nvme_cb_fn_t cb_fn, void *cb_arg);
void nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,
diff --git a/freebsd/sys/dev/nvme/nvme_ctrlr.c b/freebsd/sys/dev/nvme/nvme_ctrlr.c
index 86cabfba..2c19e694 100644
--- a/freebsd/sys/dev/nvme/nvme_ctrlr.c
+++ b/freebsd/sys/dev/nvme/nvme_ctrlr.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/uio.h>
#include <sys/endian.h>
+#include <vm/vm.h>
#include "nvme_private.h"
#ifdef __rtems__
@@ -66,6 +67,11 @@ nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
int error;
qpair = &ctrlr->adminq;
+ qpair->id = 0;
+#ifndef __rtems__
+ qpair->cpu = CPU_FFS(&cpuset_domain[ctrlr->domain]) - 1;
+ qpair->domain = ctrlr->domain;
+#endif /* __rtems__ */
num_entries = NVME_ADMIN_ENTRIES;
TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries);
@@ -84,34 +90,39 @@ nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
* The admin queue's max xfer size is treated differently than the
* max I/O xfer size. 16KB is sufficient here - maybe even less?
*/
- error = nvme_qpair_construct(qpair,
- 0, /* qpair ID */
- 0, /* vector */
- num_entries,
- NVME_ADMIN_TRACKERS,
- ctrlr);
+ error = nvme_qpair_construct(qpair, num_entries, NVME_ADMIN_TRACKERS,
+ ctrlr);
return (error);
}
+#define QP(ctrlr, c) ((c) * (ctrlr)->num_io_queues / mp_ncpus)
+
static int
nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
{
struct nvme_qpair *qpair;
uint32_t cap_lo;
uint16_t mqes;
- int i, error, num_entries, num_trackers;
-
- num_entries = NVME_IO_ENTRIES;
- TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
+ int c, error, i, n;
+ int num_entries, num_trackers, max_entries;
/*
- * NVMe spec sets a hard limit of 64K max entries, but
- * devices may specify a smaller limit, so we need to check
- * the MQES field in the capabilities register.
+ * NVMe spec sets a hard limit of 64K max entries, but devices may
+ * specify a smaller limit, so we need to check the MQES field in the
+ * capabilities register. We have to cap the number of entries to the
+ * current stride allows for in BAR 0/1, otherwise the remainder entries
+ * are inaccessable. MQES should reflect this, and this is just a
+ * fail-safe.
*/
+ max_entries =
+ (rman_get_size(ctrlr->resource) - nvme_mmio_offsetof(doorbell[0])) /
+ (1 << (ctrlr->dstrd + 1));
+ num_entries = NVME_IO_ENTRIES;
+ TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
cap_lo = nvme_mmio_read_4(ctrlr, cap_lo);
mqes = NVME_CAP_LO_MQES(cap_lo);
num_entries = min(num_entries, mqes + 1);
+ num_entries = min(num_entries, max_entries);
num_trackers = NVME_IO_TRACKERS;
TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers);
@@ -119,9 +130,9 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS);
num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS);
/*
- * No need to have more trackers than entries in the submit queue.
- * Note also that for a queue size of N, we can only have (N-1)
- * commands outstanding, hence the "-1" here.
+ * No need to have more trackers than entries in the submit queue. Note
+ * also that for a queue size of N, we can only have (N-1) commands
+ * outstanding, hence the "-1" here.
*/
num_trackers = min(num_trackers, (num_entries-1));
@@ -133,32 +144,37 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
*/
ctrlr->max_hw_pend_io = num_trackers * ctrlr->num_io_queues * 3 / 4;
- /*
- * This was calculated previously when setting up interrupts, but
- * a controller could theoretically support fewer I/O queues than
- * MSI-X vectors. So calculate again here just to be safe.
- */
- ctrlr->num_cpus_per_ioq = howmany(mp_ncpus, ctrlr->num_io_queues);
-
ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair),
M_NVME, M_ZERO | M_WAITOK);
- for (i = 0; i < ctrlr->num_io_queues; i++) {
+ for (i = c = n = 0; i < ctrlr->num_io_queues; i++, c += n) {
qpair = &ctrlr->ioq[i];
/*
* Admin queue has ID=0. IO queues start at ID=1 -
* hence the 'i+1' here.
- *
+ */
+ qpair->id = i + 1;
+#ifndef __rtems__
+ if (ctrlr->num_io_queues > 1) {
+ /* Find number of CPUs served by this queue. */
+ for (n = 1; QP(ctrlr, c + n) == i; n++)
+ ;
+ /* Shuffle multiple NVMe devices between CPUs. */
+ qpair->cpu = c + (device_get_unit(ctrlr->dev)+n/2) % n;
+ qpair->domain = pcpu_find(qpair->cpu)->pc_domain;
+ } else {
+ qpair->cpu = CPU_FFS(&cpuset_domain[ctrlr->domain]) - 1;
+ qpair->domain = ctrlr->domain;
+ }
+#endif /* __rtems__ */
+
+ /*
* For I/O queues, use the controller-wide max_xfer_size
* calculated in nvme_attach().
*/
- error = nvme_qpair_construct(qpair,
- i+1, /* qpair ID */
- ctrlr->msix_enabled ? i+1 : 0, /* vector */
- num_entries,
- num_trackers,
- ctrlr);
+ error = nvme_qpair_construct(qpair, num_entries, num_trackers,
+ ctrlr);
if (error)
return (error);
@@ -167,8 +183,11 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
* interrupt thread for this controller.
*/
if (ctrlr->num_io_queues > 1)
- bus_bind_intr(ctrlr->dev, qpair->res,
- i * ctrlr->num_cpus_per_ioq);
+#ifndef __rtems__
+ bus_bind_intr(ctrlr->dev, qpair->res, qpair->cpu);
+#else /* __rtems__ */
+ bus_bind_intr(ctrlr->dev, qpair->res, QP(ctrlr, i));
+#endif /* __rtems__ */
}
return (0);
@@ -179,7 +198,7 @@ nvme_ctrlr_fail(struct nvme_controller *ctrlr)
{
int i;
- ctrlr->is_failed = TRUE;
+ ctrlr->is_failed = true;
nvme_admin_qpair_disable(&ctrlr->adminq);
nvme_qpair_fail(&ctrlr->adminq);
if (ctrlr->ioq != NULL) {
@@ -461,6 +480,8 @@ nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
*/
ctrlr->num_io_queues = min(ctrlr->num_io_queues, sq_allocated);
ctrlr->num_io_queues = min(ctrlr->num_io_queues, cq_allocated);
+ if (ctrlr->num_io_queues > vm_ndomains)
+ ctrlr->num_io_queues -= ctrlr->num_io_queues % vm_ndomains;
return (0);
}
@@ -476,7 +497,7 @@ nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
qpair = &ctrlr->ioq[i];
status.done = 0;
- nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector,
+ nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair,
nvme_completion_poll_cb, &status);
nvme_completion_poll(&status);
if (nvme_completion_is_error(&status.cpl)) {
@@ -542,7 +563,7 @@ nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
return (0);
}
-static boolean_t
+static bool
is_log_page_id_valid(uint8_t page_id)
{
@@ -554,10 +575,10 @@ is_log_page_id_valid(uint8_t page_id)
case NVME_LOG_COMMAND_EFFECT:
case NVME_LOG_RES_NOTIFICATION:
case NVME_LOG_SANITIZE_STATUS:
- return (TRUE);
+ return (true);
}
- return (FALSE);
+ return (false);
}
static uint32_t
@@ -778,7 +799,7 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
* Disable timeout here, since asynchronous event requests should by
* nature never be timed out.
*/
- req->timeout = FALSE;
+ req->timeout = false;
req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
nvme_ctrlr_submit_admin_request(ctrlr, req);
}
@@ -837,6 +858,173 @@ nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
}
static void
+nvme_ctrlr_hmb_free(struct nvme_controller *ctrlr)
+{
+#ifndef __rtems__
+ struct nvme_hmb_chunk *hmbc;
+ int i;
+
+ if (ctrlr->hmb_desc_paddr) {
+ bus_dmamap_unload(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_map);
+ bus_dmamem_free(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_vaddr,
+ ctrlr->hmb_desc_map);
+ ctrlr->hmb_desc_paddr = 0;
+ }
+ if (ctrlr->hmb_desc_tag) {
+ bus_dma_tag_destroy(ctrlr->hmb_desc_tag);
+ ctrlr->hmb_desc_tag = NULL;
+ }
+ for (i = 0; i < ctrlr->hmb_nchunks; i++) {
+ hmbc = &ctrlr->hmb_chunks[i];
+ bus_dmamap_unload(ctrlr->hmb_tag, hmbc->hmbc_map);
+ bus_dmamem_free(ctrlr->hmb_tag, hmbc->hmbc_vaddr,
+ hmbc->hmbc_map);
+ }
+ ctrlr->hmb_nchunks = 0;
+ if (ctrlr->hmb_tag) {
+ bus_dma_tag_destroy(ctrlr->hmb_tag);
+ ctrlr->hmb_tag = NULL;
+ }
+ if (ctrlr->hmb_chunks) {
+ free(ctrlr->hmb_chunks, M_NVME);
+ ctrlr->hmb_chunks = NULL;
+ }
+#endif /* __rtems__ */
+}
+
+#ifndef __rtems__
+static void
+nvme_ctrlr_hmb_alloc(struct nvme_controller *ctrlr)
+{
+ struct nvme_hmb_chunk *hmbc;
+ size_t pref, min, minc, size;
+ int err, i;
+ uint64_t max;
+
+ /* Limit HMB to 5% of RAM size per device by default. */
+ max = (uint64_t)physmem * PAGE_SIZE / 20;
+ TUNABLE_UINT64_FETCH("hw.nvme.hmb_max", &max);
+
+ min = (long long unsigned)ctrlr->cdata.hmmin * 4096;
+ if (max == 0 || max < min)
+ return;
+ pref = MIN((long long unsigned)ctrlr->cdata.hmpre * 4096, max);
+ minc = MAX(ctrlr->cdata.hmminds * 4096, PAGE_SIZE);
+ if (min > 0 && ctrlr->cdata.hmmaxd > 0)
+ minc = MAX(minc, min / ctrlr->cdata.hmmaxd);
+ ctrlr->hmb_chunk = pref;
+
+again:
+ ctrlr->hmb_chunk = roundup2(ctrlr->hmb_chunk, PAGE_SIZE);
+ ctrlr->hmb_nchunks = howmany(pref, ctrlr->hmb_chunk);
+ if (ctrlr->cdata.hmmaxd > 0 && ctrlr->hmb_nchunks > ctrlr->cdata.hmmaxd)
+ ctrlr->hmb_nchunks = ctrlr->cdata.hmmaxd;
+ ctrlr->hmb_chunks = malloc(sizeof(struct nvme_hmb_chunk) *
+ ctrlr->hmb_nchunks, M_NVME, M_WAITOK);
+ err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
+ PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+ ctrlr->hmb_chunk, 1, ctrlr->hmb_chunk, 0, NULL, NULL, &ctrlr->hmb_tag);
+ if (err != 0) {
+ nvme_printf(ctrlr, "HMB tag create failed %d\n", err);
+ nvme_ctrlr_hmb_free(ctrlr);
+ return;
+ }
+
+ for (i = 0; i < ctrlr->hmb_nchunks; i++) {
+ hmbc = &ctrlr->hmb_chunks[i];
+ if (bus_dmamem_alloc(ctrlr->hmb_tag,
+ (void **)&hmbc->hmbc_vaddr, BUS_DMA_NOWAIT,
+ &hmbc->hmbc_map)) {
+ nvme_printf(ctrlr, "failed to alloc HMB\n");
+ break;
+ }
+ if (bus_dmamap_load(ctrlr->hmb_tag, hmbc->hmbc_map,
+ hmbc->hmbc_vaddr, ctrlr->hmb_chunk, nvme_single_map,
+ &hmbc->hmbc_paddr, BUS_DMA_NOWAIT) != 0) {
+ bus_dmamem_free(ctrlr->hmb_tag, hmbc->hmbc_vaddr,
+ hmbc->hmbc_map);
+ nvme_printf(ctrlr, "failed to load HMB\n");
+ break;
+ }
+ bus_dmamap_sync(ctrlr->hmb_tag, hmbc->hmbc_map,
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ }
+
+ if (i < ctrlr->hmb_nchunks && i * ctrlr->hmb_chunk < min &&
+ ctrlr->hmb_chunk / 2 >= minc) {
+ ctrlr->hmb_nchunks = i;
+ nvme_ctrlr_hmb_free(ctrlr);
+ ctrlr->hmb_chunk /= 2;
+ goto again;
+ }
+ ctrlr->hmb_nchunks = i;
+ if (ctrlr->hmb_nchunks * ctrlr->hmb_chunk < min) {
+ nvme_ctrlr_hmb_free(ctrlr);
+ return;
+ }
+
+ size = sizeof(struct nvme_hmb_desc) * ctrlr->hmb_nchunks;
+ err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
+ 16, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+ size, 1, size, 0, NULL, NULL, &ctrlr->hmb_desc_tag);
+ if (err != 0) {
+ nvme_printf(ctrlr, "HMB desc tag create failed %d\n", err);
+ nvme_ctrlr_hmb_free(ctrlr);
+ return;
+ }
+ if (bus_dmamem_alloc(ctrlr->hmb_desc_tag,
+ (void **)&ctrlr->hmb_desc_vaddr, BUS_DMA_WAITOK,
+ &ctrlr->hmb_desc_map)) {
+ nvme_printf(ctrlr, "failed to alloc HMB desc\n");
+ nvme_ctrlr_hmb_free(ctrlr);
+ return;
+ }
+ if (bus_dmamap_load(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_map,
+ ctrlr->hmb_desc_vaddr, size, nvme_single_map,
+ &ctrlr->hmb_desc_paddr, BUS_DMA_NOWAIT) != 0) {
+ bus_dmamem_free(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_vaddr,
+ ctrlr->hmb_desc_map);
+ nvme_printf(ctrlr, "failed to load HMB desc\n");
+ nvme_ctrlr_hmb_free(ctrlr);
+ return;
+ }
+
+ for (i = 0; i < ctrlr->hmb_nchunks; i++) {
+ ctrlr->hmb_desc_vaddr[i].addr =
+ htole64(ctrlr->hmb_chunks[i].hmbc_paddr);
+ ctrlr->hmb_desc_vaddr[i].size = htole32(ctrlr->hmb_chunk / 4096);
+ }
+ bus_dmamap_sync(ctrlr->hmb_desc_tag, ctrlr->hmb_desc_map,
+ BUS_DMASYNC_PREWRITE);
+
+ nvme_printf(ctrlr, "Allocated %lluMB host memory buffer\n",
+ (long long unsigned)ctrlr->hmb_nchunks * ctrlr->hmb_chunk
+ / 1024 / 1024);
+}
+
+static void
+nvme_ctrlr_hmb_enable(struct nvme_controller *ctrlr, bool enable, bool memret)
+{
+ struct nvme_completion_poll_status status;
+ uint32_t cdw11;
+
+ cdw11 = 0;
+ if (enable)
+ cdw11 |= 1;
+ if (memret)
+ cdw11 |= 2;
+ status.done = 0;
+ nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_HOST_MEMORY_BUFFER, cdw11,
+ ctrlr->hmb_nchunks * ctrlr->hmb_chunk / 4096, ctrlr->hmb_desc_paddr,
+ ctrlr->hmb_desc_paddr >> 32, ctrlr->hmb_nchunks, NULL, 0,
+ nvme_completion_poll_cb, &status);
+ nvme_completion_poll(&status);
+ if (nvme_completion_is_error(&status.cpl))
+ nvme_printf(ctrlr, "nvme_ctrlr_hmb_enable failed!\n");
+}
+#endif /* __rtems__ */
+
+static void
nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
{
struct nvme_controller *ctrlr = ctrlr_arg;
@@ -884,6 +1072,15 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
}
}
+#ifndef __rtems__
+ if (ctrlr->cdata.hmpre > 0 && ctrlr->hmb_nchunks == 0) {
+ nvme_ctrlr_hmb_alloc(ctrlr);
+ if (ctrlr->hmb_nchunks > 0)
+ nvme_ctrlr_hmb_enable(ctrlr, true, false);
+ } else if (ctrlr->hmb_nchunks > 0)
+ nvme_ctrlr_hmb_enable(ctrlr, true, true);
+#endif /* __rtems__ */
+
if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
nvme_ctrlr_fail(ctrlr);
return;
@@ -905,6 +1102,25 @@ void
nvme_ctrlr_start_config_hook(void *arg)
{
struct nvme_controller *ctrlr = arg;
+ int status;
+
+ /*
+ * Reset controller twice to ensure we do a transition from cc.en==1 to
+ * cc.en==0. This is because we don't really know what status the
+ * controller was left in when boot handed off to OS. Linux doesn't do
+ * this, however. If we adopt that policy, see also nvme_ctrlr_resume().
+ */
+ status = nvme_ctrlr_hw_reset(ctrlr);
+ if (status != 0) {
+ nvme_ctrlr_fail(ctrlr);
+ return;
+ }
+
+ status = nvme_ctrlr_hw_reset(ctrlr);
+ if (status != 0) {
+ nvme_ctrlr_fail(ctrlr);
+ return;
+ }
nvme_qpair_reset(&ctrlr->adminq);
nvme_admin_qpair_enable(&ctrlr->adminq);
@@ -1135,22 +1351,19 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
uint32_t cap_lo;
uint32_t cap_hi;
uint32_t to;
- uint8_t dstrd;
uint8_t mpsmin;
int status, timeout_period;
ctrlr->dev = dev;
mtx_init(&ctrlr->lock, "nvme ctrlr lock", NULL, MTX_DEF);
+#ifndef __rtems__
+ if (bus_get_domain(dev, &ctrlr->domain) != 0)
+ ctrlr->domain = 0;
+#endif /* __rtems__ */
- /*
- * Software emulators may set the doorbell stride to something
- * other than zero, but this driver is not set up to handle that.
- */
cap_hi = nvme_mmio_read_4(ctrlr, cap_hi);
- dstrd = NVME_CAP_HI_DSTRD(cap_hi);
- if (dstrd != 0)
- return (ENXIO);
+ ctrlr->dstrd = NVME_CAP_HI_DSTRD(cap_hi) + 2;
mpsmin = NVME_CAP_HI_MPSMIN(cap_hi);
ctrlr->min_page_size = 1 << (12 + mpsmin);
@@ -1186,7 +1399,7 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr);
STAILQ_INIT(&ctrlr->fail_req);
- ctrlr->is_failed = FALSE;
+ ctrlr->is_failed = false;
make_dev_args_init(&md_args);
md_args.mda_devsw = &nvme_ctrlr_cdevsw;
@@ -1228,11 +1441,17 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
destroy_dev(ctrlr->cdev);
if (ctrlr->is_initialized) {
- if (!gone)
+ if (!gone) {
+#ifndef __rtems__
+ if (ctrlr->hmb_nchunks > 0)
+ nvme_ctrlr_hmb_enable(ctrlr, false, false);
+#endif /* __rtems__ */
nvme_ctrlr_delete_qpairs(ctrlr);
+ }
for (i = 0; i < ctrlr->num_io_queues; i++)
nvme_io_qpair_destroy(&ctrlr->ioq[i]);
free(ctrlr->ioq, M_NVME);
+ nvme_ctrlr_hmb_free(ctrlr);
nvme_admin_qpair_destroy(&ctrlr->adminq);
}
@@ -1312,7 +1531,7 @@ nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
{
struct nvme_qpair *qpair;
- qpair = &ctrlr->ioq[curcpu / ctrlr->num_cpus_per_ioq];
+ qpair = &ctrlr->ioq[QP(ctrlr, curcpu)];
nvme_qpair_submit_request(qpair, req);
}
@@ -1356,6 +1575,11 @@ nvme_ctrlr_suspend(struct nvme_controller *ctrlr)
return (EWOULDBLOCK);
}
+#ifndef __rtems__
+ if (ctrlr->hmb_nchunks > 0)
+ nvme_ctrlr_hmb_enable(ctrlr, false, false);
+#endif /* __rtems__ */
+
/*
* Per Section 7.6.2 of NVMe spec 1.4, to properly suspend, we need to
* delete the hardware I/O queues, and then shutdown. This properly
diff --git a/freebsd/sys/dev/nvme/nvme_ctrlr_cmd.c b/freebsd/sys/dev/nvme/nvme_ctrlr_cmd.c
index f5c1832c..8ce51e1f 100644
--- a/freebsd/sys/dev/nvme/nvme_ctrlr_cmd.c
+++ b/freebsd/sys/dev/nvme/nvme_ctrlr_cmd.c
@@ -78,8 +78,7 @@ nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr, uint32_t nsid,
void
nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
- struct nvme_qpair *io_que, uint16_t vector, nvme_cb_fn_t cb_fn,
- void *cb_arg)
+ struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg)
{
struct nvme_request *req;
struct nvme_command *cmd;
@@ -95,7 +94,7 @@ nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
*/
cmd->cdw10 = htole32(((io_que->num_entries-1) << 16) | io_que->id);
/* 0x3 = interrupts enabled | physically contiguous */
- cmd->cdw11 = htole32((vector << 16) | 0x3);
+ cmd->cdw11 = htole32((io_que->vector << 16) | 0x3);
cmd->prp1 = htole64(io_que->cpl_bus_addr);
nvme_ctrlr_submit_admin_request(ctrlr, req);
@@ -169,7 +168,8 @@ nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr,
void
nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature,
- uint32_t cdw11, void *payload, uint32_t payload_size,
+ uint32_t cdw11, uint32_t cdw12, uint32_t cdw13, uint32_t cdw14,
+ uint32_t cdw15, void *payload, uint32_t payload_size,
nvme_cb_fn_t cb_fn, void *cb_arg)
{
struct nvme_request *req;
@@ -181,6 +181,10 @@ nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature,
cmd->opc = NVME_OPC_SET_FEATURES;
cmd->cdw10 = htole32(feature);
cmd->cdw11 = htole32(cdw11);
+ cmd->cdw12 = htole32(cdw12);
+ cmd->cdw13 = htole32(cdw13);
+ cmd->cdw14 = htole32(cdw14);
+ cmd->cdw15 = htole32(cdw15);
nvme_ctrlr_submit_admin_request(ctrlr, req);
}
@@ -211,7 +215,7 @@ nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr,
cdw11 = ((num_queues - 1) << 16) | (num_queues - 1);
nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_NUMBER_OF_QUEUES, cdw11,
- NULL, 0, cb_fn, cb_arg);
+ 0, 0, 0, 0, NULL, 0, cb_fn, cb_arg);
}
void
@@ -222,8 +226,8 @@ nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr,
cdw11 = state;
nvme_ctrlr_cmd_set_feature(ctrlr,
- NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, NULL, 0, cb_fn,
- cb_arg);
+ NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, 0, 0, 0, 0, NULL, 0,
+ cb_fn, cb_arg);
}
void
@@ -248,7 +252,7 @@ nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr,
cdw11 = ((microseconds/100) << 8) | threshold;
nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_INTERRUPT_COALESCING, cdw11,
- NULL, 0, cb_fn, cb_arg);
+ 0, 0, 0, 0, NULL, 0, cb_fn, cb_arg);
}
void
diff --git a/freebsd/sys/dev/nvme/nvme_pci.c b/freebsd/sys/dev/nvme/nvme_pci.c
index b9d46a8b..6b07a5ab 100644
--- a/freebsd/sys/dev/nvme/nvme_pci.c
+++ b/freebsd/sys/dev/nvme/nvme_pci.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/smp.h>
+#include <vm/vm.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
@@ -235,7 +236,6 @@ nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
ctrlr->msix_enabled = 0;
ctrlr->num_io_queues = 1;
- ctrlr->num_cpus_per_ioq = mp_ncpus;
ctrlr->rid = 0;
ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
&ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
@@ -261,82 +261,65 @@ static void
nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
{
device_t dev;
- int per_cpu_io_queues;
+ int force_intx, num_io_queues, per_cpu_io_queues;
int min_cpus_per_ioq;
int num_vectors_requested, num_vectors_allocated;
- int num_vectors_available;
dev = ctrlr->dev;
- min_cpus_per_ioq = 1;
- TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
- if (min_cpus_per_ioq < 1) {
- min_cpus_per_ioq = 1;
- } else if (min_cpus_per_ioq > mp_ncpus) {
- min_cpus_per_ioq = mp_ncpus;
+ force_intx = 0;
+ TUNABLE_INT_FETCH("hw.nvme.force_intx", &force_intx);
+ if (force_intx || pci_msix_count(dev) < 2) {
+ nvme_ctrlr_configure_intx(ctrlr);
+ return;
}
+ num_io_queues = mp_ncpus;
+ TUNABLE_INT_FETCH("hw.nvme.num_io_queues", &num_io_queues);
+ if (num_io_queues < 1 || num_io_queues > mp_ncpus)
+ num_io_queues = mp_ncpus;
+
per_cpu_io_queues = 1;
TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
+ if (per_cpu_io_queues == 0)
+ num_io_queues = 1;
- if (per_cpu_io_queues == 0) {
- min_cpus_per_ioq = mp_ncpus;
+#ifndef __rtems__
+ min_cpus_per_ioq = smp_threads_per_core;
+#else /* __rtems__ */
+ min_cpus_per_ioq = 1;
+#endif /* __rtems__ */
+ TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
+ if (min_cpus_per_ioq > 1) {
+ num_io_queues = min(num_io_queues,
+ max(1, mp_ncpus / min_cpus_per_ioq));
}
- ctrlr->force_intx = 0;
- TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
-
- /*
- * FreeBSD currently cannot allocate more than about 190 vectors at
- * boot, meaning that systems with high core count and many devices
- * requesting per-CPU interrupt vectors will not get their full
- * allotment. So first, try to allocate as many as we may need to
- * understand what is available, then immediately release them.
- * Then figure out how many of those we will actually use, based on
- * assigning an equal number of cores to each I/O queue.
- */
+ num_io_queues = min(num_io_queues, pci_msix_count(dev) - 1);
+again:
+ if (num_io_queues > vm_ndomains)
+ num_io_queues -= num_io_queues % vm_ndomains;
/* One vector for per core I/O queue, plus one vector for admin queue. */
- num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
- if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
- num_vectors_available = 0;
- }
- pci_release_msi(dev);
-
- if (ctrlr->force_intx || num_vectors_available < 2) {
- nvme_ctrlr_configure_intx(ctrlr);
- return;
- }
-
- /*
- * Do not use all vectors for I/O queues - one must be saved for the
- * admin queue.
- */
- ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
- howmany(mp_ncpus, num_vectors_available - 1));
-
- ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
- num_vectors_requested = ctrlr->num_io_queues + 1;
+ num_vectors_requested = num_io_queues + 1;
num_vectors_allocated = num_vectors_requested;
-
- /*
- * Now just allocate the number of vectors we need. This should
- * succeed, since we previously called pci_alloc_msix()
- * successfully returning at least this many vectors, but just to
- * be safe, if something goes wrong just revert to INTx.
- */
if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
nvme_ctrlr_configure_intx(ctrlr);
return;
}
-
- if (num_vectors_allocated < num_vectors_requested) {
+ if (num_vectors_allocated < 2) {
pci_release_msi(dev);
nvme_ctrlr_configure_intx(ctrlr);
return;
}
+ if (num_vectors_allocated != num_vectors_requested) {
+ pci_release_msi(dev);
+ num_io_queues = num_vectors_allocated - 1;
+ goto again;
+ }
ctrlr->msix_enabled = 1;
+ ctrlr->num_io_queues = num_io_queues;
}
static int
diff --git a/freebsd/sys/dev/nvme/nvme_private.h b/freebsd/sys/dev/nvme/nvme_private.h
index a49d2b94..54ce1bfc 100644
--- a/freebsd/sys/dev/nvme/nvme_private.h
+++ b/freebsd/sys/dev/nvme/nvme_private.h
@@ -147,7 +147,7 @@ struct nvme_request {
} u;
uint32_t type;
uint32_t payload_size;
- boolean_t timeout;
+ bool timeout;
nvme_cb_fn_t cb_fn;
void *cb_arg;
int32_t retries;
@@ -187,7 +187,10 @@ struct nvme_qpair {
struct nvme_controller *ctrlr;
uint32_t id;
- uint32_t phase;
+#ifndef __rtems__
+ int domain;
+ int cpu;
+#endif /* __rtems__ */
uint16_t vector;
int rid;
@@ -199,6 +202,7 @@ struct nvme_qpair {
uint32_t sq_tdbl_off;
uint32_t cq_hdbl_off;
+ uint32_t phase;
uint32_t sq_head;
uint32_t sq_tail;
uint32_t cq_head;
@@ -226,7 +230,7 @@ struct nvme_qpair {
struct nvme_tracker **act_tr;
- boolean_t is_enabled;
+ bool is_enabled;
struct mtx lock __aligned(CACHE_LINE_SIZE);
@@ -252,7 +256,9 @@ struct nvme_controller {
device_t dev;
struct mtx lock;
-
+#ifndef __rtems__
+ int domain;
+#endif /* __rtems__ */
uint32_t ready_timeout_in_ms;
uint32_t quirks;
#define QUIRK_DELAY_B4_CHK_RDY 1 /* Can't touch MMIO on disable */
@@ -272,11 +278,9 @@ struct nvme_controller {
struct resource *bar4_resource;
uint32_t msix_enabled;
- uint32_t force_intx;
uint32_t enable_aborts;
uint32_t num_io_queues;
- uint32_t num_cpus_per_ioq;
uint32_t max_hw_pend_io;
/* Fields for tracking progress during controller initialization. */
@@ -293,9 +297,6 @@ struct nvme_controller {
struct resource *res;
void *tag;
- bus_dma_tag_t hw_desc_tag;
- bus_dmamap_t hw_desc_map;
-
/** maximum i/o size in bytes */
uint32_t max_xfer_size;
@@ -311,6 +312,9 @@ struct nvme_controller {
/** timeout period in seconds */
uint32_t timeout_period;
+ /** doorbell stride */
+ uint32_t dstrd;
+
struct nvme_qpair adminq;
struct nvme_qpair *ioq;
@@ -333,8 +337,24 @@ struct nvme_controller {
uint32_t is_initialized;
uint32_t notification_sent;
- boolean_t is_failed;
+ bool is_failed;
STAILQ_HEAD(, nvme_request) fail_req;
+
+ /* Host Memory Buffer */
+#ifndef __rtems__
+ int hmb_nchunks;
+ size_t hmb_chunk;
+ bus_dma_tag_t hmb_tag;
+ struct nvme_hmb_chunk {
+ bus_dmamap_t hmbc_map;
+ void *hmbc_vaddr;
+ uint64_t hmbc_paddr;
+ } *hmb_chunks;
+ bus_dma_tag_t hmb_desc_tag;
+ bus_dmamap_t hmb_desc_map;
+ struct nvme_hmb_desc *hmb_desc_vaddr;
+ uint64_t hmb_desc_paddr;
+#endif /* __rtems__ */
};
#define nvme_mmio_offsetof(reg) \
@@ -388,7 +408,7 @@ void nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr,
nvme_cb_fn_t cb_fn,
void *cb_arg);
void nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
- struct nvme_qpair *io_que, uint16_t vector,
+ struct nvme_qpair *io_que,
nvme_cb_fn_t cb_fn, void *cb_arg);
void nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr,
struct nvme_qpair *io_que,
@@ -424,9 +444,8 @@ void nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
void nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
struct nvme_request *req);
-int nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
- uint16_t vector, uint32_t num_entries,
- uint32_t num_trackers,
+int nvme_qpair_construct(struct nvme_qpair *qpair,
+ uint32_t num_entries, uint32_t num_trackers,
struct nvme_controller *ctrlr);
void nvme_qpair_submit_tracker(struct nvme_qpair *qpair,
struct nvme_tracker *tr);
@@ -499,7 +518,7 @@ _nvme_allocate_request(nvme_cb_fn_t cb_fn, void *cb_arg)
if (req != NULL) {
req->cb_fn = cb_fn;
req->cb_arg = cb_arg;
- req->timeout = TRUE;
+ req->timeout = true;
}
return (req);
}
diff --git a/freebsd/sys/dev/nvme/nvme_qpair.c b/freebsd/sys/dev/nvme/nvme_qpair.c
index 6c16240d..3955f09b 100644
--- a/freebsd/sys/dev/nvme/nvme_qpair.c
+++ b/freebsd/sys/dev/nvme/nvme_qpair.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/conf.h>
+#include <sys/domainset.h>
#include <sys/proc.h>
#include <dev/pci/pcivar.h>
@@ -358,7 +359,7 @@ nvme_qpair_print_completion(struct nvme_qpair *qpair,
cpl->cdw0);
}
-static boolean_t
+static bool
nvme_completion_is_retry(const struct nvme_completion *cpl)
{
uint8_t sct, sc, dnr;
@@ -419,11 +420,12 @@ nvme_completion_is_retry(const struct nvme_completion *cpl)
}
static void
-nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
+nvme_qpair_complete_tracker(struct nvme_tracker *tr,
struct nvme_completion *cpl, error_print_t print_on_error)
{
+ struct nvme_qpair * qpair = tr->qpair;
struct nvme_request *req;
- boolean_t retry, error, retriable;
+ bool retry, error, retriable;
req = tr->req;
error = nvme_completion_is_error(cpl);
@@ -444,8 +446,17 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n"));
- if (req->cb_fn && !retry)
- req->cb_fn(req->cb_arg, cpl);
+ if (!retry) {
+#ifndef __rtems__
+ if (req->type != NVME_REQUEST_NULL) {
+ bus_dmamap_sync(qpair->dma_tag_payload,
+ tr->payload_dma_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+ }
+#endif /* __rtems__ */
+ if (req->cb_fn)
+ req->cb_fn(req->cb_arg, cpl);
+ }
mtx_lock(&qpair->lock);
callout_stop(&tr->timer);
@@ -456,9 +467,6 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
} else {
#ifndef __rtems__
if (req->type != NVME_REQUEST_NULL) {
- bus_dmamap_sync(qpair->dma_tag_payload,
- tr->payload_dma_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(qpair->dma_tag_payload,
tr->payload_dma_map);
}
@@ -487,19 +495,22 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
}
static void
-nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair,
+nvme_qpair_manual_complete_tracker(
struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
error_print_t print_on_error)
{
struct nvme_completion cpl;
memset(&cpl, 0, sizeof(cpl));
+
+ struct nvme_qpair * qpair = tr->qpair;
+
cpl.sqid = qpair->id;
cpl.cid = tr->cid;
cpl.status |= (sct & NVME_STATUS_SCT_MASK) << NVME_STATUS_SCT_SHIFT;
cpl.status |= (sc & NVME_STATUS_SC_MASK) << NVME_STATUS_SC_SHIFT;
cpl.status |= (dnr & NVME_STATUS_DNR_MASK) << NVME_STATUS_DNR_SHIFT;
- nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
+ nvme_qpair_complete_tracker(tr, &cpl, print_on_error);
}
void
@@ -507,7 +518,7 @@ nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
struct nvme_request *req, uint32_t sct, uint32_t sc)
{
struct nvme_completion cpl;
- boolean_t error;
+ bool error;
memset(&cpl, 0, sizeof(cpl));
cpl.sqid = qpair->id;
@@ -596,7 +607,7 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair)
tr = qpair->act_tr[cpl.cid];
if (tr != NULL) {
- nvme_qpair_complete_tracker(qpair, tr, &cpl, ERROR_PRINT_ALL);
+ nvme_qpair_complete_tracker(tr, &cpl, ERROR_PRINT_ALL);
qpair->sq_head = cpl.sqhd;
done++;
} else if (!in_panic) {
@@ -630,8 +641,13 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair)
qpair->phase = !qpair->phase; /* 3 */
}
- nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
- qpair->cq_head);
+#ifndef __rtems__
+ bus_space_write_4(qpair->ctrlr->bus_tag, qpair->ctrlr->bus_handle,
+ qpair->cq_hdbl_off, qpair->cq_head);
+#else /* __rtems__ */
+ bus_space_write_4(qpair->ctrlr->bus_tag, qpair->ctrlr->bus_handle,
+ qpair->cq_hdbl_off, htole32(qpair->cq_head));
+#endif /* __rtems__ */
}
return (done != 0);
}
@@ -645,8 +661,8 @@ nvme_qpair_msix_handler(void *arg)
}
int
-nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
- uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
+nvme_qpair_construct(struct nvme_qpair *qpair,
+ uint32_t num_entries, uint32_t num_trackers,
struct nvme_controller *ctrlr)
{
struct nvme_tracker *tr;
@@ -655,8 +671,7 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
uint8_t *queuemem, *prpmem, *prp_list;
int i, err;
- qpair->id = id;
- qpair->vector = vector;
+ qpair->vector = ctrlr->msix_enabled ? qpair->id : 0;
qpair->num_entries = num_entries;
qpair->num_trackers = num_trackers;
qpair->ctrlr = ctrlr;
@@ -667,19 +682,19 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
* MSI-X vector resource IDs start at 1, so we add one to
* the queue's vector to get the corresponding rid to use.
*/
- qpair->rid = vector + 1;
+ qpair->rid = qpair->vector + 1;
qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
&qpair->rid, RF_ACTIVE);
bus_setup_intr(ctrlr->dev, qpair->res,
INTR_TYPE_MISC | INTR_MPSAFE, NULL,
nvme_qpair_msix_handler, qpair, &qpair->tag);
- if (id == 0) {
+ if (qpair->id == 0) {
bus_describe_intr(ctrlr->dev, qpair->res, qpair->tag,
"admin");
} else {
bus_describe_intr(ctrlr->dev, qpair->res, qpair->tag,
- "io%d", id - 1);
+ "io%d", qpair->id - 1);
}
}
@@ -717,6 +732,9 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
nvme_printf(ctrlr, "tag create failed %d\n", err);
goto out;
}
+#ifndef __rtems__
+ bus_dma_tag_set_domain(qpair->dma_tag, qpair->domain);
+#endif /* __rtems__ */
if (bus_dmamem_alloc(qpair->dma_tag, (void **)&queuemem,
BUS_DMA_NOWAIT, &qpair->queuemem_map)) {
@@ -741,8 +759,15 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
qpair->cpl_bus_addr = queuemem_phys + cmdsz;
prpmem_phys = queuemem_phys + cmdsz + cplsz;
- qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
- qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
+ /*
+ * Calcuate the stride of the doorbell register. Many emulators set this
+ * value to correspond to a cache line. However, some hardware has set
+ * it to various small values.
+ */
+ qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[0]) +
+ (qpair->id << (ctrlr->dstrd + 1));
+ qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[0]) +
+ (qpair->id << (ctrlr->dstrd + 1)) + (1 << ctrlr->dstrd);
TAILQ_INIT(&qpair->free_tr);
TAILQ_INIT(&qpair->outstanding_tr);
@@ -768,7 +793,8 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
(uint8_t *)roundup2((uintptr_t)prp_list, PAGE_SIZE);
}
- tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_WAITOK);
+ tr = malloc_domainset(sizeof(*tr), M_NVME,
+ DOMAINSET_PREF(qpair->domain), M_ZERO | M_WAITOK);
#ifndef __rtems__
bus_dmamap_create(qpair->dma_tag_payload, 0,
&tr->payload_dma_map);
@@ -788,8 +814,9 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
goto out;
}
- qpair->act_tr = malloc(sizeof(struct nvme_tracker *) *
- qpair->num_entries, M_NVME, M_ZERO | M_WAITOK);
+ qpair->act_tr = malloc_domainset(sizeof(struct nvme_tracker *) *
+ qpair->num_entries, M_NVME, DOMAINSET_PREF(qpair->domain),
+ M_ZERO | M_WAITOK);
return (0);
out:
@@ -819,7 +846,7 @@ nvme_qpair_destroy(struct nvme_qpair *qpair)
}
if (qpair->act_tr)
- free(qpair->act_tr, M_NVME);
+ free_domain(qpair->act_tr, M_NVME);
while (!TAILQ_EMPTY(&qpair->free_tr)) {
tr = TAILQ_FIRST(&qpair->free_tr);
@@ -828,7 +855,7 @@ nvme_qpair_destroy(struct nvme_qpair *qpair)
bus_dmamap_destroy(qpair->dma_tag_payload,
tr->payload_dma_map);
#endif /* __rtems__ */
- free(tr, M_NVME);
+ free_domain(tr, M_NVME);
}
if (qpair->dma_tag)
@@ -848,7 +875,7 @@ nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair)
tr = TAILQ_FIRST(&qpair->outstanding_tr);
while (tr != NULL) {
if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
- nvme_qpair_manual_complete_tracker(qpair, tr,
+ nvme_qpair_manual_complete_tracker(tr,
NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0,
ERROR_PRINT_NONE);
tr = TAILQ_FIRST(&qpair->outstanding_tr);
@@ -892,7 +919,7 @@ nvme_abort_complete(void *arg, const struct nvme_completion *status)
*/
nvme_printf(tr->qpair->ctrlr,
"abort command failed, aborting command manually\n");
- nvme_qpair_manual_complete_tracker(tr->qpair, tr,
+ nvme_qpair_manual_complete_tracker(tr,
NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, ERROR_PRINT_ALL);
}
}
@@ -947,8 +974,13 @@ nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
ctrlr = qpair->ctrlr;
if (req->timeout)
- callout_reset_curcpu(&tr->timer, ctrlr->timeout_period * hz,
- nvme_timeout, tr);
+#ifndef __rtems__
+ callout_reset_on(&tr->timer, ctrlr->timeout_period * hz,
+ nvme_timeout, tr, qpair->cpu);
+#else /* __rtems__ */
+ callout_reset_on(&tr->timer, ctrlr->timeout_period * hz,
+ nvme_timeout, tr, -1);
+#endif /* __rtems__ */
/* Copy the command from the tracker to the submission queue. */
memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
@@ -970,9 +1002,13 @@ nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
wmb();
#endif /* __rtems__ */
- nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
- qpair->sq_tail);
-
+#ifndef __rtems__
+ bus_space_write_4(qpair->ctrlr->bus_tag, qpair->ctrlr->bus_handle,
+ qpair->sq_tdbl_off, qpair->sq_tail);
+#else /* __rtems__ */
+ bus_space_write_4(qpair->ctrlr->bus_tag, qpair->ctrlr->bus_handle,
+ qpair->sq_tdbl_off, htole32(qpair->sq_tail));
+#endif /* __rtems__ */
qpair->num_cmds++;
}
@@ -1199,7 +1235,7 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
* with the qpair lock held.
*/
mtx_unlock(&qpair->lock);
- nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+ nvme_qpair_manual_complete_tracker(tr, NVME_SCT_GENERIC,
NVME_SC_DATA_TRANSFER_ERROR, DO_NOT_RETRY, ERROR_PRINT_ALL);
mtx_lock(&qpair->lock);
}
@@ -1218,7 +1254,7 @@ static void
nvme_qpair_enable(struct nvme_qpair *qpair)
{
- qpair->is_enabled = TRUE;
+ qpair->is_enabled = true;
}
void
@@ -1257,7 +1293,7 @@ nvme_admin_qpair_enable(struct nvme_qpair *qpair)
TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
nvme_printf(qpair->ctrlr,
"aborting outstanding admin command\n");
- nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+ nvme_qpair_manual_complete_tracker(tr, NVME_SCT_GENERIC,
NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL);
}
@@ -1279,7 +1315,7 @@ nvme_io_qpair_enable(struct nvme_qpair *qpair)
*/
TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
nvme_printf(qpair->ctrlr, "aborting outstanding i/o\n");
- nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+ nvme_qpair_manual_complete_tracker(tr, NVME_SCT_GENERIC,
NVME_SC_ABORTED_BY_REQUEST, 0, ERROR_PRINT_NO_RETRY);
}
@@ -1306,7 +1342,7 @@ nvme_qpair_disable(struct nvme_qpair *qpair)
{
struct nvme_tracker *tr;
- qpair->is_enabled = FALSE;
+ qpair->is_enabled = false;
mtx_lock(&qpair->lock);
TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
callout_stop(&tr->timer);
@@ -1358,11 +1394,10 @@ nvme_qpair_fail(struct nvme_qpair *qpair)
*/
nvme_printf(qpair->ctrlr, "failing outstanding i/o\n");
mtx_unlock(&qpair->lock);
- nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
+ nvme_qpair_manual_complete_tracker(tr, NVME_SCT_GENERIC,
NVME_SC_ABORTED_BY_REQUEST, DO_NOT_RETRY, ERROR_PRINT_ALL);
mtx_lock(&qpair->lock);
}
mtx_unlock(&qpair->lock);
}
-
diff --git a/freebsd/sys/dev/nvme/nvme_sysctl.c b/freebsd/sys/dev/nvme/nvme_sysctl.c
index 7110cb80..589f4f43 100644
--- a/freebsd/sys/dev/nvme/nvme_sysctl.c
+++ b/freebsd/sys/dev/nvme/nvme_sysctl.c
@@ -308,9 +308,9 @@ nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
- SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_cpus_per_ioq",
- CTLFLAG_RD, &ctrlr->num_cpus_per_ioq, 0,
- "Number of CPUs assigned per I/O queue pair");
+ SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_io_queues",
+ CTLFLAG_RD, &ctrlr->num_io_queues, 0,
+ "Number of I/O queue pairs");
SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
"int_coal_time", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
diff --git a/freebsd/sys/dev/ofw/ofw_bus_subr.h b/freebsd/sys/dev/ofw/ofw_bus_subr.h
index 218ba710..4a55037b 100644
--- a/freebsd/sys/dev/ofw/ofw_bus_subr.h
+++ b/freebsd/sys/dev/ofw/ofw_bus_subr.h
@@ -69,7 +69,8 @@ struct intr_map_data_fdt {
#define FDTCOMPAT_PNP_INFO(t, busname) \
MODULE_PNP_INFO(FDTCOMPAT_PNP_DESCR, busname, t, t, sizeof(t) / sizeof(t[0]));
-#define SIMPLEBUS_PNP_INFO(t) FDTCOMPAT_PNP_INFO(t, simplebus)
+#define OFWBUS_PNP_INFO(t) FDTCOMPAT_PNP_INFO(t, ofwbus)
+#define SIMPLEBUS_PNP_INFO(t) FDTCOMPAT_PNP_INFO(t, simplebus)
/* Generic implementation of ofw_bus_if.m methods and helper routines */
int ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *, phandle_t);
diff --git a/freebsd/sys/dev/pci/pci.c b/freebsd/sys/dev/pci/pci.c
index 586efc3d..f1501208 100644
--- a/freebsd/sys/dev/pci/pci.c
+++ b/freebsd/sys/dev/pci/pci.c
@@ -108,8 +108,6 @@ static void pci_assign_interrupt(device_t bus, device_t dev,
static int pci_add_map(device_t bus, device_t dev, int reg,
struct resource_list *rl, int force, int prefetch);
static int pci_probe(device_t dev);
-static int pci_attach(device_t dev);
-static int pci_detach(device_t dev);
static void pci_load_vendor_data(void);
static int pci_describe_parse_line(char **ptr, int *vendor,
int *device, char **desc);
@@ -250,6 +248,7 @@ struct pci_quirk {
#define PCI_QUIRK_UNMAP_REG 4 /* Ignore PCI map register */
#define PCI_QUIRK_DISABLE_MSIX 5 /* MSI-X doesn't work */
#define PCI_QUIRK_MSI_INTX_BUG 6 /* PCIM_CMD_INTxDIS disables MSI */
+#define PCI_QUIRK_REALLOC_BAR 7 /* Can't allocate memory at the default address */
int arg1;
int arg2;
};
@@ -331,6 +330,12 @@ static const struct pci_quirk pci_quirks[] = {
{ 0x167814e4, PCI_QUIRK_MSI_INTX_BUG, 0, 0 }, /* BCM5715 */
{ 0x167914e4, PCI_QUIRK_MSI_INTX_BUG, 0, 0 }, /* BCM5715S */
+ /*
+ * HPE Gen 10 VGA has a memory range that can't be allocated in the
+ * expected place.
+ */
+ { 0x98741002, PCI_QUIRK_REALLOC_BAR, 0, 0 },
+
{ 0 }
};
@@ -3311,7 +3316,9 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
*/
res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
flags);
- if (pci_do_realloc_bars && res == NULL && (start != 0 || end != ~0)) {
+ if ((pci_do_realloc_bars
+ || pci_has_quirk(pci_get_devid(dev), PCI_QUIRK_REALLOC_BAR))
+ && res == NULL && (start != 0 || end != ~0)) {
/*
* If the allocation fails, try to allocate a resource for
* this BAR using any available range. The firmware felt
@@ -4404,7 +4411,7 @@ pci_attach_common(device_t dev)
return (0);
}
-static int
+int
pci_attach(device_t dev)
{
int busno, domain, error;
@@ -4425,7 +4432,7 @@ pci_attach(device_t dev)
return (bus_generic_attach(dev));
}
-static int
+int
pci_detach(device_t dev)
{
#ifdef PCI_RES_BUS
diff --git a/freebsd/sys/dev/pci/pci_private.h b/freebsd/sys/dev/pci/pci_private.h
index f468152b..d891f592 100644
--- a/freebsd/sys/dev/pci/pci_private.h
+++ b/freebsd/sys/dev/pci/pci_private.h
@@ -58,7 +58,9 @@ void pci_add_resources(device_t bus, device_t dev, int force,
uint32_t prefetchmask);
void pci_add_resources_ea(device_t bus, device_t dev, int alloc_iov);
struct pci_devinfo *pci_alloc_devinfo_method(device_t dev);
+int pci_attach(device_t dev);
int pci_attach_common(device_t dev);
+int pci_detach(device_t dev);
int pci_rescan_method(device_t dev);
void pci_driver_added(device_t dev, driver_t *driver);
int pci_ea_is_enabled(device_t dev, int rid);
diff --git a/freebsd/sys/dev/sdhci/sdhci.c b/freebsd/sys/dev/sdhci/sdhci.c
index 5d9cf26c..ed6010e8 100644
--- a/freebsd/sys/dev/sdhci/sdhci.c
+++ b/freebsd/sys/dev/sdhci/sdhci.c
@@ -904,8 +904,13 @@ sdhci_init_slot(device_t dev, struct sdhci_slot *slot, int num)
slot->host.host_ocr |= MMC_OCR_320_330 | MMC_OCR_330_340;
if (caps & SDHCI_CAN_VDD_300)
slot->host.host_ocr |= MMC_OCR_290_300 | MMC_OCR_300_310;
- /* 1.8V VDD is not supposed to be used for removable cards. */
- if ((caps & SDHCI_CAN_VDD_180) && (slot->opt & SDHCI_SLOT_EMBEDDED))
+ /*
+ * 1.8V VDD is not supposed to be used for removable cards. Hardware
+ * prior to v3.0 had no way to indicate embedded slots, but did
+ * sometimes support 1.8v for non-removable devices.
+ */
+ if ((caps & SDHCI_CAN_VDD_180) && (slot->version < SDHCI_SPEC_300 ||
+ (slot->opt & SDHCI_SLOT_EMBEDDED)))
slot->host.host_ocr |= MMC_OCR_LOW_VOLTAGE;
if (slot->host.host_ocr == 0) {
slot_printf(slot, "Hardware doesn't report any "
diff --git a/freebsd/sys/dev/usb/controller/dwc_otg_fdt.c b/freebsd/sys/dev/usb/controller/dwc_otg_fdt.c
index a7110887..65343be9 100644
--- a/freebsd/sys/dev/usb/controller/dwc_otg_fdt.c
+++ b/freebsd/sys/dev/usb/controller/dwc_otg_fdt.c
@@ -83,6 +83,20 @@ dwc_otg_probe(device_t dev)
return (BUS_PROBE_DEFAULT);
}
+static int
+dwc_otg_irq_index(device_t dev, int *rid)
+{
+ int idx, rv;
+ phandle_t node;
+
+ node = ofw_bus_get_node(dev);
+ rv = ofw_bus_find_string_index(node, "interrupt-names", "usb", &idx);
+ if (rv != 0)
+ return (rv);
+ *rid = idx;
+ return (0);
+}
+
int
dwc_otg_attach(device_t dev)
{
@@ -135,10 +149,16 @@ dwc_otg_attach(device_t dev)
/*
- * brcm,bcm2708-usb FDT provides two interrupts,
- * we need only second one (VC_USB)
+ * brcm,bcm2708-usb FDT provides two interrupts, we need only the USB
+ * interrupt (VC_USB). The latest FDT for it provides an
+ * interrupt-names property and swapped them around, while older ones
+ * did not have interrupt-names and put the usb interrupt in the second
+ * position. We'll attempt to use interrupt-names first with a fallback
+ * to the old method of assuming the index based on the compatible
+ * string.
*/
- rid = ofw_bus_is_compatible(dev, "brcm,bcm2708-usb") ? 1 : 0;
+ if (dwc_otg_irq_index(dev, &rid) != 0)
+ rid = ofw_bus_is_compatible(dev, "brcm,bcm2708-usb") ? 1 : 0;
sc->sc_otg.sc_irq_res =
bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE);
if (sc->sc_otg.sc_irq_res == NULL)
diff --git a/freebsd/sys/dev/usb/input/uep.c b/freebsd/sys/dev/usb/input/uep.c
index 247bfb9c..6d11b1f7 100644
--- a/freebsd/sys/dev/usb/input/uep.c
+++ b/freebsd/sys/dev/usb/input/uep.c
@@ -59,7 +59,6 @@
#else
#include <sys/ioccom.h>
#include <sys/fcntl.h>
-#include <sys/tty.h>
#endif
#define USB_DEBUG_VAR uep_debug
diff --git a/freebsd/sys/dev/usb/input/ukbd.c b/freebsd/sys/dev/usb/input/ukbd.c
index 770d1d3f..5cdaaffd 100644
--- a/freebsd/sys/dev/usb/input/ukbd.c
+++ b/freebsd/sys/dev/usb/input/ukbd.c
@@ -82,7 +82,6 @@ __FBSDID("$FreeBSD$");
#include <sys/ioccom.h>
#include <sys/filio.h>
-#include <sys/tty.h>
#include <sys/kbio.h>
#include <dev/kbd/kbdreg.h>
@@ -1372,7 +1371,7 @@ ukbd_attach(device_t dev)
sc->sc_flags |= UKBD_FLAG_ATTACHED;
if (bootverbose) {
- genkbd_diag(kbd, bootverbose);
+ kbdd_diag(kbd, bootverbose);
}
#ifdef USB_DEBUG
@@ -2287,9 +2286,7 @@ static keyboard_switch_t ukbdsw = {
.clear_state = &ukbd_clear_state,
.get_state = &ukbd_get_state,
.set_state = &ukbd_set_state,
- .get_fkeystr = &genkbd_get_fkeystr,
.poll = &ukbd_poll,
- .diag = &genkbd_diag,
};
KEYBOARD_DRIVER(ukbd, ukbdsw, ukbd_configure);
diff --git a/freebsd/sys/dev/usb/input/ums.c b/freebsd/sys/dev/usb/input/ums.c
index 4a0d1f34..65c76b4a 100644
--- a/freebsd/sys/dev/usb/input/ums.c
+++ b/freebsd/sys/dev/usb/input/ums.c
@@ -81,7 +81,6 @@ __FBSDID("$FreeBSD$");
#include <sys/ioccom.h>
#include <sys/filio.h>
-#include <sys/tty.h>
#include <sys/mouse.h>
#ifdef USB_DEBUG
diff --git a/freebsd/sys/dev/usb/serial/uslcom.c b/freebsd/sys/dev/usb/serial/uslcom.c
index 4128802d..26986b8b 100644
--- a/freebsd/sys/dev/usb/serial/uslcom.c
+++ b/freebsd/sys/dev/usb/serial/uslcom.c
@@ -315,6 +315,7 @@ static const STRUCT_USB_HOST_ID uslcom_devs[] = {
USLCOM_DEV(SILABS, HAMLINKUSB),
USLCOM_DEV(SILABS, HELICOM),
USLCOM_DEV(SILABS, HUBZ),
+ USLCOM_DEV(SILABS, BV_AV2010_10),
USLCOM_DEV(SILABS, IMS_USB_RS422),
USLCOM_DEV(SILABS, INFINITY_MIC),
USLCOM_DEV(SILABS, INGENI_ZIGBEE),
@@ -626,7 +627,11 @@ uslcom_pre_param(struct ucom_softc *ucom, struct termios *t)
case USLCOM_PARTNUM_CP2102:
case USLCOM_PARTNUM_CP2103:
default:
- maxspeed = 921600;
+ /*
+ * Datasheet for cp2102 says 921600 max. Testing shows that
+ * 1228800 and 1843200 work fine.
+ */
+ maxspeed = 1843200;
break;
}
if (t->c_ospeed <= 0 || t->c_ospeed > maxspeed)
diff --git a/freebsd/sys/dev/usb/usb_bus.h b/freebsd/sys/dev/usb/usb_bus.h
index 710436c1..07784ded 100644
--- a/freebsd/sys/dev/usb/usb_bus.h
+++ b/freebsd/sys/dev/usb/usb_bus.h
@@ -2,7 +2,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
+ * Copyright (c) 2008-2019 Hans Petter Selasky. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -42,19 +42,10 @@ struct usb_bus_msg {
};
/*
- * The following structure defines the USB statistics structure.
- */
-struct usb_bus_stat {
- uint32_t uds_requests[4];
-};
-
-/*
* The following structure defines an USB BUS. There is one USB BUS
* for every Host or Device controller.
*/
struct usb_bus {
- struct usb_bus_stat stats_err;
- struct usb_bus_stat stats_ok;
#if USB_HAVE_ROOT_MOUNT_HOLD
struct root_hold_token *bus_roothold;
#endif
@@ -131,6 +122,7 @@ struct usb_bus {
uint8_t do_probe; /* set if USB should be re-probed */
uint8_t no_explore; /* don't explore USB ports */
uint8_t dma_bits; /* number of DMA address lines */
+ uint8_t control_ep_quirk; /* need 64kByte buffer for data stage */
};
#endif /* _USB_BUS_H_ */
diff --git a/freebsd/sys/dev/usb/usb_device.h b/freebsd/sys/dev/usb/usb_device.h
index 1cf48ea1..691b2b38 100644
--- a/freebsd/sys/dev/usb/usb_device.h
+++ b/freebsd/sys/dev/usb/usb_device.h
@@ -2,7 +2,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
+ * Copyright (c) 2008-2019 Hans Petter Selasky. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -177,10 +177,22 @@ union usb_device_scratch {
};
/*
+ * Helper structure to keep track of USB device statistics.
+ */
+struct usb_device_statistics {
+ uint32_t uds_requests[4];
+};
+
+/*
* The following structure defines an USB device. There exists one of
* these structures for every USB device.
*/
struct usb_device {
+ /* statistics */
+ struct usb_device_statistics stats_err;
+ struct usb_device_statistics stats_ok;
+ struct usb_device_statistics stats_cancelled;
+
/* generic clear stall message */
struct usb_udev_msg cs_msg[2];
struct sx enum_sx;
diff --git a/freebsd/sys/dev/usb/usb_generic.c b/freebsd/sys/dev/usb/usb_generic.c
index d45a0bf6..b3f12249 100644
--- a/freebsd/sys/dev/usb/usb_generic.c
+++ b/freebsd/sys/dev/usb/usb_generic.c
@@ -2229,10 +2229,9 @@ ugen_ioctl_post(struct usb_fifo *f, u_long cmd, void *addr, int fflags)
for (n = 0; n != 4; n++) {
u.stat->uds_requests_fail[n] =
- f->udev->bus->stats_err.uds_requests[n];
-
+ f->udev->stats_err.uds_requests[n];
u.stat->uds_requests_ok[n] =
- f->udev->bus->stats_ok.uds_requests[n];
+ f->udev->stats_ok.uds_requests[n];
}
break;
diff --git a/freebsd/sys/dev/usb/usb_ioctl.h b/freebsd/sys/dev/usb/usb_ioctl.h
index e7e63fb9..c4023cab 100644
--- a/freebsd/sys/dev/usb/usb_ioctl.h
+++ b/freebsd/sys/dev/usb/usb_ioctl.h
@@ -224,7 +224,7 @@ struct usb_fs_uninit {
} USB_IOCTL_STRUCT_ALIGN(1);
struct usb_fs_open {
-#define USB_FS_MAX_BUFSIZE (1 << 18)
+#define USB_FS_MAX_BUFSIZE (1 << 25) /* 32 MBytes */
uint32_t max_bufsize;
#define USB_FS_MAX_FRAMES (1U << 12)
#define USB_FS_MAX_FRAMES_PRE_SCALE (1U << 31) /* for ISOCHRONOUS transfers */
diff --git a/freebsd/sys/dev/usb/usb_transfer.c b/freebsd/sys/dev/usb/usb_transfer.c
index 7ea25337..2478d937 100644
--- a/freebsd/sys/dev/usb/usb_transfer.c
+++ b/freebsd/sys/dev/usb/usb_transfer.c
@@ -111,6 +111,33 @@ static const struct usb_config usb_control_ep_cfg[USB_CTRL_XFER_MAX] = {
},
};
+static const struct usb_config usb_control_ep_quirk_cfg[USB_CTRL_XFER_MAX] = {
+
+ /* This transfer is used for generic control endpoint transfers */
+
+ [0] = {
+ .type = UE_CONTROL,
+ .endpoint = 0x00, /* Control endpoint */
+ .direction = UE_DIR_ANY,
+ .bufsize = 65535, /* bytes */
+ .callback = &usb_request_callback,
+ .usb_mode = USB_MODE_DUAL, /* both modes */
+ },
+
+ /* This transfer is used for generic clear stall only */
+
+ [1] = {
+ .type = UE_CONTROL,
+ .endpoint = 0x00, /* Control pipe */
+ .direction = UE_DIR_ANY,
+ .bufsize = sizeof(struct usb_device_request),
+ .callback = &usb_do_clear_stall_callback,
+ .timeout = 1000, /* 1 second */
+ .interval = 50, /* 50ms */
+ .usb_mode = USB_MODE_HOST,
+ },
+};
+
/* function prototypes */
static void usbd_update_max_frame_size(struct usb_xfer *);
@@ -1051,7 +1078,8 @@ usbd_transfer_setup(struct usb_device *udev,
* context, else there is a chance of
* deadlock!
*/
- if (setup_start == usb_control_ep_cfg)
+ if (setup_start == usb_control_ep_cfg ||
+ setup_start == usb_control_ep_quirk_cfg)
info->done_p =
USB_BUS_CONTROL_XFER_PROC(udev->bus);
else if (xfer_mtx == &Giant)
@@ -2595,11 +2623,14 @@ usbd_transfer_done(struct usb_xfer *xfer, usb_error_t error)
}
#endif
/* keep some statistics */
- if (xfer->error) {
- info->bus->stats_err.uds_requests
+ if (xfer->error == USB_ERR_CANCELLED) {
+ info->udev->stats_cancelled.uds_requests
+ [xfer->endpoint->edesc->bmAttributes & UE_XFERTYPE]++;
+ } else if (xfer->error != USB_ERR_NORMAL_COMPLETION) {
+ info->udev->stats_err.uds_requests
[xfer->endpoint->edesc->bmAttributes & UE_XFERTYPE]++;
} else {
- info->bus->stats_ok.uds_requests
+ info->udev->stats_ok.uds_requests
[xfer->endpoint->edesc->bmAttributes & UE_XFERTYPE]++;
}
@@ -3179,7 +3210,8 @@ repeat:
*/
iface_index = 0;
if (usbd_transfer_setup(udev, &iface_index,
- udev->ctrl_xfer, usb_control_ep_cfg, USB_CTRL_XFER_MAX, NULL,
+ udev->ctrl_xfer, udev->bus->control_ep_quirk ?
+ usb_control_ep_quirk_cfg : usb_control_ep_cfg, USB_CTRL_XFER_MAX, NULL,
&udev->device_mtx)) {
DPRINTFN(0, "could not setup default "
"USB transfer\n");
diff --git a/freebsd/sys/fs/devfs/devfs_vnops.c b/freebsd/sys/fs/devfs/devfs_vnops.c
index 86808e21..f1027e6f 100644
--- a/freebsd/sys/fs/devfs/devfs_vnops.c
+++ b/freebsd/sys/fs/devfs/devfs_vnops.c
@@ -298,38 +298,27 @@ devfs_vptocnp(struct vop_vptocnp_args *ap)
if (error != 0)
return (error);
- i = *buflen;
+ if (vp->v_type != VCHR && vp->v_type != VDIR) {
+ error = ENOENT;
+ goto finished;
+ }
+
dd = vp->v_data;
+ if (vp->v_type == VDIR && dd == dmp->dm_rootdir) {
+ *dvp = vp;
+ vref(*dvp);
+ goto finished;
+ }
- if (vp->v_type == VCHR) {
- i -= strlen(dd->de_cdp->cdp_c.si_name);
- if (i < 0) {
- error = ENOMEM;
- goto finished;
- }
- bcopy(dd->de_cdp->cdp_c.si_name, buf + i,
- strlen(dd->de_cdp->cdp_c.si_name));
- de = dd->de_dir;
- } else if (vp->v_type == VDIR) {
- if (dd == dmp->dm_rootdir) {
- *dvp = vp;
- vref(*dvp);
- goto finished;
- }
- i -= dd->de_dirent->d_namlen;
- if (i < 0) {
- error = ENOMEM;
- goto finished;
- }
- bcopy(dd->de_dirent->d_name, buf + i,
- dd->de_dirent->d_namlen);
- de = dd;
- } else {
- error = ENOENT;
+ i = *buflen;
+ i -= dd->de_dirent->d_namlen;
+ if (i < 0) {
+ error = ENOMEM;
goto finished;
}
+ bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen);
*buflen = i;
- de = devfs_parent_dirent(de);
+ de = devfs_parent_dirent(dd);
if (de == NULL) {
error = ENOENT;
goto finished;
@@ -828,9 +817,16 @@ out:
error = ENOTTY;
if (error == 0 && com == TIOCSCTTY) {
- /* Do nothing if reassigning same control tty */
+ /*
+ * Do nothing if reassigning same control tty, or if the
+ * control tty has already disappeared. If it disappeared,
+ * it's because we were racing with TIOCNOTTY. TIOCNOTTY
+ * already took care of releasing the old vnode and we have
+ * nothing left to do.
+ */
sx_slock(&proctree_lock);
- if (td->td_proc->p_session->s_ttyvp == vp) {
+ if (td->td_proc->p_session->s_ttyvp == vp ||
+ td->td_proc->p_session->s_ttyp == NULL) {
sx_sunlock(&proctree_lock);
return (0);
}
@@ -938,8 +934,8 @@ devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
return (EIO);
- error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
- if (error)
+ error = vn_dir_check_exec(dvp, cnp);
+ if (error != 0)
return (error);
if (cnp->cn_namelen == 1 && *pname == '.') {
diff --git a/freebsd/sys/kern/kern_conf.c b/freebsd/sys/kern/kern_conf.c
index 8605cc43..e1553915 100644
--- a/freebsd/sys/kern/kern_conf.c
+++ b/freebsd/sys/kern/kern_conf.c
@@ -174,12 +174,6 @@ dev_rel(struct cdev *dev)
dev->si_refcount--;
KASSERT(dev->si_refcount >= 0,
("dev_rel(%s) gave negative count", devtoname(dev)));
-#if 0
- if (dev->si_usecount == 0 &&
- (dev->si_flags & SI_CHEAPCLONE) && (dev->si_flags & SI_NAMED))
- ;
- else
-#endif
if (dev->si_devsw == NULL && dev->si_refcount == 0) {
LIST_REMOVE(dev, si_list);
flag = 1;
@@ -601,20 +595,41 @@ newdev(struct make_dev_args *args, struct cdev *si)
mtx_assert(&devmtx, MA_OWNED);
csw = args->mda_devsw;
+ si2 = NULL;
if (csw->d_flags & D_NEEDMINOR) {
/* We may want to return an existing device */
LIST_FOREACH(si2, &csw->d_devs, si_list) {
if (dev2unit(si2) == args->mda_unit) {
dev_free_devlocked(si);
- return (si2);
+ si = si2;
+ break;
}
}
+
+ /*
+ * If we're returning an existing device, we should make sure
+ * it isn't already initialized. This would have been caught
+ * in consumers anyways, but it's good to catch such a case
+ * early. We still need to complete initialization of the
+ * device, and we'll use whatever make_dev_args were passed in
+ * to do so.
+ */
+ KASSERT(si2 == NULL || (si2->si_flags & SI_NAMED) == 0,
+ ("make_dev() by driver %s on pre-existing device (min=%x, name=%s)",
+ args->mda_devsw->d_name, dev2unit(si2), devtoname(si2)));
}
si->si_drv0 = args->mda_unit;
- si->si_devsw = csw;
si->si_drv1 = args->mda_si_drv1;
si->si_drv2 = args->mda_si_drv2;
- LIST_INSERT_HEAD(&csw->d_devs, si, si_list);
+ /* Only push to csw->d_devs if it's not a cloned device. */
+ if (si2 == NULL) {
+ si->si_devsw = csw;
+ LIST_INSERT_HEAD(&csw->d_devs, si, si_list);
+ } else {
+ KASSERT(si->si_devsw == csw,
+ ("%s: inconsistent devsw between clone_create() and make_dev()",
+ __func__));
+ }
return (si);
}
@@ -832,17 +847,6 @@ make_dev_sv(struct make_dev_args *args1, struct cdev **dres,
dev_refl(dev);
if ((args.mda_flags & MAKEDEV_ETERNAL) != 0)
dev->si_flags |= SI_ETERNAL;
- if (dev->si_flags & SI_CHEAPCLONE &&
- dev->si_flags & SI_NAMED) {
- /*
- * This is allowed as it removes races and generally
- * simplifies cloning devices.
- * XXX: still ??
- */
- dev_unlock_and_free();
- *dres = dev;
- return (0);
- }
KASSERT(!(dev->si_flags & SI_NAMED),
("make_dev() by driver %s on pre-existing device (min=%x, name=%s)",
args.mda_devsw->d_name, dev2unit(dev), devtoname(dev)));
@@ -1592,7 +1596,6 @@ DB_SHOW_COMMAND(cdev, db_show_cdev)
SI_FLAG(SI_ETERNAL);
SI_FLAG(SI_ALIAS);
SI_FLAG(SI_NAMED);
- SI_FLAG(SI_CHEAPCLONE);
SI_FLAG(SI_CHILD);
SI_FLAG(SI_DUMPDEV);
SI_FLAG(SI_CLONELIST);
diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c
index 07fbd418..4571cbed 100644
--- a/freebsd/sys/kern/kern_linker.c
+++ b/freebsd/sys/kern/kern_linker.c
@@ -640,6 +640,10 @@ linker_make_file(const char *pathname, linker_class_t lc)
lf->ndeps = 0;
lf->deps = NULL;
lf->loadcnt = ++loadcnt;
+#ifdef __arm__
+ lf->exidx_addr = 0;
+ lf->exidx_size = 0;
+#endif
STAILQ_INIT(&lf->common);
TAILQ_INIT(&lf->modules);
TAILQ_INSERT_TAIL(&linker_files, lf, link);
@@ -2077,14 +2081,18 @@ linker_load_module(const char *kldname, const char *modname,
*/
KASSERT(verinfo == NULL, ("linker_load_module: verinfo"
" is not NULL"));
+ /* check if root file system is not mounted */
+ if (rootvnode == NULL || curproc->p_fd->fd_rdir == NULL)
+ return (ENXIO);
pathname = linker_search_kld(kldname);
} else {
if (modlist_lookup2(modname, verinfo) != NULL)
return (EEXIST);
+ /* check if root file system is not mounted */
+ if (rootvnode == NULL || curproc->p_fd->fd_rdir == NULL)
+ return (ENXIO);
if (kldname != NULL)
pathname = strdup(kldname, M_LINKER);
- else if (rootvnode == NULL)
- pathname = NULL;
else
/*
* Need to find a KLD with required module
diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c
index b1c02570..d7d8a356 100644
--- a/freebsd/sys/kern/kern_mib.c
+++ b/freebsd/sys/kern/kern_mib.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_config.h>
#include <sys/param.h>
+#include <sys/boot.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/limits.h>
@@ -87,6 +88,8 @@ SYSCTL_ROOT_NODE(CTL_HW, hw, CTLFLAG_RW, 0,
#ifndef __rtems__
SYSCTL_ROOT_NODE(CTL_MACHDEP, machdep, CTLFLAG_RW, 0,
"machine dependent");
+SYSCTL_NODE(_machdep, OID_AUTO, mitigations, CTLFLAG_RW, 0,
+ "Machine dependent platform mitigations.");
SYSCTL_ROOT_NODE(CTL_USER, user, CTLFLAG_RW, 0,
"user-level");
SYSCTL_ROOT_NODE(CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0,
@@ -148,7 +151,7 @@ SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
SYSCTL_NULL_INT_PTR, 0, "Whether saved set-group/user ID is available");
#endif
-char kernelname[MAXPATHLEN] = "/boot/kernel/kernel"; /* XXX bloat */
+char kernelname[MAXPATHLEN] = PATH_KERNEL; /* XXX bloat */
SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW | CTLFLAG_MPSAFE,
kernelname, sizeof kernelname, "Name of kernel file booted");
diff --git a/freebsd/sys/kern/kern_mtxpool.c b/freebsd/sys/kern/kern_mtxpool.c
index bc47d826..a7fc1078 100644
--- a/freebsd/sys/kern/kern_mtxpool.c
+++ b/freebsd/sys/kern/kern_mtxpool.c
@@ -85,7 +85,7 @@ struct mtx_pool {
#define mtx_pool_next mtx_pool_header.mtxpool_next
#ifndef __rtems__
-struct mtx_pool *mtxpool_sleep;
+struct mtx_pool __read_frequently *mtxpool_sleep;
#endif /* __rtems__ */
#if UINTPTR_MAX == UINT64_MAX /* 64 bits */
diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c
index b7ba41ea..f529704a 100644
--- a/freebsd/sys/kern/kern_sysctl.c
+++ b/freebsd/sys/kern/kern_sysctl.c
@@ -940,13 +940,18 @@ SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
* (be aware though, that the proper interface isn't as obvious as it
* may seem, there are various conflicting requirements.
*
- * {0,0} printf the entire MIB-tree.
- * {0,1,...} return the name of the "..." OID.
- * {0,2,...} return the next OID.
- * {0,3} return the OID of the name in "new"
- * {0,4,...} return the kind & format info for the "..." OID.
- * {0,5,...} return the description of the "..." OID.
- * {0,6,...} return the aggregation label of the "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_DEBUG} printf the entire MIB-tree.
+ * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...} return the name of the "..."
+ * OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...} return the next OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID} return the OID of the name in
+ * "new"
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...} return the kind & format info
+ * for the "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...} return the description of the
+ * "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...} return the aggregation label of
+ * the "..." OID.
*/
#ifdef SYSCTL_DEBUG
@@ -1014,8 +1019,8 @@ sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
return (ENOENT);
}
-SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE,
- 0, 0, sysctl_sysctl_debug, "-", "");
+SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", "");
#endif
static int
@@ -1080,8 +1085,8 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
* capability mode.
*/
-static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
- sysctl_sysctl_name, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, "");
static int
sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
@@ -1167,8 +1172,8 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
* capability mode.
*/
-static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
- sysctl_sysctl_next, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
static int
name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
@@ -1254,9 +1259,9 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
* capability mode.
*/
-SYSCTL_PROC(_sysctl, 3, name2oid,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE
- | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", "");
+SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW |
+ CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0,
+ sysctl_sysctl_name2oid, "I", "");
static int
sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
@@ -1284,8 +1289,8 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
}
-static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
- sysctl_sysctl_oidfmt, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, "");
static int
sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
@@ -1309,8 +1314,8 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
- sysctl_sysctl_oiddescr, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD |
+ CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, "");
static int
sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
@@ -1334,8 +1339,8 @@ sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 6, oidlabel,
- CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
/*
* Default "handler" functions.
@@ -1830,8 +1835,8 @@ kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
size_t oidlen, plen;
int error;
- oid[0] = 0; /* sysctl internal magic */
- oid[1] = 3; /* name2oid */
+ oid[0] = CTL_SYSCTL;
+ oid[1] = CTL_SYSCTL_NAME2OID;
oidlen = sizeof(oid);
error = kernel_sysctl(td, oid, 2, oid, &oidlen,
diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c
index 2f478afc..b9162020 100644
--- a/freebsd/sys/kern/kern_timeout.c
+++ b/freebsd/sys/kern/kern_timeout.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/callout.h>
+#include <sys/domainset.h>
#include <sys/file.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
@@ -135,7 +136,8 @@ SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_
* TODO:
* allocate more timeout table slots when table overflows.
*/
-u_int callwheelsize, callwheelmask;
+static u_int __read_mostly callwheelsize;
+static u_int __read_mostly callwheelmask;
#else /* __rtems__ */
#define callwheelsize (2 * ncallout)
#define callwheelmask (callwheelsize - 1)
@@ -234,7 +236,7 @@ struct callout_cpu cc_cpu;
#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED)
#ifndef __rtems__
-static int timeout_cpu;
+static int __read_mostly timeout_cpu;
#else /* __rtems__ */
#define timeout_cpu 0
#endif /* __rtems__ */
@@ -426,8 +428,9 @@ callout_cpu_init(struct callout_cpu *cc, int cpu)
SLIST_INIT(&cc->cc_callfree);
cc->cc_inited = 1;
#ifndef __rtems__
- cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
- M_CALLOUT, M_WAITOK);
+ cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) *
+ callwheelsize, M_CALLOUT,
+ DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK);
#endif /* __rtems__ */
for (i = 0; i < callwheelsize; i++)
LIST_INIT(&cc->cc_callwheel[i]);
diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c
index bfeb1c34..e43d0030 100644
--- a/freebsd/sys/kern/subr_bus.c
+++ b/freebsd/sys/kern/subr_bus.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/conf.h>
+#include <sys/domainset.h>
#include <sys/eventhandler.h>
#include <sys/filio.h>
#include <sys/lock.h>
@@ -2564,7 +2565,7 @@ void
device_set_softc(device_t dev, void *softc)
{
if (dev->softc && !(dev->flags & DF_EXTERNALSOFTC))
- free(dev->softc, M_BUS_SC);
+ free_domain(dev->softc, M_BUS_SC);
dev->softc = softc;
if (dev->softc)
dev->flags |= DF_EXTERNALSOFTC;
@@ -2581,7 +2582,7 @@ device_set_softc(device_t dev, void *softc)
void
device_free_softc(void *softc)
{
- free(softc, M_BUS_SC);
+ free_domain(softc, M_BUS_SC);
}
/**
@@ -2830,6 +2831,11 @@ device_set_devclass_fixed(device_t dev, const char *classname)
int
device_set_driver(device_t dev, driver_t *driver)
{
+#ifndef __rtems__
+ int domain;
+ struct domainset *policy;
+#endif /* __rtems__ */
+
if (dev->state >= DS_ATTACHED)
return (EBUSY);
@@ -2837,7 +2843,7 @@ device_set_driver(device_t dev, driver_t *driver)
return (0);
if (dev->softc && !(dev->flags & DF_EXTERNALSOFTC)) {
- free(dev->softc, M_BUS_SC);
+ free_domain(dev->softc, M_BUS_SC);
dev->softc = NULL;
}
device_set_desc(dev, NULL);
@@ -2846,8 +2852,14 @@ device_set_driver(device_t dev, driver_t *driver)
if (driver) {
kobj_init((kobj_t) dev, (kobj_class_t) driver);
if (!(dev->flags & DF_EXTERNALSOFTC) && driver->size > 0) {
- dev->softc = malloc(driver->size, M_BUS_SC,
- M_NOWAIT | M_ZERO);
+#ifndef __rtems__
+ if (bus_get_domain(dev, &domain) == 0)
+ policy = DOMAINSET_PREF(domain);
+ else
+ policy = DOMAINSET_RR();
+#endif /* __rtems__ */
+ dev->softc = malloc_domainset(driver->size, M_BUS_SC,
+ policy, M_NOWAIT | M_ZERO);
if (!dev->softc) {
kobj_delete((kobj_t) dev, NULL);
kobj_init((kobj_t) dev, &null_class);
@@ -3771,6 +3783,22 @@ bus_generic_attach(device_t dev)
}
/**
+ * @brief Helper function for delaying attaching children
+ *
+ * Many buses can't run transactions on the bus which children need to probe and
+ * attach until after interrupts and/or timers are running. This function
+ * delays their attach until interrupts and timers are enabled.
+ */
+int
+bus_delayed_attach_children(device_t dev)
+{
+ /* Probe and attach the bus children when interrupts are available */
+ config_intrhook_oneshot((ich_func_t)bus_generic_attach, dev);
+
+ return (0);
+}
+
+/**
* @brief Helper function for implementing DEVICE_DETACH()
*
* This function can be used to help implement the DEVICE_DETACH() for
diff --git a/freebsd/sys/kern/subr_firmware.c b/freebsd/sys/kern/subr_firmware.c
index cc8bb691..2780963c 100644
--- a/freebsd/sys/kern/subr_firmware.c
+++ b/freebsd/sys/kern/subr_firmware.c
@@ -260,7 +260,6 @@ firmware_unregister(const char *imagename)
static void
loadimage(void *arg, int npending)
{
- struct thread *td = curthread;
char *imagename = arg;
struct priv_fw *fp;
linker_file_t result;
@@ -270,11 +269,6 @@ loadimage(void *arg, int npending)
mtx_lock(&firmware_mtx);
mtx_unlock(&firmware_mtx);
- if (td->td_proc->p_fd->fd_rdir == NULL) {
- printf("%s: root not mounted yet, no way to load image\n",
- imagename);
- goto done;
- }
error = linker_reference_module(imagename, NULL, &result);
if (error != 0) {
printf("%s: could not load firmware image, error %d\n",
diff --git a/freebsd/sys/kern/subr_gtaskqueue.c b/freebsd/sys/kern/subr_gtaskqueue.c
index 173cfa08..c061c6b0 100644
--- a/freebsd/sys/kern/subr_gtaskqueue.c
+++ b/freebsd/sys/kern/subr_gtaskqueue.c
@@ -63,26 +63,26 @@ TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
TASKQGROUP_DEFINE(config, 1, 1);
struct gtaskqueue_busy {
- struct gtask *tb_running;
- TAILQ_ENTRY(gtaskqueue_busy) tb_link;
+ struct gtask *tb_running;
+ u_int tb_seq;
+ LIST_ENTRY(gtaskqueue_busy) tb_link;
};
-static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
-
typedef void (*gtaskqueue_enqueue_fn)(void *context);
struct gtaskqueue {
STAILQ_HEAD(, gtask) tq_queue;
+ LIST_HEAD(, gtaskqueue_busy) tq_active;
+ u_int tq_seq;
+ int tq_callouts;
+ struct mtx_padalign tq_mutex;
gtaskqueue_enqueue_fn tq_enqueue;
void *tq_context;
char *tq_name;
- TAILQ_HEAD(, gtaskqueue_busy) tq_active;
- struct mtx tq_mutex;
struct thread **tq_threads;
int tq_tcount;
int tq_spin;
int tq_flags;
- int tq_callouts;
taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
};
@@ -121,12 +121,11 @@ gtask_dump(struct gtask *gtask)
#endif
static __inline int
-TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
- int t)
+TQ_SLEEP(struct gtaskqueue *tq, void *p, const char *wm)
{
if (tq->tq_spin)
- return (msleep_spin(p, m, wm, t));
- return (msleep(p, m, pri, wm, t));
+ return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
+ return (msleep(p, &tq->tq_mutex, 0, wm, 0));
}
static struct gtaskqueue *
@@ -150,7 +149,7 @@ _gtaskqueue_create(const char *name, int mflags,
}
STAILQ_INIT(&queue->tq_queue);
- TAILQ_INIT(&queue->tq_active);
+ LIST_INIT(&queue->tq_active);
queue->tq_enqueue = enqueue;
queue->tq_context = context;
queue->tq_name = tq_name;
@@ -173,7 +172,7 @@ gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
wakeup(tq);
- TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
+ TQ_SLEEP(tq, pp, "gtq_destroy");
}
}
@@ -184,7 +183,7 @@ gtaskqueue_free(struct gtaskqueue *queue)
TQ_LOCK(queue);
queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
gtaskqueue_terminate(queue->tq_threads, queue);
- KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
+ KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
mtx_destroy(&queue->tq_mutex);
free(queue->tq_threads, M_GTASKQUEUE);
@@ -291,7 +290,7 @@ gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
* have completed or are currently executing.
*/
while (t_barrier.ta_flags & TASK_ENQUEUED)
- TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
+ TQ_SLEEP(queue, &t_barrier, "gtq_qdrain");
}
/*
@@ -302,31 +301,24 @@ gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
static void
gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
{
- struct gtaskqueue_busy tb_marker, *tb_first;
+ struct gtaskqueue_busy *tb;
+ u_int seq;
- if (TAILQ_EMPTY(&queue->tq_active))
+ if (LIST_EMPTY(&queue->tq_active))
return;
/* Block taskq_terminate().*/
queue->tq_callouts++;
- /*
- * Wait for all currently executing taskqueue threads
- * to go idle.
- */
- tb_marker.tb_running = TB_DRAIN_WAITER;
- TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
- while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
- TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
- TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
-
- /*
- * Wakeup any other drain waiter that happened to queue up
- * without any intervening active thread.
- */
- tb_first = TAILQ_FIRST(&queue->tq_active);
- if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
- wakeup(tb_first);
+ /* Wait for any active task with sequence from the past. */
+ seq = queue->tq_seq;
+restart:
+ LIST_FOREACH(tb, &queue->tq_active, tb_link) {
+ if ((int)(tb->tb_seq - seq) <= 0) {
+ TQ_SLEEP(queue, tb->tb_running, "gtq_adrain");
+ goto restart;
+ }
+ }
/* Release taskqueue_terminate(). */
queue->tq_callouts--;
@@ -358,40 +350,27 @@ static void
gtaskqueue_run_locked(struct gtaskqueue *queue)
{
struct gtaskqueue_busy tb;
- struct gtaskqueue_busy *tb_first;
struct gtask *gtask;
KASSERT(queue != NULL, ("tq is NULL"));
TQ_ASSERT_LOCKED(queue);
tb.tb_running = NULL;
+ LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
- while (STAILQ_FIRST(&queue->tq_queue)) {
- TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
-
- /*
- * Carefully remove the first task from the queue and
- * clear its TASK_ENQUEUED flag
- */
- gtask = STAILQ_FIRST(&queue->tq_queue);
- KASSERT(gtask != NULL, ("task is NULL"));
+ while ((gtask = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
gtask->ta_flags &= ~TASK_ENQUEUED;
tb.tb_running = gtask;
+ tb.tb_seq = ++queue->tq_seq;
TQ_UNLOCK(queue);
KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
gtask->ta_func(gtask->ta_context);
TQ_LOCK(queue);
- tb.tb_running = NULL;
wakeup(gtask);
-
- TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
- tb_first = TAILQ_FIRST(&queue->tq_active);
- if (tb_first != NULL &&
- tb_first->tb_running == TB_DRAIN_WAITER)
- wakeup(tb_first);
}
+ LIST_REMOVE(&tb, tb_link);
}
static int
@@ -400,7 +379,7 @@ task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
struct gtaskqueue_busy *tb;
TQ_ASSERT_LOCKED(queue);
- TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
+ LIST_FOREACH(tb, &queue->tq_active, tb_link) {
if (tb->tb_running == gtask)
return (1);
}
@@ -433,7 +412,7 @@ static void
gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
{
while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
- TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
+ TQ_SLEEP(queue, gtask, "gtq_drain");
}
void
@@ -580,7 +559,7 @@ gtaskqueue_thread_loop(void *arg)
*/
if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
break;
- TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
+ TQ_SLEEP(tq, tq, "-");
}
gtaskqueue_run_locked(tq);
/*
@@ -606,7 +585,7 @@ gtaskqueue_thread_enqueue(void *context)
tqp = context;
tq = *tqp;
- wakeup_one(tq);
+ wakeup_any(tq);
}
diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c
index 67e62fc8..85912248 100644
--- a/freebsd/sys/kern/subr_taskqueue.c
+++ b/freebsd/sys/kern/subr_taskqueue.c
@@ -58,26 +58,27 @@ static void taskqueue_swi_enqueue(void *);
static void taskqueue_swi_giant_enqueue(void *);
struct taskqueue_busy {
- struct task *tb_running;
- TAILQ_ENTRY(taskqueue_busy) tb_link;
+ struct task *tb_running;
+ u_int tb_seq;
+ LIST_ENTRY(taskqueue_busy) tb_link;
};
-struct task * const TB_DRAIN_WAITER = (struct task *)0x1;
-
struct taskqueue {
STAILQ_HEAD(, task) tq_queue;
+ LIST_HEAD(, taskqueue_busy) tq_active;
+ struct task *tq_hint;
+ u_int tq_seq;
+ int tq_callouts;
+ struct mtx_padalign tq_mutex;
taskqueue_enqueue_fn tq_enqueue;
void *tq_context;
char *tq_name;
- TAILQ_HEAD(, taskqueue_busy) tq_active;
- struct mtx tq_mutex;
struct thread **tq_threads;
int tq_tcount;
#ifndef __rtems__
int tq_spin;
#endif /* __rtems__ */
int tq_flags;
- int tq_callouts;
taskqueue_callback_fn tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
void *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
};
@@ -134,14 +135,13 @@ _timeout_task_init(struct taskqueue *queue, struct timeout_task *timeout_task,
}
static __inline int
-TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
- int t)
+TQ_SLEEP(struct taskqueue *tq, void *p, const char *wm)
{
#ifndef __rtems__
if (tq->tq_spin)
- return (msleep_spin(p, m, wm, t));
+ return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
#endif /* __rtems__ */
- return (msleep(p, m, pri, wm, t));
+ return (msleep(p, &tq->tq_mutex, 0, wm, 0));
}
static struct taskqueue *
@@ -165,7 +165,7 @@ _taskqueue_create(const char *name, int mflags,
snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
STAILQ_INIT(&queue->tq_queue);
- TAILQ_INIT(&queue->tq_active);
+ LIST_INIT(&queue->tq_active);
queue->tq_enqueue = enqueue;
queue->tq_context = context;
queue->tq_name = tq_name;
@@ -223,7 +223,7 @@ taskqueue_terminate(struct thread **pp, struct taskqueue *tq)
while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
wakeup(tq);
- TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
+ TQ_SLEEP(tq, pp, "tq_destroy");
}
}
@@ -234,7 +234,7 @@ taskqueue_free(struct taskqueue *queue)
TQ_LOCK(queue);
queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
taskqueue_terminate(queue->tq_threads, queue);
- KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
+ KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
mtx_destroy(&queue->tq_mutex);
free(queue->tq_threads, M_TASKQUEUE);
@@ -260,21 +260,30 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
}
/*
- * Optimise the case when all tasks have the same priority.
+ * Optimise cases when all tasks use small set of priorities.
+ * In case of only one priority we always insert at the end.
+ * In case of two tq_hint typically gives the insertion point.
+ * In case of more then two tq_hint should halve the search.
*/
prev = STAILQ_LAST(&queue->tq_queue, task, ta_link);
if (!prev || prev->ta_priority >= task->ta_priority) {
STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
} else {
- prev = NULL;
- for (ins = STAILQ_FIRST(&queue->tq_queue); ins;
- prev = ins, ins = STAILQ_NEXT(ins, ta_link))
+ prev = queue->tq_hint;
+ if (prev && prev->ta_priority >= task->ta_priority) {
+ ins = STAILQ_NEXT(prev, ta_link);
+ } else {
+ prev = NULL;
+ ins = STAILQ_FIRST(&queue->tq_queue);
+ }
+ for (; ins; prev = ins, ins = STAILQ_NEXT(ins, ta_link))
if (ins->ta_priority < task->ta_priority)
break;
- if (prev)
+ if (prev) {
STAILQ_INSERT_AFTER(&queue->tq_queue, prev, task, ta_link);
- else
+ queue->tq_hint = task;
+ } else
STAILQ_INSERT_HEAD(&queue->tq_queue, task, ta_link);
}
@@ -393,6 +402,7 @@ taskqueue_drain_tq_queue(struct taskqueue *queue)
*/
TASK_INIT(&t_barrier, USHRT_MAX, taskqueue_task_nop_fn, &t_barrier);
STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
+ queue->tq_hint = &t_barrier;
t_barrier.ta_pending = 1;
/*
@@ -400,7 +410,7 @@ taskqueue_drain_tq_queue(struct taskqueue *queue)
* have completed or are currently executing.
*/
while (t_barrier.ta_pending != 0)
- TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
+ TQ_SLEEP(queue, &t_barrier, "tq_qdrain");
return (1);
}
@@ -412,31 +422,24 @@ taskqueue_drain_tq_queue(struct taskqueue *queue)
static int
taskqueue_drain_tq_active(struct taskqueue *queue)
{
- struct taskqueue_busy tb_marker, *tb_first;
+ struct taskqueue_busy *tb;
+ u_int seq;
- if (TAILQ_EMPTY(&queue->tq_active))
+ if (LIST_EMPTY(&queue->tq_active))
return (0);
/* Block taskq_terminate().*/
queue->tq_callouts++;
- /*
- * Wait for all currently executing taskqueue threads
- * to go idle.
- */
- tb_marker.tb_running = TB_DRAIN_WAITER;
- TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
- while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
- TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
- TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
-
- /*
- * Wakeup any other drain waiter that happened to queue up
- * without any intervening active thread.
- */
- tb_first = TAILQ_FIRST(&queue->tq_active);
- if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
- wakeup(tb_first);
+ /* Wait for any active task with sequence from the past. */
+ seq = queue->tq_seq;
+restart:
+ LIST_FOREACH(tb, &queue->tq_active, tb_link) {
+ if ((int)(tb->tb_seq - seq) <= 0) {
+ TQ_SLEEP(queue, tb->tb_running, "tq_adrain");
+ goto restart;
+ }
+ }
/* Release taskqueue_terminate(). */
queue->tq_callouts--;
@@ -469,42 +472,31 @@ static void
taskqueue_run_locked(struct taskqueue *queue)
{
struct taskqueue_busy tb;
- struct taskqueue_busy *tb_first;
struct task *task;
int pending;
KASSERT(queue != NULL, ("tq is NULL"));
TQ_ASSERT_LOCKED(queue);
tb.tb_running = NULL;
+ LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
- while (STAILQ_FIRST(&queue->tq_queue)) {
- TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
-
- /*
- * Carefully remove the first task from the queue and
- * zero its pending count.
- */
- task = STAILQ_FIRST(&queue->tq_queue);
- KASSERT(task != NULL, ("task is NULL"));
+ while ((task = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
+ if (queue->tq_hint == task)
+ queue->tq_hint = NULL;
pending = task->ta_pending;
task->ta_pending = 0;
tb.tb_running = task;
+ tb.tb_seq = ++queue->tq_seq;
TQ_UNLOCK(queue);
KASSERT(task->ta_func != NULL, ("task->ta_func is NULL"));
task->ta_func(task->ta_context, pending);
TQ_LOCK(queue);
- tb.tb_running = NULL;
wakeup(task);
-
- TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
- tb_first = TAILQ_FIRST(&queue->tq_active);
- if (tb_first != NULL &&
- tb_first->tb_running == TB_DRAIN_WAITER)
- wakeup(tb_first);
}
+ LIST_REMOVE(&tb, tb_link);
}
void
@@ -522,7 +514,7 @@ task_is_running(struct taskqueue *queue, struct task *task)
struct taskqueue_busy *tb;
TQ_ASSERT_LOCKED(queue);
- TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
+ LIST_FOREACH(tb, &queue->tq_active, tb_link) {
if (tb->tb_running == task)
return (1);
}
@@ -551,8 +543,11 @@ taskqueue_cancel_locked(struct taskqueue *queue, struct task *task,
u_int *pendp)
{
- if (task->ta_pending > 0)
+ if (task->ta_pending > 0) {
STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link);
+ if (queue->tq_hint == task)
+ queue->tq_hint = NULL;
+ }
if (pendp != NULL)
*pendp = task->ta_pending;
task->ta_pending = 0;
@@ -603,7 +598,7 @@ taskqueue_drain(struct taskqueue *queue, struct task *task)
TQ_LOCK(queue);
while (task->ta_pending != 0 || task_is_running(queue, task))
- TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0);
+ TQ_SLEEP(queue, task, "tq_drain");
TQ_UNLOCK(queue);
}
@@ -687,7 +682,7 @@ taskqueue_swi_giant_run(void *dummy)
static int
_taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
- cpuset_t *mask, const char *name, va_list ap)
+ cpuset_t *mask, struct proc *p, const char *name, va_list ap)
{
char ktname[MAXCOMLEN + 1];
struct thread *td;
@@ -709,10 +704,10 @@ _taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
for (i = 0; i < count; i++) {
if (count == 1)
- error = kthread_add(taskqueue_thread_loop, tqp, NULL,
+ error = kthread_add(taskqueue_thread_loop, tqp, p,
&tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
else
- error = kthread_add(taskqueue_thread_loop, tqp, NULL,
+ error = kthread_add(taskqueue_thread_loop, tqp, p,
&tq->tq_threads[i], RFSTOPPED, 0,
"%s_%d", ktname, i);
if (error) {
@@ -766,7 +761,20 @@ taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
int error;
va_start(ap, name);
- error = _taskqueue_start_threads(tqp, count, pri, NULL, name, ap);
+ error = _taskqueue_start_threads(tqp, count, pri, NULL, NULL, name, ap);
+ va_end(ap);
+ return (error);
+}
+
+int
+taskqueue_start_threads_in_proc(struct taskqueue **tqp, int count, int pri,
+ struct proc *proc, const char *name, ...)
+{
+ va_list ap;
+ int error;
+
+ va_start(ap, name);
+ error = _taskqueue_start_threads(tqp, count, pri, NULL, proc, name, ap);
va_end(ap);
return (error);
}
@@ -779,7 +787,7 @@ taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count, int pri,
int error;
va_start(ap, name);
- error = _taskqueue_start_threads(tqp, count, pri, mask, name, ap);
+ error = _taskqueue_start_threads(tqp, count, pri, mask, NULL, name, ap);
va_end(ap);
return (error);
}
@@ -815,7 +823,7 @@ taskqueue_thread_loop(void *arg)
*/
if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
break;
- TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
+ TQ_SLEEP(tq, tq, "-");
}
taskqueue_run_locked(tq);
/*
diff --git a/freebsd/sys/kern/sys_pipe.c b/freebsd/sys/kern/sys_pipe.c
index e20c67ea..aef35fc1 100755
--- a/freebsd/sys/kern/sys_pipe.c
+++ b/freebsd/sys/kern/sys_pipe.c
@@ -1135,15 +1135,8 @@ retry:
goto error1;
}
- while (wpipe->pipe_map.cnt != 0) {
- if (wpipe->pipe_state & PIPE_EOF) {
- wpipe->pipe_map.cnt = 0;
- pipe_destroy_write_buffer(wpipe);
- pipeselwakeup(wpipe);
- pipeunlock(wpipe);
- error = EPIPE;
- goto error1;
- }
+ while (wpipe->pipe_map.cnt != 0 &&
+ (wpipe->pipe_state & PIPE_EOF) == 0) {
if (wpipe->pipe_state & PIPE_WANTR) {
wpipe->pipe_state &= ~PIPE_WANTR;
wakeup(wpipe);
@@ -1158,12 +1151,16 @@ retry:
break;
}
- if (wpipe->pipe_state & PIPE_EOF)
+ if ((wpipe->pipe_state & PIPE_EOF) != 0) {
+ wpipe->pipe_map.cnt = 0;
+ pipe_destroy_write_buffer(wpipe);
+ pipeselwakeup(wpipe);
error = EPIPE;
- if (error == EINTR || error == ERESTART)
+ } else if (error == EINTR || error == ERESTART) {
pipe_clone_write_buffer(wpipe);
- else
+ } else {
pipe_destroy_write_buffer(wpipe);
+ }
pipeunlock(wpipe);
KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0,
("pipe %p leaked PIPE_DIRECTW", wpipe));
diff --git a/freebsd/sys/kern/tty.c b/freebsd/sys/kern/tty.c
index 88b928b9..d0b5633c 100644
--- a/freebsd/sys/kern/tty.c
+++ b/freebsd/sys/kern/tty.c
@@ -1184,6 +1184,7 @@ void
tty_rel_gone(struct tty *tp)
{
+ tty_lock_assert(tp, MA_OWNED);
MPASS(!tty_gone(tp));
/* Simulate carrier removal. */
@@ -1198,6 +1199,73 @@ tty_rel_gone(struct tty *tp)
tty_rel_free(tp);
}
+#ifndef __rtems__
+static int
+tty_drop_ctty(struct tty *tp, struct proc *p)
+{
+ struct session *session;
+ struct vnode *vp;
+
+ /*
+ * This looks terrible, but it's generally safe as long as the tty
+ * hasn't gone away while we had the lock dropped. All of our sanity
+ * checking that this operation is OK happens after we've picked it back
+ * up, so other state changes are generally not fatal and the potential
+ * for this particular operation to happen out-of-order in a
+ * multithreaded scenario is likely a non-issue.
+ */
+ tty_unlock(tp);
+ sx_xlock(&proctree_lock);
+ tty_lock(tp);
+ if (tty_gone(tp)) {
+ sx_xunlock(&proctree_lock);
+ return (ENODEV);
+ }
+
+ /*
+ * If the session doesn't have a controlling TTY, or if we weren't
+ * invoked on the controlling TTY, we'll return ENOIOCTL as we've
+ * historically done.
+ */
+ session = p->p_session;
+ if (session->s_ttyp == NULL || session->s_ttyp != tp) {
+ sx_xunlock(&proctree_lock);
+ return (ENOTTY);
+ }
+
+ if (!SESS_LEADER(p)) {
+ sx_xunlock(&proctree_lock);
+ return (EPERM);
+ }
+
+ PROC_LOCK(p);
+ SESS_LOCK(session);
+ vp = session->s_ttyvp;
+ session->s_ttyp = NULL;
+ session->s_ttyvp = NULL;
+ session->s_ttydp = NULL;
+ SESS_UNLOCK(session);
+
+ tp->t_sessioncnt--;
+ p->p_flag &= ~P_CONTROLT;
+ PROC_UNLOCK(p);
+ sx_xunlock(&proctree_lock);
+
+ /*
+ * If we did have a vnode, release our reference. Ordinarily we manage
+ * these at the devfs layer, but we can't necessarily know that we were
+ * invoked on the vnode referenced in the session (i.e. the vnode we
+ * hold a reference to). We explicitly don't check VBAD/VI_DOOMED here
+ * to avoid a vnode leak -- in circumstances elsewhere where we'd hit a
+ * VI_DOOMED vnode, release has been deferred until the controlling TTY
+ * is either changed or released.
+ */
+ if (vp != NULL)
+ vrele(vp);
+ return (0);
+}
+#endif /* __rtems__ */
+
/*
* Exposing information about current TTY's through sysctl
*/
@@ -1738,6 +1806,10 @@ tty_generic_ioctl(struct tty *tp, u_long cmd, void *data, int fflag,
*(int *)data = NO_PID;
#endif /* __rtems__ */
return (0);
+#ifndef __rtems__
+ case TIOCNOTTY:
+ return (tty_drop_ctty(tp, td->td_proc));
+#endif /* __rtems__ */
case TIOCSCTTY: {
#ifndef __rtems__
struct proc *p = td->td_proc;
diff --git a/freebsd/sys/kern/uipc_mbuf2.c b/freebsd/sys/kern/uipc_mbuf2.c
index 6f98b0a2..7a0b9cf0 100644
--- a/freebsd/sys/kern/uipc_mbuf2.c
+++ b/freebsd/sys/kern/uipc_mbuf2.c
@@ -103,8 +103,8 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
int writable;
/* check invalid arguments. */
- if (m == NULL)
- panic("m == NULL in m_pulldown()");
+ KASSERT(m != NULL, ("%s: fix caller: m is NULL off %d len %d offp %p\n",
+ __func__, off, len, offp));
if (len > MCLBYTES) {
m_freem(m);
return NULL; /* impossible */
diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c
index fc4ee85d..13fca66d 100644
--- a/freebsd/sys/kern/uipc_usrreq.c
+++ b/freebsd/sys/kern/uipc_usrreq.c
@@ -2504,7 +2504,8 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
goto out;
}
- controlp = &(*controlp)->m_next;
+ if (*controlp != NULL)
+ controlp = &(*controlp)->m_next;
if (CMSG_SPACE(datalen) < clen) {
clen -= CMSG_SPACE(datalen);
cm = (struct cmsghdr *)
diff --git a/freebsd/sys/net/dlt.h b/freebsd/sys/net/dlt.h
index 639e5a7f..31ad4e01 100644
--- a/freebsd/sys/net/dlt.h
+++ b/freebsd/sys/net/dlt.h
@@ -769,8 +769,17 @@
* IPMB packet for IPMI, beginning with the I2C slave address, followed
* by the netFn and LUN, etc.. Requested by Chanthy Toeung
* <chanthy.toeung@ca.kontron.com>.
+ *
+ * XXX - this used to be called DLT_IPMB, back when we got the
+ * impression from the email thread requesting it that the packet
+ * had no extra 2-byte header. We've renamed it; if anybody used
+ * DLT_IPMB and assumed no 2-byte header, this will cause the compile
+ * to fail, at which point we'll have to figure out what to do about
+ * the two header types using the same DLT_/LINKTYPE_ value. If that
+ * doesn't happen, we'll assume nobody used it and that the redefinition
+ * is safe.
*/
-#define DLT_IPMB 199
+#define DLT_IPMB_KONTRON 199
/*
* Juniper-private data link type, as per request from
@@ -1365,6 +1374,11 @@
#define DLT_DISPLAYPORT_AUX 275
/*
+ * Linux cooked sockets v2.
+ */
+#define DLT_LINUX_SLL2 276
+
+/*
* In case the code that includes this file (directly or indirectly)
* has also included OS files that happen to define DLT_MATCHING_MAX,
* with a different value (perhaps because that OS hasn't picked up
@@ -1374,7 +1388,7 @@
#ifdef DLT_MATCHING_MAX
#undef DLT_MATCHING_MAX
#endif
-#define DLT_MATCHING_MAX 275 /* highest value in the "matching" range */
+#define DLT_MATCHING_MAX 276 /* highest value in the "matching" range */
/*
* DLT and savefile link type values are split into a class and
diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c
index d57e6983..37e1581b 100644
--- a/freebsd/sys/net/if.c
+++ b/freebsd/sys/net/if.c
@@ -34,6 +34,7 @@
* $FreeBSD$
*/
+#include <rtems/bsd/local/opt_bpf.h>
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_inet.h>
@@ -1253,16 +1254,20 @@ static void
if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
{
struct if_clone *ifc;
+#ifdef DEV_BPF
u_int bif_dlt, bif_hdrlen;
+#endif
void *old;
int rc;
+#ifdef DEV_BPF
/*
* if_detach_internal() will call the eventhandler to notify
* interface departure. That will detach if_bpf. We need to
* safe the dlt and hdrlen so we can re-attach it later.
*/
bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen);
+#endif
/*
* Detach from current vnet, but preserve LLADDR info, do not
@@ -1309,8 +1314,10 @@ if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
if_attach_internal(ifp, 1, ifc);
+#ifdef DEV_BPF
if (ifp->if_bpf == NULL)
bpfattach(ifp, bif_dlt, bif_hdrlen);
+#endif
CURVNET_RESTORE();
}
@@ -1447,14 +1454,12 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
return (EEXIST);
}
- if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
- M_NOWAIT)) == NULL) {
+ if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL) {
IFNET_WUNLOCK();
return (ENOMEM);
}
- if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
- M_TEMP, M_NOWAIT)) == NULL) {
+ if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
free(ifgl, M_TEMP);
IFNET_WUNLOCK();
return (ENOMEM);
@@ -1465,8 +1470,7 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
break;
if (ifg == NULL) {
- if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
- M_TEMP, M_NOWAIT)) == NULL) {
+ if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL) {
free(ifgl, M_TEMP);
free(ifgm, M_TEMP);
IFNET_WUNLOCK();
@@ -1498,39 +1502,36 @@ if_addgroup(struct ifnet *ifp, const char *groupname)
}
/*
- * Remove a group from an interface
+ * Helper function to remove a group out of an interface. Expects the global
+ * ifnet lock to be write-locked, and drops it before returning.
*/
-int
-if_delgroup(struct ifnet *ifp, const char *groupname)
+static void
+_if_delgroup_locked(struct ifnet *ifp, struct ifg_list *ifgl,
+ const char *groupname)
{
- struct ifg_list *ifgl;
- struct ifg_member *ifgm;
- int freeifgl;
+ struct ifg_member *ifgm;
+ bool freeifgl;
- IFNET_WLOCK();
- CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
- if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
- break;
- if (ifgl == NULL) {
- IFNET_WUNLOCK();
- return (ENOENT);
- }
+ IFNET_WLOCK_ASSERT();
- freeifgl = 0;
IF_ADDR_WLOCK(ifp);
CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
IF_ADDR_WUNLOCK(ifp);
- CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
- if (ifgm->ifgm_ifp == ifp)
+ CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) {
+ if (ifgm->ifgm_ifp == ifp) {
+ CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm,
+ ifg_member, ifgm_next);
break;
-
- if (ifgm != NULL)
- CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member, ifgm_next);
+ }
+ }
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
- CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next);
- freeifgl = 1;
+ CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group,
+ ifg_next);
+ freeifgl = true;
+ } else {
+ freeifgl = false;
}
IFNET_WUNLOCK();
@@ -1543,6 +1544,26 @@ if_delgroup(struct ifnet *ifp, const char *groupname)
free(ifgl, M_TEMP);
EVENTHANDLER_INVOKE(group_change_event, groupname);
+}
+
+/*
+ * Remove a group from an interface
+ */
+int
+if_delgroup(struct ifnet *ifp, const char *groupname)
+{
+ struct ifg_list *ifgl;
+
+ IFNET_WLOCK();
+ CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
+ if (strcmp(ifgl->ifgl_group->ifg_group, groupname) == 0)
+ break;
+ if (ifgl == NULL) {
+ IFNET_WUNLOCK();
+ return (ENOENT);
+ }
+
+ _if_delgroup_locked(ifp, ifgl, groupname);
return (0);
}
@@ -1553,44 +1574,13 @@ if_delgroup(struct ifnet *ifp, const char *groupname)
static void
if_delgroups(struct ifnet *ifp)
{
- struct ifg_list *ifgl;
- struct ifg_member *ifgm;
+ struct ifg_list *ifgl;
char groupname[IFNAMSIZ];
- int ifglfree;
IFNET_WLOCK();
- while (!CK_STAILQ_EMPTY(&ifp->if_groups)) {
- ifgl = CK_STAILQ_FIRST(&ifp->if_groups);
-
+ while ((ifgl = CK_STAILQ_FIRST(&ifp->if_groups)) != NULL) {
strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ);
-
- IF_ADDR_WLOCK(ifp);
- CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next);
- IF_ADDR_WUNLOCK(ifp);
-
- CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
- if (ifgm->ifgm_ifp == ifp)
- break;
-
- if (ifgm != NULL)
- CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member,
- ifgm_next);
- ifglfree = 0;
- if (--ifgl->ifgl_group->ifg_refcnt == 0) {
- CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next);
- ifglfree = 1;
- }
-
- IFNET_WUNLOCK();
- epoch_wait_preempt(net_epoch_preempt);
- free(ifgm, M_TEMP);
- if (ifglfree) {
- EVENTHANDLER_INVOKE(group_detach_event,
- ifgl->ifgl_group);
- free(ifgl->ifgl_group, M_TEMP);
- }
- EVENTHANDLER_INVOKE(group_change_event, groupname);
-
+ _if_delgroup_locked(ifp, ifgl, groupname);
IFNET_WLOCK();
}
IFNET_WUNLOCK();
@@ -1678,7 +1668,7 @@ if_getgroupmembers(struct ifgroupreq *ifgr)
IFNET_RLOCK();
CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
- if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
+ if (strcmp(ifg->ifg_group, ifgr->ifgr_name) == 0)
break;
if (ifg == NULL) {
IFNET_RUNLOCK();
@@ -1957,10 +1947,13 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
- if (error != 0 &&
- !(cmd == RTM_ADD && error == EEXIST) &&
- !(cmd == RTM_DELETE && error == ENOENT))
- if_printf(ifp, "%s failed: %d\n", otype, error);
+ if (error == 0 ||
+ (cmd == RTM_ADD && error == EEXIST) ||
+ (cmd == RTM_DELETE && (error == ENOENT || error == ESRCH)))
+ return (error);
+
+ log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n",
+ __func__, otype, if_name(ifp), error);
return (error);
}
@@ -2951,6 +2944,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
case SIOCGIFGENERIC:
case SIOCGIFRSSKEY:
case SIOCGIFRSSHASH:
+ case SIOCGIFDOWNREASON:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
error = (*ifp->if_ioctl)(ifp, cmd, data);
diff --git a/freebsd/sys/net/if_bridge.c b/freebsd/sys/net/if_bridge.c
index 4bfb67a8..2544c4f5 100644
--- a/freebsd/sys/net/if_bridge.c
+++ b/freebsd/sys/net/if_bridge.c
@@ -137,6 +137,14 @@ __FBSDID("$FreeBSD$");
#include <net/route.h>
+#ifdef INET6
+/*
+ * XXX: declare here to avoid to include many inet6 related files..
+ * should be more generalized?
+ */
+extern void nd6_setmtu(struct ifnet *);
+#endif
+
/*
* Size of the route hash table. Must be a power of two.
*/
@@ -774,7 +782,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
} args;
struct ifdrv *ifd = (struct ifdrv *) data;
const struct bridge_control *bc;
- int error = 0;
+ int error = 0, oldmtu;
switch (cmd) {
@@ -820,12 +828,24 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
+ oldmtu = ifp->if_mtu;
BRIDGE_LOCK(sc);
error = (*bc->bc_func)(sc, &args);
BRIDGE_UNLOCK(sc);
if (error)
break;
+ /*
+ * Bridge MTU may change during addition of the first port.
+ * If it did, do network layer specific procedure.
+ */
+ if (ifp->if_mtu != oldmtu) {
+#ifdef INET6
+ nd6_setmtu(ifp);
+#endif
+ rt_updatemtu(ifp);
+ }
+
if (bc->bc_flags & BC_F_COPYOUT)
error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
diff --git a/freebsd/sys/net/if_clone.c b/freebsd/sys/net/if_clone.c
index 1fa79766..ac59b635 100644
--- a/freebsd/sys/net/if_clone.c
+++ b/freebsd/sys/net/if_clone.c
@@ -213,6 +213,18 @@ if_clone_create(char *name, size_t len, caddr_t params)
return (if_clone_createif(ifc, name, len, params));
}
+void
+if_clone_addif(struct if_clone *ifc, struct ifnet *ifp)
+{
+
+ if ((ifc->ifc_flags & IFC_NOGROUP) == 0)
+ if_addgroup(ifp, ifc->ifc_name);
+
+ IF_CLONE_LOCK(ifc);
+ IFC_IFLIST_INSERT(ifc, ifp);
+ IF_CLONE_UNLOCK(ifc);
+}
+
/*
* Create a clone network interface.
*/
@@ -235,12 +247,7 @@ if_clone_createif(struct if_clone *ifc, char *name, size_t len, caddr_t params)
if (ifp == NULL)
panic("%s: lookup failed for %s", __func__, name);
- if ((ifc->ifc_flags & IFC_NOGROUP) == 0)
- if_addgroup(ifp, ifc->ifc_name);
-
- IF_CLONE_LOCK(ifc);
- IFC_IFLIST_INSERT(ifc, ifp);
- IF_CLONE_UNLOCK(ifc);
+ if_clone_addif(ifc, ifp);
}
return (err);
diff --git a/freebsd/sys/net/if_clone.h b/freebsd/sys/net/if_clone.h
index 5dceacf6..b721e294 100644
--- a/freebsd/sys/net/if_clone.h
+++ b/freebsd/sys/net/if_clone.h
@@ -79,7 +79,8 @@ int if_clone_list(struct if_clonereq *);
struct if_clone *if_clone_findifc(struct ifnet *);
void if_clone_addgroup(struct ifnet *, struct if_clone *);
-/* The below interface used only by epair(4). */
+/* The below interfaces are used only by epair(4). */
+void if_clone_addif(struct if_clone *, struct ifnet *);
int if_clone_destroyif(struct if_clone *, struct ifnet *);
#endif /* _KERNEL */
diff --git a/freebsd/sys/net/if_epair.c b/freebsd/sys/net/if_epair.c
index 69ff3efc..f4a875b7 100644
--- a/freebsd/sys/net/if_epair.c
+++ b/freebsd/sys/net/if_epair.c
@@ -713,6 +713,21 @@ epair_clone_match(struct if_clone *ifc, const char *name)
return (1);
}
+static void
+epair_clone_add(struct if_clone *ifc, struct epair_softc *scb)
+{
+ struct ifnet *ifp;
+ uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
+
+ ifp = scb->ifp;
+ /* Copy epairNa etheraddr and change the last byte. */
+ memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
+ eaddr[5] = 0x0b;
+ ether_ifattach(ifp, eaddr);
+
+ if_clone_addif(ifc, ifp);
+}
+
static int
epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
{
@@ -725,24 +740,6 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
uint32_t hash;
uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */
- /*
- * We are abusing params to create our second interface.
- * Actually we already created it and called if_clone_create()
- * for it to do the official insertion procedure the moment we knew
- * it cannot fail anymore. So just do attach it here.
- */
- if (params) {
- scb = (struct epair_softc *)params;
- ifp = scb->ifp;
- /* Copy epairNa etheraddr and change the last byte. */
- memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
- eaddr[5] = 0x0b;
- ether_ifattach(ifp, eaddr);
- /* Correctly set the name for the cloner list. */
- strlcpy(name, ifp->if_xname, len);
- return (0);
- }
-
/* Try to see if a special unit was requested. */
error = ifc_name2unit(name, &unit);
if (error != 0)
@@ -893,10 +890,11 @@ epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
if_setsendqready(ifp);
/* We need to play some tricks here for the second interface. */
strlcpy(name, epairname, len);
- error = if_clone_create(name, len, (caddr_t)scb);
- if (error)
- panic("%s: if_clone_create() for our 2nd iface failed: %d",
- __func__, error);
+
+ /* Correctly set the name for the cloner list. */
+ strlcpy(name, scb->ifp->if_xname, len);
+ epair_clone_add(ifc, scb);
+
scb->if_qflush = ifp->if_qflush;
ifp->if_qflush = epair_qflush;
ifp->if_transmit = epair_transmit;
diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c
index af6f1667..2d133ec4 100644
--- a/freebsd/sys/net/if_lagg.c
+++ b/freebsd/sys/net/if_lagg.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/bpf.h>
+#include <net/route.h>
#include <net/vnet.h>
#if defined(INET) || defined(INET6)
@@ -75,6 +76,14 @@ __FBSDID("$FreeBSD$");
#include <net/if_lagg.h>
#include <net/ieee8023ad_lacp.h>
+#ifdef INET6
+/*
+ * XXX: declare here to avoid to include many inet6 related files..
+ * should be more generalized?
+ */
+extern void nd6_setmtu(struct ifnet *);
+#endif
+
#define LAGG_RLOCK() struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et)
#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &lagg_et)
#define LAGG_RLOCK_ASSERT() MPASS(in_epoch(net_epoch_preempt))
@@ -1154,7 +1163,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct ifnet *tpif;
struct thread *td = curthread;
char *buf, *outbuf;
- int count, buflen, len, error = 0;
+ int count, buflen, len, error = 0, oldmtu;
bzero(&rpbuf, sizeof(rpbuf));
@@ -1221,23 +1230,35 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
ro->ro_active += LAGG_PORTACTIVE(lp);
}
- ro->ro_bkt = sc->sc_bkt;
+ ro->ro_bkt = sc->sc_stride;
ro->ro_flapping = sc->sc_flapping;
ro->ro_flowid_shift = sc->flowid_shift;
LAGG_XUNLOCK(sc);
break;
case SIOCSLAGGOPTS:
- if (sc->sc_proto == LAGG_PROTO_ROUNDROBIN) {
- if (ro->ro_bkt == 0)
- sc->sc_bkt = 1; // Minimum 1 packet per iface.
- else
- sc->sc_bkt = ro->ro_bkt;
- }
error = priv_check(td, PRIV_NET_LAGG);
if (error)
break;
- if (ro->ro_opts == 0)
+
+ /*
+ * The stride option was added without defining a corresponding
+ * LAGG_OPT flag, so handle a non-zero value before checking
+ * anything else to preserve compatibility.
+ */
+ LAGG_XLOCK(sc);
+ if (ro->ro_opts == 0 && ro->ro_bkt != 0) {
+ if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN) {
+ LAGG_XUNLOCK(sc);
+ error = EINVAL;
+ break;
+ }
+ sc->sc_stride = ro->ro_bkt;
+ }
+ if (ro->ro_opts == 0) {
+ LAGG_XUNLOCK(sc);
break;
+ }
+
/*
* Set options. LACP options are stored in sc->sc_psc,
* not in sc_opts.
@@ -1248,6 +1269,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case LAGG_OPT_USE_FLOWID:
case -LAGG_OPT_USE_FLOWID:
case LAGG_OPT_FLOWIDSHIFT:
+ case LAGG_OPT_RR_LIMIT:
valid = 1;
lacp = 0;
break;
@@ -1266,8 +1288,6 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
- LAGG_XLOCK(sc);
-
if (valid == 0 ||
(lacp == 1 && sc->sc_proto != LAGG_PROTO_LACP)) {
/* Invalid combination of options specified. */
@@ -1275,14 +1295,23 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
LAGG_XUNLOCK(sc);
break; /* Return from SIOCSLAGGOPTS. */
}
+
/*
* Store new options into sc->sc_opts except for
- * FLOWIDSHIFT and LACP options.
+ * FLOWIDSHIFT, RR and LACP options.
*/
if (lacp == 0) {
if (ro->ro_opts == LAGG_OPT_FLOWIDSHIFT)
sc->flowid_shift = ro->ro_flowid_shift;
- else if (ro->ro_opts > 0)
+ else if (ro->ro_opts == LAGG_OPT_RR_LIMIT) {
+ if (sc->sc_proto != LAGG_PROTO_ROUNDROBIN ||
+ ro->ro_bkt == 0) {
+ error = EINVAL;
+ LAGG_XUNLOCK(sc);
+ break;
+ }
+ sc->sc_stride = ro->ro_bkt;
+ } else if (ro->ro_opts > 0)
sc->sc_opts |= ro->ro_opts;
else
sc->sc_opts &= ~ro->ro_opts;
@@ -1407,10 +1436,23 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
tpif->if_xname);
}
#endif
+ oldmtu = ifp->if_mtu;
LAGG_XLOCK(sc);
error = lagg_port_create(sc, tpif);
LAGG_XUNLOCK(sc);
if_rele(tpif);
+
+ /*
+ * LAGG MTU may change during addition of the first port.
+ * If it did, do network layer specific procedure.
+ */
+ if (ifp->if_mtu != oldmtu) {
+#ifdef INET6
+ nd6_setmtu(ifp);
+#endif
+ rt_updatemtu(ifp);
+ }
+
VLAN_CAPABILITIES(ifp);
break;
case SIOCSLAGGDELPORT:
@@ -1904,7 +1946,7 @@ static void
lagg_rr_attach(struct lagg_softc *sc)
{
sc->sc_seq = 0;
- sc->sc_bkt_count = sc->sc_bkt;
+ sc->sc_stride = 1;
}
static int
@@ -1913,18 +1955,8 @@ lagg_rr_start(struct lagg_softc *sc, struct mbuf *m)
struct lagg_port *lp;
uint32_t p;
- if (sc->sc_bkt_count == 0 && sc->sc_bkt > 0)
- sc->sc_bkt_count = sc->sc_bkt;
-
- if (sc->sc_bkt > 0) {
- atomic_subtract_int(&sc->sc_bkt_count, 1);
- if (atomic_cmpset_int(&sc->sc_bkt_count, 0, sc->sc_bkt))
- p = atomic_fetchadd_32(&sc->sc_seq, 1);
- else
- p = sc->sc_seq;
- } else
- p = atomic_fetchadd_32(&sc->sc_seq, 1);
-
+ p = atomic_fetchadd_32(&sc->sc_seq, 1);
+ p /= sc->sc_stride;
p %= sc->sc_count;
lp = CK_SLIST_FIRST(&sc->sc_ports);
diff --git a/freebsd/sys/net/if_lagg.h b/freebsd/sys/net/if_lagg.h
index f1e2d8f4..c4256a45 100644
--- a/freebsd/sys/net/if_lagg.h
+++ b/freebsd/sys/net/if_lagg.h
@@ -63,11 +63,11 @@ struct lagg_protos {
#define LAGG_PROTO_DEFAULT LAGG_PROTO_FAILOVER
#define LAGG_PROTOS { \
- { "failover", LAGG_PROTO_FAILOVER }, \
+ { "failover", LAGG_PROTO_FAILOVER }, \
{ "lacp", LAGG_PROTO_LACP }, \
{ "loadbalance", LAGG_PROTO_LOADBALANCE }, \
- { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \
- { "broadcast", LAGG_PROTO_BROADCAST }, \
+ { "roundrobin", LAGG_PROTO_ROUNDROBIN }, \
+ { "broadcast", LAGG_PROTO_BROADCAST }, \
{ "none", LAGG_PROTO_NONE }, \
{ "default", LAGG_PROTO_DEFAULT } \
}
@@ -148,11 +148,12 @@ struct lagg_reqopts {
#define LAGG_OPT_LACP_TXTEST 0x20 /* LACP debug: txtest */
#define LAGG_OPT_LACP_RXTEST 0x40 /* LACP debug: rxtest */
#define LAGG_OPT_LACP_TIMEOUT 0x80 /* LACP timeout */
+#define LAGG_OPT_RR_LIMIT 0x100 /* RR stride */
u_int ro_count; /* number of ports */
u_int ro_active; /* active port count */
u_int ro_flapping; /* number of flapping */
int ro_flowid_shift; /* shift the flowid */
- uint32_t ro_bkt; /* packet bucket for roundrobin */
+ uint32_t ro_bkt; /* stride for RR */
};
#define SIOCGLAGGOPTS _IOWR('i', 152, struct lagg_reqopts)
@@ -214,6 +215,7 @@ struct lagg_softc {
struct ifmedia sc_media; /* media config */
void *sc_psc; /* protocol data */
uint32_t sc_seq; /* sequence counter */
+ uint32_t sc_stride; /* stride for RR */
uint32_t sc_flags;
int sc_destroying; /* destroying lagg */
@@ -225,8 +227,6 @@ struct lagg_softc {
struct callout sc_callout;
u_int sc_opts;
int flowid_shift; /* shift the flowid */
- uint32_t sc_bkt; /* packates bucket for roundrobin */
- uint32_t sc_bkt_count; /* packates bucket count for roundrobin */
struct lagg_counters detached_counters; /* detached ports sum */
};
diff --git a/freebsd/sys/net/if_llatbl.c b/freebsd/sys/net/if_llatbl.c
index b220d7aa..7b5c3a91 100644
--- a/freebsd/sys/net/if_llatbl.c
+++ b/freebsd/sys/net/if_llatbl.c
@@ -81,11 +81,6 @@ RW_SYSINIT(lltable_list_lock, &lltable_list_lock, "lltable_list_lock");
static void lltable_unlink(struct lltable *llt);
static void llentries_unlink(struct lltable *llt, struct llentries *head);
-static void htable_unlink_entry(struct llentry *lle);
-static void htable_link_entry(struct lltable *llt, struct llentry *lle);
-static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
- void *farg);
-
/*
* Dump lle state for a specific address family.
*/
@@ -182,15 +177,16 @@ static void
htable_unlink_entry(struct llentry *lle)
{
- if ((lle->la_flags & LLE_LINKED) != 0) {
- IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
- CK_LIST_REMOVE(lle, lle_next);
- lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
+ if ((lle->la_flags & LLE_LINKED) == 0)
+ return;
+
+ IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
+ CK_LIST_REMOVE(lle, lle_next);
+ lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
#if 0
- lle->lle_tbl = NULL;
- lle->lle_head = NULL;
+ lle->lle_tbl = NULL;
+ lle->lle_head = NULL;
#endif
- }
}
struct prefix_match_data {
diff --git a/freebsd/sys/net/if_tap.c b/freebsd/sys/net/if_tap.c
deleted file mode 100644
index 4ca35b66..00000000
--- a/freebsd/sys/net/if_tap.c
+++ /dev/null
@@ -1,1153 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * BASED ON:
- * -------------------------------------------------------------------------
- *
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- */
-
-/*
- * $FreeBSD$
- * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-
-#include <sys/param.h>
-#include <sys/conf.h>
-#include <sys/lock.h>
-#include <sys/fcntl.h>
-#include <sys/filio.h>
-#include <sys/jail.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/poll.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/selinfo.h>
-#include <sys/signalvar.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-#include <sys/sx.h>
-#include <sys/sysctl.h>
-#include <sys/systm.h>
-#include <sys/ttycom.h>
-#include <sys/uio.h>
-#include <sys/queue.h>
-
-#include <net/bpf.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/if_clone.h>
-#include <net/if_dl.h>
-#include <net/if_media.h>
-#include <net/if_types.h>
-#include <net/route.h>
-#include <net/vnet.h>
-
-#include <netinet/in.h>
-
-#include <net/if_tapvar.h>
-#include <net/if_tap.h>
-
-#define CDEV_NAME "tap"
-#define TAPDEBUG if (tapdebug) printf
-
-static const char tapname[] = "tap";
-static const char vmnetname[] = "vmnet";
-#define TAPMAXUNIT 0x7fff
-#define VMNET_DEV_MASK CLONE_FLAG0
-
-/* module */
-static int tapmodevent(module_t, int, void *);
-
-/* device */
-static void tapclone(void *, struct ucred *, char *, int,
- struct cdev **);
-static void tapcreate(struct cdev *);
-
-/* network interface */
-static void tapifstart(struct ifnet *);
-static int tapifioctl(struct ifnet *, u_long, caddr_t);
-static void tapifinit(void *);
-
-static int tap_clone_create(struct if_clone *, int, caddr_t);
-static void tap_clone_destroy(struct ifnet *);
-static struct if_clone *tap_cloner;
-static int vmnet_clone_create(struct if_clone *, int, caddr_t);
-static void vmnet_clone_destroy(struct ifnet *);
-static struct if_clone *vmnet_cloner;
-
-/* character device */
-static d_open_t tapopen;
-static d_close_t tapclose;
-static d_read_t tapread;
-static d_write_t tapwrite;
-static d_ioctl_t tapioctl;
-static d_poll_t tappoll;
-static d_kqfilter_t tapkqfilter;
-
-/* kqueue(2) */
-static int tapkqread(struct knote *, long);
-static int tapkqwrite(struct knote *, long);
-static void tapkqdetach(struct knote *);
-
-static struct filterops tap_read_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tapkqdetach,
- .f_event = tapkqread,
-};
-
-static struct filterops tap_write_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tapkqdetach,
- .f_event = tapkqwrite,
-};
-
-static struct cdevsw tap_cdevsw = {
- .d_version = D_VERSION,
- .d_flags = D_NEEDMINOR,
- .d_open = tapopen,
- .d_close = tapclose,
- .d_read = tapread,
- .d_write = tapwrite,
- .d_ioctl = tapioctl,
- .d_poll = tappoll,
- .d_name = CDEV_NAME,
- .d_kqfilter = tapkqfilter,
-};
-
-/*
- * All global variables in if_tap.c are locked with tapmtx, with the
- * exception of tapdebug, which is accessed unlocked; tapclones is
- * static at runtime.
- */
-static struct mtx tapmtx;
-static int tapdebug = 0; /* debug flag */
-static int tapuopen = 0; /* allow user open() */
-static int tapuponopen = 0; /* IFF_UP on open() */
-static int tapdclone = 1; /* enable devfs cloning */
-static SLIST_HEAD(, tap_softc) taphead; /* first device */
-static struct clonedevs *tapclones;
-
-MALLOC_DECLARE(M_TAP);
-MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface");
-SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, "");
-
-static struct sx tap_ioctl_sx;
-SX_SYSINIT(tap_ioctl_sx, &tap_ioctl_sx, "tap_ioctl");
-
-SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
- "Ethernet tunnel software network interface");
-SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0,
- "Allow user to open /dev/tap (based on node permissions)");
-SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
- "Bring interface up when /dev/tap is opened");
-SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
- "Enable legacy devfs interface creation");
-SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, "");
-
-DEV_MODULE(if_tap, tapmodevent, NULL);
-MODULE_VERSION(if_tap, 1);
-
-static int
-tap_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
- struct cdev *dev;
- int i;
-
- /* Find any existing device, or allocate new unit number. */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0);
- if (i) {
- dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600,
- "%s%d", tapname, unit);
- }
-
- tapcreate(dev);
- return (0);
-}
-
-/* vmnet devices are tap devices in disguise */
-static int
-vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
- struct cdev *dev;
- int i;
-
- /* Find any existing device, or allocate new unit number. */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK);
- if (i) {
- dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT,
- GID_WHEEL, 0600, "%s%d", vmnetname, unit);
- }
-
- tapcreate(dev);
- return (0);
-}
-
-static void
-tap_destroy(struct tap_softc *tp)
-{
- struct ifnet *ifp = tp->tap_ifp;
-
- CURVNET_SET(ifp->if_vnet);
-
- destroy_dev(tp->tap_dev);
- seldrain(&tp->tap_rsel);
- knlist_clear(&tp->tap_rsel.si_note, 0);
- knlist_destroy(&tp->tap_rsel.si_note);
- ether_ifdetach(ifp);
-
- sx_xlock(&tap_ioctl_sx);
- ifp->if_softc = NULL;
- sx_xunlock(&tap_ioctl_sx);
-
- if_free(ifp);
-
- mtx_destroy(&tp->tap_mtx);
- free(tp, M_TAP);
- CURVNET_RESTORE();
-}
-
-static void
-tap_clone_destroy(struct ifnet *ifp)
-{
- struct tap_softc *tp = ifp->if_softc;
-
- mtx_lock(&tapmtx);
- SLIST_REMOVE(&taphead, tp, tap_softc, tap_next);
- mtx_unlock(&tapmtx);
- tap_destroy(tp);
-}
-
-/* vmnet devices are tap devices in disguise */
-static void
-vmnet_clone_destroy(struct ifnet *ifp)
-{
- tap_clone_destroy(ifp);
-}
-
-/*
- * tapmodevent
- *
- * module event handler
- */
-static int
-tapmodevent(module_t mod, int type, void *data)
-{
- static eventhandler_tag eh_tag = NULL;
- struct tap_softc *tp = NULL;
- struct ifnet *ifp = NULL;
-
- switch (type) {
- case MOD_LOAD:
-
- /* intitialize device */
-
- mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF);
- SLIST_INIT(&taphead);
-
- clone_setup(&tapclones);
- eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000);
- if (eh_tag == NULL) {
- clone_cleanup(&tapclones);
- mtx_destroy(&tapmtx);
- return (ENOMEM);
- }
- tap_cloner = if_clone_simple(tapname, tap_clone_create,
- tap_clone_destroy, 0);
- vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create,
- vmnet_clone_destroy, 0);
- return (0);
-
- case MOD_UNLOAD:
- /*
- * The EBUSY algorithm here can't quite atomically
- * guarantee that this is race-free since we have to
- * release the tap mtx to deregister the clone handler.
- */
- mtx_lock(&tapmtx);
- SLIST_FOREACH(tp, &taphead, tap_next) {
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_flags & TAP_OPEN) {
- mtx_unlock(&tp->tap_mtx);
- mtx_unlock(&tapmtx);
- return (EBUSY);
- }
- mtx_unlock(&tp->tap_mtx);
- }
- mtx_unlock(&tapmtx);
-
- EVENTHANDLER_DEREGISTER(dev_clone, eh_tag);
- if_clone_detach(tap_cloner);
- if_clone_detach(vmnet_cloner);
- drain_dev_clone_events();
-
- mtx_lock(&tapmtx);
- while ((tp = SLIST_FIRST(&taphead)) != NULL) {
- SLIST_REMOVE_HEAD(&taphead, tap_next);
- mtx_unlock(&tapmtx);
-
- ifp = tp->tap_ifp;
-
- TAPDEBUG("detaching %s\n", ifp->if_xname);
-
- tap_destroy(tp);
- mtx_lock(&tapmtx);
- }
- mtx_unlock(&tapmtx);
- clone_cleanup(&tapclones);
-
- mtx_destroy(&tapmtx);
-
- break;
-
- default:
- return (EOPNOTSUPP);
- }
-
- return (0);
-} /* tapmodevent */
-
-
-/*
- * DEVFS handler
- *
- * We need to support two kind of devices - tap and vmnet
- */
-static void
-tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev)
-{
- char devname[SPECNAMELEN + 1];
- int i, unit, append_unit;
- int extra;
-
- if (*dev != NULL)
- return;
-
- if (!tapdclone ||
- (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0))
- return;
-
- unit = 0;
- append_unit = 0;
- extra = 0;
-
- /* We're interested in only tap/vmnet devices. */
- if (strcmp(name, tapname) == 0) {
- unit = -1;
- } else if (strcmp(name, vmnetname) == 0) {
- unit = -1;
- extra = VMNET_DEV_MASK;
- } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) {
- if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) {
- return;
- } else {
- extra = VMNET_DEV_MASK;
- }
- }
-
- if (unit == -1)
- append_unit = 1;
-
- CURVNET_SET(CRED_TO_VNET(cred));
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra);
- if (i) {
- if (append_unit) {
- /*
- * We were passed 'tun' or 'tap', with no unit specified
- * so we'll need to append it now.
- */
- namelen = snprintf(devname, sizeof(devname), "%s%d", name,
- unit);
- name = devname;
- }
-
- *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra,
- cred, UID_ROOT, GID_WHEEL, 0600, "%s", name);
- }
-
- if_clone_create(name, namelen, NULL);
- CURVNET_RESTORE();
-} /* tapclone */
-
-
-/*
- * tapcreate
- *
- * to create interface
- */
-static void
-tapcreate(struct cdev *dev)
-{
- struct ifnet *ifp = NULL;
- struct tap_softc *tp = NULL;
- unsigned short macaddr_hi;
- uint32_t macaddr_mid;
- int unit;
- const char *name = NULL;
- u_char eaddr[6];
-
- /* allocate driver storage and create device */
- tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO);
- mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF);
- mtx_lock(&tapmtx);
- SLIST_INSERT_HEAD(&taphead, tp, tap_next);
- mtx_unlock(&tapmtx);
-
- unit = dev2unit(dev);
-
- /* select device: tap or vmnet */
- if (unit & VMNET_DEV_MASK) {
- name = vmnetname;
- tp->tap_flags |= TAP_VMNET;
- } else
- name = tapname;
-
- unit &= TAPMAXUNIT;
-
- TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev));
-
- /* generate fake MAC address: 00 bd xx xx xx unit_no */
- macaddr_hi = htons(0x00bd);
- macaddr_mid = (uint32_t) ticks;
- bcopy(&macaddr_hi, eaddr, sizeof(short));
- bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
- eaddr[5] = (u_char)unit;
-
- /* fill the rest and attach interface */
- ifp = tp->tap_ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL)
- panic("%s%d: can not if_alloc()", name, unit);
- ifp->if_softc = tp;
- if_initname(ifp, name, unit);
- ifp->if_init = tapifinit;
- ifp->if_start = tapifstart;
- ifp->if_ioctl = tapifioctl;
- ifp->if_mtu = ETHERMTU;
- ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST);
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_capabilities |= IFCAP_LINKSTATE;
- ifp->if_capenable |= IFCAP_LINKSTATE;
-
- dev->si_drv1 = tp;
- tp->tap_dev = dev;
-
- ether_ifattach(ifp, eaddr);
-
- mtx_lock(&tp->tap_mtx);
- tp->tap_flags |= TAP_INITED;
- mtx_unlock(&tp->tap_mtx);
-
- knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx);
-
- TAPDEBUG("interface %s is created. minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-} /* tapcreate */
-
-
-/*
- * tapopen
- *
- * to open tunnel. must be superuser
- */
-static int
-tapopen(struct cdev *dev, int flag, int mode, struct thread *td)
-{
- struct tap_softc *tp = NULL;
- struct ifnet *ifp = NULL;
- int error;
-
- if (tapuopen == 0) {
- error = priv_check(td, PRIV_NET_TAP);
- if (error)
- return (error);
- }
-
- if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT)
- return (ENXIO);
-
- tp = dev->si_drv1;
-
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_flags & TAP_OPEN) {
- mtx_unlock(&tp->tap_mtx);
- return (EBUSY);
- }
-
- bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr));
-#ifndef __rtems__
- tp->tap_pid = td->td_proc->p_pid;
-#else /* __rtems__ */
- tp->tap_pid = BSD_DEFAULT_PID;
-#endif /* __rtems__ */
- tp->tap_flags |= TAP_OPEN;
- ifp = tp->tap_ifp;
-
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- if (tapuponopen)
- ifp->if_flags |= IFF_UP;
- if_link_state_change(ifp, LINK_STATE_UP);
- mtx_unlock(&tp->tap_mtx);
-
- TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev));
-
- return (0);
-} /* tapopen */
-
-
-/*
- * tapclose
- *
- * close the device - mark i/f down & delete routing info
- */
-static int
-tapclose(struct cdev *dev, int foo, int bar, struct thread *td)
-{
- struct ifaddr *ifa;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
-
- /* junk all pending output */
- mtx_lock(&tp->tap_mtx);
- CURVNET_SET(ifp->if_vnet);
- IF_DRAIN(&ifp->if_snd);
-
- /*
- * Do not bring the interface down, and do not anything with
- * interface, if we are in VMnet mode. Just close the device.
- */
- if (((tp->tap_flags & TAP_VMNET) == 0) &&
- (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) {
- mtx_unlock(&tp->tap_mtx);
- if_down(ifp);
- mtx_lock(&tp->tap_mtx);
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- mtx_unlock(&tp->tap_mtx);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- rtinit(ifa, (int)RTM_DELETE, 0);
- }
- if_purgeaddrs(ifp);
- mtx_lock(&tp->tap_mtx);
- }
- }
-
- if_link_state_change(ifp, LINK_STATE_DOWN);
- CURVNET_RESTORE();
-
- funsetown(&tp->tap_sigio);
- selwakeuppri(&tp->tap_rsel, PZERO+1);
- KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
-
- tp->tap_flags &= ~TAP_OPEN;
- tp->tap_pid = 0;
- mtx_unlock(&tp->tap_mtx);
-
- TAPDEBUG("%s is closed. minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- return (0);
-} /* tapclose */
-
-
-/*
- * tapifinit
- *
- * network interface initialization function
- */
-static void
-tapifinit(void *xtp)
-{
- struct tap_softc *tp = (struct tap_softc *)xtp;
- struct ifnet *ifp = tp->tap_ifp;
-
- TAPDEBUG("initializing %s\n", ifp->if_xname);
-
- mtx_lock(&tp->tap_mtx);
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- mtx_unlock(&tp->tap_mtx);
-
- /* attempt to start output */
- tapifstart(ifp);
-} /* tapifinit */
-
-
-/*
- * tapifioctl
- *
- * Process an ioctl request on network interface
- */
-static int
-tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct tap_softc *tp;
- struct ifreq *ifr = (struct ifreq *)data;
- struct ifstat *ifs = NULL;
- struct ifmediareq *ifmr = NULL;
- int dummy, error = 0;
-
- sx_xlock(&tap_ioctl_sx);
- tp = ifp->if_softc;
- if (tp == NULL) {
- error = ENXIO;
- goto bad;
- }
- switch (cmd) {
- case SIOCSIFFLAGS: /* XXX -- just like vmnet does */
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- break;
-
- case SIOCGIFMEDIA:
- ifmr = (struct ifmediareq *)data;
- dummy = ifmr->ifm_count;
- ifmr->ifm_count = 1;
- ifmr->ifm_status = IFM_AVALID;
- ifmr->ifm_active = IFM_ETHER;
- if (tp->tap_flags & TAP_OPEN)
- ifmr->ifm_status |= IFM_ACTIVE;
- ifmr->ifm_current = ifmr->ifm_active;
- if (dummy >= 1) {
- int media = IFM_ETHER;
- error = copyout(&media, ifmr->ifm_ulist,
- sizeof(int));
- }
- break;
-
- case SIOCSIFMTU:
- ifp->if_mtu = ifr->ifr_mtu;
- break;
-
- case SIOCGIFSTATUS:
- ifs = (struct ifstat *)data;
- mtx_lock(&tp->tap_mtx);
- if (tp->tap_pid != 0)
- snprintf(ifs->ascii, sizeof(ifs->ascii),
- "\tOpened by PID %d\n", tp->tap_pid);
- else
- ifs->ascii[0] = '\0';
- mtx_unlock(&tp->tap_mtx);
- break;
-
- default:
- error = ether_ioctl(ifp, cmd, data);
- break;
- }
-
-bad:
- sx_xunlock(&tap_ioctl_sx);
- return (error);
-} /* tapifioctl */
-
-
-/*
- * tapifstart
- *
- * queue packets from higher level ready to put out
- */
-static void
-tapifstart(struct ifnet *ifp)
-{
- struct tap_softc *tp = ifp->if_softc;
-
- TAPDEBUG("%s starting\n", ifp->if_xname);
-
- /*
- * do not junk pending output if we are in VMnet mode.
- * XXX: can this do any harm because of queue overflow?
- */
-
- mtx_lock(&tp->tap_mtx);
- if (((tp->tap_flags & TAP_VMNET) == 0) &&
- ((tp->tap_flags & TAP_READY) != TAP_READY)) {
- struct mbuf *m;
-
- /* Unlocked read. */
- TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname,
- tp->tap_flags);
-
- for (;;) {
- IF_DEQUEUE(&ifp->if_snd, m);
- if (m != NULL) {
- m_freem(m);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- } else
- break;
- }
- mtx_unlock(&tp->tap_mtx);
-
- return;
- }
-
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- if (tp->tap_flags & TAP_RWAIT) {
- tp->tap_flags &= ~TAP_RWAIT;
- wakeup(tp);
- }
-
- if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) {
- mtx_unlock(&tp->tap_mtx);
- pgsigio(&tp->tap_sigio, SIGIO, 0);
- mtx_lock(&tp->tap_mtx);
- }
-
- selwakeuppri(&tp->tap_rsel, PZERO+1);
- KNOTE_LOCKED(&tp->tap_rsel.si_note, 0);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
- }
-
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- mtx_unlock(&tp->tap_mtx);
-} /* tapifstart */
-
-
-/*
- * tapioctl
- *
- * the cdevsw interface is now pretty minimal
- */
-static int
-tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
-{
- struct ifreq ifr;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct tapinfo *tapp = NULL;
- int f;
- int error;
-#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
- defined(COMPAT_FREEBSD4)
- int ival;
-#endif
-
- switch (cmd) {
- case TAPSIFINFO:
- tapp = (struct tapinfo *)data;
- if (ifp->if_type != tapp->type)
- return (EPROTOTYPE);
- mtx_lock(&tp->tap_mtx);
- if (ifp->if_mtu != tapp->mtu) {
- strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ);
- ifr.ifr_mtu = tapp->mtu;
- CURVNET_SET(ifp->if_vnet);
- error = ifhwioctl(SIOCSIFMTU, ifp,
- (caddr_t)&ifr, td);
- CURVNET_RESTORE();
- if (error) {
- mtx_unlock(&tp->tap_mtx);
- return (error);
- }
- }
- ifp->if_baudrate = tapp->baudrate;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case TAPGIFINFO:
- tapp = (struct tapinfo *)data;
- mtx_lock(&tp->tap_mtx);
- tapp->mtu = ifp->if_mtu;
- tapp->type = ifp->if_type;
- tapp->baudrate = ifp->if_baudrate;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case TAPSDEBUG:
- tapdebug = *(int *)data;
- break;
-
- case TAPGDEBUG:
- *(int *)data = tapdebug;
- break;
-
- case TAPGIFNAME: {
- struct ifreq *ifr = (struct ifreq *) data;
-
- strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
- } break;
-
- case FIONBIO:
- break;
-
- case FIOASYNC:
- mtx_lock(&tp->tap_mtx);
- if (*(int *)data)
- tp->tap_flags |= TAP_ASYNC;
- else
- tp->tap_flags &= ~TAP_ASYNC;
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case FIONREAD:
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- struct mbuf *mb;
-
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, mb);
- for (*(int *)data = 0; mb != NULL;
- mb = mb->m_next)
- *(int *)data += mb->m_len;
- IFQ_UNLOCK(&ifp->if_snd);
- } else
- *(int *)data = 0;
- break;
-
- case FIOSETOWN:
- return (fsetown(*(int *)data, &tp->tap_sigio));
-
- case FIOGETOWN:
- *(int *)data = fgetown(&tp->tap_sigio);
- return (0);
-
- /* this is deprecated, FIOSETOWN should be used instead */
- case TIOCSPGRP:
- return (fsetown(-(*(int *)data), &tp->tap_sigio));
-
- /* this is deprecated, FIOGETOWN should be used instead */
- case TIOCGPGRP:
- *(int *)data = -fgetown(&tp->tap_sigio);
- return (0);
-
- /* VMware/VMnet port ioctl's */
-
-#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
- defined(COMPAT_FREEBSD4)
- case _IO('V', 0):
- ival = IOCPARM_IVAL(data);
- data = (caddr_t)&ival;
- /* FALLTHROUGH */
-#endif
- case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
- f = *(int *)data;
- f &= 0x0fff;
- f &= ~IFF_CANTCHANGE;
- f |= IFF_UP;
-
- mtx_lock(&tp->tap_mtx);
- ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE);
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case SIOCGIFADDR: /* get MAC address of the remote side */
- mtx_lock(&tp->tap_mtx);
- bcopy(tp->ether_addr, data, sizeof(tp->ether_addr));
- mtx_unlock(&tp->tap_mtx);
- break;
-
- case SIOCSIFADDR: /* set MAC address of the remote side */
- mtx_lock(&tp->tap_mtx);
- bcopy(data, tp->ether_addr, sizeof(tp->ether_addr));
- mtx_unlock(&tp->tap_mtx);
- break;
-
- default:
- return (ENOTTY);
- }
- return (0);
-} /* tapioctl */
-
-
-/*
- * tapread
- *
- * the cdevsw read interface - reads a packet at a time, or at
- * least as much of a packet as can be read
- */
-static int
-tapread(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct mbuf *m = NULL;
- int error = 0, len;
-
- TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev));
-
- mtx_lock(&tp->tap_mtx);
- if ((tp->tap_flags & TAP_READY) != TAP_READY) {
- mtx_unlock(&tp->tap_mtx);
-
- /* Unlocked read. */
- TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n",
- ifp->if_xname, dev2unit(dev), tp->tap_flags);
-
- return (EHOSTDOWN);
- }
-
- tp->tap_flags &= ~TAP_RWAIT;
-
- /* sleep until we get a packet */
- do {
- IF_DEQUEUE(&ifp->if_snd, m);
-
- if (m == NULL) {
- if (flag & O_NONBLOCK) {
- mtx_unlock(&tp->tap_mtx);
- return (EWOULDBLOCK);
- }
-
- tp->tap_flags |= TAP_RWAIT;
- error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1),
- "taprd", 0);
- if (error) {
- mtx_unlock(&tp->tap_mtx);
- return (error);
- }
- }
- } while (m == NULL);
- mtx_unlock(&tp->tap_mtx);
-
- /* feed packet to bpf */
- BPF_MTAP(ifp, m);
-
- /* xfer packet to user space */
- while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) {
- len = min(uio->uio_resid, m->m_len);
- if (len == 0)
- break;
-
- error = uiomove(mtod(m, void *), len, uio);
- m = m_free(m);
- }
-
- if (m != NULL) {
- TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname,
- dev2unit(dev));
- m_freem(m);
- }
-
- return (error);
-} /* tapread */
-
-
-/*
- * tapwrite
- *
- * the cdevsw write interface - an atomic write is a packet - or else!
- */
-static int
-tapwrite(struct cdev *dev, struct uio *uio, int flag)
-{
- struct ether_header *eh;
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- struct mbuf *m;
-
- TAPDEBUG("%s writing, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- if (uio->uio_resid == 0)
- return (0);
-
- if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) {
- TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n",
- ifp->if_xname, uio->uio_resid, dev2unit(dev));
-
- return (EIO);
- }
-
- if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN,
- M_PKTHDR)) == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return (ENOBUFS);
- }
-
- m->m_pkthdr.rcvif = ifp;
-
- /*
- * Only pass a unicast frame to ether_input(), if it would actually
- * have been received by non-virtual hardware.
- */
- if (m->m_len < sizeof(struct ether_header)) {
- m_freem(m);
- return (0);
- }
- eh = mtod(m, struct ether_header *);
-
- if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
- !ETHER_IS_MULTICAST(eh->ether_dhost) &&
- bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
- m_freem(m);
- return (0);
- }
-
- /* Pass packet up to parent. */
- CURVNET_SET(ifp->if_vnet);
- (*ifp->if_input)(ifp, m);
- CURVNET_RESTORE();
- if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */
-
- return (0);
-} /* tapwrite */
-
-
-/*
- * tappoll
- *
- * the poll interface, this is only useful on reads
- * really. the write detect always returns true, write never blocks
- * anyway, it either accepts the packet or drops it
- */
-static int
-tappoll(struct cdev *dev, int events, struct thread *td)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
- int revents = 0;
-
- TAPDEBUG("%s polling, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- if (events & (POLLIN | POLLRDNORM)) {
- IFQ_LOCK(&ifp->if_snd);
- if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
- TAPDEBUG("%s have data in queue. len = %d, " \
- "minor = %#x\n", ifp->if_xname,
- ifp->if_snd.ifq_len, dev2unit(dev));
-
- revents |= (events & (POLLIN | POLLRDNORM));
- } else {
- TAPDEBUG("%s waiting for data, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-
- selrecord(td, &tp->tap_rsel);
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
-
- if (events & (POLLOUT | POLLWRNORM))
- revents |= (events & (POLLOUT | POLLWRNORM));
-
- return (revents);
-} /* tappoll */
-
-
-/*
- * tap_kqfilter
- *
- * support for kevent() system call
- */
-static int
-tapkqfilter(struct cdev *dev, struct knote *kn)
-{
- struct tap_softc *tp = dev->si_drv1;
- struct ifnet *ifp = tp->tap_ifp;
-
- switch (kn->kn_filter) {
- case EVFILT_READ:
- TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tap_read_filterops;
- break;
-
- case EVFILT_WRITE:
- TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tap_write_filterops;
- break;
-
- default:
- TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- return (EINVAL);
- /* NOT REACHED */
- }
-
- kn->kn_hook = tp;
- knlist_add(&tp->tap_rsel.si_note, kn, 0);
-
- return (0);
-} /* tapkqfilter */
-
-
-/*
- * tap_kqread
- *
- * Return true if there is data in the interface queue
- */
-static int
-tapkqread(struct knote *kn, long hint)
-{
- int ret;
- struct tap_softc *tp = kn->kn_hook;
- struct cdev *dev = tp->tap_dev;
- struct ifnet *ifp = tp->tap_ifp;
-
- if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
- TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n",
- ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
- ret = 1;
- } else {
- TAPDEBUG("%s waiting for data, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- ret = 0;
- }
-
- return (ret);
-} /* tapkqread */
-
-
-/*
- * tap_kqwrite
- *
- * Always can write. Return the MTU in kn->data
- */
-static int
-tapkqwrite(struct knote *kn, long hint)
-{
- struct tap_softc *tp = kn->kn_hook;
- struct ifnet *ifp = tp->tap_ifp;
-
- kn->kn_data = ifp->if_mtu;
-
- return (1);
-} /* tapkqwrite */
-
-
-static void
-tapkqdetach(struct knote *kn)
-{
- struct tap_softc *tp = kn->kn_hook;
-
- knlist_remove(&tp->tap_rsel.si_note, kn, 0);
-} /* tapkqdetach */
-
diff --git a/freebsd/sys/net/if_tap.h b/freebsd/sys/net/if_tap.h
index 34f44b38..9718cee4 100644
--- a/freebsd/sys/net/if_tap.h
+++ b/freebsd/sys/net/if_tap.h
@@ -40,24 +40,22 @@
#ifndef _NET_IF_TAP_H_
#define _NET_IF_TAP_H_
-/* refer to if_tapvar.h for the softc stuff */
+#include <net/if_tun.h>
/* maximum receive packet size (hard limit) */
#define TAPMRU 16384
-struct tapinfo {
- int baudrate; /* linespeed */
- short mtu; /* maximum transmission unit */
- u_char type; /* ethernet, tokenring, etc. */
- u_char dummy; /* place holder */
-};
+#define tapinfo tuninfo
-/* ioctl's for get/set debug */
-#define TAPSDEBUG _IOW('t', 90, int)
-#define TAPGDEBUG _IOR('t', 89, int)
-#define TAPSIFINFO _IOW('t', 91, struct tapinfo)
-#define TAPGIFINFO _IOR('t', 92, struct tapinfo)
-#define TAPGIFNAME _IOR('t', 93, struct ifreq)
+/*
+ * ioctl's for get/set debug; these are aliases of TUN* ioctls, see net/if_tun.h
+ * for details.
+ */
+#define TAPSDEBUG TUNSDEBUG
+#define TAPGDEBUG TUNGDEBUG
+#define TAPSIFINFO TUNSIFINFO
+#define TAPGIFINFO TUNGIFINFO
+#define TAPGIFNAME TUNGIFNAME
/* VMware ioctl's */
#define VMIO_SIOCSIFFLAGS _IOWINT('V', 0)
diff --git a/freebsd/sys/net/if_tapvar.h b/freebsd/sys/net/if_tapvar.h
deleted file mode 100644
index f5cf9f3e..00000000
--- a/freebsd/sys/net/if_tapvar.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * BASED ON:
- * -------------------------------------------------------------------------
- *
- * Copyright (c) 1998 Brian Somers <brian@Awfulhak.org>
- * All rights reserved.
- *
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- */
-
-/*
- * $FreeBSD$
- * $Id: if_tapvar.h,v 0.6 2000/07/11 02:16:08 max Exp $
- */
-
-#ifndef _NET_IF_TAPVAR_H_
-#define _NET_IF_TAPVAR_H_
-
-/*
- * tap_mtx locks tap_flags, tap_pid. tap_next locked with global tapmtx.
- * Other fields locked by owning subsystems.
- */
-struct tap_softc {
- struct ifnet *tap_ifp;
- u_short tap_flags; /* misc flags */
-#define TAP_OPEN (1 << 0)
-#define TAP_INITED (1 << 1)
-#define TAP_RWAIT (1 << 2)
-#define TAP_ASYNC (1 << 3)
-#define TAP_READY (TAP_OPEN|TAP_INITED)
-#define TAP_VMNET (1 << 4)
-
- u_int8_t ether_addr[ETHER_ADDR_LEN]; /* ether addr of the remote side */
-
- pid_t tap_pid; /* PID of process to open */
- struct sigio *tap_sigio; /* information for async I/O */
- struct selinfo tap_rsel; /* read select */
-
- SLIST_ENTRY(tap_softc) tap_next; /* next device in chain */
- struct cdev *tap_dev;
- struct mtx tap_mtx; /* per-softc mutex */
-};
-
-#endif /* !_NET_IF_TAPVAR_H_ */
diff --git a/freebsd/sys/net/if_tun.c b/freebsd/sys/net/if_tun.c
deleted file mode 100644
index c96b2163..00000000
--- a/freebsd/sys/net/if_tun.c
+++ /dev/null
@@ -1,1132 +0,0 @@
-#include <machine/rtems-bsd-kernel-space.h>
-
-/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */
-
-/*-
- * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
- * Nottingham University 1987.
- *
- * This source may be freely distributed, however I would be interested
- * in any changes that are made.
- *
- * This driver takes packets off the IP i/f and hands them up to a
- * user process to have its wicked way with. This driver has it's
- * roots in a similar driver written by Phil Cockcroft (formerly) at
- * UCL. This driver is based much more on read/write/poll mode of
- * operation though.
- *
- * $FreeBSD$
- */
-
-#include <rtems/bsd/local/opt_inet.h>
-#include <rtems/bsd/local/opt_inet6.h>
-
-#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/jail.h>
-#include <sys/mbuf.h>
-#include <sys/module.h>
-#include <sys/socket.h>
-#include <sys/fcntl.h>
-#include <sys/filio.h>
-#include <sys/sockio.h>
-#include <sys/sx.h>
-#include <sys/ttycom.h>
-#include <sys/poll.h>
-#include <sys/selinfo.h>
-#include <sys/signalvar.h>
-#include <sys/filedesc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/conf.h>
-#include <sys/uio.h>
-#include <sys/malloc.h>
-#include <sys/random.h>
-#include <sys/ctype.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/if_clone.h>
-#include <net/if_types.h>
-#include <net/netisr.h>
-#include <net/route.h>
-#include <net/vnet.h>
-#ifdef INET
-#include <netinet/in.h>
-#endif
-#include <net/bpf.h>
-#include <net/if_tun.h>
-
-#include <sys/queue.h>
-#include <sys/condvar.h>
-
-#include <security/mac/mac_framework.h>
-
-/*
- * tun_list is protected by global tunmtx. Other mutable fields are
- * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is
- * static for the duration of a tunnel interface.
- */
-struct tun_softc {
- TAILQ_ENTRY(tun_softc) tun_list;
- struct cdev *tun_dev;
- u_short tun_flags; /* misc flags */
-#define TUN_OPEN 0x0001
-#define TUN_INITED 0x0002
-#define TUN_RCOLL 0x0004
-#define TUN_IASET 0x0008
-#define TUN_DSTADDR 0x0010
-#define TUN_LMODE 0x0020
-#define TUN_RWAIT 0x0040
-#define TUN_ASYNC 0x0080
-#define TUN_IFHEAD 0x0100
-#define TUN_DYING 0x0200
-
-#define TUN_READY (TUN_OPEN | TUN_INITED)
-
-#ifndef __rtems__
- pid_t tun_pid; /* owning pid */
-#endif /* __rtems__ */
- struct ifnet *tun_ifp; /* the interface */
- struct sigio *tun_sigio; /* information for async I/O */
- struct selinfo tun_rsel; /* read select */
- struct mtx tun_mtx; /* protect mutable softc fields */
- struct cv tun_cv; /* protect against ref'd dev destroy */
-};
-#define TUN2IFP(sc) ((sc)->tun_ifp)
-
-#define TUNDEBUG if (tundebug) if_printf
-
-/*
- * All mutable global variables in if_tun are locked using tunmtx, with
- * the exception of tundebug, which is used unlocked, and tunclones,
- * which is static after setup.
- */
-static struct mtx tunmtx;
-static eventhandler_tag tag;
-static const char tunname[] = "tun";
-static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
-static int tundebug = 0;
-static int tundclone = 1;
-static struct clonedevs *tunclones;
-static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
-SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
-
-static struct sx tun_ioctl_sx;
-SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
-
-SYSCTL_DECL(_net_link);
-static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
- "IP tunnel software network interface.");
-SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
- "Enable legacy devfs interface creation.");
-
-static void tunclone(void *arg, struct ucred *cred, char *name,
- int namelen, struct cdev **dev);
-static void tuncreate(const char *name, struct cdev *dev);
-static int tunifioctl(struct ifnet *, u_long, caddr_t);
-static void tuninit(struct ifnet *);
-static int tunmodevent(module_t, int, void *);
-static int tunoutput(struct ifnet *, struct mbuf *,
- const struct sockaddr *, struct route *ro);
-static void tunstart(struct ifnet *);
-
-static int tun_clone_match(struct if_clone *ifc, const char *name);
-static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
-static int tun_clone_destroy(struct if_clone *, struct ifnet *);
-static struct unrhdr *tun_unrhdr;
-VNET_DEFINE_STATIC(struct if_clone *, tun_cloner);
-#define V_tun_cloner VNET(tun_cloner)
-
-static d_open_t tunopen;
-static d_close_t tunclose;
-static d_read_t tunread;
-static d_write_t tunwrite;
-static d_ioctl_t tunioctl;
-static d_poll_t tunpoll;
-static d_kqfilter_t tunkqfilter;
-
-static int tunkqread(struct knote *, long);
-static int tunkqwrite(struct knote *, long);
-static void tunkqdetach(struct knote *);
-
-static struct filterops tun_read_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tunkqdetach,
- .f_event = tunkqread,
-};
-
-static struct filterops tun_write_filterops = {
- .f_isfd = 1,
- .f_attach = NULL,
- .f_detach = tunkqdetach,
- .f_event = tunkqwrite,
-};
-
-static struct cdevsw tun_cdevsw = {
- .d_version = D_VERSION,
- .d_flags = D_NEEDMINOR,
- .d_open = tunopen,
- .d_close = tunclose,
- .d_read = tunread,
- .d_write = tunwrite,
- .d_ioctl = tunioctl,
- .d_poll = tunpoll,
- .d_kqfilter = tunkqfilter,
- .d_name = tunname,
-};
-
-static int
-tun_clone_match(struct if_clone *ifc, const char *name)
-{
- if (strncmp(tunname, name, 3) == 0 &&
- (name[3] == '\0' || isdigit(name[3])))
- return (1);
-
- return (0);
-}
-
-static int
-tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
-{
- struct cdev *dev;
- int err, unit, i;
-
- err = ifc_name2unit(name, &unit);
- if (err != 0)
- return (err);
-
- if (unit != -1) {
- /* If this unit number is still available that/s okay. */
- if (alloc_unr_specific(tun_unrhdr, unit) == -1)
- return (EEXIST);
- } else {
- unit = alloc_unr(tun_unrhdr);
- }
-
- snprintf(name, IFNAMSIZ, "%s%d", tunname, unit);
-
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0);
- if (i) {
- /* No preexisting struct cdev *, create one */
- dev = make_dev(&tun_cdevsw, unit,
- UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
- }
- tuncreate(tunname, dev);
-
- return (0);
-}
-
-static void
-tunclone(void *arg, struct ucred *cred, char *name, int namelen,
- struct cdev **dev)
-{
- char devname[SPECNAMELEN + 1];
- int u, i, append_unit;
-
- if (*dev != NULL)
- return;
-
- /*
- * If tun cloning is enabled, only the superuser can create an
- * interface.
- */
- if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
- return;
-
- if (strcmp(name, tunname) == 0) {
- u = -1;
- } else if (dev_stdclone(name, NULL, tunname, &u) != 1)
- return; /* Don't recognise the name */
- if (u != -1 && u > IF_MAXUNIT)
- return; /* Unit number too high */
-
- if (u == -1)
- append_unit = 1;
- else
- append_unit = 0;
-
- CURVNET_SET(CRED_TO_VNET(cred));
- /* find any existing device, or allocate new unit number */
- i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
- if (i) {
- if (append_unit) {
- namelen = snprintf(devname, sizeof(devname), "%s%d",
- name, u);
- name = devname;
- }
- /* No preexisting struct cdev *, create one */
- *dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred,
- UID_UUCP, GID_DIALER, 0600, "%s", name);
- }
-
- if_clone_create(name, namelen, NULL);
- CURVNET_RESTORE();
-}
-
-static void
-tun_destroy(struct tun_softc *tp)
-{
- struct cdev *dev;
-
- mtx_lock(&tp->tun_mtx);
- tp->tun_flags |= TUN_DYING;
- if ((tp->tun_flags & TUN_OPEN) != 0)
- cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
- else
- mtx_unlock(&tp->tun_mtx);
-
- CURVNET_SET(TUN2IFP(tp)->if_vnet);
-
- dev = tp->tun_dev;
- bpfdetach(TUN2IFP(tp));
- if_detach(TUN2IFP(tp));
-
- sx_xlock(&tun_ioctl_sx);
- TUN2IFP(tp)->if_softc = NULL;
- sx_xunlock(&tun_ioctl_sx);
-
- free_unr(tun_unrhdr, TUN2IFP(tp)->if_dunit);
- if_free(TUN2IFP(tp));
- destroy_dev(dev);
- seldrain(&tp->tun_rsel);
- knlist_clear(&tp->tun_rsel.si_note, 0);
- knlist_destroy(&tp->tun_rsel.si_note);
- mtx_destroy(&tp->tun_mtx);
- cv_destroy(&tp->tun_cv);
- free(tp, M_TUN);
- CURVNET_RESTORE();
-}
-
-static int
-tun_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
-
- mtx_lock(&tunmtx);
- TAILQ_REMOVE(&tunhead, tp, tun_list);
- mtx_unlock(&tunmtx);
- tun_destroy(tp);
-
- return (0);
-}
-
-static void
-vnet_tun_init(const void *unused __unused)
-{
- V_tun_cloner = if_clone_advanced(tunname, 0, tun_clone_match,
- tun_clone_create, tun_clone_destroy);
-}
-VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
- vnet_tun_init, NULL);
-
-static void
-vnet_tun_uninit(const void *unused __unused)
-{
- if_clone_detach(V_tun_cloner);
-}
-VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
- vnet_tun_uninit, NULL);
-
-static void
-tun_uninit(const void *unused __unused)
-{
- struct tun_softc *tp;
-
- EVENTHANDLER_DEREGISTER(dev_clone, tag);
- drain_dev_clone_events();
-
- mtx_lock(&tunmtx);
- while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
- TAILQ_REMOVE(&tunhead, tp, tun_list);
- mtx_unlock(&tunmtx);
- tun_destroy(tp);
- mtx_lock(&tunmtx);
- }
- mtx_unlock(&tunmtx);
- delete_unrhdr(tun_unrhdr);
- clone_cleanup(&tunclones);
- mtx_destroy(&tunmtx);
-}
-SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
-
-static int
-tunmodevent(module_t mod, int type, void *data)
-{
-
- switch (type) {
- case MOD_LOAD:
- mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
- clone_setup(&tunclones);
- tun_unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
- tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
- if (tag == NULL)
- return (ENOMEM);
- break;
- case MOD_UNLOAD:
- /* See tun_uninit, so it's done after the vnet_sysuninit() */
- break;
- default:
- return EOPNOTSUPP;
- }
- return 0;
-}
-
-static moduledata_t tun_mod = {
- "if_tun",
- tunmodevent,
- 0
-};
-
-DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(if_tun, 1);
-
-static void
-tunstart(struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
- struct mbuf *m;
-
- TUNDEBUG(ifp,"%s starting\n", ifp->if_xname);
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, m);
- if (m == NULL) {
- IFQ_UNLOCK(&ifp->if_snd);
- return;
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
-
- mtx_lock(&tp->tun_mtx);
- if (tp->tun_flags & TUN_RWAIT) {
- tp->tun_flags &= ~TUN_RWAIT;
- wakeup(tp);
- }
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
- KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
- if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
- mtx_unlock(&tp->tun_mtx);
- pgsigio(&tp->tun_sigio, SIGIO, 0);
- } else
- mtx_unlock(&tp->tun_mtx);
-}
-
-/* XXX: should return an error code so it can fail. */
-static void
-tuncreate(const char *name, struct cdev *dev)
-{
- struct tun_softc *sc;
- struct ifnet *ifp;
-
- sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
- mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
- cv_init(&sc->tun_cv, "tun_condvar");
- sc->tun_flags = TUN_INITED;
- sc->tun_dev = dev;
- mtx_lock(&tunmtx);
- TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
- mtx_unlock(&tunmtx);
-
- ifp = sc->tun_ifp = if_alloc(IFT_PPP);
- if (ifp == NULL)
- panic("%s%d: failed to if_alloc() interface.\n",
- name, dev2unit(dev));
- if_initname(ifp, name, dev2unit(dev));
- ifp->if_mtu = TUNMTU;
- ifp->if_ioctl = tunifioctl;
- ifp->if_output = tunoutput;
- ifp->if_start = tunstart;
- ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
- ifp->if_softc = sc;
- IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
- ifp->if_snd.ifq_drv_maxlen = 0;
- IFQ_SET_READY(&ifp->if_snd);
- knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
- ifp->if_capabilities |= IFCAP_LINKSTATE;
- ifp->if_capenable |= IFCAP_LINKSTATE;
-
- if_attach(ifp);
- bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
- dev->si_drv1 = sc;
- TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
-}
-
-static int
-tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
-{
- struct ifnet *ifp;
- struct tun_softc *tp;
-
- /*
- * XXXRW: Non-atomic test and set of dev->si_drv1 requires
- * synchronization.
- */
- tp = dev->si_drv1;
- if (!tp) {
- tuncreate(tunname, dev);
- tp = dev->si_drv1;
- }
-
- mtx_lock(&tp->tun_mtx);
- if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
- mtx_unlock(&tp->tun_mtx);
- return (EBUSY);
- }
-
-#ifndef __rtems__
- tp->tun_pid = td->td_proc->p_pid;
-#endif /* __rtems__ */
- tp->tun_flags |= TUN_OPEN;
- ifp = TUN2IFP(tp);
- if_link_state_change(ifp, LINK_STATE_UP);
- TUNDEBUG(ifp, "open\n");
- mtx_unlock(&tp->tun_mtx);
-
- return (0);
-}
-
-/*
- * tunclose - close the device - mark i/f down & delete
- * routing info
- */
-static int
-tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
-{
- struct tun_softc *tp;
- struct ifnet *ifp;
-
- tp = dev->si_drv1;
- ifp = TUN2IFP(tp);
-
- mtx_lock(&tp->tun_mtx);
-#ifndef __rtems__
- /*
- * Simply close the device if this isn't the controlling process. This
- * may happen if, for instance, the tunnel has been handed off to
- * another process. The original controller should be able to close it
- * without putting us into an inconsistent state.
- */
- if (td->td_proc->p_pid != tp->tun_pid) {
- mtx_unlock(&tp->tun_mtx);
- return (0);
- }
-#endif /* __rtems__ */
-
- /*
- * junk all pending output
- */
- CURVNET_SET(ifp->if_vnet);
- IFQ_PURGE(&ifp->if_snd);
-
- if (ifp->if_flags & IFF_UP) {
- mtx_unlock(&tp->tun_mtx);
- if_down(ifp);
- mtx_lock(&tp->tun_mtx);
- }
-
- /* Delete all addresses and routes which reference this interface. */
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- struct ifaddr *ifa;
-
- ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
- mtx_unlock(&tp->tun_mtx);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- /* deal w/IPv4 PtP destination; unlocked read */
- if (ifa->ifa_addr->sa_family == AF_INET) {
- rtinit(ifa, (int)RTM_DELETE,
- tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
- } else {
- rtinit(ifa, (int)RTM_DELETE, 0);
- }
- }
- if_purgeaddrs(ifp);
- mtx_lock(&tp->tun_mtx);
- }
- if_link_state_change(ifp, LINK_STATE_DOWN);
- CURVNET_RESTORE();
-
- funsetown(&tp->tun_sigio);
- selwakeuppri(&tp->tun_rsel, PZERO + 1);
- KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
- TUNDEBUG (ifp, "closed\n");
- tp->tun_flags &= ~TUN_OPEN;
-#ifndef __rtems__
- tp->tun_pid = 0;
-#endif /* __rtems__ */
-
- cv_broadcast(&tp->tun_cv);
- mtx_unlock(&tp->tun_mtx);
- return (0);
-}
-
-static void
-tuninit(struct ifnet *ifp)
-{
- struct tun_softc *tp = ifp->if_softc;
-#ifdef INET
- struct ifaddr *ifa;
-#endif
-
- TUNDEBUG(ifp, "tuninit\n");
-
- mtx_lock(&tp->tun_mtx);
- ifp->if_flags |= IFF_UP;
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- getmicrotime(&ifp->if_lastchange);
-
-#ifdef INET
- if_addr_rlock(ifp);
- CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr->sa_family == AF_INET) {
- struct sockaddr_in *si;
-
- si = (struct sockaddr_in *)ifa->ifa_addr;
- if (si->sin_addr.s_addr)
- tp->tun_flags |= TUN_IASET;
-
- si = (struct sockaddr_in *)ifa->ifa_dstaddr;
- if (si && si->sin_addr.s_addr)
- tp->tun_flags |= TUN_DSTADDR;
- }
- }
- if_addr_runlock(ifp);
-#endif
- mtx_unlock(&tp->tun_mtx);
-}
-
-/*
- * Process an ioctl request.
- */
-static int
-tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
- struct ifreq *ifr = (struct ifreq *)data;
- struct tun_softc *tp;
- struct ifstat *ifs;
- int error = 0;
-
- sx_xlock(&tun_ioctl_sx);
- tp = ifp->if_softc;
- if (tp == NULL) {
- error = ENXIO;
- goto bad;
- }
- switch(cmd) {
- case SIOCGIFSTATUS:
- ifs = (struct ifstat *)data;
- mtx_lock(&tp->tun_mtx);
-#ifndef __rtems__
- if (tp->tun_pid)
- snprintf(ifs->ascii, sizeof(ifs->ascii),
- "\tOpened by PID %d\n", tp->tun_pid);
- else
-#endif /* __rtems__ */
- ifs->ascii[0] = '\0';
- mtx_unlock(&tp->tun_mtx);
- break;
- case SIOCSIFADDR:
- tuninit(ifp);
- TUNDEBUG(ifp, "address set\n");
- break;
- case SIOCSIFMTU:
- ifp->if_mtu = ifr->ifr_mtu;
- TUNDEBUG(ifp, "mtu set\n");
- break;
- case SIOCSIFFLAGS:
- case SIOCADDMULTI:
- case SIOCDELMULTI:
- break;
- default:
- error = EINVAL;
- }
-bad:
- sx_xunlock(&tun_ioctl_sx);
- return (error);
-}
-
-/*
- * tunoutput - queue packets from higher level ready to put out.
- */
-static int
-tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
- struct route *ro)
-{
- struct tun_softc *tp = ifp->if_softc;
- u_short cached_tun_flags;
- int error;
- u_int32_t af;
-
- TUNDEBUG (ifp, "tunoutput\n");
-
-#ifdef MAC
- error = mac_ifnet_check_transmit(ifp, m0);
- if (error) {
- m_freem(m0);
- return (error);
- }
-#endif
-
- /* Could be unlocked read? */
- mtx_lock(&tp->tun_mtx);
- cached_tun_flags = tp->tun_flags;
- mtx_unlock(&tp->tun_mtx);
- if ((cached_tun_flags & TUN_READY) != TUN_READY) {
- TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
- m_freem (m0);
- return (EHOSTDOWN);
- }
-
- if ((ifp->if_flags & IFF_UP) != IFF_UP) {
- m_freem (m0);
- return (EHOSTDOWN);
- }
-
- /* BPF writes need to be handled specially. */
- if (dst->sa_family == AF_UNSPEC)
- bcopy(dst->sa_data, &af, sizeof(af));
- else
- af = dst->sa_family;
-
- if (bpf_peers_present(ifp->if_bpf))
- bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
-
- /* prepend sockaddr? this may abort if the mbuf allocation fails */
- if (cached_tun_flags & TUN_LMODE) {
- /* allocate space for sockaddr */
- M_PREPEND(m0, dst->sa_len, M_NOWAIT);
-
- /* if allocation failed drop packet */
- if (m0 == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (ENOBUFS);
- } else {
- bcopy(dst, m0->m_data, dst->sa_len);
- }
- }
-
- if (cached_tun_flags & TUN_IFHEAD) {
- /* Prepend the address family */
- M_PREPEND(m0, 4, M_NOWAIT);
-
- /* if allocation failed drop packet */
- if (m0 == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- return (ENOBUFS);
- } else
- *(u_int32_t *)m0->m_data = htonl(af);
- } else {
-#ifdef INET
- if (af != AF_INET)
-#endif
- {
- m_freem(m0);
- return (EAFNOSUPPORT);
- }
- }
-
- error = (ifp->if_transmit)(ifp, m0);
- if (error)
- return (ENOBUFS);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
- return (0);
-}
-
-/*
- * the cdevsw interface is now pretty minimal.
- */
-static int
-tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
- struct thread *td)
-{
- struct ifreq ifr, *ifrp;
- struct tun_softc *tp = dev->si_drv1;
- struct tuninfo *tunp;
- int error;
-
- switch (cmd) {
- case TUNGIFNAME:
- ifrp = (struct ifreq *)data;
- strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
- break;
- case TUNSIFINFO:
- tunp = (struct tuninfo *)data;
- if (TUN2IFP(tp)->if_type != tunp->type)
- return (EPROTOTYPE);
- mtx_lock(&tp->tun_mtx);
- if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
- strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
- ifr.ifr_mtu = tunp->mtu;
- CURVNET_SET(TUN2IFP(tp)->if_vnet);
- error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
- (caddr_t)&ifr, td);
- CURVNET_RESTORE();
- if (error) {
- mtx_unlock(&tp->tun_mtx);
- return (error);
- }
- }
- TUN2IFP(tp)->if_baudrate = tunp->baudrate;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNGIFINFO:
- tunp = (struct tuninfo *)data;
- mtx_lock(&tp->tun_mtx);
- tunp->mtu = TUN2IFP(tp)->if_mtu;
- tunp->type = TUN2IFP(tp)->if_type;
- tunp->baudrate = TUN2IFP(tp)->if_baudrate;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSDEBUG:
- tundebug = *(int *)data;
- break;
- case TUNGDEBUG:
- *(int *)data = tundebug;
- break;
- case TUNSLMODE:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data) {
- tp->tun_flags |= TUN_LMODE;
- tp->tun_flags &= ~TUN_IFHEAD;
- } else
- tp->tun_flags &= ~TUN_LMODE;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSIFHEAD:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data) {
- tp->tun_flags |= TUN_IFHEAD;
- tp->tun_flags &= ~TUN_LMODE;
- } else
- tp->tun_flags &= ~TUN_IFHEAD;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNGIFHEAD:
- mtx_lock(&tp->tun_mtx);
- *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
- mtx_unlock(&tp->tun_mtx);
- break;
- case TUNSIFMODE:
- /* deny this if UP */
- if (TUN2IFP(tp)->if_flags & IFF_UP)
- return(EBUSY);
-
- switch (*(int *)data & ~IFF_MULTICAST) {
- case IFF_POINTOPOINT:
- case IFF_BROADCAST:
- mtx_lock(&tp->tun_mtx);
- TUN2IFP(tp)->if_flags &=
- ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
- TUN2IFP(tp)->if_flags |= *(int *)data;
- mtx_unlock(&tp->tun_mtx);
- break;
- default:
- return(EINVAL);
- }
- break;
- case TUNSIFPID:
-#ifndef __rtems__
- mtx_lock(&tp->tun_mtx);
- tp->tun_pid = curthread->td_proc->p_pid;
- mtx_unlock(&tp->tun_mtx);
-#endif /* __rtems__ */
- break;
- case FIONBIO:
- break;
- case FIOASYNC:
- mtx_lock(&tp->tun_mtx);
- if (*(int *)data)
- tp->tun_flags |= TUN_ASYNC;
- else
- tp->tun_flags &= ~TUN_ASYNC;
- mtx_unlock(&tp->tun_mtx);
- break;
- case FIONREAD:
- if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
- struct mbuf *mb;
- IFQ_LOCK(&TUN2IFP(tp)->if_snd);
- IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
- for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
- *(int *)data += mb->m_len;
- IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
- } else
- *(int *)data = 0;
- break;
- case FIOSETOWN:
- return (fsetown(*(int *)data, &tp->tun_sigio));
-
- case FIOGETOWN:
- *(int *)data = fgetown(&tp->tun_sigio);
- return (0);
-
- /* This is deprecated, FIOSETOWN should be used instead. */
- case TIOCSPGRP:
- return (fsetown(-(*(int *)data), &tp->tun_sigio));
-
- /* This is deprecated, FIOGETOWN should be used instead. */
- case TIOCGPGRP:
- *(int *)data = -fgetown(&tp->tun_sigio);
- return (0);
-
- default:
- return (ENOTTY);
- }
- return (0);
-}
-
-/*
- * The cdevsw read interface - reads a packet at a time, or at
- * least as much of a packet as can be read.
- */
-static int
-tunread(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- struct mbuf *m;
- int error=0, len;
-
- TUNDEBUG (ifp, "read\n");
- mtx_lock(&tp->tun_mtx);
- if ((tp->tun_flags & TUN_READY) != TUN_READY) {
- mtx_unlock(&tp->tun_mtx);
- TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
- return (EHOSTDOWN);
- }
-
- tp->tun_flags &= ~TUN_RWAIT;
-
- do {
- IFQ_DEQUEUE(&ifp->if_snd, m);
- if (m == NULL) {
- if (flag & O_NONBLOCK) {
- mtx_unlock(&tp->tun_mtx);
- return (EWOULDBLOCK);
- }
- tp->tun_flags |= TUN_RWAIT;
- error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
- "tunread", 0);
- if (error != 0) {
- mtx_unlock(&tp->tun_mtx);
- return (error);
- }
- }
- } while (m == NULL);
- mtx_unlock(&tp->tun_mtx);
-
- while (m && uio->uio_resid > 0 && error == 0) {
- len = min(uio->uio_resid, m->m_len);
- if (len != 0)
- error = uiomove(mtod(m, void *), len, uio);
- m = m_free(m);
- }
-
- if (m) {
- TUNDEBUG(ifp, "Dropping mbuf\n");
- m_freem(m);
- }
- return (error);
-}
-
-/*
- * the cdevsw write interface - an atomic write is a packet - or else!
- */
-static int
-tunwrite(struct cdev *dev, struct uio *uio, int flag)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- struct mbuf *m;
- uint32_t family, mru;
- int isr;
-
- TUNDEBUG(ifp, "tunwrite\n");
-
- if ((ifp->if_flags & IFF_UP) != IFF_UP)
- /* ignore silently */
- return (0);
-
- if (uio->uio_resid == 0)
- return (0);
-
- mru = TUNMRU;
- if (tp->tun_flags & TUN_IFHEAD)
- mru += sizeof(family);
- if (uio->uio_resid < 0 || uio->uio_resid > mru) {
- TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
- return (EIO);
- }
-
- if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
- if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
- return (ENOBUFS);
- }
-
- m->m_pkthdr.rcvif = ifp;
-#ifdef MAC
- mac_ifnet_create_mbuf(ifp, m);
-#endif
-
- /* Could be unlocked read? */
- mtx_lock(&tp->tun_mtx);
- if (tp->tun_flags & TUN_IFHEAD) {
- mtx_unlock(&tp->tun_mtx);
- if (m->m_len < sizeof(family) &&
- (m = m_pullup(m, sizeof(family))) == NULL)
- return (ENOBUFS);
- family = ntohl(*mtod(m, u_int32_t *));
- m_adj(m, sizeof(family));
- } else {
- mtx_unlock(&tp->tun_mtx);
- family = AF_INET;
- }
-
- BPF_MTAP2(ifp, &family, sizeof(family), m);
-
- switch (family) {
-#ifdef INET
- case AF_INET:
- isr = NETISR_IP;
- break;
-#endif
-#ifdef INET6
- case AF_INET6:
- isr = NETISR_IPV6;
- break;
-#endif
- default:
- m_freem(m);
- return (EAFNOSUPPORT);
- }
- random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
- if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
- if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
- CURVNET_SET(ifp->if_vnet);
- M_SETFIB(m, ifp->if_fib);
- netisr_dispatch(isr, m);
- CURVNET_RESTORE();
- return (0);
-}
-
-/*
- * tunpoll - the poll interface, this is only useful on reads
- * really. The write detect always returns true, write never blocks
- * anyway, it either accepts the packet or drops it.
- */
-static int
-tunpoll(struct cdev *dev, int events, struct thread *td)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
- int revents = 0;
- struct mbuf *m;
-
- TUNDEBUG(ifp, "tunpoll\n");
-
- if (events & (POLLIN | POLLRDNORM)) {
- IFQ_LOCK(&ifp->if_snd);
- IFQ_POLL_NOLOCK(&ifp->if_snd, m);
- if (m != NULL) {
- TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
- revents |= events & (POLLIN | POLLRDNORM);
- } else {
- TUNDEBUG(ifp, "tunpoll waiting\n");
- selrecord(td, &tp->tun_rsel);
- }
- IFQ_UNLOCK(&ifp->if_snd);
- }
- if (events & (POLLOUT | POLLWRNORM))
- revents |= events & (POLLOUT | POLLWRNORM);
-
- return (revents);
-}
-
-/*
- * tunkqfilter - support for the kevent() system call.
- */
-static int
-tunkqfilter(struct cdev *dev, struct knote *kn)
-{
- struct tun_softc *tp = dev->si_drv1;
- struct ifnet *ifp = TUN2IFP(tp);
-
- switch(kn->kn_filter) {
- case EVFILT_READ:
- TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tun_read_filterops;
- break;
-
- case EVFILT_WRITE:
- TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- kn->kn_fop = &tun_write_filterops;
- break;
-
- default:
- TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
- ifp->if_xname, dev2unit(dev));
- return(EINVAL);
- }
-
- kn->kn_hook = tp;
- knlist_add(&tp->tun_rsel.si_note, kn, 0);
-
- return (0);
-}
-
-/*
- * Return true of there is data in the interface queue.
- */
-static int
-tunkqread(struct knote *kn, long hint)
-{
- int ret;
- struct tun_softc *tp = kn->kn_hook;
- struct cdev *dev = tp->tun_dev;
- struct ifnet *ifp = TUN2IFP(tp);
-
- if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
- TUNDEBUG(ifp,
- "%s have data in the queue. Len = %d, minor = %#x\n",
- ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
- ret = 1;
- } else {
- TUNDEBUG(ifp,
- "%s waiting for data, minor = %#x\n", ifp->if_xname,
- dev2unit(dev));
- ret = 0;
- }
-
- return (ret);
-}
-
-/*
- * Always can write, always return MTU in kn->data.
- */
-static int
-tunkqwrite(struct knote *kn, long hint)
-{
- struct tun_softc *tp = kn->kn_hook;
- struct ifnet *ifp = TUN2IFP(tp);
-
- kn->kn_data = ifp->if_mtu;
-
- return (1);
-}
-
-static void
-tunkqdetach(struct knote *kn)
-{
- struct tun_softc *tp = kn->kn_hook;
-
- knlist_remove(&tp->tun_rsel.si_note, kn, 0);
-}
diff --git a/freebsd/sys/net/if_tuntap.c b/freebsd/sys/net/if_tuntap.c
new file mode 100644
index 00000000..e366aac7
--- /dev/null
+++ b/freebsd/sys/net/if_tuntap.c
@@ -0,0 +1,1923 @@
+#include <machine/rtems-bsd-kernel-space.h>
+
+/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
+ * All rights reserved.
+ * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * BASED ON:
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
+ * Nottingham University 1987.
+ *
+ * This source may be freely distributed, however I would be interested
+ * in any changes that are made.
+ *
+ * This driver takes packets off the IP i/f and hands them up to a
+ * user process to have its wicked way with. This driver has it's
+ * roots in a similar driver written by Phil Cockcroft (formerly) at
+ * UCL. This driver is based much more on read/write/poll mode of
+ * operation though.
+ *
+ * $FreeBSD$
+ */
+
+#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/jail.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/socket.h>
+#include <sys/fcntl.h>
+#include <sys/filio.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/syslog.h>
+#include <sys/ttycom.h>
+#include <sys/poll.h>
+#include <sys/selinfo.h>
+#include <sys/signalvar.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/random.h>
+#include <sys/ctype.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_clone.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/vnet.h>
+#ifdef INET
+#include <netinet/in.h>
+#endif
+#include <net/bpf.h>
+#include <net/if_tap.h>
+#include <net/if_tun.h>
+
+#include <sys/queue.h>
+#include <sys/condvar.h>
+#include <security/mac/mac_framework.h>
+
+struct tuntap_driver;
+
+/*
+ * tun_list is protected by global tunmtx. Other mutable fields are
+ * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is
+ * static for the duration of a tunnel interface.
+ */
+struct tuntap_softc {
+ TAILQ_ENTRY(tuntap_softc) tun_list;
+ struct cdev *tun_alias;
+ struct cdev *tun_dev;
+ u_short tun_flags; /* misc flags */
+#define TUN_OPEN 0x0001
+#define TUN_INITED 0x0002
+#define TUN_UNUSED1 0x0008
+#define TUN_DSTADDR 0x0010
+#define TUN_LMODE 0x0020
+#define TUN_RWAIT 0x0040
+#define TUN_ASYNC 0x0080
+#define TUN_IFHEAD 0x0100
+#define TUN_DYING 0x0200
+#define TUN_L2 0x0400
+#define TUN_VMNET 0x0800
+
+#define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET)
+#define TUN_READY (TUN_OPEN | TUN_INITED)
+
+#ifndef __rtems__
+ pid_t tun_pid; /* owning pid */
+#endif /* __rtems__ */
+ struct ifnet *tun_ifp; /* the interface */
+ struct sigio *tun_sigio; /* async I/O info */
+ struct tuntap_driver *tun_drv; /* appropriate driver */
+ struct selinfo tun_rsel; /* read select */
+ struct mtx tun_mtx; /* softc field mutex */
+ struct cv tun_cv; /* for ref'd dev destroy */
+ struct ether_addr tun_ether; /* remote address */
+ int tun_busy; /* busy count */
+};
+#define TUN2IFP(sc) ((sc)->tun_ifp)
+
+#define TUNDEBUG if (tundebug) if_printf
+
+#define TUN_LOCK(tp) mtx_lock(&(tp)->tun_mtx)
+#define TUN_UNLOCK(tp) mtx_unlock(&(tp)->tun_mtx)
+#define TUN_LOCK_ASSERT(tp) mtx_assert(&(tp)->tun_mtx, MA_OWNED);
+
+#define TUN_VMIO_FLAG_MASK 0x0fff
+
+/*
+ * All mutable global variables in if_tun are locked using tunmtx, with
+ * the exception of tundebug, which is used unlocked, and the drivers' *clones,
+ * which are static after setup.
+ */
+static struct mtx tunmtx;
+static eventhandler_tag arrival_tag;
+static eventhandler_tag clone_tag;
+static const char tunname[] = "tun";
+static const char tapname[] = "tap";
+static const char vmnetname[] = "vmnet";
+static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
+static int tundebug = 0;
+static int tundclone = 1;
+static int tap_allow_uopen = 0; /* allow user open() */
+static int tapuponopen = 0; /* IFF_UP on open() */
+static int tapdclone = 1; /* enable devfs cloning */
+
+static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
+SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
+
+static struct sx tun_ioctl_sx;
+SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
+
+SYSCTL_DECL(_net_link);
+/* tun */
+static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
+ "IP tunnel software network interface");
+SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
+ "Enable legacy devfs interface creation");
+
+/* tap */
+static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0,
+ "Ethernet tunnel software network interface");
+SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0,
+ "Allow user to open /dev/tap (based on node permissions)");
+SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0,
+ "Bring interface up when /dev/tap is opened");
+SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0,
+ "Enable legacy devfs interface creation");
+SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, "");
+
+static int tun_create_device(struct tuntap_driver *drv, int unit,
+ struct ucred *cr, struct cdev **dev, const char *name);
+static int tun_busy_locked(struct tuntap_softc *tp);
+static void tun_unbusy_locked(struct tuntap_softc *tp);
+static int tun_busy(struct tuntap_softc *tp);
+static void tun_unbusy(struct tuntap_softc *tp);
+
+static int tuntap_name2info(const char *name, int *unit, int *flags);
+static void tunclone(void *arg, struct ucred *cred, char *name,
+ int namelen, struct cdev **dev);
+static void tuncreate(struct cdev *dev);
+static void tundtor(void *data);
+static void tunrename(void *arg, struct ifnet *ifp);
+static int tunifioctl(struct ifnet *, u_long, caddr_t);
+static void tuninit(struct ifnet *);
+static void tunifinit(void *xtp);
+static int tuntapmodevent(module_t, int, void *);
+static int tunoutput(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *ro);
+static void tunstart(struct ifnet *);
+static void tunstart_l2(struct ifnet *);
+
+static int tun_clone_match(struct if_clone *ifc, const char *name);
+static int tap_clone_match(struct if_clone *ifc, const char *name);
+static int vmnet_clone_match(struct if_clone *ifc, const char *name);
+static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
+static int tun_clone_destroy(struct if_clone *, struct ifnet *);
+
+static d_open_t tunopen;
+static d_read_t tunread;
+static d_write_t tunwrite;
+static d_ioctl_t tunioctl;
+static d_poll_t tunpoll;
+static d_kqfilter_t tunkqfilter;
+
+static int tunkqread(struct knote *, long);
+static int tunkqwrite(struct knote *, long);
+static void tunkqdetach(struct knote *);
+
+static struct filterops tun_read_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tunkqdetach,
+ .f_event = tunkqread,
+};
+
+static struct filterops tun_write_filterops = {
+ .f_isfd = 1,
+ .f_attach = NULL,
+ .f_detach = tunkqdetach,
+ .f_event = tunkqwrite,
+};
+
+static struct tuntap_driver {
+ struct cdevsw cdevsw;
+ int ident_flags;
+ struct unrhdr *unrhdr;
+ struct clonedevs *clones;
+ ifc_match_t *clone_match_fn;
+ ifc_create_t *clone_create_fn;
+ ifc_destroy_t *clone_destroy_fn;
+} tuntap_drivers[] = {
+ {
+ .ident_flags = 0,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = tunname,
+ },
+ .clone_match_fn = tun_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+ {
+ .ident_flags = TUN_L2,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = tapname,
+ },
+ .clone_match_fn = tap_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+ {
+ .ident_flags = TUN_L2 | TUN_VMNET,
+ .cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = D_NEEDMINOR,
+ .d_open = tunopen,
+ .d_read = tunread,
+ .d_write = tunwrite,
+ .d_ioctl = tunioctl,
+ .d_poll = tunpoll,
+ .d_kqfilter = tunkqfilter,
+ .d_name = vmnetname,
+ },
+ .clone_match_fn = vmnet_clone_match,
+ .clone_create_fn = tun_clone_create,
+ .clone_destroy_fn = tun_clone_destroy,
+ },
+};
+
+struct tuntap_driver_cloner {
+ SLIST_ENTRY(tuntap_driver_cloner) link;
+ struct tuntap_driver *drv;
+ struct if_clone *cloner;
+};
+
+VNET_DEFINE_STATIC(SLIST_HEAD(, tuntap_driver_cloner), tuntap_driver_cloners) =
+ SLIST_HEAD_INITIALIZER(tuntap_driver_cloners);
+
+#define V_tuntap_driver_cloners VNET(tuntap_driver_cloners)
+
+/*
+ * Mechanism for marking a tunnel device as busy so that we can safely do some
+ * orthogonal operations (such as operations on devices) without racing against
+ * tun_destroy. tun_destroy will wait on the condvar if we're at all busy or
+ * open, to be woken up when the condition is alleviated.
+ */
+static int
+tun_busy_locked(struct tuntap_softc *tp)
+{
+
+ TUN_LOCK_ASSERT(tp);
+ if ((tp->tun_flags & TUN_DYING) != 0) {
+ /*
+ * Perhaps unintuitive, but the device is busy going away.
+ * Other interpretations of EBUSY from tun_busy make little
+ * sense, since making a busy device even more busy doesn't
+ * sound like a problem.
+ */
+ return (EBUSY);
+ }
+
+ ++tp->tun_busy;
+ return (0);
+}
+
+static void
+tun_unbusy_locked(struct tuntap_softc *tp)
+{
+
+ TUN_LOCK_ASSERT(tp);
+ KASSERT(tp->tun_busy != 0, ("tun_unbusy: called for non-busy tunnel"));
+
+ --tp->tun_busy;
+ /* Wake up anything that may be waiting on our busy tunnel. */
+ if (tp->tun_busy == 0)
+ cv_broadcast(&tp->tun_cv);
+}
+
+static int
+tun_busy(struct tuntap_softc *tp)
+{
+ int ret;
+
+ TUN_LOCK(tp);
+ ret = tun_busy_locked(tp);
+ TUN_UNLOCK(tp);
+ return (ret);
+}
+
+
+static void
+tun_unbusy(struct tuntap_softc *tp)
+{
+
+ TUN_LOCK(tp);
+ tun_unbusy_locked(tp);
+ TUN_UNLOCK(tp);
+}
+
+/*
+ * Sets unit and/or flags given the device name. Must be called with correct
+ * vnet context.
+ */
+static int
+tuntap_name2info(const char *name, int *outunit, int *outflags)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+ char *dname;
+ int flags, unit;
+ bool found;
+
+ if (name == NULL)
+ return (EINVAL);
+
+ /*
+ * Needed for dev_stdclone, but dev_stdclone will not modify, it just
+ * wants to be able to pass back a char * through the second param. We
+ * will always set that as NULL here, so we'll fake it.
+ */
+ dname = __DECONST(char *, name);
+ found = false;
+
+ KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
+ ("tuntap_driver_cloners failed to initialize"));
+ SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
+ KASSERT(drvc->drv != NULL,
+ ("tuntap_driver_cloners entry not properly initialized"));
+ drv = drvc->drv;
+
+ if (strcmp(name, drv->cdevsw.d_name) == 0) {
+ found = true;
+ unit = -1;
+ flags = drv->ident_flags;
+ break;
+ }
+
+ if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) {
+ found = true;
+ flags = drv->ident_flags;
+ break;
+ }
+ }
+
+ if (!found)
+ return (ENXIO);
+
+ if (outunit != NULL)
+ *outunit = unit;
+ if (outflags != NULL)
+ *outflags = flags;
+ return (0);
+}
+
+/*
+ * Get driver information from a set of flags specified. Masks the identifying
+ * part of the flags and compares it against all of the available
+ * tuntap_drivers. Must be called with correct vnet context.
+ */
+static struct tuntap_driver *
+tuntap_driver_from_flags(int tun_flags)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+
+ KASSERT(!SLIST_EMPTY(&V_tuntap_driver_cloners),
+ ("tuntap_driver_cloners failed to initialize"));
+ SLIST_FOREACH(drvc, &V_tuntap_driver_cloners, link) {
+ KASSERT(drvc->drv != NULL,
+ ("tuntap_driver_cloners entry not properly initialized"));
+ drv = drvc->drv;
+ if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->ident_flags)
+ return (drv);
+ }
+
+ return (NULL);
+}
+
+
+
+static int
+tun_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & TUN_L2) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+tap_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+vmnet_clone_match(struct if_clone *ifc, const char *name)
+{
+ int tunflags;
+
+ if (tuntap_name2info(name, NULL, &tunflags) == 0) {
+ if ((tunflags & TUN_VMNET) != 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
+{
+ struct tuntap_driver *drv;
+ struct cdev *dev;
+ int err, i, tunflags, unit;
+
+ tunflags = 0;
+ /* The name here tells us exactly what we're creating */
+ err = tuntap_name2info(name, &unit, &tunflags);
+ if (err != 0)
+ return (err);
+
+ drv = tuntap_driver_from_flags(tunflags);
+ if (drv == NULL)
+ return (ENXIO);
+
+ if (unit != -1) {
+ /* If this unit number is still available that's okay. */
+ if (alloc_unr_specific(drv->unrhdr, unit) == -1)
+ return (EEXIST);
+ } else {
+ unit = alloc_unr(drv->unrhdr);
+ }
+
+ snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit);
+
+ /* find any existing device, or allocate new unit number */
+ dev = NULL;
+ i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0);
+ /* No preexisting struct cdev *, create one */
+ if (i != 0)
+ i = tun_create_device(drv, unit, NULL, &dev, name);
+ if (i == 0)
+ tuncreate(dev);
+
+ return (i);
+}
+
+static void
+tunclone(void *arg, struct ucred *cred, char *name, int namelen,
+ struct cdev **dev)
+{
+ char devname[SPECNAMELEN + 1];
+ struct tuntap_driver *drv;
+ int append_unit, i, u, tunflags;
+ bool mayclone;
+
+ if (*dev != NULL)
+ return;
+
+ tunflags = 0;
+ CURVNET_SET(CRED_TO_VNET(cred));
+ if (tuntap_name2info(name, &u, &tunflags) != 0)
+ goto out; /* Not recognized */
+
+ if (u != -1 && u > IF_MAXUNIT)
+ goto out; /* Unit number too high */
+
+ mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE, 0) == 0;
+ if ((tunflags & TUN_L2) != 0) {
+ /* tap/vmnet allow user open with a sysctl */
+ mayclone = (mayclone || tap_allow_uopen) && tapdclone;
+ } else {
+ mayclone = mayclone && tundclone;
+ }
+
+ /*
+ * If tun cloning is enabled, only the superuser can create an
+ * interface.
+ */
+ if (!mayclone)
+ goto out;
+
+ if (u == -1)
+ append_unit = 1;
+ else
+ append_unit = 0;
+
+ drv = tuntap_driver_from_flags(tunflags);
+ if (drv == NULL)
+ goto out;
+
+ /* find any existing device, or allocate new unit number */
+ i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0);
+ if (i) {
+ if (append_unit) {
+ namelen = snprintf(devname, sizeof(devname), "%s%d",
+ name, u);
+ name = devname;
+ }
+
+ i = tun_create_device(drv, u, cred, dev, name);
+ }
+ if (i == 0)
+ if_clone_create(name, namelen, NULL);
+out:
+ CURVNET_RESTORE();
+}
+
+static void
+tun_destroy(struct tuntap_softc *tp)
+{
+
+ TUN_LOCK(tp);
+ tp->tun_flags |= TUN_DYING;
+ if (tp->tun_busy != 0)
+ cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
+ else
+ TUN_UNLOCK(tp);
+
+ CURVNET_SET(TUN2IFP(tp)->if_vnet);
+
+ /* destroy_dev will take care of any alias. */
+ destroy_dev(tp->tun_dev);
+ seldrain(&tp->tun_rsel);
+ knlist_clear(&tp->tun_rsel.si_note, 0);
+ knlist_destroy(&tp->tun_rsel.si_note);
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ ether_ifdetach(TUN2IFP(tp));
+ } else {
+ bpfdetach(TUN2IFP(tp));
+ if_detach(TUN2IFP(tp));
+ }
+ sx_xlock(&tun_ioctl_sx);
+ TUN2IFP(tp)->if_softc = NULL;
+ sx_xunlock(&tun_ioctl_sx);
+ free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
+ if_free(TUN2IFP(tp));
+ mtx_destroy(&tp->tun_mtx);
+ cv_destroy(&tp->tun_cv);
+ free(tp, M_TUN);
+ CURVNET_RESTORE();
+}
+
+static int
+tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+
+ mtx_lock(&tunmtx);
+ TAILQ_REMOVE(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ tun_destroy(tp);
+
+ return (0);
+}
+
+static void
+vnet_tun_init(const void *unused __unused)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_driver_cloner *drvc;
+ int i;
+
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ drvc = malloc(sizeof(*drvc), M_TUN, M_WAITOK | M_ZERO);
+
+ drvc->drv = drv;
+ drvc->cloner = if_clone_advanced(drv->cdevsw.d_name, 0,
+ drv->clone_match_fn, drv->clone_create_fn,
+ drv->clone_destroy_fn);
+ SLIST_INSERT_HEAD(&V_tuntap_driver_cloners, drvc, link);
+ };
+}
+VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_tun_init, NULL);
+
+static void
+vnet_tun_uninit(const void *unused __unused)
+{
+ struct tuntap_driver_cloner *drvc;
+
+ while (!SLIST_EMPTY(&V_tuntap_driver_cloners)) {
+ drvc = SLIST_FIRST(&V_tuntap_driver_cloners);
+ SLIST_REMOVE_HEAD(&V_tuntap_driver_cloners, link);
+
+ if_clone_detach(drvc->cloner);
+ free(drvc, M_TUN);
+ }
+}
+VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
+ vnet_tun_uninit, NULL);
+
+static void
+tun_uninit(const void *unused __unused)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_softc *tp;
+ int i;
+
+ EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
+ EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
+ drain_dev_clone_events();
+
+ mtx_lock(&tunmtx);
+ while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
+ TAILQ_REMOVE(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ tun_destroy(tp);
+ mtx_lock(&tunmtx);
+ }
+ mtx_unlock(&tunmtx);
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ delete_unrhdr(drv->unrhdr);
+ clone_cleanup(&drv->clones);
+ }
+ mtx_destroy(&tunmtx);
+}
+SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
+
+static struct tuntap_driver *
+tuntap_driver_from_ifnet(const struct ifnet *ifp)
+{
+ struct tuntap_driver *drv;
+ int i;
+
+ if (ifp == NULL)
+ return (NULL);
+
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ if (strcmp(ifp->if_dname, drv->cdevsw.d_name) == 0)
+ return (drv);
+ }
+
+ return (NULL);
+}
+
+static int
+tuntapmodevent(module_t mod, int type, void *data)
+{
+ struct tuntap_driver *drv;
+ int i;
+
+ switch (type) {
+ case MOD_LOAD:
+ mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
+ for (i = 0; i < nitems(tuntap_drivers); ++i) {
+ drv = &tuntap_drivers[i];
+ clone_setup(&drv->clones);
+ drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
+ }
+ arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
+ tunrename, 0, 1000);
+ if (arrival_tag == NULL)
+ return (ENOMEM);
+ clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
+ if (clone_tag == NULL)
+ return (ENOMEM);
+ break;
+ case MOD_UNLOAD:
+ /* See tun_uninit, so it's done after the vnet_sysuninit() */
+ break;
+ default:
+ return EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static moduledata_t tuntap_mod = {
+ "if_tuntap",
+ tuntapmodevent,
+ 0
+};
+
+/* We'll only ever have these two, so no need for a macro. */
+static moduledata_t tun_mod = { "if_tun", NULL, 0 };
+static moduledata_t tap_mod = { "if_tap", NULL, 0 };
+
+DECLARE_MODULE(if_tuntap, tuntap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_tuntap, 1);
+DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_tun, 1);
+DECLARE_MODULE(if_tap, tap_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_tap, 1);
+
+static int
+tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr,
+ struct cdev **dev, const char *name)
+{
+ struct make_dev_args args;
+ struct tuntap_softc *tp;
+ int error;
+
+ tp = malloc(sizeof(*tp), M_TUN, M_WAITOK | M_ZERO);
+ mtx_init(&tp->tun_mtx, "tun_mtx", NULL, MTX_DEF);
+ cv_init(&tp->tun_cv, "tun_condvar");
+ tp->tun_flags = drv->ident_flags;
+ tp->tun_drv = drv;
+
+ make_dev_args_init(&args);
+ if (cr != NULL)
+ args.mda_flags = MAKEDEV_REF;
+ args.mda_devsw = &drv->cdevsw;
+ args.mda_cr = cr;
+ args.mda_uid = UID_UUCP;
+ args.mda_gid = GID_DIALER;
+ args.mda_mode = 0600;
+ args.mda_unit = unit;
+ args.mda_si_drv1 = tp;
+ error = make_dev_s(&args, dev, "%s", name);
+ if (error != 0) {
+ free(tp, M_TUN);
+ return (error);
+ }
+
+ KASSERT((*dev)->si_drv1 != NULL,
+ ("Failed to set si_drv1 at %s creation", name));
+ tp->tun_dev = *dev;
+ knlist_init_mtx(&tp->tun_rsel.si_note, &tp->tun_mtx);
+ mtx_lock(&tunmtx);
+ TAILQ_INSERT_TAIL(&tunhead, tp, tun_list);
+ mtx_unlock(&tunmtx);
+ return (0);
+}
+
+static void
+tunstart(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+ struct mbuf *m;
+
+ TUNDEBUG(ifp, "starting\n");
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_LOCK(&ifp->if_snd);
+ IFQ_POLL_NOLOCK(&ifp->if_snd, m);
+ if (m == NULL) {
+ IFQ_UNLOCK(&ifp->if_snd);
+ return;
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+
+ TUN_LOCK(tp);
+ if (tp->tun_flags & TUN_RWAIT) {
+ tp->tun_flags &= ~TUN_RWAIT;
+ wakeup(tp);
+ }
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
+ TUN_UNLOCK(tp);
+ pgsigio(&tp->tun_sigio, SIGIO, 0);
+ } else
+ TUN_UNLOCK(tp);
+}
+
+/*
+ * tunstart_l2
+ *
+ * queue packets from higher level ready to put out
+ */
+static void
+tunstart_l2(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+
+ TUNDEBUG(ifp, "starting\n");
+
+ /*
+ * do not junk pending output if we are in VMnet mode.
+ * XXX: can this do any harm because of queue overflow?
+ */
+
+ TUN_LOCK(tp);
+ if (((tp->tun_flags & TUN_VMNET) == 0) &&
+ ((tp->tun_flags & TUN_READY) != TUN_READY)) {
+ struct mbuf *m;
+
+ /* Unlocked read. */
+ TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags);
+
+ for (;;) {
+ IF_DEQUEUE(&ifp->if_snd, m);
+ if (m != NULL) {
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ } else
+ break;
+ }
+ TUN_UNLOCK(tp);
+
+ return;
+ }
+
+ ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ if (tp->tun_flags & TUN_RWAIT) {
+ tp->tun_flags &= ~TUN_RWAIT;
+ wakeup(tp);
+ }
+
+ if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) {
+ TUN_UNLOCK(tp);
+ pgsigio(&tp->tun_sigio, SIGIO, 0);
+ TUN_LOCK(tp);
+ }
+
+ selwakeuppri(&tp->tun_rsel, PZERO+1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */
+ }
+
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ TUN_UNLOCK(tp);
+} /* tunstart_l2 */
+
+/* XXX: should return an error code so it can fail. */
+static void
+tuncreate(struct cdev *dev)
+{
+ struct tuntap_driver *drv;
+ struct tuntap_softc *tp;
+ struct ifnet *ifp;
+ struct ether_addr eaddr;
+ int iflags;
+ u_char type;
+
+ tp = dev->si_drv1;
+ KASSERT(tp != NULL,
+ ("si_drv1 should have been initialized at creation"));
+
+ drv = tp->tun_drv;
+ iflags = IFF_MULTICAST;
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ type = IFT_ETHER;
+ iflags |= IFF_BROADCAST | IFF_SIMPLEX;
+ } else {
+ type = IFT_PPP;
+ iflags |= IFF_POINTOPOINT;
+ }
+ ifp = tp->tun_ifp = if_alloc(type);
+ if (ifp == NULL)
+ panic("%s%d: failed to if_alloc() interface.\n",
+ drv->cdevsw.d_name, dev2unit(dev));
+ ifp->if_softc = tp;
+ if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev));
+ ifp->if_ioctl = tunifioctl;
+ ifp->if_flags = iflags;
+ IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
+ ifp->if_capabilities |= IFCAP_LINKSTATE;
+ ifp->if_capenable |= IFCAP_LINKSTATE;
+
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_init = tunifinit;
+ ifp->if_start = tunstart_l2;
+
+ ether_gen_addr(ifp, &eaddr);
+ ether_ifattach(ifp, eaddr.octet);
+ } else {
+ ifp->if_mtu = TUNMTU;
+ ifp->if_start = tunstart;
+ ifp->if_output = tunoutput;
+
+ ifp->if_snd.ifq_drv_maxlen = 0;
+ IFQ_SET_READY(&ifp->if_snd);
+
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+ }
+
+ TUN_LOCK(tp);
+ tp->tun_flags |= TUN_INITED;
+ TUN_UNLOCK(tp);
+
+ TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+}
+
+static void
+tunrename(void *arg __unused, struct ifnet *ifp)
+{
+ struct tuntap_softc *tp;
+ int error;
+
+ if ((ifp->if_flags & IFF_RENAMING) == 0)
+ return;
+
+ if (tuntap_driver_from_ifnet(ifp) == NULL)
+ return;
+
+ /*
+ * We need to grab the ioctl sx long enough to make sure the softc is
+ * still there. If it is, we can safely try to busy the tun device.
+ * The busy may fail if the device is currently dying, in which case
+ * we do nothing. If it doesn't fail, the busy count stops the device
+ * from dying until we've created the alias (that will then be
+ * subsequently destroyed).
+ */
+ sx_xlock(&tun_ioctl_sx);
+ tp = ifp->if_softc;
+ if (tp == NULL) {
+ sx_xunlock(&tun_ioctl_sx);
+ return;
+ }
+ error = tun_busy(tp);
+ sx_xunlock(&tun_ioctl_sx);
+ if (error != 0)
+ return;
+ if (tp->tun_alias != NULL) {
+ destroy_dev(tp->tun_alias);
+ tp->tun_alias = NULL;
+ }
+
+ if (strcmp(ifp->if_xname, tp->tun_dev->si_name) == 0)
+ goto out;
+
+ /*
+ * Failure's ok, aliases are created on a best effort basis. If a
+ * tun user/consumer decides to rename the interface to conflict with
+ * another device (non-ifnet) on the system, we will assume they know
+ * what they are doing. make_dev_alias_p won't touch tun_alias on
+ * failure, so we use it but ignore the return value.
+ */
+ make_dev_alias_p(MAKEDEV_CHECKNAME, &tp->tun_alias, tp->tun_dev, "%s",
+ ifp->if_xname);
+out:
+ tun_unbusy(tp);
+}
+
+static int
+tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+ struct ifnet *ifp;
+ struct tuntap_softc *tp;
+ int error, tunflags;
+
+ tunflags = 0;
+ CURVNET_SET(TD_TO_VNET(td));
+ error = tuntap_name2info(dev->si_name, NULL, &tunflags);
+ if (error != 0) {
+ CURVNET_RESTORE();
+ return (error); /* Shouldn't happen */
+ }
+
+ if ((tunflags & TUN_L2) != 0) {
+ /* Restrict? */
+ if (tap_allow_uopen == 0) {
+ error = priv_check(td, PRIV_NET_TAP);
+ if (error != 0) {
+ CURVNET_RESTORE();
+ return (error);
+ }
+ }
+ }
+
+ tp = dev->si_drv1;
+ KASSERT(tp != NULL,
+ ("si_drv1 should have been initialized at creation"));
+
+ TUN_LOCK(tp);
+ if ((tp->tun_flags & TUN_INITED) == 0) {
+ TUN_UNLOCK(tp);
+ CURVNET_RESTORE();
+ return (ENXIO);
+ }
+ if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
+ TUN_UNLOCK(tp);
+ CURVNET_RESTORE();
+ return (EBUSY);
+ }
+
+ error = tun_busy_locked(tp);
+ KASSERT(error == 0, ("Must be able to busy an unopen tunnel"));
+ ifp = TUN2IFP(tp);
+
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ bcopy(IF_LLADDR(ifp), tp->tun_ether.octet,
+ sizeof(tp->tun_ether.octet));
+
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+
+ if (tapuponopen)
+ ifp->if_flags |= IFF_UP;
+ }
+
+#ifndef __rtems__
+ tp->tun_pid = td->td_proc->p_pid;
+#endif /* __rtems__ */
+ tp->tun_flags |= TUN_OPEN;
+
+ if_link_state_change(ifp, LINK_STATE_UP);
+ TUNDEBUG(ifp, "open\n");
+ TUN_UNLOCK(tp);
+
+ /*
+ * This can fail with either ENOENT or EBUSY. This is in the middle of
+ * d_open, so ENOENT should not be possible. EBUSY is possible, but
+ * the only cdevpriv dtor being set will be tundtor and the softc being
+ * passed is constant for a given cdev. We ignore the possible error
+ * because of this as either "unlikely" or "not actually a problem."
+ */
+ (void)devfs_set_cdevpriv(tp, tundtor);
+ CURVNET_RESTORE();
+ return (0);
+}
+
+/*
+ * tundtor - tear down the device - mark i/f down & delete
+ * routing info
+ */
+static void
+tundtor(void *data)
+{
+#ifndef __rtems__
+ struct proc *p;
+#endif /* __rtems__ */
+ struct tuntap_softc *tp;
+ struct ifnet *ifp;
+ bool l2tun;
+
+ tp = data;
+#ifndef __rtems__
+ p = curproc;
+#endif /* __rtems__ */
+ ifp = TUN2IFP(tp);
+
+ TUN_LOCK(tp);
+
+#ifndef __rtems__
+ /*
+ * Realistically, we can't be obstinate here. This only means that the
+ * tuntap device was closed out of order, and the last closer wasn't the
+ * controller. These are still good to know about, though, as software
+ * should avoid multiple processes with a tuntap device open and
+ * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in
+ * parent).
+ */
+ if (p->p_pid != tp->tun_pid) {
+ log(LOG_INFO,
+ "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n",
+ p->p_pid, p->p_comm, tp->tun_dev->si_name);
+ }
+#endif /* __rtems__ */
+
+ /*
+ * junk all pending output
+ */
+ CURVNET_SET(ifp->if_vnet);
+
+ l2tun = false;
+ if ((tp->tun_flags & TUN_L2) != 0) {
+ l2tun = true;
+ IF_DRAIN(&ifp->if_snd);
+ } else {
+ IFQ_PURGE(&ifp->if_snd);
+ }
+
+ /* For vmnet, we won't do most of the address/route bits */
+ if ((tp->tun_flags & TUN_VMNET) != 0 ||
+ (l2tun && (ifp->if_flags & IFF_LINK0) != 0))
+ goto out;
+
+ if (ifp->if_flags & IFF_UP) {
+ TUN_UNLOCK(tp);
+ if_down(ifp);
+ TUN_LOCK(tp);
+ }
+
+ /* Delete all addresses and routes which reference this interface. */
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ struct ifaddr *ifa;
+
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ TUN_UNLOCK(tp);
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ /* deal w/IPv4 PtP destination; unlocked read */
+ if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) {
+ rtinit(ifa, (int)RTM_DELETE,
+ tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
+ } else {
+ rtinit(ifa, (int)RTM_DELETE, 0);
+ }
+ }
+ if_purgeaddrs(ifp);
+ TUN_LOCK(tp);
+ }
+
+out:
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+ CURVNET_RESTORE();
+
+ funsetown(&tp->tun_sigio);
+ selwakeuppri(&tp->tun_rsel, PZERO + 1);
+ KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
+ TUNDEBUG (ifp, "closed\n");
+ tp->tun_flags &= ~TUN_OPEN;
+#ifndef __rtems__
+ tp->tun_pid = 0;
+#endif /* __rtems__ */
+
+ tun_unbusy_locked(tp);
+ TUN_UNLOCK(tp);
+}
+
+static void
+tuninit(struct ifnet *ifp)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+#ifdef INET
+ struct ifaddr *ifa;
+#endif
+
+ TUNDEBUG(ifp, "tuninit\n");
+
+ TUN_LOCK(tp);
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ if ((tp->tun_flags & TUN_L2) == 0) {
+ ifp->if_flags |= IFF_UP;
+ getmicrotime(&ifp->if_lastchange);
+#ifdef INET
+ if_addr_rlock(ifp);
+ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family == AF_INET) {
+ struct sockaddr_in *si;
+
+ si = (struct sockaddr_in *)ifa->ifa_dstaddr;
+ if (si && si->sin_addr.s_addr) {
+ tp->tun_flags |= TUN_DSTADDR;
+ break;
+ }
+ }
+ }
+ if_addr_runlock(ifp);
+#endif
+ TUN_UNLOCK(tp);
+ } else {
+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ TUN_UNLOCK(tp);
+ /* attempt to start output */
+ tunstart_l2(ifp);
+ }
+
+}
+
+/*
+ * Used only for l2 tunnel.
+ */
+static void
+tunifinit(void *xtp)
+{
+ struct tuntap_softc *tp;
+
+ tp = (struct tuntap_softc *)xtp;
+ tuninit(tp->tun_ifp);
+}
+
+/*
+ * Process an ioctl request.
+ */
+static int
+tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct ifreq *ifr = (struct ifreq *)data;
+ struct tuntap_softc *tp;
+ struct ifstat *ifs;
+ struct ifmediareq *ifmr;
+ int dummy, error = 0;
+ bool l2tun;
+
+ ifmr = NULL;
+ sx_xlock(&tun_ioctl_sx);
+ tp = ifp->if_softc;
+ if (tp == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ switch(cmd) {
+ case SIOCGIFSTATUS:
+ ifs = (struct ifstat *)data;
+ TUN_LOCK(tp);
+#ifndef __rtems__
+ if (tp->tun_pid)
+ snprintf(ifs->ascii, sizeof(ifs->ascii),
+ "\tOpened by PID %d\n", tp->tun_pid);
+ else
+#endif /* __rtems__ */
+ ifs->ascii[0] = '\0';
+ TUN_UNLOCK(tp);
+ break;
+ case SIOCSIFADDR:
+ if (l2tun)
+ error = ether_ioctl(ifp, cmd, data);
+ else
+ tuninit(ifp);
+ if (error == 0)
+ TUNDEBUG(ifp, "address set\n");
+ break;
+ case SIOCSIFMTU:
+ ifp->if_mtu = ifr->ifr_mtu;
+ TUNDEBUG(ifp, "mtu set\n");
+ break;
+ case SIOCSIFFLAGS:
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+ case SIOCGIFMEDIA:
+ if (!l2tun) {
+ error = EINVAL;
+ break;
+ }
+
+ ifmr = (struct ifmediareq *)data;
+ dummy = ifmr->ifm_count;
+ ifmr->ifm_count = 1;
+ ifmr->ifm_status = IFM_AVALID;
+ ifmr->ifm_active = IFM_ETHER;
+ if (tp->tun_flags & TUN_OPEN)
+ ifmr->ifm_status |= IFM_ACTIVE;
+ ifmr->ifm_current = ifmr->ifm_active;
+ if (dummy >= 1) {
+ int media = IFM_ETHER;
+ error = copyout(&media, ifmr->ifm_ulist, sizeof(int));
+ }
+ break;
+ default:
+ if (l2tun) {
+ error = ether_ioctl(ifp, cmd, data);
+ } else {
+ error = EINVAL;
+ }
+ }
+bad:
+ sx_xunlock(&tun_ioctl_sx);
+ return (error);
+}
+
+/*
+ * tunoutput - queue packets from higher level ready to put out.
+ */
+static int
+tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
+ struct route *ro)
+{
+ struct tuntap_softc *tp = ifp->if_softc;
+ u_short cached_tun_flags;
+ int error;
+ u_int32_t af;
+
+ TUNDEBUG (ifp, "tunoutput\n");
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m0);
+ if (error) {
+ m_freem(m0);
+ return (error);
+ }
+#endif
+
+ /* Could be unlocked read? */
+ TUN_LOCK(tp);
+ cached_tun_flags = tp->tun_flags;
+ TUN_UNLOCK(tp);
+ if ((cached_tun_flags & TUN_READY) != TUN_READY) {
+ TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
+ m_freem (m0);
+ return (EHOSTDOWN);
+ }
+
+ if ((ifp->if_flags & IFF_UP) != IFF_UP) {
+ m_freem (m0);
+ return (EHOSTDOWN);
+ }
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC)
+ bcopy(dst->sa_data, &af, sizeof(af));
+ else
+ af = dst->sa_family;
+
+ if (bpf_peers_present(ifp->if_bpf))
+ bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
+
+ /* prepend sockaddr? this may abort if the mbuf allocation fails */
+ if (cached_tun_flags & TUN_LMODE) {
+ /* allocate space for sockaddr */
+ M_PREPEND(m0, dst->sa_len, M_NOWAIT);
+
+ /* if allocation failed drop packet */
+ if (m0 == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ } else {
+ bcopy(dst, m0->m_data, dst->sa_len);
+ }
+ }
+
+ if (cached_tun_flags & TUN_IFHEAD) {
+ /* Prepend the address family */
+ M_PREPEND(m0, 4, M_NOWAIT);
+
+ /* if allocation failed drop packet */
+ if (m0 == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ } else
+ *(u_int32_t *)m0->m_data = htonl(af);
+ } else {
+#ifdef INET
+ if (af != AF_INET)
+#endif
+ {
+ m_freem(m0);
+ return (EAFNOSUPPORT);
+ }
+ }
+
+ error = (ifp->if_transmit)(ifp, m0);
+ if (error)
+ return (ENOBUFS);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ return (0);
+}
+
+/*
+ * the cdevsw interface is now pretty minimal.
+ */
+static int
+tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
+ struct thread *td)
+{
+ struct ifreq ifr, *ifrp;
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct tuninfo *tunp;
+ int error, iflags;
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+ defined(COMPAT_FREEBSD4)
+ int ival;
+#endif
+ bool l2tun;
+
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ if (l2tun) {
+ /* tap specific ioctls */
+ switch(cmd) {
+ /* VMware/VMnet port ioctl's */
+#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
+ defined(COMPAT_FREEBSD4)
+ case _IO('V', 0):
+ ival = IOCPARM_IVAL(data);
+ data = (caddr_t)&ival;
+ /* FALLTHROUGH */
+#endif
+ case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */
+ iflags = *(int *)data;
+ iflags &= TUN_VMIO_FLAG_MASK;
+ iflags &= ~IFF_CANTCHANGE;
+ iflags |= IFF_UP;
+
+ TUN_LOCK(tp);
+ TUN2IFP(tp)->if_flags = iflags |
+ (TUN2IFP(tp)->if_flags & IFF_CANTCHANGE);
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case SIOCGIFADDR: /* get MAC address of the remote side */
+ TUN_LOCK(tp);
+ bcopy(&tp->tun_ether.octet, data,
+ sizeof(tp->tun_ether.octet));
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case SIOCSIFADDR: /* set MAC address of the remote side */
+ TUN_LOCK(tp);
+ bcopy(data, &tp->tun_ether.octet,
+ sizeof(tp->tun_ether.octet));
+ TUN_UNLOCK(tp);
+
+ return (0);
+ }
+
+ /* Fall through to the common ioctls if unhandled */
+ } else {
+ switch (cmd) {
+ case TUNSLMODE:
+ TUN_LOCK(tp);
+ if (*(int *)data) {
+ tp->tun_flags |= TUN_LMODE;
+ tp->tun_flags &= ~TUN_IFHEAD;
+ } else
+ tp->tun_flags &= ~TUN_LMODE;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNSIFHEAD:
+ TUN_LOCK(tp);
+ if (*(int *)data) {
+ tp->tun_flags |= TUN_IFHEAD;
+ tp->tun_flags &= ~TUN_LMODE;
+ } else
+ tp->tun_flags &= ~TUN_IFHEAD;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNGIFHEAD:
+ TUN_LOCK(tp);
+ *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
+ TUN_UNLOCK(tp);
+
+ return (0);
+ case TUNSIFMODE:
+ /* deny this if UP */
+ if (TUN2IFP(tp)->if_flags & IFF_UP)
+ return (EBUSY);
+
+ switch (*(int *)data & ~IFF_MULTICAST) {
+ case IFF_POINTOPOINT:
+ case IFF_BROADCAST:
+ TUN_LOCK(tp);
+ TUN2IFP(tp)->if_flags &=
+ ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
+ TUN2IFP(tp)->if_flags |= *(int *)data;
+ TUN_UNLOCK(tp);
+
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+ case TUNSIFPID:
+#ifndef __rtems__
+ TUN_LOCK(tp);
+ tp->tun_pid = curthread->td_proc->p_pid;
+ TUN_UNLOCK(tp);
+#endif /* __rtems__ */
+
+ return (0);
+ }
+ /* Fall through to the common ioctls if unhandled */
+ }
+
+ switch (cmd) {
+ case TUNGIFNAME:
+ ifrp = (struct ifreq *)data;
+ strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
+
+ return (0);
+ case TUNSIFINFO:
+ tunp = (struct tuninfo *)data;
+ if (TUN2IFP(tp)->if_type != tunp->type)
+ return (EPROTOTYPE);
+ TUN_LOCK(tp);
+ if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
+ strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
+ ifr.ifr_mtu = tunp->mtu;
+ CURVNET_SET(TUN2IFP(tp)->if_vnet);
+ error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
+ (caddr_t)&ifr, td);
+ CURVNET_RESTORE();
+ if (error) {
+ TUN_UNLOCK(tp);
+ return (error);
+ }
+ }
+ TUN2IFP(tp)->if_baudrate = tunp->baudrate;
+ TUN_UNLOCK(tp);
+ break;
+ case TUNGIFINFO:
+ tunp = (struct tuninfo *)data;
+ TUN_LOCK(tp);
+ tunp->mtu = TUN2IFP(tp)->if_mtu;
+ tunp->type = TUN2IFP(tp)->if_type;
+ tunp->baudrate = TUN2IFP(tp)->if_baudrate;
+ TUN_UNLOCK(tp);
+ break;
+ case TUNSDEBUG:
+ tundebug = *(int *)data;
+ break;
+ case TUNGDEBUG:
+ *(int *)data = tundebug;
+ break;
+ case FIONBIO:
+ break;
+ case FIOASYNC:
+ TUN_LOCK(tp);
+ if (*(int *)data)
+ tp->tun_flags |= TUN_ASYNC;
+ else
+ tp->tun_flags &= ~TUN_ASYNC;
+ TUN_UNLOCK(tp);
+ break;
+ case FIONREAD:
+ if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
+ struct mbuf *mb;
+ IFQ_LOCK(&TUN2IFP(tp)->if_snd);
+ IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
+ for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
+ *(int *)data += mb->m_len;
+ IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
+ } else
+ *(int *)data = 0;
+ break;
+ case FIOSETOWN:
+ return (fsetown(*(int *)data, &tp->tun_sigio));
+
+ case FIOGETOWN:
+ *(int *)data = fgetown(&tp->tun_sigio);
+ return (0);
+
+ /* This is deprecated, FIOSETOWN should be used instead. */
+ case TIOCSPGRP:
+ return (fsetown(-(*(int *)data), &tp->tun_sigio));
+
+ /* This is deprecated, FIOGETOWN should be used instead. */
+ case TIOCGPGRP:
+ *(int *)data = -fgetown(&tp->tun_sigio);
+ return (0);
+
+ default:
+ return (ENOTTY);
+ }
+ return (0);
+}
+
+/*
+ * The cdevsw read interface - reads a packet at a time, or at
+ * least as much of a packet as can be read.
+ */
+static int
+tunread(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ struct mbuf *m;
+ int error=0, len;
+
+ TUNDEBUG (ifp, "read\n");
+ TUN_LOCK(tp);
+ if ((tp->tun_flags & TUN_READY) != TUN_READY) {
+ TUN_UNLOCK(tp);
+ TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
+ return (EHOSTDOWN);
+ }
+
+ tp->tun_flags &= ~TUN_RWAIT;
+
+ for (;;) {
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ if (m != NULL)
+ break;
+ if (flag & O_NONBLOCK) {
+ TUN_UNLOCK(tp);
+ return (EWOULDBLOCK);
+ }
+ tp->tun_flags |= TUN_RWAIT;
+ error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
+ "tunread", 0);
+ if (error != 0) {
+ TUN_UNLOCK(tp);
+ return (error);
+ }
+ }
+ TUN_UNLOCK(tp);
+
+ if ((tp->tun_flags & TUN_L2) != 0)
+ BPF_MTAP(ifp, m);
+
+ while (m && uio->uio_resid > 0 && error == 0) {
+ len = min(uio->uio_resid, m->m_len);
+ if (len != 0)
+ error = uiomove(mtod(m, void *), len, uio);
+ m = m_free(m);
+ }
+
+ if (m) {
+ TUNDEBUG(ifp, "Dropping mbuf\n");
+ m_freem(m);
+ }
+ return (error);
+}
+
+static int
+tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m)
+{
+ struct ether_header *eh;
+ struct ifnet *ifp;
+
+ ifp = TUN2IFP(tp);
+
+ /*
+ * Only pass a unicast frame to ether_input(), if it would
+ * actually have been received by non-virtual hardware.
+ */
+ if (m->m_len < sizeof(struct ether_header)) {
+ m_freem(m);
+ return (0);
+ }
+
+ eh = mtod(m, struct ether_header *);
+
+ if (eh && (ifp->if_flags & IFF_PROMISC) == 0 &&
+ !ETHER_IS_MULTICAST(eh->ether_dhost) &&
+ bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) {
+ m_freem(m);
+ return (0);
+ }
+
+ /* Pass packet up to parent. */
+ CURVNET_SET(ifp->if_vnet);
+ (*ifp->if_input)(ifp, m);
+ CURVNET_RESTORE();
+ /* ibytes are counted in parent */
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ return (0);
+}
+
+static int
+tunwrite_l3(struct tuntap_softc *tp, struct mbuf *m)
+{
+ struct ifnet *ifp;
+ int family, isr;
+
+ ifp = TUN2IFP(tp);
+ /* Could be unlocked read? */
+ TUN_LOCK(tp);
+ if (tp->tun_flags & TUN_IFHEAD) {
+ TUN_UNLOCK(tp);
+ if (m->m_len < sizeof(family) &&
+ (m = m_pullup(m, sizeof(family))) == NULL)
+ return (ENOBUFS);
+ family = ntohl(*mtod(m, u_int32_t *));
+ m_adj(m, sizeof(family));
+ } else {
+ TUN_UNLOCK(tp);
+ family = AF_INET;
+ }
+
+ BPF_MTAP2(ifp, &family, sizeof(family), m);
+
+ switch (family) {
+#ifdef INET
+ case AF_INET:
+ isr = NETISR_IP;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ isr = NETISR_IPV6;
+ break;
+#endif
+ default:
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+ random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ CURVNET_SET(ifp->if_vnet);
+ M_SETFIB(m, ifp->if_fib);
+ netisr_dispatch(isr, m);
+ CURVNET_RESTORE();
+ return (0);
+}
+
+/*
+ * the cdevsw write interface - an atomic write is a packet - or else!
+ */
+static int
+tunwrite(struct cdev *dev, struct uio *uio, int flag)
+{
+ struct tuntap_softc *tp;
+ struct ifnet *ifp;
+ struct mbuf *m;
+ uint32_t mru;
+ int align;
+ bool l2tun;
+
+ tp = dev->si_drv1;
+ ifp = TUN2IFP(tp);
+ TUNDEBUG(ifp, "tunwrite\n");
+ if ((ifp->if_flags & IFF_UP) != IFF_UP)
+ /* ignore silently */
+ return (0);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ l2tun = (tp->tun_flags & TUN_L2) != 0;
+ align = 0;
+ mru = l2tun ? TAPMRU : TUNMRU;
+ if (l2tun)
+ align = ETHER_ALIGN;
+ else if ((tp->tun_flags & TUN_IFHEAD) != 0)
+ mru += sizeof(uint32_t); /* family */
+ if (uio->uio_resid < 0 || uio->uio_resid > mru) {
+ TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
+ return (EIO);
+ }
+
+ if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ m->m_pkthdr.rcvif = ifp;
+#ifdef MAC
+ mac_ifnet_create_mbuf(ifp, m);
+#endif
+
+ if (l2tun)
+ return (tunwrite_l2(tp, m));
+
+ return (tunwrite_l3(tp, m));
+}
+
+/*
+ * tunpoll - the poll interface, this is only useful on reads
+ * really. The write detect always returns true, write never blocks
+ * anyway, it either accepts the packet or drops it.
+ */
+static int
+tunpoll(struct cdev *dev, int events, struct thread *td)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+ int revents = 0;
+
+ TUNDEBUG(ifp, "tunpoll\n");
+
+ if (events & (POLLIN | POLLRDNORM)) {
+ IFQ_LOCK(&ifp->if_snd);
+ if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
+ revents |= events & (POLLIN | POLLRDNORM);
+ } else {
+ TUNDEBUG(ifp, "tunpoll waiting\n");
+ selrecord(td, &tp->tun_rsel);
+ }
+ IFQ_UNLOCK(&ifp->if_snd);
+ }
+ revents |= events & (POLLOUT | POLLWRNORM);
+
+ return (revents);
+}
+
+/*
+ * tunkqfilter - support for the kevent() system call.
+ */
+static int
+tunkqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct tuntap_softc *tp = dev->si_drv1;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ switch(kn->kn_filter) {
+ case EVFILT_READ:
+ TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tun_read_filterops;
+ break;
+
+ case EVFILT_WRITE:
+ TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ kn->kn_fop = &tun_write_filterops;
+ break;
+
+ default:
+ TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
+ ifp->if_xname, dev2unit(dev));
+ return(EINVAL);
+ }
+
+ kn->kn_hook = tp;
+ knlist_add(&tp->tun_rsel.si_note, kn, 0);
+
+ return (0);
+}
+
+/*
+ * Return true of there is data in the interface queue.
+ */
+static int
+tunkqread(struct knote *kn, long hint)
+{
+ int ret;
+ struct tuntap_softc *tp = kn->kn_hook;
+ struct cdev *dev = tp->tun_dev;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
+ TUNDEBUG(ifp,
+ "%s have data in the queue. Len = %d, minor = %#x\n",
+ ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
+ ret = 1;
+ } else {
+ TUNDEBUG(ifp,
+ "%s waiting for data, minor = %#x\n", ifp->if_xname,
+ dev2unit(dev));
+ ret = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * Always can write, always return MTU in kn->data.
+ */
+static int
+tunkqwrite(struct knote *kn, long hint)
+{
+ struct tuntap_softc *tp = kn->kn_hook;
+ struct ifnet *ifp = TUN2IFP(tp);
+
+ kn->kn_data = ifp->if_mtu;
+
+ return (1);
+}
+
+static void
+tunkqdetach(struct knote *kn)
+{
+ struct tuntap_softc *tp = kn->kn_hook;
+
+ knlist_remove(&tp->tun_rsel.si_note, kn, 0);
+}
diff --git a/freebsd/sys/net/if_vlan.c b/freebsd/sys/net/if_vlan.c
index 8f108b9d..2b5b3488 100644
--- a/freebsd/sys/net/if_vlan.c
+++ b/freebsd/sys/net/if_vlan.c
@@ -48,6 +48,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
+#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_vlan.h>
#include <rtems/bsd/local/opt_ratelimit.h>
@@ -76,6 +77,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
+#include <net/route.h>
#include <net/vnet.h>
#ifdef INET
@@ -83,6 +85,14 @@ __FBSDID("$FreeBSD$");
#include <netinet/if_ether.h>
#endif
+#ifdef INET6
+/*
+ * XXX: declare here to avoid to include many inet6 related files..
+ * should be more generalized?
+ */
+extern void nd6_setmtu(struct ifnet *);
+#endif
+
#define VLAN_DEF_HWIDTH 4
#define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST)
@@ -1410,11 +1420,19 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid)
* Set up our interface address to reflect the underlying
* physical interface's.
*/
- bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen);
+ TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv);
((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen =
p->if_addrlen;
- TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv);
+ /*
+ * Do not schedule link address update if it was the same
+ * as previous parent's. This helps avoid updating for each
+ * associated llentry.
+ */
+ if (memcmp(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen) != 0) {
+ bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen);
+ taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task);
+ }
/* We are ready for operation now. */
ifp->if_drv_flags |= IFF_DRV_RUNNING;
@@ -1725,7 +1743,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct ifvlan *ifv;
struct ifvlantrunk *trunk;
struct vlanreq vlr;
- int error = 0;
+ int error = 0, oldmtu;
ifr = (struct ifreq *)data;
ifa = (struct ifaddr *) data;
@@ -1819,8 +1837,20 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = ENOENT;
break;
}
+ oldmtu = ifp->if_mtu;
error = vlan_config(ifv, p, vlr.vlr_tag);
if_rele(p);
+
+ /*
+ * VLAN MTU may change during addition of the vlandev.
+ * If it did, do network layer specific procedure.
+ */
+ if (ifp->if_mtu != oldmtu) {
+#ifdef INET6
+ nd6_setmtu(ifp);
+#endif
+ rt_updatemtu(ifp);
+ }
break;
case SIOCGETVLAN:
diff --git a/freebsd/sys/net/iflib.h b/freebsd/sys/net/iflib.h
index 2395439a..b0215daf 100644
--- a/freebsd/sys/net/iflib.h
+++ b/freebsd/sys/net/iflib.h
@@ -361,6 +361,11 @@ typedef enum {
* Interface needs admin task to ignore interface up/down status
*/
#define IFLIB_ADMIN_ALWAYS_RUN 0x10000
+/*
+ * When using a single hardware interrupt for the interface, only process RX
+ * interrupts instead of doing combined RX/TX processing.
+ */
+#define IFLIB_SINGLE_IRQ_RX_ONLY 0x40000
/*
diff --git a/freebsd/sys/net/route.c b/freebsd/sys/net/route.c
index 0933c3a8..adbf91bd 100644
--- a/freebsd/sys/net/route.c
+++ b/freebsd/sys/net/route.c
@@ -865,7 +865,7 @@ rtrequest_fib(int req,
* to reflect size of the provided buffer. if no NHR_COPY is specified,
* point dst,netmask and gw @info fields to appropriate @rt values.
*
- * if @flags contains NHR_REF, do refcouting on rt_ifp.
+ * if @flags contains NHR_REF, do refcouting on rt_ifp and rt_ifa.
*
* Returns 0 on success.
*/
@@ -935,10 +935,9 @@ rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
info->rti_flags = rt->rt_flags;
info->rti_ifp = rt->rt_ifp;
info->rti_ifa = rt->rt_ifa;
- ifa_ref(info->rti_ifa);
if (flags & NHR_REF) {
- /* Do 'traditional' refcouting */
if_ref(info->rti_ifp);
+ ifa_ref(info->rti_ifa);
}
return (0);
@@ -948,8 +947,8 @@ rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
* Lookups up route entry for @dst in RIB database for fib @fibnum.
* Exports entry data to @info using rt_exportinfo().
*
- * if @flags contains NHR_REF, refcouting is performed on rt_ifp.
- * All references can be released later by calling rib_free_info()
+ * If @flags contains NHR_REF, refcouting is performed on rt_ifp and rt_ifa.
+ * All references can be released later by calling rib_free_info().
*
* Returns 0 on success.
* Returns ENOENT for lookup failure, ENOMEM for export failure.
@@ -995,6 +994,7 @@ void
rib_free_info(struct rt_addrinfo *info)
{
+ ifa_free(info->rti_ifa);
if_rele(info->rti_ifp);
}
@@ -1627,9 +1627,12 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
error = rt_getifa_fib(info, fibnum);
if (error)
return (error);
+ } else {
+ ifa_ref(info->rti_ifa);
}
rt = uma_zalloc(V_rtzone, M_NOWAIT);
if (rt == NULL) {
+ ifa_free(info->rti_ifa);
return (ENOBUFS);
}
rt->rt_flags = RTF_UP | flags;
@@ -1638,6 +1641,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
* Add the gateway. Possibly re-malloc-ing the storage for it.
*/
if ((error = rt_setgate(rt, dst, gateway)) != 0) {
+ ifa_free(info->rti_ifa);
uma_zfree(V_rtzone, rt);
return (error);
}
@@ -1661,7 +1665,6 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
* examine the ifa and ifa->ifa_ifp if it so desires.
*/
ifa = info->rti_ifa;
- ifa_ref(ifa);
rt->rt_ifa = ifa;
rt->rt_ifp = ifa->ifa_ifp;
rt->rt_weight = 1;
@@ -2101,7 +2104,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
* Do the actual request
*/
bzero((caddr_t)&info, sizeof(info));
- ifa_ref(ifa);
info.rti_ifa = ifa;
info.rti_flags = flags |
(ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
@@ -2116,7 +2118,6 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
info.rti_info[RTAX_NETMASK] = netmask;
error = rtrequest1_fib(cmd, &info, &rt, fibnum);
-
if (error == 0 && rt != NULL) {
/*
* notify any listening routing agents of the change
diff --git a/freebsd/sys/net/sff8472.h b/freebsd/sys/net/sff8472.h
index d38fcfc0..9fa465a1 100644
--- a/freebsd/sys/net/sff8472.h
+++ b/freebsd/sys/net/sff8472.h
@@ -379,7 +379,7 @@ enum {
/*
* Table 3.2 Identifier values.
- * Identifier constants has taken from SFF-8024 rev 4.2 table 4.1
+ * Identifier constants has taken from SFF-8024 rev 4.6 table 4.1
* (as referenced by table 3.2 footer)
* */
enum {
@@ -396,10 +396,10 @@ enum {
SFF_8024_ID_X2 = 0xA, /* X2 */
SFF_8024_ID_DWDM_SFP = 0xB, /* DWDM-SFP */
SFF_8024_ID_QSFP = 0xC, /* QSFP */
- SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ */
+ SFF_8024_ID_QSFPPLUS = 0xD, /* QSFP+ or later */
SFF_8024_ID_CXP = 0xE, /* CXP */
- SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
- SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
+ SFF_8024_ID_HD4X = 0xF, /* Shielded Mini Multilane HD 4X */
+ SFF_8024_ID_HD8X = 0x10, /* Shielded Mini Multilane HD 8X */
SFF_8024_ID_QSFP28 = 0x11, /* QSFP28 or later */
SFF_8024_ID_CXP2 = 0x12, /* CXP2 (aka CXP28) */
SFF_8024_ID_CDFP = 0x13, /* CDFP (Style 1/Style 2) */
@@ -408,34 +408,49 @@ enum {
SFF_8024_ID_CDFP3 = 0x16, /* CDFP (Style3) */
SFF_8024_ID_MICROQSFP = 0x17, /* microQSFP */
SFF_8024_ID_QSFP_DD = 0x18, /* QSFP-DD 8X Pluggable Transceiver */
- SFF_8024_ID_LAST = SFF_8024_ID_QSFP_DD
- };
-
-static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {"Unknown",
- "GBIC",
- "SFF",
- "SFP/SFP+/SFP28",
- "XBI",
- "Xenpak",
- "XFP",
- "XFF",
- "XFP-E",
- "XPAK",
- "X2",
- "DWDM-SFP/SFP+",
- "QSFP",
- "QSFP+",
- "CXP",
- "HD4X",
- "HD8X",
- "QSFP28",
- "CXP2",
- "CDFP",
- "SMM4",
- "SMM8",
- "CDFP3",
- "microQSFP",
- "QSFP-DD"};
+ SFF_8024_ID_OSFP8X = 0x19, /* OSFP 8X Pluggable Transceiver */
+ SFF_8024_ID_SFP_DD = 0x1A, /* SFP-DD 2X Pluggable Transceiver */
+ SFF_8024_ID_DSFP = 0x1B, /* DSFP Dual SFF Pluggable Transceiver */
+ SFF_8024_ID_X4ML = 0x1C, /* x4 MiniLink/OcuLink */
+ SFF_8024_ID_X8ML = 0x1D, /* x8 MiniLink */
+ SFF_8024_ID_QSFP_CMIS = 0x1E, /* QSFP+ or later w/ Common Management
+ Interface Specification */
+ SFF_8024_ID_LAST = SFF_8024_ID_QSFP_CMIS
+};
+
+static const char *sff_8024_id[SFF_8024_ID_LAST + 1] = {
+ "Unknown",
+ "GBIC",
+ "SFF",
+ "SFP/SFP+/SFP28",
+ "XBI",
+ "Xenpak",
+ "XFP",
+ "XFF",
+ "XFP-E",
+ "XPAK",
+ "X2",
+ "DWDM-SFP/SFP+",
+ "QSFP",
+ "QSFP+",
+ "CXP",
+ "HD4X",
+ "HD8X",
+ "QSFP28",
+ "CXP2",
+ "CDFP",
+ "SMM4",
+ "SMM8",
+ "CDFP3",
+ "microQSFP",
+ "QSFP-DD",
+ "QSFP8X",
+ "SFP-DD",
+ "DSFP",
+ "x4MiniLink/OcuLink",
+ "x8MiniLink",
+ "QSFP+(CIMS)"
+};
/* Keep compatibility with old definitions */
#define SFF_8472_ID_UNKNOWN SFF_8024_ID_UNKNOWN
diff --git a/freebsd/sys/net/vnet.h b/freebsd/sys/net/vnet.h
index b4168750..2d69a8a9 100644
--- a/freebsd/sys/net/vnet.h
+++ b/freebsd/sys/net/vnet.h
@@ -325,6 +325,8 @@ struct vnet_sysinit {
};
#define VNET_SYSINIT(ident, subsystem, order, func, arg) \
+ CTASSERT((subsystem) > SI_SUB_VNET && \
+ (subsystem) <= SI_SUB_VNET_DONE); \
static struct vnet_sysinit ident ## _vnet_init = { \
subsystem, \
order, \
@@ -337,6 +339,8 @@ struct vnet_sysinit {
vnet_deregister_sysinit, &ident ## _vnet_init)
#define VNET_SYSUNINIT(ident, subsystem, order, func, arg) \
+ CTASSERT((subsystem) > SI_SUB_VNET && \
+ (subsystem) <= SI_SUB_VNET_DONE); \
static struct vnet_sysinit ident ## _vnet_uninit = { \
subsystem, \
order, \
diff --git a/freebsd/sys/net80211/ieee80211.c b/freebsd/sys/net80211/ieee80211.c
index 927905bb..f003c769 100644
--- a/freebsd/sys/net80211/ieee80211.c
+++ b/freebsd/sys/net80211/ieee80211.c
@@ -1388,6 +1388,8 @@ getflags(const uint8_t bands[], uint32_t flags[], int ht40, int vht80)
/*
* Add one 20 MHz channel into specified channel list.
+ * You MUST NOT mix bands when calling this. It will not add 5ghz
+ * channels if you have any B/G/N band bit set.
*/
/* XXX VHT */
int
diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c
index ff442399..cbb6c6d3 100644
--- a/freebsd/sys/netinet/in_mcast.c
+++ b/freebsd/sys/netinet/in_mcast.c
@@ -2207,7 +2207,11 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
__func__);
goto out_inp_locked;
}
- inm_acquire(imf->imf_inm);
+ /*
+ * NOTE: Refcount from in_joingroup_locked()
+ * is protecting membership.
+ */
+ ip_mfilter_insert(&imo->imo_head, imf);
} else {
CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
IN_MULTI_LIST_LOCK();
@@ -2231,8 +2235,6 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
goto out_inp_locked;
}
}
- if (is_new)
- ip_mfilter_insert(&imo->imo_head, imf);
imf_commit(imf);
imf = NULL;
@@ -2401,6 +2403,12 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
if (is_final) {
ip_mfilter_remove(&imo->imo_head, imf);
imf_leave(imf);
+
+ /*
+ * Give up the multicast address record to which
+ * the membership points.
+ */
+ (void) in_leavegroup_locked(imf->imf_inm, imf);
} else {
if (imf->imf_st[0] == MCAST_EXCLUDE) {
error = EADDRNOTAVAIL;
@@ -2455,14 +2463,8 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
out_inp_locked:
INP_WUNLOCK(inp);
- if (is_final && imf) {
- /*
- * Give up the multicast address record to which
- * the membership points.
- */
- (void) in_leavegroup_locked(imf->imf_inm, imf);
+ if (is_final && imf)
ip_mfilter_free(imf);
- }
IN_MULTI_UNLOCK();
return (error);
diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c
index 02a24bb8..30b09198 100644
--- a/freebsd/sys/netinet/ip_carp.c
+++ b/freebsd/sys/netinet/ip_carp.c
@@ -568,13 +568,16 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
}
/* verify that we have a complete carp packet */
- len = m->m_len;
- IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
- if (ch == NULL) {
- CARPSTATS_INC(carps_badlen);
- CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
- return (IPPROTO_DONE);
+ if (m->m_len < *offp + sizeof(*ch)) {
+ len = m->m_len;
+ m = m_pullup(m, *offp + sizeof(*ch));
+ if (m == NULL) {
+ CARPSTATS_INC(carps_badlen);
+ CARP_DEBUG("%s: packet size %u too small\n", __func__, len);
+ return (IPPROTO_DONE);
+ }
}
+ ch = (struct carp_header *)(mtod(m, char *) + *offp);
/* verify the CARP checksum */
@@ -1189,7 +1192,7 @@ carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr)
return (ifa);
}
-caddr_t
+char *
carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr)
{
struct ifaddr *ifa;
@@ -1231,14 +1234,15 @@ carp_forus(struct ifnet *ifp, u_char *dhost)
CIF_LOCK(ifp->if_carp);
IFNET_FOREACH_CARP(ifp, sc) {
- CARP_LOCK(sc);
+ /*
+ * CARP_LOCK() is not here, since would protect nothing, but
+ * cause deadlock with if_bridge, calling this under its lock.
+ */
if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr),
ETHER_ADDR_LEN)) {
- CARP_UNLOCK(sc);
CIF_UNLOCK(ifp->if_carp);
return (1);
}
- CARP_UNLOCK(sc);
}
CIF_UNLOCK(ifp->if_carp);
@@ -1848,7 +1852,7 @@ carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td)
carp_carprcp(&carpr, sc, priveleged);
carpr.carpr_count = count;
error = copyout(&carpr,
- (caddr_t)ifr_data_get_ptr(ifr) +
+ (char *)ifr_data_get_ptr(ifr) +
(i * sizeof(carpr)), sizeof(carpr));
if (error) {
CIF_UNLOCK(ifp->if_carp);
diff --git a/freebsd/sys/netinet/ip_carp.h b/freebsd/sys/netinet/ip_carp.h
index fc591ac3..f8ee38dd 100644
--- a/freebsd/sys/netinet/ip_carp.h
+++ b/freebsd/sys/netinet/ip_carp.h
@@ -149,7 +149,7 @@ int carp_output (struct ifnet *, struct mbuf *,
int carp_master(struct ifaddr *);
int carp_iamatch(struct ifaddr *, uint8_t **);
struct ifaddr *carp_iamatch6(struct ifnet *, struct in6_addr *);
-caddr_t carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *);
+char * carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *);
int carp_forus(struct ifnet *, u_char *);
/* These are external networking stack hooks for CARP */
@@ -174,7 +174,7 @@ extern int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
#ifdef INET6
/* netinet6/nd6_nbr.c */
extern struct ifaddr *(*carp_iamatch6_p)(struct ifnet *, struct in6_addr *);
-extern caddr_t (*carp_macmatch6_p)(struct ifnet *, struct mbuf *,
+extern char * (*carp_macmatch6_p)(struct ifnet *, struct mbuf *,
const struct in6_addr *);
#endif
#endif
diff --git a/freebsd/sys/netinet/ip_mroute.c b/freebsd/sys/netinet/ip_mroute.c
index 3dd887f3..3b27781e 100644
--- a/freebsd/sys/netinet/ip_mroute.c
+++ b/freebsd/sys/netinet/ip_mroute.c
@@ -181,10 +181,14 @@ static struct mtx mfc_mtx;
VNET_DEFINE_STATIC(vifi_t, numvifs);
#define V_numvifs VNET(numvifs)
-VNET_DEFINE_STATIC(struct vif, viftable[MAXVIFS]);
+VNET_DEFINE_STATIC(struct vif *, viftable);
#define V_viftable VNET(viftable)
+/*
+ * No one should be able to "query" this before initialisation happened in
+ * vnet_mroute_init(), so we should still be fine.
+ */
SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD,
- &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
+ &VNET_NAME(viftable), sizeof(*V_viftable) * MAXVIFS, "S,vif[MAXVIFS]",
"IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
static struct mtx vif_mtx;
@@ -212,7 +216,7 @@ static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
* expiration time. Periodically, the entries are analysed and processed.
*/
#define BW_METER_BUCKETS 1024
-VNET_DEFINE_STATIC(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
+VNET_DEFINE_STATIC(struct bw_meter **, bw_meter_timers);
#define V_bw_meter_timers VNET(bw_meter_timers)
VNET_DEFINE_STATIC(struct callout, bw_meter_ch);
#define V_bw_meter_ch VNET(bw_meter_ch)
@@ -222,7 +226,7 @@ VNET_DEFINE_STATIC(struct callout, bw_meter_ch);
* Pending upcalls are stored in a vector which is flushed when
* full, or periodically
*/
-VNET_DEFINE_STATIC(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
+VNET_DEFINE_STATIC(struct bw_upcall *, bw_upcalls);
#define V_bw_upcalls VNET(bw_upcalls)
VNET_DEFINE_STATIC(u_int, bw_upcalls_n); /* # of pending upcalls */
#define V_bw_upcalls_n VNET(bw_upcalls_n)
@@ -766,7 +770,7 @@ X_ip_mrouter_done(void)
bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
V_bw_upcalls_n = 0;
- bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
+ bzero(V_bw_meter_timers, BW_METER_BUCKETS * sizeof(*V_bw_meter_timers));
MFC_UNLOCK();
@@ -2807,7 +2811,14 @@ vnet_mroute_init(const void *unused __unused)
{
V_nexpire = malloc(mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
- bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
+
+ V_viftable = mallocarray(MAXVIFS, sizeof(*V_viftable),
+ M_MRTABLE, M_WAITOK|M_ZERO);
+ V_bw_meter_timers = mallocarray(BW_METER_BUCKETS,
+ sizeof(*V_bw_meter_timers), M_MRTABLE, M_WAITOK|M_ZERO);
+ V_bw_upcalls = mallocarray(BW_UPCALLS_MAX, sizeof(*V_bw_upcalls),
+ M_MRTABLE, M_WAITOK|M_ZERO);
+
callout_init(&V_expire_upcalls_ch, 1);
callout_init(&V_bw_upcalls_ch, 1);
callout_init(&V_bw_meter_ch, 1);
@@ -2820,6 +2831,9 @@ static void
vnet_mroute_uninit(const void *unused __unused)
{
+ free(V_bw_upcalls, M_MRTABLE);
+ free(V_bw_meter_timers, M_MRTABLE);
+ free(V_viftable, M_MRTABLE);
free(V_nexpire, M_MRTABLE);
V_nexpire = NULL;
}
diff --git a/freebsd/sys/netinet/ip_output.c b/freebsd/sys/netinet/ip_output.c
index c9eb7aa3..343874e5 100644
--- a/freebsd/sys/netinet/ip_output.c
+++ b/freebsd/sys/netinet/ip_output.c
@@ -655,6 +655,7 @@ sendit:
in_pcboutput_txrtlmt(inp, ifp, m);
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
} else {
m->m_pkthdr.snd_tag = NULL;
}
@@ -707,6 +708,7 @@ sendit:
in_pcboutput_txrtlmt(inp, ifp, m);
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
} else {
m->m_pkthdr.snd_tag = NULL;
}
diff --git a/freebsd/sys/netinet/ip_reass.c b/freebsd/sys/netinet/ip_reass.c
index 70a6edae..036d19fe 100644
--- a/freebsd/sys/netinet/ip_reass.c
+++ b/freebsd/sys/netinet/ip_reass.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/eventhandler.h>
+#include <sys/kernel.h>
#include <sys/hash.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
@@ -48,7 +49,10 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <net/if_var.h>
#include <net/rss_config.h>
#include <net/netisr.h>
#include <net/vnet.h>
@@ -182,6 +186,7 @@ ip_reass(struct mbuf *m)
struct ip *ip;
struct mbuf *p, *q, *nq, *t;
struct ipq *fp;
+ struct ifnet *srcifp;
struct ipqhead *head;
int i, hlen, next, tmpmax;
u_int8_t ecn, ecn0;
@@ -242,6 +247,11 @@ ip_reass(struct mbuf *m)
}
/*
+ * Store receive network interface pointer for later.
+ */
+ srcifp = m->m_pkthdr.rcvif;
+
+ /*
* Attempt reassembly; if it succeeds, proceed.
* ip_reass() will return a different mbuf.
*/
@@ -491,8 +501,11 @@ ip_reass(struct mbuf *m)
m->m_len += (ip->ip_hl << 2);
m->m_data -= (ip->ip_hl << 2);
/* some debugging cruft by sklower, below, will go away soon */
- if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */
+ if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
m_fixhdr(m);
+ /* set valid receive interface pointer */
+ m->m_pkthdr.rcvif = srcifp;
+ }
IPSTAT_INC(ips_reassembled);
IPQ_UNLOCK(hash);
@@ -608,6 +621,46 @@ ipreass_drain(void)
}
}
+/*
+ * Drain off all datagram fragments belonging to
+ * the given network interface.
+ */
+static void
+ipreass_cleanup(void *arg __unused, struct ifnet *ifp)
+{
+ struct ipq *fp, *temp;
+ struct mbuf *m;
+ int i;
+
+ KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
+
+ CURVNET_SET_QUIET(ifp->if_vnet);
+
+ /*
+ * Skip processing if IPv4 reassembly is not initialised or
+ * torn down by ipreass_destroy().
+ */
+ if (V_ipq_zone == NULL) {
+ CURVNET_RESTORE();
+ return;
+ }
+
+ for (i = 0; i < IPREASS_NHASH; i++) {
+ IPQ_LOCK(i);
+ /* Scan fragment list. */
+ TAILQ_FOREACH_SAFE(fp, &V_ipq[i].head, ipq_list, temp) {
+ for (m = fp->ipq_frags; m != NULL; m = m->m_nextpkt) {
+ /* clear no longer valid rcvif pointer */
+ if (m->m_pkthdr.rcvif == ifp)
+ m->m_pkthdr.rcvif = NULL;
+ }
+ }
+ IPQ_UNLOCK(i);
+ }
+ CURVNET_RESTORE();
+}
+EVENTHANDLER_DEFINE(ifnet_departure_event, ipreass_cleanup, NULL, 0);
+
#ifdef VIMAGE
/*
* Destroy IP reassembly structures.
@@ -618,6 +671,7 @@ ipreass_destroy(void)
ipreass_drain();
uma_zdestroy(V_ipq_zone);
+ V_ipq_zone = NULL;
for (int i = 0; i < IPREASS_NHASH; i++)
mtx_destroy(&V_ipq[i].lock);
}
diff --git a/freebsd/sys/netinet/sctp_asconf.c b/freebsd/sys/netinet/sctp_asconf.c
index 4de01ed7..a13f4040 100644
--- a/freebsd/sys/netinet/sctp_asconf.c
+++ b/freebsd/sys/netinet/sctp_asconf.c
@@ -107,42 +107,47 @@ sctp_asconf_error_response(uint32_t id, uint16_t cause, uint8_t *error_tlv,
struct mbuf *m_reply = NULL;
struct sctp_asconf_paramhdr *aph;
struct sctp_error_cause *error;
+ size_t buf_len;
+ uint16_t i, param_length, cause_length, padding_length;
uint8_t *tlv;
- m_reply = sctp_get_mbuf_for_msg((sizeof(struct sctp_asconf_paramhdr) +
- tlv_length +
- sizeof(struct sctp_error_cause)),
- 0, M_NOWAIT, 1, MT_DATA);
+ if (error_tlv == NULL) {
+ tlv_length = 0;
+ }
+ cause_length = sizeof(struct sctp_error_cause) + tlv_length;
+ param_length = sizeof(struct sctp_asconf_paramhdr) + cause_length;
+ padding_length = tlv_length % 4;
+ if (padding_length != 0) {
+ padding_length = 4 - padding_length;
+ }
+ buf_len = param_length + padding_length;
+ if (buf_len > MLEN) {
+ SCTPDBG(SCTP_DEBUG_ASCONF1,
+ "asconf_error_response: tlv_length (%xh) too big\n",
+ tlv_length);
+ return (NULL);
+ }
+ m_reply = sctp_get_mbuf_for_msg(buf_len, 0, M_NOWAIT, 1, MT_DATA);
if (m_reply == NULL) {
SCTPDBG(SCTP_DEBUG_ASCONF1,
"asconf_error_response: couldn't get mbuf!\n");
return (NULL);
}
aph = mtod(m_reply, struct sctp_asconf_paramhdr *);
- error = (struct sctp_error_cause *)(aph + 1);
-
- aph->correlation_id = id;
aph->ph.param_type = htons(SCTP_ERROR_CAUSE_IND);
+ aph->ph.param_length = htons(param_length);
+ aph->correlation_id = id;
+ error = (struct sctp_error_cause *)(aph + 1);
error->code = htons(cause);
- error->length = tlv_length + sizeof(struct sctp_error_cause);
- aph->ph.param_length = error->length +
- sizeof(struct sctp_asconf_paramhdr);
-
- if (aph->ph.param_length > MLEN) {
- SCTPDBG(SCTP_DEBUG_ASCONF1,
- "asconf_error_response: tlv_length (%xh) too big\n",
- tlv_length);
- sctp_m_freem(m_reply); /* discard */
- return (NULL);
- }
+ error->length = htons(cause_length);
if (error_tlv != NULL) {
tlv = (uint8_t *)(error + 1);
memcpy(tlv, error_tlv, tlv_length);
+ for (i = 0; i < padding_length; i++) {
+ tlv[tlv_length + i] = 0;
+ }
}
- SCTP_BUF_LEN(m_reply) = aph->ph.param_length;
- error->length = htons(error->length);
- aph->ph.param_length = htons(aph->ph.param_length);
-
+ SCTP_BUF_LEN(m_reply) = buf_len;
return (m_reply);
}
@@ -171,10 +176,16 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
#endif
aparam_length = ntohs(aph->ph.param_length);
+ if (aparam_length < sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_paramhdr)) {
+ return (NULL);
+ }
ph = (struct sctp_paramhdr *)(aph + 1);
param_type = ntohs(ph->param_type);
#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
+ if (param_length + sizeof(struct sctp_asconf_paramhdr) != aparam_length) {
+ return (NULL);
+ }
#endif
sa = &store.sa;
switch (param_type) {
@@ -238,6 +249,7 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
"process_asconf_add_ip: using source addr ");
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, src);
}
+ net = NULL;
/* add the address */
if (bad_address) {
m_reply = sctp_asconf_error_response(aph->correlation_id,
@@ -252,17 +264,19 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
SCTP_CAUSE_RESOURCE_SHORTAGE, (uint8_t *)aph,
aparam_length);
} else {
- /* notify upper layer */
- sctp_ulp_notify(SCTP_NOTIFY_ASCONF_ADD_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
if (response_required) {
m_reply =
sctp_asconf_success_response(aph->correlation_id);
}
- sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb, net);
- sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
- stcb, net);
- if (send_hb) {
- sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED);
+ if (net != NULL) {
+ /* notify upper layer */
+ sctp_ulp_notify(SCTP_NOTIFY_ASCONF_ADD_IP, stcb, 0, sa, SCTP_SO_NOT_LOCKED);
+ sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb, net);
+ sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
+ stcb, net);
+ if (send_hb) {
+ sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED);
+ }
}
}
return (m_reply);
@@ -271,7 +285,7 @@ sctp_process_asconf_add_ip(struct sockaddr *src, struct sctp_asconf_paramhdr *ap
static int
sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src)
{
- struct sctp_nets *src_net, *net;
+ struct sctp_nets *src_net, *net, *nnet;
/* make sure the source address exists as a destination net */
src_net = sctp_findnet(stcb, src);
@@ -281,10 +295,9 @@ sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src)
}
/* delete all destination addresses except the source */
- TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
+ TAILQ_FOREACH_SAFE(net, &stcb->asoc.nets, sctp_next, nnet) {
if (net != src_net) {
/* delete this address */
- sctp_remove_net(stcb, net);
SCTPDBG(SCTP_DEBUG_ASCONF1,
"asconf_del_remote_addrs_except: deleting ");
SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1,
@@ -292,6 +305,7 @@ sctp_asconf_del_remote_addrs_except(struct sctp_tcb *stcb, struct sockaddr *src)
/* notify upper layer */
sctp_ulp_notify(SCTP_NOTIFY_ASCONF_DELETE_IP, stcb, 0,
(struct sockaddr *)&net->ro._l_addr, SCTP_SO_NOT_LOCKED);
+ sctp_remove_net(stcb, net);
}
}
return (0);
@@ -322,10 +336,16 @@ sctp_process_asconf_delete_ip(struct sockaddr *src,
#endif
aparam_length = ntohs(aph->ph.param_length);
+ if (aparam_length < sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_paramhdr)) {
+ return (NULL);
+ }
ph = (struct sctp_paramhdr *)(aph + 1);
param_type = ntohs(ph->param_type);
#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
+ if (param_length + sizeof(struct sctp_asconf_paramhdr) != aparam_length) {
+ return (NULL);
+ }
#endif
sa = &store.sa;
switch (param_type) {
@@ -453,10 +473,16 @@ sctp_process_asconf_set_primary(struct sockaddr *src,
#endif
aparam_length = ntohs(aph->ph.param_length);
+ if (aparam_length < sizeof(struct sctp_asconf_paramhdr) + sizeof(struct sctp_paramhdr)) {
+ return (NULL);
+ }
ph = (struct sctp_paramhdr *)(aph + 1);
param_type = ntohs(ph->param_type);
#if defined(INET) || defined(INET6)
param_length = ntohs(ph->param_length);
+ if (param_length + sizeof(struct sctp_asconf_paramhdr) != aparam_length) {
+ return (NULL);
+ }
#endif
sa = &store.sa;
switch (param_type) {
@@ -675,8 +701,8 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
sctp_m_freem(m_ack);
return;
}
- /* param_length is already validated in process_control... */
- offset += ntohs(p_addr->ph.param_length); /* skip lookup addr */
+ /* skip lookup addr */
+ offset += SCTP_SIZE32(ntohs(p_addr->ph.param_length));
/* get pointer to first asconf param in ASCONF */
aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, sizeof(struct sctp_asconf_paramhdr), (uint8_t *)&aparam_buf);
if (aph == NULL) {
@@ -705,6 +731,7 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
if (param_length <= sizeof(struct sctp_paramhdr)) {
SCTPDBG(SCTP_DEBUG_ASCONF1, "handle_asconf: param length (%u) too short\n", param_length);
sctp_m_freem(m_ack);
+ return;
}
/* get the entire parameter */
aph = (struct sctp_asconf_paramhdr *)sctp_m_getptr(m, offset, param_length, aparam_buf);
@@ -760,8 +787,6 @@ sctp_handle_asconf(struct mbuf *m, unsigned int offset,
if (m_result != NULL) {
SCTP_BUF_NEXT(m_tail) = m_result;
m_tail = m_result;
- /* update lengths, make sure it's aligned too */
- SCTP_BUF_LEN(m_result) = SCTP_SIZE32(SCTP_BUF_LEN(m_result));
ack_cp->ch.chunk_length += SCTP_BUF_LEN(m_result);
/* set flag to force success reports */
error = 1;
@@ -1956,12 +1981,10 @@ sctp_addr_mgmt_assoc(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
case AF_INET:
{
struct sockaddr_in *sin;
- struct in6pcb *inp6;
- inp6 = (struct in6pcb *)&inp->ip_inp.inp;
/* invalid if we are a v6 only endpoint */
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp6))
+ SCTP_IPV6_V6ONLY(&inp->ip_inp.inp))
return;
sin = &ifa->address.sin;
@@ -2034,11 +2057,9 @@ sctp_asconf_iterator_ep(struct sctp_inpcb *inp, void *ptr, uint32_t val SCTP_UNU
case AF_INET:
{
/* invalid if we are a v6 only endpoint */
- struct in6pcb *inp6;
- inp6 = (struct in6pcb *)&inp->ip_inp.inp;
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp6)) {
+ SCTP_IPV6_V6ONLY(&inp->ip_inp.inp)) {
cnt_invalid++;
if (asc->cnt == cnt_invalid)
return (1);
@@ -2149,13 +2170,11 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
case AF_INET:
{
/* invalid if we are a v6 only endpoint */
- struct in6pcb *inp6;
struct sockaddr_in *sin;
- inp6 = (struct in6pcb *)&inp->ip_inp.inp;
/* invalid if we are a v6 only endpoint */
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp6))
+ SCTP_IPV6_V6ONLY(&inp->ip_inp.inp))
continue;
sin = &ifa->address.sin;
@@ -2172,7 +2191,7 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
continue;
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
- SCTP_IPV6_V6ONLY(inp6)) {
+ SCTP_IPV6_V6ONLY(&inp->ip_inp.inp)) {
cnt_invalid++;
if (asc->cnt == cnt_invalid)
return;
diff --git a/freebsd/sys/netinet/sctp_dtrace_define.h b/freebsd/sys/netinet/sctp_dtrace_define.h
deleted file mode 100644
index ad7c8526..00000000
--- a/freebsd/sys/netinet/sctp_dtrace_define.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
- * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * a) Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * b) Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- *
- * c) Neither the name of Cisco Systems, Inc. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#ifndef _NETINET_SCTP_DTRACE_DEFINE_H_
-#define _NETINET_SCTP_DTRACE_DEFINE_H_
-
-#include <sys/kernel.h>
-#include <sys/sdt.h>
-
-SDT_PROVIDER_DECLARE(sctp);
-
-/********************************************************/
-/* Cwnd probe - tracks changes in the congestion window on a netp */
-/********************************************************/
-/* Initial */
-SDT_PROBE_DEFINE5(sctp, cwnd, net, init,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "uintptr_t", /* The pointer to the struct sctp_nets *
- * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
-
-/* ACK-INCREASE */
-SDT_PROBE_DEFINE5(sctp, cwnd, net, ack,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "uintptr_t", /* The pointer to the struct sctp_nets *
- * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
-
-/* ACK-INCREASE */
-SDT_PROBE_DEFINE5(sctp, cwnd, net, rttvar,
- "uint64_t", /* The Vtag << 32 | localport << 16 |
- * remoteport */
- "uint64_t", /* obw | nbw */
- "uint64_t", /* bwrtt | newrtt */
- "uint64_t", /* flight */
- "uint64_t"); /* (cwnd << 32) | point << 16 | retval(0/1) */
-
-SDT_PROBE_DEFINE5(sctp, cwnd, net, rttstep,
- "uint64_t", /* The Vtag << 32 | localport << 16 |
- * remoteport */
- "uint64_t", /* obw | nbw */
- "uint64_t", /* bwrtt | newrtt */
- "uint64_t", /* flight */
- "uint64_t"); /* (cwnd << 32) | point << 16 | retval(0/1) */
-
-/* FastRetransmit-DECREASE */
-SDT_PROBE_DEFINE5(sctp, cwnd, net, fr,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "uintptr_t", /* The pointer to the struct sctp_nets *
- * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
-
-/* TimeOut-DECREASE */
-SDT_PROBE_DEFINE5(sctp, cwnd, net, to,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "uintptr_t", /* The pointer to the struct sctp_nets *
- * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
-
-/* BurstLimit-DECREASE */
-SDT_PROBE_DEFINE5(sctp, cwnd, net, bl,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "uintptr_t", /* The pointer to the struct sctp_nets *
- * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
-
-/* ECN-DECREASE */
-SDT_PROBE_DEFINE5(sctp, cwnd, net, ecn,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "uintptr_t", /* The pointer to the struct sctp_nets *
- * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
-
-/* PacketDrop-DECREASE */
-SDT_PROBE_DEFINE5(sctp, cwnd, net, pd,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "uintptr_t", /* The pointer to the struct sctp_nets *
- * changing */
- "int", /* The old value of the cwnd */
- "int"); /* The new value of the cwnd */
-
-/********************************************************/
-/* Rwnd probe - tracks changes in the receiver window for an assoc */
-/********************************************************/
-SDT_PROBE_DEFINE4(sctp, rwnd, assoc, val,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "int", /* The up/down amount */
- "int"); /* The new value of the cwnd */
-
-/********************************************************/
-/* flight probe - tracks changes in the flight size on a net or assoc */
-/********************************************************/
-SDT_PROBE_DEFINE5(sctp, flightsize, net, val,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "uintptr_t", /* The pointer to the struct sctp_nets *
- * changing */
- "int", /* The up/down amount */
- "int"); /* The new value of the cwnd */
-
-/********************************************************/
-/* The total flight version */
-/********************************************************/
-SDT_PROBE_DEFINE4(sctp, flightsize, assoc, val,
- "uint32_t", /* The Vtag for this end */
- "uint32_t", /* The port number of the local side << 16 |
- * port number of remote in network byte
- * order. */
- "int", /* The up/down amount */
- "int"); /* The new value of the cwnd */
-
-#endif
diff --git a/freebsd/sys/netinet/sctp_indata.c b/freebsd/sys/netinet/sctp_indata.c
index c4a11fec..1b28cc38 100644
--- a/freebsd/sys/netinet/sctp_indata.c
+++ b/freebsd/sys/netinet/sctp_indata.c
@@ -474,6 +474,11 @@ sctp_clean_up_control(struct sctp_tcb *stcb, struct sctp_queued_to_read *control
chk->data = NULL;
sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
}
+ sctp_free_remote_addr(control->whoFrom);
+ if (control->data) {
+ sctp_m_freem(control->data);
+ control->data = NULL;
+ }
sctp_free_a_readq(stcb, control);
}
@@ -713,6 +718,7 @@ sctp_add_to_tail_pointer(struct sctp_queued_to_read *control, struct mbuf *m, ui
}
if (control->tail_mbuf == NULL) {
/* TSNH */
+ sctp_m_freem(control->data);
control->data = m;
sctp_setup_tail_pointer(control);
return;
@@ -2116,10 +2122,13 @@ sctp_process_a_data_chunk(struct sctp_tcb *stcb, struct sctp_association *asoc,
struct mbuf *mm;
control->data = dmbuf;
+ control->tail_mbuf = NULL;
for (mm = control->data; mm; mm = mm->m_next) {
control->length += SCTP_BUF_LEN(mm);
+ if (SCTP_BUF_NEXT(mm) == NULL) {
+ control->tail_mbuf = mm;
+ }
}
- control->tail_mbuf = NULL;
control->end_added = 1;
control->last_frag_seen = 1;
control->first_frag_seen = 1;
@@ -3110,13 +3119,12 @@ sctp_process_segment_range(struct sctp_tcb *stcb, struct sctp_tmit_chunk **p_tp1
* update RTO too ?
*/
if (tp1->do_rtt) {
- if (*rto_ok) {
- tp1->whoTo->RTO =
- sctp_calculate_rto(stcb,
- &stcb->asoc,
- tp1->whoTo,
- &tp1->sent_rcv_time,
- SCTP_RTT_FROM_DATA);
+ if (*rto_ok &&
+ sctp_calculate_rto(stcb,
+ &stcb->asoc,
+ tp1->whoTo,
+ &tp1->sent_rcv_time,
+ SCTP_RTT_FROM_DATA)) {
*rto_ok = 0;
}
if (tp1->whoTo->rto_needed == 0) {
@@ -4088,16 +4096,12 @@ sctp_express_handle_sack(struct sctp_tcb *stcb, uint32_t cumack,
/* update RTO too? */
if (tp1->do_rtt) {
- if (rto_ok) {
- tp1->whoTo->RTO =
- /*
- * sa_ignore
- * NO_NULL_CHK
- */
- sctp_calculate_rto(stcb,
- asoc, tp1->whoTo,
- &tp1->sent_rcv_time,
- SCTP_RTT_FROM_DATA);
+ if (rto_ok &&
+ sctp_calculate_rto(stcb,
+ &stcb->asoc,
+ tp1->whoTo,
+ &tp1->sent_rcv_time,
+ SCTP_RTT_FROM_DATA)) {
rto_ok = 0;
}
if (tp1->whoTo->rto_needed == 0) {
@@ -4706,12 +4710,12 @@ hopeless_peer:
/* update RTO too? */
if (tp1->do_rtt) {
- if (rto_ok) {
- tp1->whoTo->RTO =
- sctp_calculate_rto(stcb,
- asoc, tp1->whoTo,
- &tp1->sent_rcv_time,
- SCTP_RTT_FROM_DATA);
+ if (rto_ok &&
+ sctp_calculate_rto(stcb,
+ &stcb->asoc,
+ tp1->whoTo,
+ &tp1->sent_rcv_time,
+ SCTP_RTT_FROM_DATA)) {
rto_ok = 0;
}
if (tp1->whoTo->rto_needed == 0) {
diff --git a/freebsd/sys/netinet/sctp_input.c b/freebsd/sys/netinet/sctp_input.c
index 3f4e2f5f..4191d24c 100644
--- a/freebsd/sys/netinet/sctp_input.c
+++ b/freebsd/sys/netinet/sctp_input.c
@@ -467,6 +467,10 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
if (!cookie_found) {
uint16_t len;
+ /* Only report the missing cookie parameter */
+ if (op_err != NULL) {
+ sctp_m_freem(op_err);
+ }
len = (uint16_t)(sizeof(struct sctp_error_missing_param) + sizeof(uint16_t));
/* We abort with an error of missing mandatory param */
op_err = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
@@ -550,7 +554,7 @@ sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
/* calculate the RTO */
- net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered,
+ sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered,
SCTP_RTT_FROM_NON_DATA);
retval = sctp_send_cookie_echo(m, offset, initack_limit, stcb, net);
return (retval);
@@ -650,7 +654,7 @@ sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
tv.tv_sec = cp->heartbeat.hb_info.time_value_1;
tv.tv_usec = cp->heartbeat.hb_info.time_value_2;
/* Now lets do a RTO with this */
- r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv,
+ sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv,
SCTP_RTT_FROM_NON_DATA);
if (!(r_net->dest_state & SCTP_ADDR_REACHABLE)) {
r_net->dest_state |= SCTP_ADDR_REACHABLE;
@@ -705,34 +709,37 @@ static int
sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
{
/*
- * return 0 means we want you to proceed with the abort non-zero
- * means no abort processing
+ * Return 0 means we want you to proceed with the abort non-zero
+ * means no abort processing.
*/
+ uint32_t new_vtag;
struct sctpasochead *head;
if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) ||
(SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) {
+ new_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
atomic_add_int(&stcb->asoc.refcnt, 1);
SCTP_TCB_UNLOCK(stcb);
SCTP_INP_INFO_WLOCK();
SCTP_TCB_LOCK(stcb);
atomic_subtract_int(&stcb->asoc.refcnt, 1);
+ } else {
+ return (0);
}
if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) {
/* generate a new vtag and send init */
LIST_REMOVE(stcb, sctp_asocs);
- stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+ stcb->asoc.my_vtag = new_vtag;
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
/*
* put it in the bucket in the vtag hash of assoc's for the
* system
*/
LIST_INSERT_HEAD(head, stcb, sctp_asocs);
- sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
SCTP_INP_INFO_WUNLOCK();
+ sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
return (1);
- }
- if (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) {
+ } else {
/*
* treat like a case where the cookie expired i.e.: - dump
* current cookie. - generate a new vtag. - resend init.
@@ -742,15 +749,15 @@ sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT);
sctp_stop_all_cookie_timers(stcb);
sctp_toss_old_cookies(stcb, &stcb->asoc);
- stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
+ stcb->asoc.my_vtag = new_vtag;
head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
/*
* put it in the bucket in the vtag hash of assoc's for the
* system
*/
LIST_INSERT_HEAD(head, stcb, sctp_asocs);
- sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
SCTP_INP_INFO_WUNLOCK();
+ sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
return (1);
}
return (0);
@@ -1676,8 +1683,7 @@ sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
old.tv_sec = cookie->time_entered.tv_sec;
old.tv_usec = cookie->time_entered.tv_usec;
net->hb_responded = 1;
- net->RTO = sctp_calculate_rto(stcb, asoc, net,
- &old,
+ sctp_calculate_rto(stcb, asoc, net, &old,
SCTP_RTT_FROM_NON_DATA);
if (stcb->asoc.sctp_autoclose_ticks &&
@@ -2401,8 +2407,7 @@ sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
/* calculate the RTT and set the encaps port */
old.tv_sec = cookie->time_entered.tv_sec;
old.tv_usec = cookie->time_entered.tv_usec;
- (*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
- &old, SCTP_RTT_FROM_NON_DATA);
+ sctp_calculate_rto(stcb, asoc, *netp, &old, SCTP_RTT_FROM_NON_DATA);
}
/* respond with a COOKIE-ACK */
sctp_send_cookie_ack(stcb);
@@ -2978,8 +2983,7 @@ sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
SCTP_STAT_INCR_GAUGE32(sctps_currestab);
if (asoc->overall_error_count == 0) {
- net->RTO = sctp_calculate_rto(stcb, asoc, net,
- &asoc->time_entered,
+ sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered,
SCTP_RTT_FROM_NON_DATA);
}
(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
diff --git a/freebsd/sys/netinet/sctp_os_bsd.h b/freebsd/sys/netinet/sctp_os_bsd.h
index abe8e2c9..3db2d5e2 100644
--- a/freebsd/sys/netinet/sctp_os_bsd.h
+++ b/freebsd/sys/netinet/sctp_os_bsd.h
@@ -97,9 +97,6 @@ __FBSDID("$FreeBSD$");
#include <crypto/sha1.h>
#include <crypto/sha2/sha256.h>
-#ifndef in6pcb
-#define in6pcb inpcb
-#endif
/* Declare all the malloc names for all the various mallocs */
MALLOC_DECLARE(SCTP_M_MAP);
MALLOC_DECLARE(SCTP_M_STRMI);
@@ -368,7 +365,7 @@ typedef struct callout sctp_os_timer_t;
*/
/* get the v6 hop limit */
-#define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct in6pcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
+#define SCTP_GET_HLIM(inp, ro) in6_selecthlim((struct inpcb *)&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
/* is the endpoint v6only? */
#define SCTP_IPV6_V6ONLY(inp) (((struct inpcb *)inp)->inp_flags & IN6P_IPV6_V6ONLY)
@@ -431,7 +428,7 @@ typedef struct rtentry sctp_rtentry_t;
m_clrprotoflags(o_pak); \
if (local_stcb && local_stcb->sctp_ep) \
result = ip6_output(o_pak, \
- ((struct in6pcb *)(local_stcb->sctp_ep))->in6p_outputopts, \
+ ((struct inpcb *)(local_stcb->sctp_ep))->in6p_outputopts, \
(ro), 0, 0, ifp, NULL); \
else \
result = ip6_output(o_pak, NULL, (ro), 0, 0, ifp, NULL); \
diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c
index 9221080d..522825da 100644
--- a/freebsd/sys/netinet/sctp_output.c
+++ b/freebsd/sys/netinet/sctp_output.c
@@ -4338,7 +4338,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
* at the SCTP layer. So use the value from
* the IP layer.
*/
- flowlabel = ntohl(((struct in6pcb *)inp)->in6p_flowinfo);
+ flowlabel = ntohl(((struct inpcb *)inp)->inp_flow);
}
flowlabel &= 0x000fffff;
len = SCTP_MIN_OVERHEAD;
@@ -4393,7 +4393,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
* at the SCTP layer. So use the value from
* the IP layer.
*/
- tos_value = (ntohl(((struct in6pcb *)inp)->in6p_flowinfo) >> 20) & 0xff;
+ tos_value = (ntohl(((struct inpcb *)inp)->inp_flow) >> 20) & 0xff;
}
tos_value &= 0xfc;
if (ecn_ok) {
@@ -7874,8 +7874,8 @@ sctp_med_chunk_output(struct sctp_inpcb *inp,
int bundle_at, ctl_cnt, no_data_chunks, eeor_mode;
unsigned int mtu, r_mtu, omtu, mx_mtu, to_out;
int tsns_sent = 0;
- uint32_t auth_offset = 0;
- struct sctp_auth_chunk *auth = NULL;
+ uint32_t auth_offset;
+ struct sctp_auth_chunk *auth;
uint16_t auth_keyid;
int override_ok = 1;
int skip_fill_up = 0;
@@ -8070,6 +8070,8 @@ again_one_more_time:
}
bundle_at = 0;
endoutchain = outchain = NULL;
+ auth = NULL;
+ auth_offset = 0;
no_fragmentflg = 1;
one_chunk = 0;
if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
@@ -9061,8 +9063,7 @@ sctp_send_cookie_echo(struct mbuf *m,
pad = 4 - pad;
}
if (pad > 0) {
- cookie = sctp_pad_lastmbuf(cookie, pad, NULL);
- if (cookie == NULL) {
+ if (sctp_pad_lastmbuf(cookie, pad, NULL) == NULL) {
return (-8);
}
}
diff --git a/freebsd/sys/netinet/sctp_pcb.c b/freebsd/sys/netinet/sctp_pcb.c
index 10e4768e..c72cb5a9 100644
--- a/freebsd/sys/netinet/sctp_pcb.c
+++ b/freebsd/sys/netinet/sctp_pcb.c
@@ -49,7 +49,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/sctp_output.h>
#include <netinet/sctp_timer.h>
#include <netinet/sctp_bsd_addr.h>
-#include <netinet/sctp_dtrace_define.h>
#if defined(INET) || defined(INET6)
#include <netinet/udp.h>
#endif
@@ -3647,12 +3646,8 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from)
#ifdef INET6
- if (ip_pcb->inp_vflag & INP_IPV6) {
- struct in6pcb *in6p;
-
- in6p = (struct in6pcb *)inp;
- ip6_freepcbopts(in6p->in6p_outputopts);
- }
+ if (ip_pcb->inp_vflag & INP_IPV6)
+ ip6_freepcbopts(((struct inpcb *)inp)->in6p_outputopts);
#endif /* INET6 */
ip_pcb->inp_vflag = 0;
/* free up authentication fields */
diff --git a/freebsd/sys/netinet/sctp_pcb.h b/freebsd/sys/netinet/sctp_pcb.h
index 0f5aca88..cbe51c7d 100644
--- a/freebsd/sys/netinet/sctp_pcb.h
+++ b/freebsd/sys/netinet/sctp_pcb.h
@@ -362,7 +362,7 @@ struct sctp_inpcb {
*/
union {
struct inpcb inp;
- char align[(sizeof(struct in6pcb) + SCTP_ALIGNM1) &
+ char align[(sizeof(struct inpcb) + SCTP_ALIGNM1) &
~SCTP_ALIGNM1];
} ip_inp;
diff --git a/freebsd/sys/netinet/sctp_usrreq.c b/freebsd/sys/netinet/sctp_usrreq.c
index 01759156..0783462a 100644
--- a/freebsd/sys/netinet/sctp_usrreq.c
+++ b/freebsd/sys/netinet/sctp_usrreq.c
@@ -1414,10 +1414,8 @@ sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
(num_v4 > 0)) {
- struct in6pcb *inp6;
- inp6 = (struct in6pcb *)inp;
- if (SCTP_IPV6_V6ONLY(inp6)) {
+ if (SCTP_IPV6_V6ONLY(inp)) {
/*
* if IPV6_V6ONLY flag, ignore connections destined
* to a v4 addr or v4-mapped addr
@@ -6918,14 +6916,14 @@ sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
#ifdef INET6
case AF_INET6:
{
- struct sockaddr_in6 *sin6p;
+ struct sockaddr_in6 *sin6;
if (addr->sa_len != sizeof(struct sockaddr_in6)) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
return (EINVAL);
}
- sin6p = (struct sockaddr_in6 *)addr;
- if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6p->sin6_addr)) != 0) {
+ sin6 = (struct sockaddr_in6 *)addr;
+ if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6->sin6_addr)) != 0) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
return (error);
}
diff --git a/freebsd/sys/netinet/sctputil.c b/freebsd/sys/netinet/sctputil.c
index c7d4499c..6ae999b0 100644
--- a/freebsd/sys/netinet/sctputil.c
+++ b/freebsd/sys/netinet/sctputil.c
@@ -2471,25 +2471,24 @@ sctp_mtu_size_reset(struct sctp_inpcb *inp,
/*
- * given an association and starting time of the current RTT period return
- * RTO in number of msecs net should point to the current network
+ * Given an association and starting time of the current RTT period, update
+ * RTO in number of msecs. net should point to the current network.
+ * Return 1, if an RTO update was performed, return 0 if no update was
+ * performed due to invalid starting point.
*/
-uint32_t
+int
sctp_calculate_rto(struct sctp_tcb *stcb,
struct sctp_association *asoc,
struct sctp_nets *net,
struct timeval *old,
int rtt_from_sack)
{
- /*-
- * given an association and the starting time of the current RTT
- * period (in value1/value2) return RTO in number of msecs.
- */
+ struct timeval now;
+ uint64_t rtt_us; /* RTT in us */
int32_t rtt; /* RTT in ms */
uint32_t new_rto;
int first_measure = 0;
- struct timeval now;
/************************/
/* 1. calculate new RTT */
@@ -2500,10 +2499,19 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
} else {
(void)SCTP_GETTIME_TIMEVAL(&now);
}
+ if ((old->tv_sec > now.tv_sec) ||
+ ((old->tv_sec == now.tv_sec) && (old->tv_sec > now.tv_sec))) {
+ /* The starting point is in the future. */
+ return (0);
+ }
timevalsub(&now, old);
+ rtt_us = (uint64_t)1000000 * (uint64_t)now.tv_sec + (uint64_t)now.tv_usec;
+ if (rtt_us > SCTP_RTO_UPPER_BOUND * 1000) {
+ /* The RTT is larger than a sane value. */
+ return (0);
+ }
/* store the current RTT in us */
- net->rtt = (uint64_t)1000000 * (uint64_t)now.tv_sec +
- (uint64_t)now.tv_usec;
+ net->rtt = rtt_us;
/* compute rtt in ms */
rtt = (int32_t)(net->rtt / 1000);
if ((asoc->cc_functions.sctp_rtt_calculated) && (rtt_from_sack == SCTP_RTT_FROM_DATA)) {
@@ -2535,7 +2543,7 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
* Paper "Congestion Avoidance and Control", Annex A.
*
* (net->lastsa >> SCTP_RTT_SHIFT) is the srtt
- * (net->lastsa >> SCTP_RTT_VAR_SHIFT) is the rttvar
+ * (net->lastsv >> SCTP_RTT_VAR_SHIFT) is the rttvar
*/
if (net->RTO_measured) {
rtt -= (net->lastsa >> SCTP_RTT_SHIFT);
@@ -2576,8 +2584,8 @@ sctp_calculate_rto(struct sctp_tcb *stcb,
if (new_rto > stcb->asoc.maxrto) {
new_rto = stcb->asoc.maxrto;
}
- /* we are now returning the RTO */
- return (new_rto);
+ net->RTO = new_rto;
+ return (1);
}
/*
diff --git a/freebsd/sys/netinet/sctputil.h b/freebsd/sys/netinet/sctputil.h
index 690e6125..c67c021f 100644
--- a/freebsd/sys/netinet/sctputil.h
+++ b/freebsd/sys/netinet/sctputil.h
@@ -133,7 +133,7 @@ uint32_t sctp_get_next_mtu(uint32_t);
void
sctp_timeout_handler(void *);
-uint32_t
+int
sctp_calculate_rto(struct sctp_tcb *, struct sctp_association *,
struct sctp_nets *, struct timeval *, int);
diff --git a/freebsd/sys/netinet/tcp_input.c b/freebsd/sys/netinet/tcp_input.c
index 05891306..fc111d9c 100644
--- a/freebsd/sys/netinet/tcp_input.c
+++ b/freebsd/sys/netinet/tcp_input.c
@@ -131,9 +131,9 @@ __FBSDID("$FreeBSD$");
const int tcprexmtthresh = 3;
-int tcp_log_in_vain = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
- &tcp_log_in_vain, 0,
+VNET_DEFINE(int, tcp_log_in_vain) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_log_in_vain), 0,
"Log all incoming TCP segments to closed ports");
VNET_DEFINE(int, blackhole) = 0;
@@ -536,11 +536,19 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
int
tcp6_input(struct mbuf **mp, int *offp, int proto)
{
- struct mbuf *m = *mp;
+ struct mbuf *m;
struct in6_ifaddr *ia6;
struct ip6_hdr *ip6;
- IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
+ m = *mp;
+ if (m->m_len < *offp + sizeof(struct tcphdr)) {
+ m = m_pullup(m, *offp + sizeof(struct tcphdr));
+ if (m == NULL) {
+ *mp = m;
+ TCPSTAT_INC(tcps_rcvshort);
+ return (IPPROTO_DONE);
+ }
+ }
/*
* draft-itojun-ipv6-tcp-to-anycast
@@ -549,17 +557,17 @@ tcp6_input(struct mbuf **mp, int *offp, int proto)
ip6 = mtod(m, struct ip6_hdr *);
ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
- struct ip6_hdr *ip6;
ifa_free(&ia6->ia_ifa);
- ip6 = mtod(m, struct ip6_hdr *);
icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
(caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
+ *mp = NULL;
return (IPPROTO_DONE);
}
if (ia6)
ifa_free(&ia6->ia_ifa);
+ *mp = m;
return (tcp_input(mp, offp, proto));
}
#endif /* INET6 */
@@ -618,15 +626,6 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
#ifdef INET6
if (isipv6) {
- /* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
-
- if (m->m_len < (sizeof(*ip6) + sizeof(*th))) {
- m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
- if (m == NULL) {
- TCPSTAT_INC(tcps_rcvshort);
- return (IPPROTO_DONE);
- }
- }
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)((caddr_t)ip6 + off0);
@@ -735,7 +734,13 @@ tcp_input(struct mbuf **mp, int *offp, int proto)
if (off > sizeof (struct tcphdr)) {
#ifdef INET6
if (isipv6) {
- IP6_EXTHDR_CHECK(m, off0, off, IPPROTO_DONE);
+ if (m->m_len < off0 + off) {
+ m = m_pullup(m, off0 + off);
+ if (m == NULL) {
+ TCPSTAT_INC(tcps_rcvshort);
+ return (IPPROTO_DONE);
+ }
+ }
ip6 = mtod(m, struct ip6_hdr *);
th = (struct tcphdr *)((caddr_t)ip6 + off0);
}
@@ -883,8 +888,8 @@ findpcb:
* Log communication attempts to ports that are not
* in use.
*/
- if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
- tcp_log_in_vain == 2) {
+ if ((V_tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
+ V_tcp_log_in_vain == 2) {
if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6)))
log(LOG_INFO, "%s; %s: Connection attempt "
"to closed port\n", s, __func__);
diff --git a/freebsd/sys/netinet/tcp_output.c b/freebsd/sys/netinet/tcp_output.c
index 3e024fdb..dc75c68d 100644
--- a/freebsd/sys/netinet/tcp_output.c
+++ b/freebsd/sys/netinet/tcp_output.c
@@ -933,6 +933,20 @@ send:
if (tp->t_flags & TF_NEEDFIN)
sendalot = 1;
} else {
+ if (optlen + ipoptlen >= tp->t_maxseg) {
+ /*
+ * Since we don't have enough space to put
+ * the IP header chain and the TCP header in
+ * one packet as required by RFC 7112, don't
+ * send it. Also ensure that at least one
+ * byte of the payload can be put into the
+ * TCP segment.
+ */
+ SOCKBUF_UNLOCK(&so->so_snd);
+ error = EMSGSIZE;
+ sack_rxmit = 0;
+ goto out;
+ }
len = tp->t_maxseg - optlen - ipoptlen;
sendalot = 1;
if (dont_sendalot)
diff --git a/freebsd/sys/netinet/tcp_subr.c b/freebsd/sys/netinet/tcp_subr.c
index 44ec38c7..eae696c1 100644
--- a/freebsd/sys/netinet/tcp_subr.c
+++ b/freebsd/sys/netinet/tcp_subr.c
@@ -3114,7 +3114,7 @@ tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
{
/* Is logging enabled? */
- if (tcp_log_in_vain == 0)
+ if (V_tcp_log_in_vain == 0)
return (NULL);
return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
diff --git a/freebsd/sys/netinet/tcp_timer.c b/freebsd/sys/netinet/tcp_timer.c
index cf6ceff5..e1b9ec59 100644
--- a/freebsd/sys/netinet/tcp_timer.c
+++ b/freebsd/sys/netinet/tcp_timer.c
@@ -127,9 +127,10 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
&tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
"Retransmission Timer Slop");
-int tcp_always_keepalive = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
- &tcp_always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
+VNET_DEFINE(int, tcp_always_keepalive) = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_VNET|CTLFLAG_RW,
+ &VNET_NAME(tcp_always_keepalive) , 0,
+ "Assume SO_KEEPALIVE on all TCP connections");
int tcp_fast_finwait2_recycle = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
@@ -433,7 +434,7 @@ tcp_timer_keep(void *xtp)
TCPSTAT_INC(tcps_keeptimeo);
if (tp->t_state < TCPS_ESTABLISHED)
goto dropit;
- if ((tcp_always_keepalive ||
+ if ((V_tcp_always_keepalive ||
inp->inp_socket->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
diff --git a/freebsd/sys/netinet/tcp_timer.h b/freebsd/sys/netinet/tcp_timer.h
index 3e985bdf..fe3616c2 100644
--- a/freebsd/sys/netinet/tcp_timer.h
+++ b/freebsd/sys/netinet/tcp_timer.h
@@ -203,10 +203,11 @@ extern int tcp_backoff[];
extern int tcp_totbackoff;
extern int tcp_rexmit_drop_options;
-extern int tcp_always_keepalive;
extern int tcp_finwait2_timeout;
extern int tcp_fast_finwait2_recycle;
+VNET_DECLARE(int, tcp_always_keepalive);
+#define V_tcp_always_keepalive VNET(tcp_always_keepalive)
VNET_DECLARE(int, tcp_pmtud_blackhole_detect);
#define V_tcp_pmtud_blackhole_detect VNET(tcp_pmtud_blackhole_detect)
VNET_DECLARE(int, tcp_pmtud_blackhole_mss);
diff --git a/freebsd/sys/netinet/tcp_usrreq.c b/freebsd/sys/netinet/tcp_usrreq.c
index 809ea35d..eab13eeb 100644
--- a/freebsd/sys/netinet/tcp_usrreq.c
+++ b/freebsd/sys/netinet/tcp_usrreq.c
@@ -346,23 +346,25 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- struct sockaddr_in6 *sin6p;
+ struct sockaddr_in6 *sin6;
+ u_char vflagsav;
- sin6p = (struct sockaddr_in6 *)nam;
- if (nam->sa_len != sizeof (*sin6p))
+ sin6 = (struct sockaddr_in6 *)nam;
+ if (nam->sa_len != sizeof (*sin6))
return (EINVAL);
/*
* Must check for multicast addresses and disallow binding
* to them.
*/
- if (sin6p->sin6_family == AF_INET6 &&
- IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
+ if (sin6->sin6_family == AF_INET6 &&
+ IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return (EAFNOSUPPORT);
TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
INP_WLOCK(inp);
+ vflagsav = inp->inp_vflag;
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = EINVAL;
goto out;
@@ -374,12 +376,12 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
inp->inp_vflag |= INP_IPV6;
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
- if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
inp->inp_vflag |= INP_IPV4;
- else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+ else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
struct sockaddr_in sin;
- in6_sin6_2_sin(&sin, sin6p);
+ in6_sin6_2_sin(&sin, sin6);
if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
error = EAFNOSUPPORT;
INP_HASH_WUNLOCK(&V_tcbinfo);
@@ -397,6 +399,8 @@ tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
error = in6_pcbbind(inp, nam, td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
out:
+ if (error != 0)
+ inp->inp_vflag = vflagsav;
TCPDEBUG2(PRU_BIND);
TCP_PROBE2(debug__user, tp, PRU_BIND);
INP_WUNLOCK(inp);
@@ -459,6 +463,7 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
+ u_char vflagsav;
TCPDEBUG0;
inp = sotoinpcb(so);
@@ -468,6 +473,7 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
error = EINVAL;
goto out;
}
+ vflagsav = inp->inp_vflag;
tp = intotcpcb(inp);
TCPDEBUG1();
SOCK_LOCK(so);
@@ -493,6 +499,9 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
if (IS_FASTOPEN(tp->t_flags))
tp->t_tfo_pending = tcp_fastopen_alloc_counter();
+ if (error != 0)
+ inp->inp_vflag = vflagsav;
+
out:
TCPDEBUG2(PRU_LISTEN);
TCP_PROBE2(debug__user, tp, PRU_LISTEN);
@@ -568,23 +577,27 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- struct sockaddr_in6 *sin6p;
+ struct sockaddr_in6 *sin6;
+ u_int8_t incflagsav;
+ u_char vflagsav;
TCPDEBUG0;
- sin6p = (struct sockaddr_in6 *)nam;
- if (nam->sa_len != sizeof (*sin6p))
+ sin6 = (struct sockaddr_in6 *)nam;
+ if (nam->sa_len != sizeof (*sin6))
return (EINVAL);
/*
* Must disallow TCP ``connections'' to multicast addresses.
*/
- if (sin6p->sin6_family == AF_INET6
- && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
+ if (sin6->sin6_family == AF_INET6
+ && IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
return (EAFNOSUPPORT);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
INP_WLOCK(inp);
+ vflagsav = inp->inp_vflag;
+ incflagsav = inp->inp_inc.inc_flags;
if (inp->inp_flags & INP_TIMEWAIT) {
error = EADDRINUSE;
goto out;
@@ -601,7 +614,7 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
* therefore probably require the hash lock, which isn't held here.
* Is this a significant problem?
*/
- if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
struct sockaddr_in sin;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
@@ -613,16 +626,16 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
}
- in6_sin6_2_sin(&sin, sin6p);
+ in6_sin6_2_sin(&sin, sin6);
if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
error = EAFNOSUPPORT;
goto out;
}
- inp->inp_vflag |= INP_IPV4;
- inp->inp_vflag &= ~INP_IPV6;
if ((error = prison_remote_ip4(td->td_ucred,
&sin.sin_addr)) != 0)
goto out;
+ inp->inp_vflag |= INP_IPV4;
+ inp->inp_vflag &= ~INP_IPV6;
if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
goto out;
#ifdef TCP_OFFLOAD
@@ -640,11 +653,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
}
}
#endif
+ if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
+ goto out;
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
inp->inp_inc.inc_flags |= INC_ISIPV6;
- if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
- goto out;
if ((error = tcp6_connect(tp, nam, td)) != 0)
goto out;
#ifdef TCP_OFFLOAD
@@ -657,6 +670,15 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = tp->t_fb->tfb_tcp_output(tp);
out:
+ /*
+ * If the implicit bind in the connect call fails, restore
+ * the flags we modified.
+ */
+ if (error != 0 && inp->inp_lport == 0) {
+ inp->inp_vflag = vflagsav;
+ inp->inp_inc.inc_flags = incflagsav;
+ }
+
TCPDEBUG2(PRU_CONNECT);
TCP_PROBE2(debug__user, tp, PRU_CONNECT);
INP_WUNLOCK(inp);
@@ -912,6 +934,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
#ifdef INET6
int isipv6;
#endif
+ u_int8_t incflagsav;
+ u_char vflagsav;
+ bool restoreflags;
TCPDEBUG0;
/*
@@ -923,6 +948,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
+ vflagsav = inp->inp_vflag;
+ incflagsav = inp->inp_inc.inc_flags;
+ restoreflags = false;
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
if (control)
m_freem(control);
@@ -974,22 +1002,22 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
#ifdef INET6
case AF_INET6:
{
- struct sockaddr_in6 *sin6p;
+ struct sockaddr_in6 *sin6;
- sin6p = (struct sockaddr_in6 *)nam;
- if (sin6p->sin6_len != sizeof(struct sockaddr_in6)) {
+ sin6 = (struct sockaddr_in6 *)nam;
+ if (sin6->sin6_len != sizeof(*sin6)) {
if (m)
m_freem(m);
error = EINVAL;
goto out;
}
- if (IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
+ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
if (m)
m_freem(m);
error = EAFNOSUPPORT;
goto out;
}
- if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
#ifdef INET
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
error = EINVAL;
@@ -1003,9 +1031,10 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
m_freem(m);
goto out;
}
+ restoreflags = true;
inp->inp_vflag &= ~INP_IPV6;
sinp = &sin;
- in6_sin6_2_sin(sinp, sin6p);
+ in6_sin6_2_sin(sinp, sin6);
if (IN_MULTICAST(
ntohl(sinp->sin_addr.s_addr))) {
error = EAFNOSUPPORT;
@@ -1033,10 +1062,11 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
error = EAFNOSUPPORT;
goto out;
}
+ restoreflags = true;
inp->inp_vflag &= ~INP_IPV4;
inp->inp_inc.inc_flags |= INC_ISIPV6;
if ((error = prison_remote_ip6(td->td_ucred,
- &sin6p->sin6_addr))) {
+ &sin6->sin6_addr))) {
if (m)
m_freem(m);
goto out;
@@ -1083,6 +1113,14 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
error = tcp_connect(tp,
(struct sockaddr *)sinp, td);
#endif
+ /*
+ * The bind operation in tcp_connect succeeded. We
+ * no longer want to restore the flags if later
+ * operations fail.
+ */
+ if (error == 0 || inp->inp_lport != 0)
+ restoreflags = false;
+
if (error)
goto out;
if (IS_FASTOPEN(tp->t_flags))
@@ -1153,6 +1191,14 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
error = tcp_connect(tp,
(struct sockaddr *)sinp, td);
#endif
+ /*
+ * The bind operation in tcp_connect succeeded. We
+ * no longer want to restore the flags if later
+ * operations fail.
+ */
+ if (error == 0 || inp->inp_lport != 0)
+ restoreflags = false;
+
if (error)
goto out;
tp->snd_wnd = TTCP_CLIENT_SND_WND;
@@ -1171,6 +1217,14 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
TCP_LOG_USERSEND, error,
0, NULL, false);
out:
+ /*
+ * If the request was unsuccessful and we changed flags,
+ * restore the original flags.
+ */
+ if (error != 0 && restoreflags) {
+ inp->inp_vflag = vflagsav;
+ inp->inp_inc.inc_flags = incflagsav;
+ }
TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
diff --git a/freebsd/sys/netinet/tcp_var.h b/freebsd/sys/netinet/tcp_var.h
index cca8623e..13d20294 100644
--- a/freebsd/sys/netinet/tcp_var.h
+++ b/freebsd/sys/netinet/tcp_var.h
@@ -745,7 +745,8 @@ SYSCTL_DECL(_net_inet_tcp_sack);
MALLOC_DECLARE(M_TCPLOG);
#endif
-extern int tcp_log_in_vain;
+VNET_DECLARE(int, tcp_log_in_vain);
+#define V_tcp_log_in_vain VNET(tcp_log_in_vain)
/*
* Global TCP tunables shared between different stacks.
diff --git a/freebsd/sys/netinet/udp_usrreq.c b/freebsd/sys/netinet/udp_usrreq.c
index f89660d6..8462d0ee 100644
--- a/freebsd/sys/netinet/udp_usrreq.c
+++ b/freebsd/sys/netinet/udp_usrreq.c
@@ -122,9 +122,9 @@ VNET_DEFINE(int, udp_cksum) = 1;
SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(udp_cksum), 0, "compute udp checksum");
-int udp_log_in_vain = 0;
-SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW,
- &udp_log_in_vain, 0, "Log all incoming UDP packets");
+VNET_DEFINE(int, udp_log_in_vain) = 0;
+SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(udp_log_in_vain), 0, "Log all incoming UDP packets");
VNET_DEFINE(int, udp_blackhole) = 0;
SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
@@ -427,14 +427,13 @@ udp_input(struct mbuf **mp, int *offp, int proto)
/*
* Get IP and UDP header together in first mbuf.
*/
- ip = mtod(m, struct ip *);
if (m->m_len < iphlen + sizeof(struct udphdr)) {
if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == NULL) {
UDPSTAT_INC(udps_hdrops);
return (IPPROTO_DONE);
}
- ip = mtod(m, struct ip *);
}
+ ip = mtod(m, struct ip *);
uh = (struct udphdr *)((caddr_t)ip + iphlen);
cscov_partial = (proto == IPPROTO_UDPLITE) ? 1 : 0;
@@ -695,7 +694,7 @@ udp_input(struct mbuf **mp, int *offp, int proto)
ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD |
INPLOOKUP_RLOCKPCB, ifp, m);
if (inp == NULL) {
- if (udp_log_in_vain) {
+ if (V_udp_log_in_vain) {
char src[INET_ADDRSTRLEN];
char dst[INET_ADDRSTRLEN];
diff --git a/freebsd/sys/netinet/udp_var.h b/freebsd/sys/netinet/udp_var.h
index 01545582..ecca2a54 100644
--- a/freebsd/sys/netinet/udp_var.h
+++ b/freebsd/sys/netinet/udp_var.h
@@ -153,9 +153,10 @@ extern u_long udp_sendspace;
extern u_long udp_recvspace;
VNET_DECLARE(int, udp_cksum);
VNET_DECLARE(int, udp_blackhole);
+VNET_DECLARE(int, udp_log_in_vain);
#define V_udp_cksum VNET(udp_cksum)
#define V_udp_blackhole VNET(udp_blackhole)
-extern int udp_log_in_vain;
+#define V_udp_log_in_vain VNET(udp_log_in_vain)
static __inline struct inpcbinfo *
udp_get_inpcbinfo(int protocol)
diff --git a/freebsd/sys/netinet6/dest6.c b/freebsd/sys/netinet6/dest6.c
index 50a836ba..354457e2 100644
--- a/freebsd/sys/netinet6/dest6.c
+++ b/freebsd/sys/netinet6/dest6.c
@@ -66,30 +66,35 @@ __FBSDID("$FreeBSD$");
int
dest6_input(struct mbuf **mp, int *offp, int proto)
{
- struct mbuf *m = *mp;
- int off = *offp, dstoptlen, optlen;
+ struct mbuf *m;
+ int off, dstoptlen, optlen;
struct ip6_dest *dstopts;
u_int8_t *opt;
- /* validation of the length of the header */
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, sizeof(*dstopts), IPPROTO_DONE);
+ m = *mp;
+ off = *offp;
+
+ /* Validation of the length of the header. */
+ if (m->m_len < off + sizeof(*dstopts)) {
+ m = m_pullup(m, off + sizeof(*dstopts));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = m;
+ return (IPPROTO_DONE);
+ }
+ }
dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off);
-#else
- IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, sizeof(*dstopts));
- if (dstopts == NULL)
- return IPPROTO_DONE;
-#endif
dstoptlen = (dstopts->ip6d_len + 1) << 3;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, dstoptlen, IPPROTO_DONE);
+ if (m->m_len < off + dstoptlen) {
+ m = m_pullup(m, off + dstoptlen);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = m;
+ return (IPPROTO_DONE);
+ }
+ }
dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off);
-#else
- IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, dstoptlen);
- if (dstopts == NULL)
- return IPPROTO_DONE;
-#endif
off += dstoptlen;
dstoptlen -= sizeof(struct ip6_dest);
opt = (u_int8_t *)dstopts + sizeof(struct ip6_dest);
@@ -112,17 +117,21 @@ dest6_input(struct mbuf **mp, int *offp, int proto)
default: /* unknown option */
optlen = ip6_unknown_opt(opt, m,
opt - mtod(m, u_int8_t *));
- if (optlen == -1)
+ if (optlen == -1) {
+ *mp = NULL;
return (IPPROTO_DONE);
+ }
optlen += 2;
break;
}
}
*offp = off;
+ *mp = m;
return (dstopts->ip6d_nxt);
bad:
m_freem(m);
+ *mp = NULL;
return (IPPROTO_DONE);
}
diff --git a/freebsd/sys/netinet6/frag6.c b/freebsd/sys/netinet6/frag6.c
index 0b0c7b91..443c684a 100644
--- a/freebsd/sys/netinet6/frag6.c
+++ b/freebsd/sys/netinet6/frag6.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
+ * Copyright (c) 2019 Netflix, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -40,20 +41,18 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/domain.h>
+#include <sys/eventhandler.h>
#include <sys/hash.h>
+#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
-#include <sys/domain.h>
-#include <sys/eventhandler.h>
#include <sys/protosw.h>
+#include <sys/queue.h>
#include <sys/socket.h>
-#include <sys/errno.h>
-#include <sys/time.h>
-#include <sys/kernel.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
-#include <machine/atomic.h>
-
#include <net/if.h>
#include <net/if_var.h>
#include <net/netisr.h>
@@ -65,48 +64,85 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet/icmp6.h>
-#include <netinet/in_systm.h> /* for ECN definitions */
-#include <netinet/ip.h> /* for ECN definitions */
+#include <netinet/in_systm.h> /* For ECN definitions. */
+#include <netinet/ip.h> /* For ECN definitions. */
+#ifdef MAC
#include <security/mac/mac_framework.h>
+#endif
/*
- * Reassembly headers are stored in hash buckets.
+ * A "big picture" of how IPv6 fragment queues are all linked together.
+ *
+ * struct ip6qbucket ip6qb[...]; hashed buckets
+ * ||||||||
+ * |
+ * +--- TAILQ(struct ip6q, packets) *q6; tailq entries holding
+ * |||||||| fragmented packets
+ * | (1 per original packet)
+ * |
+ * +--- TAILQ(struct ip6asfrag, ip6q_frags) *af6; tailq entries of IPv6
+ * | *ip6af;fragment packets
+ * | for one original packet
+ * + *mbuf
*/
+
+/* Reassembly headers are stored in hash buckets. */
#define IP6REASS_NHASH_LOG2 10
#define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2)
#define IP6REASS_HMASK (IP6REASS_NHASH - 1)
-static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *,
- uint32_t bucket __unused);
-static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused);
-static void frag6_insque_head(struct ip6q *, struct ip6q *,
- uint32_t bucket);
-static void frag6_remque(struct ip6q *, uint32_t bucket);
-static void frag6_freef(struct ip6q *, uint32_t bucket);
-
+TAILQ_HEAD(ip6qhead, ip6q);
struct ip6qbucket {
- struct ip6q ip6q;
+ struct ip6qhead packets;
struct mtx lock;
int count;
};
-VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
-volatile u_int frag6_nfrags = 0;
-VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]);
-VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed);
+struct ip6asfrag {
+ TAILQ_ENTRY(ip6asfrag) ip6af_tq;
+ struct mbuf *ip6af_m;
+ int ip6af_offset; /* Offset in ip6af_m to next header. */
+ int ip6af_frglen; /* Fragmentable part length. */
+ int ip6af_off; /* Fragment offset. */
+ bool ip6af_mff; /* More fragment bit in frag off. */
+};
+
+static MALLOC_DEFINE(M_FRAG6, "frag6", "IPv6 fragment reassembly header");
+
+#ifdef VIMAGE
+/* A flag to indicate if IPv6 fragmentation is initialized. */
+VNET_DEFINE_STATIC(bool, frag6_on);
+#define V_frag6_on VNET(frag6_on)
+#endif
+
+/* System wide (global) maximum and count of packets in reassembly queues. */
+static int ip6_maxfrags;
+static volatile u_int frag6_nfrags = 0;
+/* Maximum and current packets in per-VNET reassembly queue. */
+VNET_DEFINE_STATIC(int, ip6_maxfragpackets);
+VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets);
+#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
#define V_frag6_nfragpackets VNET(frag6_nfragpackets)
-#define V_ip6q VNET(ip6q)
-#define V_ip6q_hashseed VNET(ip6q_hashseed)
-#define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock)
-#define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock)
-#define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED)
-#define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock)
-#define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q)
+/* Maximum per-VNET reassembly queues per bucket and fragments per packet. */
+VNET_DEFINE_STATIC(int, ip6_maxfragbucketsize);
+VNET_DEFINE_STATIC(int, ip6_maxfragsperpacket);
+#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize)
+#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket)
+
+/* Per-VNET reassembly queue buckets. */
+VNET_DEFINE_STATIC(struct ip6qbucket, ip6qb[IP6REASS_NHASH]);
+VNET_DEFINE_STATIC(uint32_t, ip6qb_hashseed);
+#define V_ip6qb VNET(ip6qb)
+#define V_ip6qb_hashseed VNET(ip6qb_hashseed)
-static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
+#define IP6QB_LOCK(_b) mtx_lock(&V_ip6qb[(_b)].lock)
+#define IP6QB_TRYLOCK(_b) mtx_trylock(&V_ip6qb[(_b)].lock)
+#define IP6QB_LOCK_ASSERT(_b) mtx_assert(&V_ip6qb[(_b)].lock, MA_OWNED)
+#define IP6QB_UNLOCK(_b) mtx_unlock(&V_ip6qb[(_b)].lock)
+#define IP6QB_HEAD(_b) (&V_ip6qb[(_b)].packets)
/*
* By default, limit the number of IP6 fragments across all reassembly
@@ -124,11 +160,18 @@ static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
#define IP6_MAXFRAGS (nmbclusters / 32)
#define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50))
+
/*
- * Initialise reassembly queue and fragment identifier.
+ * Sysctls and helper function.
*/
-void
-frag6_set_bucketsize()
+SYSCTL_DECL(_net_inet6_ip6);
+
+SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, frag6_nfrags,
+ CTLFLAG_RD, __DEVOLATILE(u_int *, &frag6_nfrags), 0,
+ "Global number of IPv6 fragments across all reassembly queues.");
+
+static void
+frag6_set_bucketsize(void)
{
int i;
@@ -136,68 +179,180 @@ frag6_set_bucketsize()
V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1);
}
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
+ CTLFLAG_RW, &ip6_maxfrags, 0,
+ "Maximum allowed number of outstanding IPv6 packet fragments. "
+ "A value of 0 means no fragmented packets will be accepted, while a "
+ "a value of -1 means no limit");
+
+static int
+sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
+{
+ int error, val;
+
+ val = V_ip6_maxfragpackets;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || !req->newptr)
+ return (error);
+ V_ip6_maxfragpackets = val;
+ frag6_set_bucketsize();
+ return (0);
+}
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
+ sysctl_ip6_maxfragpackets, "I",
+ "Default maximum number of outstanding fragmented IPv6 packets. "
+ "A value of 0 means no fragmented packets will be accepted, while a "
+ "a value of -1 means no limit");
+SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, frag6_nfragpackets,
+ CTLFLAG_VNET | CTLFLAG_RD,
+ __DEVOLATILE(u_int *, &VNET_NAME(frag6_nfragpackets)), 0,
+ "Per-VNET number of IPv6 fragments across all reassembly queues.");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
+ "Maximum allowed number of fragments per packet");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
+ "Maximum number of reassembly queues per hash bucket");
+
+
+/*
+ * Remove the IPv6 fragmentation header from the mbuf.
+ */
+int
+ip6_deletefraghdr(struct mbuf *m, int offset, int wait __unused)
+{
+ struct ip6_hdr *ip6;
+
+ KASSERT(m->m_len >= offset + sizeof(struct ip6_frag),
+ ("%s: ext headers not contigous in mbuf %p m_len %d >= "
+ "offset %d + %zu\n", __func__, m, m->m_len, offset,
+ sizeof(struct ip6_frag)));
+
+ /* Delete frag6 header. */
+ ip6 = mtod(m, struct ip6_hdr *);
+ bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), offset);
+ m->m_data += sizeof(struct ip6_frag);
+ m->m_len -= sizeof(struct ip6_frag);
+ m->m_flags |= M_FRAGMENTED;
+
+ return (0);
+}
+
+/*
+ * Free a fragment reassembly header and all associated datagrams.
+ */
static void
-frag6_change(void *tag)
+frag6_freef(struct ip6q *q6, uint32_t bucket)
{
- VNET_ITERATOR_DECL(vnet_iter);
+ struct ip6_hdr *ip6;
+ struct ip6asfrag *af6;
+ struct mbuf *m;
- ip6_maxfrags = IP6_MAXFRAGS;
- VNET_LIST_RLOCK_NOSLEEP();
- VNET_FOREACH(vnet_iter) {
- CURVNET_SET(vnet_iter);
- V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
- frag6_set_bucketsize();
- CURVNET_RESTORE();
+ IP6QB_LOCK_ASSERT(bucket);
+
+ while ((af6 = TAILQ_FIRST(&q6->ip6q_frags)) != NULL) {
+
+ m = af6->ip6af_m;
+ TAILQ_REMOVE(&q6->ip6q_frags, af6, ip6af_tq);
+
+ /*
+ * Return ICMP time exceeded error for the 1st fragment.
+ * Just free other fragments.
+ */
+ if (af6->ip6af_off == 0 && m->m_pkthdr.rcvif != NULL) {
+
+ /* Adjust pointer. */
+ ip6 = mtod(m, struct ip6_hdr *);
+
+ /* Restore source and destination addresses. */
+ ip6->ip6_src = q6->ip6q_src;
+ ip6->ip6_dst = q6->ip6q_dst;
+
+ icmp6_error(m, ICMP6_TIME_EXCEEDED,
+ ICMP6_TIME_EXCEED_REASSEMBLY, 0);
+ } else
+ m_freem(m);
+
+ free(af6, M_FRAG6);
}
- VNET_LIST_RUNLOCK_NOSLEEP();
+
+ TAILQ_REMOVE(IP6QB_HEAD(bucket), q6, ip6q_tq);
+ V_ip6qb[bucket].count--;
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
+#ifdef MAC
+ mac_ip6q_destroy(q6);
+#endif
+ free(q6, M_FRAG6);
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
}
-void
-frag6_init(void)
+/*
+ * Drain off all datagram fragments belonging to
+ * the given network interface.
+ */
+static void
+frag6_cleanup(void *arg __unused, struct ifnet *ifp)
{
+ struct ip6qhead *head;
struct ip6q *q6;
- int i;
+ struct ip6asfrag *af6;
+ uint32_t bucket;
- V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
- frag6_set_bucketsize();
- for (i = 0; i < IP6REASS_NHASH; i++) {
- q6 = IP6Q_HEAD(i);
- q6->ip6q_next = q6->ip6q_prev = q6;
- mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF);
- V_ip6q[i].count = 0;
- }
- V_ip6q_hashseed = arc4random();
- V_ip6_maxfragsperpacket = 64;
- if (!IS_DEFAULT_VNET(curvnet))
+ KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__));
+
+ CURVNET_SET_QUIET(ifp->if_vnet);
+#ifdef VIMAGE
+ /*
+ * Skip processing if IPv6 reassembly is not initialised or
+ * torn down by frag6_destroy().
+ */
+ if (!V_frag6_on) {
+ CURVNET_RESTORE();
return;
+ }
+#endif
- ip6_maxfrags = IP6_MAXFRAGS;
- EVENTHANDLER_REGISTER(nmbclusters_change,
- frag6_change, NULL, EVENTHANDLER_PRI_ANY);
+ for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+ IP6QB_LOCK(bucket);
+ head = IP6QB_HEAD(bucket);
+ /* Scan fragment list. */
+ TAILQ_FOREACH(q6, head, ip6q_tq) {
+ TAILQ_FOREACH(af6, &q6->ip6q_frags, ip6af_tq) {
+
+ /* Clear no longer valid rcvif pointer. */
+ if (af6->ip6af_m->m_pkthdr.rcvif == ifp)
+ af6->ip6af_m->m_pkthdr.rcvif = NULL;
+ }
+ }
+ IP6QB_UNLOCK(bucket);
+ }
+ CURVNET_RESTORE();
}
+EVENTHANDLER_DEFINE(ifnet_departure_event, frag6_cleanup, NULL, 0);
/*
- * In RFC2460, fragment and reassembly rule do not agree with each other,
- * in terms of next header field handling in fragment header.
+ * Like in RFC2460, in RFC8200, fragment and reassembly rules do not agree with
+ * each other, in terms of next header field handling in fragment header.
* While the sender will use the same value for all of the fragmented packets,
- * receiver is suggested not to check the consistency.
+ * receiver is suggested not to check for consistency.
*
- * fragment rule (p20):
- * (2) A Fragment header containing:
- * The Next Header value that identifies the first header of
- * the Fragmentable Part of the original packet.
+ * Fragment rules (p18,p19):
+ * (2) A Fragment header containing:
+ * The Next Header value that identifies the first header
+ * after the Per-Fragment headers of the original packet.
* -> next header field is same for all fragments
*
- * reassembly rule (p21):
- * The Next Header field of the last header of the Unfragmentable
- * Part is obtained from the Next Header field of the first
+ * Reassembly rule (p20):
+ * The Next Header field of the last header of the Per-Fragment
+ * headers is obtained from the Next Header field of the first
* fragment's Fragment header.
* -> should grab it from the first fragment only
*
* The following note also contradicts with fragment rule - no one is going to
* send different fragment with different next header field.
*
- * additional note (p22):
+ * Additional note (p22) [not an error]:
* The Next Header values in the Fragment headers of different
* fragments of the same original packet may differ. Only the value
* from the Offset zero fragment packet is used for reassembly.
@@ -206,91 +361,111 @@ frag6_init(void)
* There is no explicit reason given in the RFC. Historical reason maybe?
*/
/*
- * Fragment input
+ * Fragment input.
*/
int
frag6_input(struct mbuf **mp, int *offp, int proto)
{
- struct mbuf *m = *mp, *t;
+ struct mbuf *m, *t;
struct ip6_hdr *ip6;
struct ip6_frag *ip6f;
- struct ip6q *head, *q6;
- struct ip6asfrag *af6, *ip6af, *af6dwn;
- struct in6_ifaddr *ia;
- int offset = *offp, nxt, i, next;
- int first_frag = 0;
- int fragoff, frgpartlen; /* must be larger than u_int16_t */
+ struct ip6qhead *head;
+ struct ip6q *q6;
+ struct ip6asfrag *af6, *ip6af, *af6tmp;
+ struct in6_ifaddr *ia6;
+ struct ifnet *dstifp, *srcifp;
uint32_t hashkey[(sizeof(struct in6_addr) * 2 +
sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)];
- uint32_t hash, *hashkeyp;
- struct ifnet *dstifp;
- u_int8_t ecn, ecn0;
+ uint32_t bucket, *hashkeyp;
+ int fragoff, frgpartlen; /* Must be larger than uint16_t. */
+ int nxt, offset, plen;
+ uint8_t ecn, ecn0;
+ bool only_frag;
#ifdef RSS
- struct m_tag *mtag;
struct ip6_direct_ctx *ip6dc;
+ struct m_tag *mtag;
#endif
-#if 0
- char ip6buf[INET6_ADDRSTRLEN];
-#endif
+ m = *mp;
+ offset = *offp;
+
+ M_ASSERTPKTHDR(m);
+ if (m->m_len < offset + sizeof(struct ip6_frag)) {
+ m = m_pullup(m, offset + sizeof(struct ip6_frag));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = NULL;
+ return (IPPROTO_DONE);
+ }
+ }
ip6 = mtod(m, struct ip6_hdr *);
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
- ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
-#else
- IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
- if (ip6f == NULL)
- return (IPPROTO_DONE);
-#endif
dstifp = NULL;
- /* find the destination interface of the packet. */
- ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
- if (ia != NULL) {
- dstifp = ia->ia_ifp;
- ifa_free(&ia->ia_ifa);
+ /* Find the destination interface of the packet. */
+ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
+ if (ia6 != NULL) {
+ dstifp = ia6->ia_ifp;
+ ifa_free(&ia6->ia_ifa);
}
- /* jumbo payload can't contain a fragment header */
+
+ /* Jumbo payload cannot contain a fragment header. */
if (ip6->ip6_plen == 0) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
in6_ifstat_inc(dstifp, ifs6_reass_fail);
- return IPPROTO_DONE;
+ *mp = NULL;
+ return (IPPROTO_DONE);
}
/*
- * check whether fragment packet's fragment length is
- * multiple of 8 octets.
+ * Check whether fragment packet's fragment length is a
+ * multiple of 8 octets (unless it is the last one).
* sizeof(struct ip6_frag) == 8
* sizeof(struct ip6_hdr) = 40
*/
+ ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
(((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offsetof(struct ip6_hdr, ip6_plen));
in6_ifstat_inc(dstifp, ifs6_reass_fail);
- return IPPROTO_DONE;
+ *mp = NULL;
+ return (IPPROTO_DONE);
}
IP6STAT_INC(ip6s_fragments);
in6_ifstat_inc(dstifp, ifs6_reass_reqd);
- /* offset now points to data portion */
- offset += sizeof(struct ip6_frag);
-
/*
- * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
- * upfront, unrelated to any reassembly. Just skip the fragment header.
+ * Handle "atomic" fragments (offset and m bit set to 0) upfront,
+ * unrelated to any reassembly. We need to remove the frag hdr
+ * which is ugly.
+ * See RFC 6946 and section 4.5 of RFC 8200.
*/
if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
- /* XXX-BZ we want dedicated counters for this. */
- IP6STAT_INC(ip6s_reassembled);
+ IP6STAT_INC(ip6s_atomicfrags);
+ nxt = ip6f->ip6f_nxt;
+ /*
+ * Set nxt(-hdr field value) to the original value.
+ * We cannot just set ip6->ip6_nxt as there might be
+ * an unfragmentable part with extension headers and
+ * we must update the last one.
+ */
+ m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
+ (caddr_t)&nxt);
+ ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) -
+ sizeof(struct ip6_frag));
+ if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0)
+ goto dropfrag2;
+ m->m_pkthdr.len -= sizeof(struct ip6_frag);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
- *offp = offset;
- m->m_flags |= M_FRAGMENTED;
- return (ip6f->ip6f_nxt);
+ *mp = m;
+ return (nxt);
}
+ /* Offset now points to data portion. */
+ offset += sizeof(struct ip6_frag);
+
/* Get fragment length and discard 0-byte fragments. */
frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
if (frgpartlen == 0) {
@@ -298,31 +473,48 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
offsetof(struct ip6_hdr, ip6_plen));
in6_ifstat_inc(dstifp, ifs6_reass_fail);
IP6STAT_INC(ip6s_fragdropped);
- return IPPROTO_DONE;
+ *mp = NULL;
+ return (IPPROTO_DONE);
}
- hashkeyp = hashkey;
- memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
- hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
- memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
- hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
- *hashkeyp = ip6f->ip6f_ident;
- hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed);
- hash &= IP6REASS_HMASK;
- head = IP6Q_HEAD(hash);
- IP6Q_LOCK(hash);
-
/*
- * Enforce upper bound on number of fragments.
+ * Enforce upper bound on number of fragments for the entire system.
* If maxfrag is 0, never accept fragments.
* If maxfrag is -1, accept all fragments without limitation.
*/
if (ip6_maxfrags < 0)
;
else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags)
- goto dropfrag;
+ goto dropfrag2;
+
+ /*
+ * Validate that a full header chain to the ULP is present in the
+ * packet containing the first fragment as per RFC RFC7112 and
+ * RFC 8200 pages 18,19:
+ * The first fragment packet is composed of:
+ * (3) Extension headers, if any, and the Upper-Layer header. These
+ * headers must be in the first fragment. ...
+ */
+ fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
+ /* XXX TODO. thj has D16851 open for this. */
+ /* Send ICMPv6 4,3 in case of violation. */
- for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next)
+ /* Store receive network interface pointer for later. */
+ srcifp = m->m_pkthdr.rcvif;
+
+ /* Generate a hash value for fragment bucket selection. */
+ hashkeyp = hashkey;
+ memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr));
+ hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
+ memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr));
+ hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp);
+ *hashkeyp = ip6f->ip6f_ident;
+ bucket = jenkins_hash32(hashkey, nitems(hashkey), V_ip6qb_hashseed);
+ bucket &= IP6REASS_HMASK;
+ IP6QB_LOCK(bucket);
+ head = IP6QB_HEAD(bucket);
+
+ TAILQ_FOREACH(q6, head, ip6q_tq)
if (ip6f->ip6f_ident == q6->ip6q_ident &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
@@ -332,11 +524,11 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
)
break;
- if (q6 == head) {
- /*
- * the first fragment to arrive, create a reassembly queue.
- */
- first_frag = 1;
+ only_frag = false;
+ if (q6 == NULL) {
+
+ /* A first fragment to arrive creates a reassembly queue. */
+ only_frag = true;
/*
* Enforce upper bound on number of fragmented packets
@@ -347,30 +539,27 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
*/
if (V_ip6_maxfragpackets < 0)
;
- else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize ||
+ else if (V_ip6qb[bucket].count >= V_ip6_maxfragbucketsize ||
atomic_load_int(&V_frag6_nfragpackets) >=
(u_int)V_ip6_maxfragpackets)
goto dropfrag;
- atomic_add_int(&V_frag6_nfragpackets, 1);
- q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
- M_NOWAIT);
+
+ /* Allocate IPv6 fragement packet queue entry. */
+ q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FRAG6,
+ M_NOWAIT | M_ZERO);
if (q6 == NULL)
goto dropfrag;
- bzero(q6, sizeof(*q6));
#ifdef MAC
if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
- free(q6, M_FTABLE);
+ free(q6, M_FRAG6);
goto dropfrag;
}
mac_ip6q_create(m, q6);
#endif
- frag6_insque_head(q6, head, hash);
+ atomic_add_int(&V_frag6_nfragpackets, 1);
- /* ip6q_nxt will be filled afterwards, from 1st fragment */
- q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
-#ifdef notyet
- q6->ip6q_nxtp = (u_char *)nxtp;
-#endif
+ /* ip6q_nxt will be filled afterwards, from 1st fragment. */
+ TAILQ_INIT(&q6->ip6q_frags);
q6->ip6q_ident = ip6f->ip6f_ident;
q6->ip6q_ttl = IPV6_FRAGTTL;
q6->ip6q_src = ip6->ip6_src;
@@ -379,18 +568,24 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
(ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
- q6->ip6q_nfrag = 0;
+ /* Add the fragemented packet to the bucket. */
+ TAILQ_INSERT_HEAD(head, q6, ip6q_tq);
+ V_ip6qb[bucket].count++;
}
/*
- * If it's the 1st fragment, record the length of the
+ * If it is the 1st fragment, record the length of the
* unfragmentable part and the next header of the fragment header.
+ * Assume the first 1st fragement to arrive will be correct.
+ * We do not have any duplicate checks here yet so another packet
+ * with fragoff == 0 could come and overwrite the ip6q_unfrglen
+ * and worse, the next header, at any time.
*/
- fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
- if (fragoff == 0) {
+ if (fragoff == 0 && q6->ip6q_unfrglen == -1) {
q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
sizeof(struct ip6_frag);
q6->ip6q_nxt = ip6f->ip6f_nxt;
+ /* XXX ECN? */
}
/*
@@ -401,39 +596,66 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
if (q6->ip6q_unfrglen >= 0) {
/* The 1st fragment has already arrived. */
if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
+ if (only_frag) {
+ TAILQ_REMOVE(head, q6, ip6q_tq);
+ V_ip6qb[bucket].count--;
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
+#ifdef MAC
+ mac_ip6q_destroy(q6);
+#endif
+ free(q6, M_FRAG6);
+ }
+ IP6QB_UNLOCK(bucket);
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
- IP6Q_UNLOCK(hash);
+ *mp = NULL;
return (IPPROTO_DONE);
}
} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
+ if (only_frag) {
+ TAILQ_REMOVE(head, q6, ip6q_tq);
+ V_ip6qb[bucket].count--;
+ atomic_subtract_int(&V_frag6_nfragpackets, 1);
+#ifdef MAC
+ mac_ip6q_destroy(q6);
+#endif
+ free(q6, M_FRAG6);
+ }
+ IP6QB_UNLOCK(bucket);
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
offset - sizeof(struct ip6_frag) +
offsetof(struct ip6_frag, ip6f_offlg));
- IP6Q_UNLOCK(hash);
+ *mp = NULL;
return (IPPROTO_DONE);
}
+
/*
- * If it's the first fragment, do the above check for each
+ * If it is the first fragment, do the above check for each
* fragment already stored in the reassembly queue.
*/
- if (fragoff == 0) {
- for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
- af6 = af6dwn) {
- af6dwn = af6->ip6af_down;
-
- if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
- IPV6_MAXPACKET) {
- struct mbuf *merr = IP6_REASS_MBUF(af6);
+ if (fragoff == 0 && !only_frag) {
+ TAILQ_FOREACH_SAFE(af6, &q6->ip6q_frags, ip6af_tq, af6tmp) {
+
+ if (q6->ip6q_unfrglen + af6->ip6af_off +
+ af6->ip6af_frglen > IPV6_MAXPACKET) {
struct ip6_hdr *ip6err;
- int erroff = af6->ip6af_offset;
+ struct mbuf *merr;
+ int erroff;
+
+ merr = af6->ip6af_m;
+ erroff = af6->ip6af_offset;
- /* dequeue the fragment. */
- frag6_deq(af6, hash);
- free(af6, M_FTABLE);
+ /* Dequeue the fragment. */
+ TAILQ_REMOVE(&q6->ip6q_frags, af6, ip6af_tq);
+ q6->ip6q_nfrag--;
+ atomic_subtract_int(&frag6_nfrags, 1);
+ free(af6, M_FRAG6);
- /* adjust pointer. */
+ /* Set a valid receive interface pointer. */
+ merr->m_pkthdr.rcvif = srcifp;
+
+ /* Adjust pointer. */
ip6err = mtod(merr, struct ip6_hdr *);
/*
@@ -451,239 +673,182 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
}
}
- ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
- M_NOWAIT);
+ /* Allocate an IPv6 fragement queue entry for this fragmented part. */
+ ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FRAG6,
+ M_NOWAIT | M_ZERO);
if (ip6af == NULL)
goto dropfrag;
- bzero(ip6af, sizeof(*ip6af));
- ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
+ ip6af->ip6af_mff = (ip6f->ip6f_offlg & IP6F_MORE_FRAG) ? true : false;
ip6af->ip6af_off = fragoff;
ip6af->ip6af_frglen = frgpartlen;
ip6af->ip6af_offset = offset;
- IP6_REASS_MBUF(ip6af) = m;
+ ip6af->ip6af_m = m;
- if (first_frag) {
- af6 = (struct ip6asfrag *)q6;
- goto insert;
+ if (only_frag) {
+ /*
+ * Do a manual insert rather than a hard-to-understand cast
+ * to a different type relying on data structure order to work.
+ */
+ TAILQ_INSERT_HEAD(&q6->ip6q_frags, ip6af, ip6af_tq);
+ goto postinsert;
}
+ /* Do duplicate, condition, and boundry checks. */
/*
* Handle ECN by comparing this segment with the first one;
* if CE is set, do not lose CE.
- * drop if CE and not-ECT are mixed for the same packet.
+ * Drop if CE and not-ECT are mixed for the same packet.
*/
ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
ecn0 = q6->ip6q_ecn;
if (ecn == IPTOS_ECN_CE) {
if (ecn0 == IPTOS_ECN_NOTECT) {
- free(ip6af, M_FTABLE);
+ free(ip6af, M_FRAG6);
goto dropfrag;
}
if (ecn0 != IPTOS_ECN_CE)
q6->ip6q_ecn = IPTOS_ECN_CE;
}
if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
- free(ip6af, M_FTABLE);
+ free(ip6af, M_FRAG6);
goto dropfrag;
}
- /*
- * Find a segment which begins after this one does.
- */
- for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
- af6 = af6->ip6af_down)
+ /* Find a fragmented part which begins after this one does. */
+ TAILQ_FOREACH(af6, &q6->ip6q_frags, ip6af_tq)
if (af6->ip6af_off > ip6af->ip6af_off)
break;
-#if 0
- /*
- * If there is a preceding segment, it may provide some of
- * our data already. If so, drop the data from the incoming
- * segment. If it provides all of our data, drop us.
- */
- if (af6->ip6af_up != (struct ip6asfrag *)q6) {
- i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
- - ip6af->ip6af_off;
- if (i > 0) {
- if (i >= ip6af->ip6af_frglen)
- goto dropfrag;
- m_adj(IP6_REASS_MBUF(ip6af), i);
- ip6af->ip6af_off += i;
- ip6af->ip6af_frglen -= i;
- }
- }
-
- /*
- * While we overlap succeeding segments trim them or,
- * if they are completely covered, dequeue them.
- */
- while (af6 != (struct ip6asfrag *)q6 &&
- ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
- i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
- if (i < af6->ip6af_frglen) {
- af6->ip6af_frglen -= i;
- af6->ip6af_off += i;
- m_adj(IP6_REASS_MBUF(af6), i);
- break;
- }
- af6 = af6->ip6af_down;
- m_freem(IP6_REASS_MBUF(af6->ip6af_up));
- frag6_deq(af6->ip6af_up, hash);
- }
-#else
/*
* If the incoming framgent overlaps some existing fragments in
- * the reassembly queue, drop it, since it is dangerous to override
- * existing fragments from a security point of view.
- * We don't know which fragment is the bad guy - here we trust
- * fragment that came in earlier, with no real reason.
- *
- * Note: due to changes after disabling this part, mbuf passed to
- * m_adj() below now does not meet the requirement.
+ * the reassembly queue, drop both the new fragment and the
+ * entire reassembly queue. However, if the new fragment
+ * is an exact duplicate of an existing fragment, only silently
+ * drop the existing fragment and leave the fragmentation queue
+ * unchanged, as allowed by the RFC. (RFC 8200, 4.5)
*/
- if (af6->ip6af_up != (struct ip6asfrag *)q6) {
- i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
- - ip6af->ip6af_off;
- if (i > 0) {
-#if 0 /* suppress the noisy log */
- log(LOG_ERR, "%d bytes of a fragment from %s "
- "overlaps the previous fragment\n",
- i, ip6_sprintf(ip6buf, &q6->ip6q_src));
-#endif
- free(ip6af, M_FTABLE);
+ if (af6 != NULL)
+ af6tmp = TAILQ_PREV(af6, ip6fraghead, ip6af_tq);
+ else
+ af6tmp = TAILQ_LAST(&q6->ip6q_frags, ip6fraghead);
+ if (af6tmp != NULL) {
+ if (af6tmp->ip6af_off + af6tmp->ip6af_frglen -
+ ip6af->ip6af_off > 0) {
+ if (af6tmp->ip6af_off != ip6af->ip6af_off ||
+ af6tmp->ip6af_frglen != ip6af->ip6af_frglen)
+ frag6_freef(q6, bucket);
+ free(ip6af, M_FRAG6);
goto dropfrag;
}
}
- if (af6 != (struct ip6asfrag *)q6) {
- i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
- if (i > 0) {
-#if 0 /* suppress the noisy log */
- log(LOG_ERR, "%d bytes of a fragment from %s "
- "overlaps the succeeding fragment",
- i, ip6_sprintf(ip6buf, &q6->ip6q_src));
-#endif
- free(ip6af, M_FTABLE);
+ if (af6 != NULL) {
+ if (ip6af->ip6af_off + ip6af->ip6af_frglen -
+ af6->ip6af_off > 0) {
+ if (af6->ip6af_off != ip6af->ip6af_off ||
+ af6->ip6af_frglen != ip6af->ip6af_frglen)
+ frag6_freef(q6, bucket);
+ free(ip6af, M_FRAG6);
goto dropfrag;
}
}
-#endif
-insert:
#ifdef MAC
- if (!first_frag)
- mac_ip6q_update(m, q6);
+ mac_ip6q_update(m, q6);
#endif
/*
- * Stick new segment in its place;
- * check for complete reassembly.
- * If not complete, check fragment limit.
- * Move to front of packet queue, as we are
- * the most recently active fragmented packet.
+ * Stick new segment in its place; check for complete reassembly.
+ * If not complete, check fragment limit. Move to front of packet
+ * queue, as we are the most recently active fragmented packet.
*/
- frag6_enq(ip6af, af6->ip6af_up, hash);
+ if (af6 != NULL)
+ TAILQ_INSERT_BEFORE(af6, ip6af, ip6af_tq);
+ else
+ TAILQ_INSERT_TAIL(&q6->ip6q_frags, ip6af, ip6af_tq);
+postinsert:
atomic_add_int(&frag6_nfrags, 1);
q6->ip6q_nfrag++;
-#if 0 /* xxx */
- if (q6 != head->ip6q_next) {
- frag6_remque(q6, hash);
- frag6_insque_head(q6, head, hash);
- }
-#endif
- next = 0;
- for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
- af6 = af6->ip6af_down) {
- if (af6->ip6af_off != next) {
+
+ plen = 0;
+ TAILQ_FOREACH(af6, &q6->ip6q_frags, ip6af_tq) {
+ if (af6->ip6af_off != plen) {
if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
- IP6STAT_INC(ip6s_fragdropped);
- frag6_freef(q6, hash);
+ IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag);
+ frag6_freef(q6, bucket);
}
- IP6Q_UNLOCK(hash);
- return IPPROTO_DONE;
+ IP6QB_UNLOCK(bucket);
+ *mp = NULL;
+ return (IPPROTO_DONE);
}
- next += af6->ip6af_frglen;
+ plen += af6->ip6af_frglen;
}
- if (af6->ip6af_up->ip6af_mff) {
+ af6 = TAILQ_LAST(&q6->ip6q_frags, ip6fraghead);
+ if (af6->ip6af_mff) {
if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) {
- IP6STAT_INC(ip6s_fragdropped);
- frag6_freef(q6, hash);
+ IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag);
+ frag6_freef(q6, bucket);
}
- IP6Q_UNLOCK(hash);
- return IPPROTO_DONE;
+ IP6QB_UNLOCK(bucket);
+ *mp = NULL;
+ return (IPPROTO_DONE);
}
- /*
- * Reassembly is complete; concatenate fragments.
- */
- ip6af = q6->ip6q_down;
- t = m = IP6_REASS_MBUF(ip6af);
- af6 = ip6af->ip6af_down;
- frag6_deq(ip6af, hash);
- while (af6 != (struct ip6asfrag *)q6) {
+ /* Reassembly is complete; concatenate fragments. */
+ ip6af = TAILQ_FIRST(&q6->ip6q_frags);
+ t = m = ip6af->ip6af_m;
+ TAILQ_REMOVE(&q6->ip6q_frags, ip6af, ip6af_tq);
+ while ((af6 = TAILQ_FIRST(&q6->ip6q_frags)) != NULL) {
m->m_pkthdr.csum_flags &=
- IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags;
+ af6->ip6af_m->m_pkthdr.csum_flags;
m->m_pkthdr.csum_data +=
- IP6_REASS_MBUF(af6)->m_pkthdr.csum_data;
-
- af6dwn = af6->ip6af_down;
- frag6_deq(af6, hash);
- while (t->m_next)
- t = t->m_next;
- m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
- m_demote_pkthdr(IP6_REASS_MBUF(af6));
- m_cat(t, IP6_REASS_MBUF(af6));
- free(af6, M_FTABLE);
- af6 = af6dwn;
+ af6->ip6af_m->m_pkthdr.csum_data;
+
+ TAILQ_REMOVE(&q6->ip6q_frags, af6, ip6af_tq);
+ t = m_last(t);
+ m_adj(af6->ip6af_m, af6->ip6af_offset);
+ m_demote_pkthdr(af6->ip6af_m);
+ m_cat(t, af6->ip6af_m);
+ free(af6, M_FRAG6);
}
while (m->m_pkthdr.csum_data & 0xffff0000)
m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
(m->m_pkthdr.csum_data >> 16);
- /* adjust offset to point where the original next header starts */
+ /* Adjust offset to point where the original next header starts. */
offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
- free(ip6af, M_FTABLE);
+ free(ip6af, M_FRAG6);
ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
+ ip6->ip6_plen = htons((u_short)plen + offset - sizeof(struct ip6_hdr));
if (q6->ip6q_ecn == IPTOS_ECN_CE)
ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
nxt = q6->ip6q_nxt;
-#ifdef notyet
- *q6->ip6q_nxtp = (u_char)(nxt & 0xff);
-#endif
- if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
- frag6_remque(q6, hash);
- atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
-#ifdef MAC
- mac_ip6q_destroy(q6);
-#endif
- free(q6, M_FTABLE);
- atomic_subtract_int(&V_frag6_nfragpackets, 1);
+ TAILQ_REMOVE(head, q6, ip6q_tq);
+ V_ip6qb[bucket].count--;
+ atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
- goto dropfrag;
- }
+ ip6_deletefraghdr(m, offset, M_NOWAIT);
- /*
- * Store NXT to the original.
- */
+ /* Set nxt(-hdr field value) to the original value. */
m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t),
(caddr_t)&nxt);
- frag6_remque(q6, hash);
- atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
#ifdef MAC
mac_ip6q_reassemble(q6, m);
mac_ip6q_destroy(q6);
#endif
- free(q6, M_FTABLE);
+ free(q6, M_FRAG6);
atomic_subtract_int(&V_frag6_nfragpackets, 1);
if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
- int plen = 0;
+
+ plen = 0;
for (t = m; t; t = t->m_next)
plen += t->m_len;
m->m_pkthdr.len = plen;
+ /* Set a valid receive interface pointer. */
+ m->m_pkthdr.rcvif = srcifp;
}
#ifdef RSS
@@ -699,211 +864,94 @@ insert:
m_tag_prepend(m, mtag);
#endif
- IP6Q_UNLOCK(hash);
+ IP6QB_UNLOCK(bucket);
IP6STAT_INC(ip6s_reassembled);
in6_ifstat_inc(dstifp, ifs6_reass_ok);
#ifdef RSS
- /*
- * Queue/dispatch for reprocessing.
- */
+ /* Queue/dispatch for reprocessing. */
netisr_dispatch(NETISR_IPV6_DIRECT, m);
- return IPPROTO_DONE;
+ *mp = NULL;
+ return (IPPROTO_DONE);
#endif
- /*
- * Tell launch routine the next header
- */
-
+ /* Tell launch routine the next header. */
*mp = m;
*offp = offset;
- return nxt;
+ return (nxt);
- dropfrag:
- IP6Q_UNLOCK(hash);
+dropfrag:
+ IP6QB_UNLOCK(bucket);
+dropfrag2:
in6_ifstat_inc(dstifp, ifs6_reass_fail);
IP6STAT_INC(ip6s_fragdropped);
m_freem(m);
- return IPPROTO_DONE;
-}
-
-/*
- * Free a fragment reassembly header and all
- * associated datagrams.
- */
-static void
-frag6_freef(struct ip6q *q6, uint32_t bucket)
-{
- struct ip6asfrag *af6, *down6;
-
- IP6Q_LOCK_ASSERT(bucket);
-
- for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
- af6 = down6) {
- struct mbuf *m = IP6_REASS_MBUF(af6);
-
- down6 = af6->ip6af_down;
- frag6_deq(af6, bucket);
-
- /*
- * Return ICMP time exceeded error for the 1st fragment.
- * Just free other fragments.
- */
- if (af6->ip6af_off == 0) {
- struct ip6_hdr *ip6;
-
- /* adjust pointer */
- ip6 = mtod(m, struct ip6_hdr *);
-
- /* restore source and destination addresses */
- ip6->ip6_src = q6->ip6q_src;
- ip6->ip6_dst = q6->ip6q_dst;
-
- icmp6_error(m, ICMP6_TIME_EXCEEDED,
- ICMP6_TIME_EXCEED_REASSEMBLY, 0);
- } else
- m_freem(m);
- free(af6, M_FTABLE);
- }
- frag6_remque(q6, bucket);
- atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag);
-#ifdef MAC
- mac_ip6q_destroy(q6);
-#endif
- free(q6, M_FTABLE);
- atomic_subtract_int(&V_frag6_nfragpackets, 1);
-}
-
-/*
- * Put an ip fragment on a reassembly chain.
- * Like insque, but pointers in middle of structure.
- */
-static void
-frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6,
- uint32_t bucket __unused)
-{
-
- IP6Q_LOCK_ASSERT(bucket);
-
- af6->ip6af_up = up6;
- af6->ip6af_down = up6->ip6af_down;
- up6->ip6af_down->ip6af_up = af6;
- up6->ip6af_down = af6;
-}
-
-/*
- * To frag6_enq as remque is to insque.
- */
-static void
-frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused)
-{
-
- IP6Q_LOCK_ASSERT(bucket);
-
- af6->ip6af_up->ip6af_down = af6->ip6af_down;
- af6->ip6af_down->ip6af_up = af6->ip6af_up;
-}
-
-static void
-frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket)
-{
-
- IP6Q_LOCK_ASSERT(bucket);
- KASSERT(IP6Q_HEAD(bucket) == old,
- ("%s: attempt to insert at head of wrong bucket"
- " (bucket=%u, old=%p)", __func__, bucket, old));
-
- new->ip6q_prev = old;
- new->ip6q_next = old->ip6q_next;
- old->ip6q_next->ip6q_prev= new;
- old->ip6q_next = new;
- V_ip6q[bucket].count++;
-}
-
-static void
-frag6_remque(struct ip6q *p6, uint32_t bucket)
-{
-
- IP6Q_LOCK_ASSERT(bucket);
-
- p6->ip6q_prev->ip6q_next = p6->ip6q_next;
- p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
- V_ip6q[bucket].count--;
+ *mp = NULL;
+ return (IPPROTO_DONE);
}
/*
* IPv6 reassembling timer processing;
- * if a timer expires on a reassembly
- * queue, discard it.
+ * if a timer expires on a reassembly queue, discard it.
*/
void
frag6_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- struct ip6q *head, *q6;
- int i;
+ struct ip6qhead *head;
+ struct ip6q *q6, *q6tmp;
+ uint32_t bucket;
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- for (i = 0; i < IP6REASS_NHASH; i++) {
- IP6Q_LOCK(i);
- head = IP6Q_HEAD(i);
- q6 = head->ip6q_next;
- if (q6 == NULL) {
- /*
- * XXXJTL: This should never happen. This
- * should turn into an assertion.
- */
- IP6Q_UNLOCK(i);
- continue;
- }
- while (q6 != head) {
- --q6->ip6q_ttl;
- q6 = q6->ip6q_next;
- if (q6->ip6q_prev->ip6q_ttl == 0) {
- IP6STAT_INC(ip6s_fragtimeout);
+ for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+ IP6QB_LOCK(bucket);
+ head = IP6QB_HEAD(bucket);
+ TAILQ_FOREACH_SAFE(q6, head, ip6q_tq, q6tmp)
+ if (--q6->ip6q_ttl == 0) {
+ IP6STAT_ADD(ip6s_fragtimeout,
+ q6->ip6q_nfrag);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(q6->ip6q_prev, i);
+ frag6_freef(q6, bucket);
}
- }
/*
* If we are over the maximum number of fragments
* (due to the limit being lowered), drain off
* enough to get down to the new limit.
* Note that we drain all reassembly queues if
* maxfragpackets is 0 (fragmentation is disabled),
- * and don't enforce a limit when maxfragpackets
+ * and do not enforce a limit when maxfragpackets
* is negative.
*/
while ((V_ip6_maxfragpackets == 0 ||
(V_ip6_maxfragpackets > 0 &&
- V_ip6q[i].count > V_ip6_maxfragbucketsize)) &&
- head->ip6q_prev != head) {
- IP6STAT_INC(ip6s_fragoverflow);
+ V_ip6qb[bucket].count > V_ip6_maxfragbucketsize)) &&
+ (q6 = TAILQ_LAST(head, ip6qhead)) != NULL) {
+ IP6STAT_ADD(ip6s_fragoverflow, q6->ip6q_nfrag);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(head->ip6q_prev, i);
+ frag6_freef(q6, bucket);
}
- IP6Q_UNLOCK(i);
+ IP6QB_UNLOCK(bucket);
}
/*
* If we are still over the maximum number of fragmented
* packets, drain off enough to get down to the new limit.
*/
- i = 0;
+ bucket = 0;
while (V_ip6_maxfragpackets >= 0 &&
atomic_load_int(&V_frag6_nfragpackets) >
(u_int)V_ip6_maxfragpackets) {
- IP6Q_LOCK(i);
- head = IP6Q_HEAD(i);
- if (head->ip6q_prev != head) {
- IP6STAT_INC(ip6s_fragoverflow);
+ IP6QB_LOCK(bucket);
+ q6 = TAILQ_LAST(IP6QB_HEAD(bucket), ip6qhead);
+ if (q6 != NULL) {
+ IP6STAT_ADD(ip6s_fragoverflow, q6->ip6q_nfrag);
/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(head->ip6q_prev, i);
+ frag6_freef(q6, bucket);
}
- IP6Q_UNLOCK(i);
- i = (i + 1) % IP6REASS_NHASH;
+ IP6QB_UNLOCK(bucket);
+ bucket = (bucket + 1) % IP6REASS_NHASH;
}
CURVNET_RESTORE();
}
@@ -911,55 +959,102 @@ frag6_slowtimo(void)
}
/*
+ * Eventhandler to adjust limits in case nmbclusters change.
+ */
+static void
+frag6_change(void *tag)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+
+ ip6_maxfrags = IP6_MAXFRAGS;
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
+ frag6_set_bucketsize();
+ CURVNET_RESTORE();
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+}
+
+/*
+ * Initialise reassembly queue and fragment identifier.
+ */
+void
+frag6_init(void)
+{
+ uint32_t bucket;
+
+ V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS;
+ frag6_set_bucketsize();
+ for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+ TAILQ_INIT(IP6QB_HEAD(bucket));
+ mtx_init(&V_ip6qb[bucket].lock, "ip6qb", NULL, MTX_DEF);
+ V_ip6qb[bucket].count = 0;
+ }
+ V_ip6qb_hashseed = arc4random();
+ V_ip6_maxfragsperpacket = 64;
+#ifdef VIMAGE
+ V_frag6_on = true;
+#endif
+ if (!IS_DEFAULT_VNET(curvnet))
+ return;
+
+ ip6_maxfrags = IP6_MAXFRAGS;
+ EVENTHANDLER_REGISTER(nmbclusters_change,
+ frag6_change, NULL, EVENTHANDLER_PRI_ANY);
+}
+
+/*
* Drain off all datagram fragments.
*/
+static void
+frag6_drain_one(void)
+{
+ struct ip6q *q6;
+ uint32_t bucket;
+
+ for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+ IP6QB_LOCK(bucket);
+ while ((q6 = TAILQ_FIRST(IP6QB_HEAD(bucket))) != NULL) {
+ IP6STAT_INC(ip6s_fragdropped);
+ /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+ frag6_freef(q6, bucket);
+ }
+ IP6QB_UNLOCK(bucket);
+ }
+}
+
void
frag6_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
- struct ip6q *head;
- int i;
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- for (i = 0; i < IP6REASS_NHASH; i++) {
- if (IP6Q_TRYLOCK(i) == 0)
- continue;
- head = IP6Q_HEAD(i);
- while (head->ip6q_next != head) {
- IP6STAT_INC(ip6s_fragdropped);
- /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
- frag6_freef(head->ip6q_next, i);
- }
- IP6Q_UNLOCK(i);
- }
+ frag6_drain_one();
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
}
-int
-ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
+#ifdef VIMAGE
+/*
+ * Clear up IPv6 reassembly structures.
+ */
+void
+frag6_destroy(void)
{
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
- struct mbuf *t;
-
- /* Delete frag6 header. */
- if (m->m_len >= offset + sizeof(struct ip6_frag)) {
- /* This is the only possible case with !PULLDOWN_TEST. */
- bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
- offset);
- m->m_data += sizeof(struct ip6_frag);
- m->m_len -= sizeof(struct ip6_frag);
- } else {
- /* This comes with no copy if the boundary is on cluster. */
- if ((t = m_split(m, offset, wait)) == NULL)
- return (ENOMEM);
- m_adj(t, sizeof(struct ip6_frag));
- m_cat(m, t);
+ uint32_t bucket;
+
+ frag6_drain_one();
+ V_frag6_on = false;
+ for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) {
+ KASSERT(V_ip6qb[bucket].count == 0,
+ ("%s: V_ip6qb[%d] (%p) count not 0 (%d)", __func__,
+ bucket, &V_ip6qb[bucket], V_ip6qb[bucket].count));
+ mtx_destroy(&V_ip6qb[bucket].lock);
}
-
- m->m_flags |= M_FRAGMENTED;
- return (0);
}
+#endif
diff --git a/freebsd/sys/netinet6/icmp6.c b/freebsd/sys/netinet6/icmp6.c
index 6dd25e98..293ff85f 100644
--- a/freebsd/sys/netinet6/icmp6.c
+++ b/freebsd/sys/netinet6/icmp6.c
@@ -234,16 +234,13 @@ icmp6_error2(struct mbuf *m, int type, int code, int param,
if (ifp == NULL)
return;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
-#else
if (m->m_len < sizeof(struct ip6_hdr)) {
m = m_pullup(m, sizeof(struct ip6_hdr));
- if (m == NULL)
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
return;
+ }
}
-#endif
-
ip6 = mtod(m, struct ip6_hdr *);
if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
@@ -278,15 +275,13 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
}
#endif
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
-#else
if (m->m_len < sizeof(struct ip6_hdr)) {
m = m_pullup(m, sizeof(struct ip6_hdr));
- if (m == NULL)
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
return;
+ }
}
-#endif
oip6 = mtod(m, struct ip6_hdr *);
/*
@@ -324,17 +319,16 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
if (off >= 0 && nxt == IPPROTO_ICMPV6) {
struct icmp6_hdr *icp;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
- icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
-#else
- IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
- sizeof(*icp));
- if (icp == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return;
+ if (m->m_len < off + sizeof(struct icmp6_hdr)) {
+ m = m_pullup(m, off + sizeof(struct icmp6_hdr));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ return;
+ }
}
-#endif
+ oip6 = mtod(m, struct ip6_hdr *);
+ icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
+
if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
icp->icmp6_type == ND_REDIRECT) {
/*
@@ -351,8 +345,6 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
/* non-ICMPv6 - send the error */
}
- oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
-
/* Finally, do rate limitation check. */
if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
ICMP6STAT_INC(icp6s_toofreq);
@@ -403,35 +395,38 @@ icmp6_error(struct mbuf *m, int type, int code, int param)
int
icmp6_input(struct mbuf **mp, int *offp, int proto)
{
- struct mbuf *m = *mp, *n;
+ struct mbuf *m, *n;
struct ifnet *ifp;
struct ip6_hdr *ip6, *nip6;
struct icmp6_hdr *icmp6, *nicmp6;
- int off = *offp;
- int icmp6len = m->m_pkthdr.len - *offp;
- int code, sum, noff;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
- int ip6len, error;
+ int code, error, icmp6len, ip6len, noff, off, sum;
- ifp = m->m_pkthdr.rcvif;
+ m = *mp;
+ off = *offp;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
- /* m might change if M_LOOP. So, call mtod after this */
-#endif
+ if (m->m_len < off + sizeof(struct icmp6_hdr)) {
+ m = m_pullup(m, off + sizeof(struct icmp6_hdr));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = m;
+ return (IPPROTO_DONE);
+ }
+ }
/*
* Locate icmp6 structure in mbuf, and check
* that not corrupted and of at least minimum length
*/
- ip6 = mtod(m, struct ip6_hdr *);
- ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
+ icmp6len = m->m_pkthdr.len - off;
if (icmp6len < sizeof(struct icmp6_hdr)) {
ICMP6STAT_INC(icp6s_tooshort);
goto freeit;
}
+ ip6 = mtod(m, struct ip6_hdr *);
+ ifp = m->m_pkthdr.rcvif;
/*
* Check multicast group membership.
* Note: SSM filters are not applied for ICMPv6 traffic.
@@ -447,20 +442,9 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
}
}
- /*
- * calculate the checksum
- */
-#ifndef PULLDOWN_TEST
+ /* Calculate the checksum. */
icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
- if (icmp6 == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return IPPROTO_DONE;
- }
-#endif
code = icmp6->icmp6_code;
-
if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
nd6log((LOG_ERR,
"ICMP6 checksum error(%d|%x) %s\n",
@@ -475,6 +459,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
icmp6_ifstat_inc(ifp, ifs6_in_error);
+ ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
@@ -587,8 +572,14 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
n->m_pkthdr.len = n0len + (noff - off);
n->m_next = n0;
} else {
- IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
- sizeof(*nicmp6));
+ if (n->m_len < off + sizeof(*nicmp6)) {
+ n = m_pullup(n, off + sizeof(*nicmp6));
+ if (n == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ break;
+ }
+ }
+ nicmp6 = (struct icmp6_hdr *)(mtod(n, caddr_t) + off);
noff = off;
}
if (n) {
@@ -621,8 +612,10 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
*/
if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0)
goto freeit;
- if (mld_input(m, off, icmp6len) != 0)
+ if (mld_input(&m, off, icmp6len) != 0) {
+ *mp = NULL;
return (IPPROTO_DONE);
+ }
/* m stays. */
break;
@@ -641,10 +634,15 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
goto badlen;
if (mode == FQDN) {
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
- IPPROTO_DONE);
-#endif
+ if (m->m_len < off + sizeof(struct icmp6_nodeinfo)) {
+ m = m_pullup(m, off +
+ sizeof(struct icmp6_nodeinfo));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = m;
+ return (IPPROTO_DONE);
+ }
+ }
n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (n)
n = ni6_input(n, off);
@@ -734,7 +732,14 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
if (icmp6len < sizeof(struct nd_router_solicit))
goto badlen;
if (send_sendso_input_hook != NULL) {
- IP6_EXTHDR_CHECK(m, off, icmp6len, IPPROTO_DONE);
+ if (m->m_len < off + icmp6len) {
+ m = m_pullup(m, off + icmp6len);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = NULL;
+ return (IPPROTO_DONE);
+ }
+ }
error = send_sendso_input_hook(m, ifp, SND_IN, ip6len);
if (error == 0) {
m = NULL;
@@ -853,6 +858,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
deliver:
if (icmp6_notify_error(&m, off, icmp6len, code) != 0) {
/* In this case, m should've been freed. */
+ *mp = NULL;
return (IPPROTO_DONE);
}
break;
@@ -869,38 +875,40 @@ icmp6_input(struct mbuf **mp, int *offp, int proto)
/* deliver the packet to appropriate sockets */
icmp6_rip6_input(&m, *offp);
- return IPPROTO_DONE;
+ *mp = m;
+ return (IPPROTO_DONE);
freeit:
m_freem(m);
- return IPPROTO_DONE;
+ *mp = NULL;
+ return (IPPROTO_DONE);
}
static int
icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
{
- struct mbuf *m = *mp;
+ struct mbuf *m;
struct icmp6_hdr *icmp6;
struct ip6_hdr *eip6;
u_int32_t notifymtu;
struct sockaddr_in6 icmp6src, icmp6dst;
+ m = *mp;
+
if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
ICMP6STAT_INC(icp6s_tooshort);
goto freeit;
}
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off,
- sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1);
- icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
-#else
- IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
- sizeof(*icmp6) + sizeof(struct ip6_hdr));
- if (icmp6 == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return (-1);
+
+ if (m->m_len < off + sizeof(*icmp6) + sizeof(struct ip6_hdr)) {
+ m = m_pullup(m, off + sizeof(*icmp6) + sizeof(struct ip6_hdr));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = m;
+ return (-1);
+ }
}
-#endif
+ icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
eip6 = (struct ip6_hdr *)(icmp6 + 1);
/* Detect the upper level protocol */
@@ -924,19 +932,17 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
case IPPROTO_AH:
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, 0,
- eoff + sizeof(struct ip6_ext), -1);
- eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff);
-#else
- IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
- eoff, sizeof(*eh));
- if (eh == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return (-1);
+ if (m->m_len < eoff + sizeof(struct ip6_ext)) {
+ m = m_pullup(m, eoff +
+ sizeof(struct ip6_ext));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = m;
+ return (-1);
+ }
}
-#endif
-
+ eh = (struct ip6_ext *)
+ (mtod(m, caddr_t) + eoff);
if (nxt == IPPROTO_AH)
eoff += (eh->ip6e_len + 2) << 2;
else
@@ -952,18 +958,16 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
* information that depends on the final
* destination (e.g. path MTU).
*/
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1);
+ if (m->m_len < eoff + sizeof(*rth)) {
+ m = m_pullup(m, eoff + sizeof(*rth));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = m;
+ return (-1);
+ }
+ }
rth = (struct ip6_rthdr *)
(mtod(m, caddr_t) + eoff);
-#else
- IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
- eoff, sizeof(*rth));
- if (rth == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return (-1);
- }
-#endif
rthlen = (rth->ip6r_len + 1) << 3;
/*
* XXX: currently there is no
@@ -977,19 +981,17 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
int hops;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1);
+ if (m->m_len < eoff + rthlen) {
+ m = m_pullup(m, eoff + rthlen);
+ if (m == NULL) {
+ IP6STAT_INC(
+ ip6s_exthdrtoolong);
+ *mp = m;
+ return (-1);
+ }
+ }
rth0 = (struct ip6_rthdr0 *)
(mtod(m, caddr_t) + eoff);
-#else
- IP6_EXTHDR_GET(rth0,
- struct ip6_rthdr0 *, m,
- eoff, rthlen);
- if (rth0 == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return (-1);
- }
-#endif
/* just ignore a bogus header */
if ((rth0->ip6r0_len % 2) == 0 &&
(hops = rth0->ip6r0_len/2))
@@ -999,19 +1001,17 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
nxt = rth->ip6r_nxt;
break;
case IPPROTO_FRAGMENT:
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, 0, eoff +
- sizeof(struct ip6_frag), -1);
+ if (m->m_len < eoff + sizeof(struct ip6_frag)) {
+ m = m_pullup(m, eoff +
+ sizeof(struct ip6_frag));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = m;
+ return (-1);
+ }
+ }
fh = (struct ip6_frag *)(mtod(m, caddr_t) +
eoff);
-#else
- IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
- eoff, sizeof(*fh));
- if (fh == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return (-1);
- }
-#endif
/*
* Data after a fragment header is meaningless
* unless it is the first fragment, but
@@ -1037,16 +1037,7 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
}
}
notify:
-#ifndef PULLDOWN_TEST
icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
-#else
- IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
- sizeof(*icmp6) + sizeof(struct ip6_hdr));
- if (icmp6 == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return (-1);
- }
-#endif
/*
* retrieve parameters from the inner IPv6 header, and convert
@@ -1104,6 +1095,7 @@ icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
freeit:
m_freem(m);
+ *mp = NULL;
return (-1);
}
@@ -1191,15 +1183,7 @@ ni6_input(struct mbuf *m, int off)
struct in6_ifaddr *ia6 = NULL;
ip6 = mtod(m, struct ip6_hdr *);
-#ifndef PULLDOWN_TEST
ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
-#else
- IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
- if (ni6 == NULL) {
- /* m is already reclaimed */
- return (NULL);
- }
-#endif
/*
* Validate IPv6 source address.
@@ -1296,7 +1280,6 @@ ni6_input(struct mbuf *m, int off)
*
* We do not do proxy at this moment.
*/
- /* m_pulldown instead of copy? */
m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
subjlen, (caddr_t)&in6_subj);
if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
@@ -1340,10 +1323,19 @@ ni6_input(struct mbuf *m, int off)
mtx_unlock(&pr->pr_mtx);
if (!n || n->m_next || n->m_len == 0)
goto bad;
- IP6_EXTHDR_GET(subj, char *, m,
- off + sizeof(struct icmp6_nodeinfo), subjlen);
- if (subj == NULL)
- goto bad;
+ if (m->m_len < off + sizeof(struct icmp6_nodeinfo) +
+ subjlen) {
+ m = m_pullup(m, off +
+ sizeof(struct icmp6_nodeinfo) + subjlen);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ goto bad;
+ }
+ }
+ /* ip6 possibly invalid but not used after. */
+ ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
+ subj = (char *)(mtod(m, caddr_t) + off +
+ sizeof(struct icmp6_nodeinfo));
if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
n->m_len)) {
goto bad;
@@ -1906,23 +1898,15 @@ icmp6_rip6_input(struct mbuf **mp, int off)
{
struct mbuf *m = *mp;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
- struct inpcb *in6p;
+ struct inpcb *inp;
struct inpcb *last = NULL;
struct sockaddr_in6 fromsa;
struct icmp6_hdr *icmp6;
struct epoch_tracker et;
struct mbuf *opts = NULL;
-#ifndef PULLDOWN_TEST
- /* this is assumed to be safe. */
+ /* This is assumed to be safe; icmp6_input() does a pullup. */
icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
- if (icmp6 == NULL) {
- /* m is already reclaimed */
- return (IPPROTO_DONE);
- }
-#endif
/*
* XXX: the address may have embedded scope zone ID, which should be
@@ -1934,29 +1918,30 @@ icmp6_rip6_input(struct mbuf **mp, int off)
fromsa.sin6_addr = ip6->ip6_src;
if (sa6_recoverscope(&fromsa)) {
m_freem(m);
+ *mp = NULL;
return (IPPROTO_DONE);
}
INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
- CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
- if ((in6p->inp_vflag & INP_IPV6) == 0)
+ CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
+ if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
- if (in6p->inp_ip_p != IPPROTO_ICMPV6)
+ if (inp->inp_ip_p != IPPROTO_ICMPV6)
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
- !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
- !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
continue;
- INP_RLOCK(in6p);
- if (__predict_false(in6p->inp_flags2 & INP_FREED)) {
- INP_RUNLOCK(in6p);
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_RUNLOCK(inp);
continue;
}
if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
- in6p->in6p_icmp6filt)) {
- INP_RUNLOCK(in6p);
+ inp->in6p_icmp6filt)) {
+ INP_RUNLOCK(inp);
continue;
}
if (last != NULL) {
@@ -2017,7 +2002,7 @@ icmp6_rip6_input(struct mbuf **mp, int off)
}
INP_RUNLOCK(last);
}
- last = in6p;
+ last = inp;
}
INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
@@ -2059,7 +2044,8 @@ icmp6_rip6_input(struct mbuf **mp, int off)
m_freem(m);
IP6STAT_DEC(ip6s_delivered);
}
- return IPPROTO_DONE;
+ *mp = NULL;
+ return (IPPROTO_DONE);
}
/*
@@ -2237,24 +2223,17 @@ void
icmp6_redirect_input(struct mbuf *m, int off)
{
struct ifnet *ifp;
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct ip6_hdr *ip6;
struct nd_redirect *nd_rd;
- int icmp6len = ntohs(ip6->ip6_plen);
- char *lladdr = NULL;
- int lladdrlen = 0;
- int is_router;
- int is_onlink;
- struct in6_addr src6 = ip6->ip6_src;
- struct in6_addr redtgt6;
- struct in6_addr reddst6;
+ struct in6_addr src6, redtgt6, reddst6;
union nd_opts ndopts;
char ip6buf[INET6_ADDRSTRLEN];
+ char *lladdr;
+ int icmp6len, is_onlink, is_router, lladdrlen;
M_ASSERTPKTHDR(m);
KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__));
- ifp = m->m_pkthdr.rcvif;
-
/* XXX if we are router, we don't update route by icmp6 redirect */
if (V_ip6_forwarding)
goto freeit;
@@ -2265,25 +2244,29 @@ icmp6_redirect_input(struct mbuf *m, int off)
if(m->m_flags & M_FRAGMENTED)
goto freeit;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, icmp6len,);
- nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
- if (nd_rd == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return;
+ ip6 = mtod(m, struct ip6_hdr *);
+ icmp6len = ntohs(ip6->ip6_plen);
+ if (m->m_len < off + icmp6len) {
+ m = m_pullup(m, off + icmp6len);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ return;
+ }
}
-#endif
+ ip6 = mtod(m, struct ip6_hdr *);
+ nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
+
+ ifp = m->m_pkthdr.rcvif;
redtgt6 = nd_rd->nd_rd_target;
reddst6 = nd_rd->nd_rd_dst;
- if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) ||
- in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) {
+ if (in6_setscope(&redtgt6, ifp, NULL) ||
+ in6_setscope(&reddst6, ifp, NULL)) {
goto freeit;
}
/* validation */
+ src6 = ip6->ip6_src;
if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
nd6log((LOG_ERR,
"ICMP6 redirect sent from %s rejected; "
@@ -2369,6 +2352,8 @@ icmp6_redirect_input(struct mbuf *m, int off)
goto freeit;
}
+ lladdr = NULL;
+ lladdrlen = 0;
if (ndopts.nd_opts_tgt_lladdr) {
lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
diff --git a/freebsd/sys/netinet6/in6.c b/freebsd/sys/netinet6/in6.c
index 078efe45..a42b7bf7 100644
--- a/freebsd/sys/netinet6/in6.c
+++ b/freebsd/sys/netinet6/in6.c
@@ -86,6 +86,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <net/if.h>
@@ -2027,8 +2028,6 @@ in6_if2idlen(struct ifnet *ifp)
}
}
-#include <sys/sysctl.h>
-
struct in6_llentry {
struct llentry base;
};
diff --git a/freebsd/sys/netinet6/in6_mcast.c b/freebsd/sys/netinet6/in6_mcast.c
index 1ac10633..cf7c7ff2 100644
--- a/freebsd/sys/netinet6/in6_mcast.c
+++ b/freebsd/sys/netinet6/in6_mcast.c
@@ -1830,7 +1830,7 @@ ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt)
* Returns NULL if no ifp could be found.
*/
static struct ifnet *
-in6p_lookup_mcast_ifp(const struct inpcb *in6p,
+in6p_lookup_mcast_ifp(const struct inpcb *inp,
const struct sockaddr_in6 *gsin6)
{
struct nhop6_basic nh6;
@@ -1838,13 +1838,13 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p,
uint32_t scopeid;
uint32_t fibnum;
- KASSERT(in6p->inp_vflag & INP_IPV6,
+ KASSERT(inp->inp_vflag & INP_IPV6,
("%s: not INP_IPV6 inpcb", __func__));
KASSERT(gsin6->sin6_family == AF_INET6,
("%s: not AF_INET6 group", __func__));
in6_splitscope(&gsin6->sin6_addr, &dst, &scopeid);
- fibnum = in6p ? in6p->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
+ fibnum = inp ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6) != 0)
return (NULL);
@@ -2111,6 +2111,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
* NOTE: Refcount from in6_joingroup_locked()
* is protecting membership.
*/
+ ip6_mfilter_insert(&imo->im6o_head, imf);
} else {
CTR1(KTR_MLD, "%s: merge inm state", __func__);
IN6_MULTI_LIST_LOCK();
@@ -2136,9 +2137,6 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt)
}
}
- if (is_new)
- ip6_mfilter_insert(&imo->im6o_head, imf);
-
im6f_commit(imf);
imf = NULL;
@@ -2330,6 +2328,12 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
if (is_final) {
ip6_mfilter_remove(&imo->im6o_head, imf);
im6f_leave(imf);
+
+ /*
+ * Give up the multicast address record to which
+ * the membership points.
+ */
+ (void)in6_leavegroup_locked(inm, imf);
} else {
if (imf->im6f_st[0] == MCAST_EXCLUDE) {
error = EADDRNOTAVAIL;
@@ -2386,14 +2390,8 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt)
out_in6p_locked:
INP_WUNLOCK(inp);
- if (is_final && imf) {
- /*
- * Give up the multicast address record to which
- * the membership points.
- */
- (void)in6_leavegroup_locked(inm, imf);
+ if (is_final && imf)
ip6_mfilter_free(imf);
- }
IN6_MULTI_UNLOCK();
return (error);
diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c
index 3c89cdf4..903bc09b 100644
--- a/freebsd/sys/netinet6/in6_pcb.c
+++ b/freebsd/sys/netinet6/in6_pcb.c
@@ -816,20 +816,20 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
void
in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
{
- struct inpcb *in6p;
+ struct inpcb *inp;
struct in6_multi *inm;
struct in6_mfilter *imf;
struct ip6_moptions *im6o;
INP_INFO_WLOCK(pcbinfo);
- CK_LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
- INP_WLOCK(in6p);
- if (__predict_false(in6p->inp_flags2 & INP_FREED)) {
- INP_WUNLOCK(in6p);
+ CK_LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED)) {
+ INP_WUNLOCK(inp);
continue;
}
- im6o = in6p->in6p_moptions;
- if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) {
+ im6o = inp->in6p_moptions;
+ if ((inp->inp_vflag & INP_IPV6) && im6o != NULL) {
/*
* Unselect the outgoing ifp for multicast if it
* is being detached.
@@ -853,7 +853,7 @@ restart:
goto restart;
}
}
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
}
INP_INFO_WUNLOCK(pcbinfo);
}
diff --git a/freebsd/sys/netinet6/in6_pcb.h b/freebsd/sys/netinet6/in6_pcb.h
index 2c6bcdc6..56ea6eeb 100644
--- a/freebsd/sys/netinet6/in6_pcb.h
+++ b/freebsd/sys/netinet6/in6_pcb.h
@@ -113,7 +113,7 @@ int in6_getpeeraddr(struct socket *so, struct sockaddr **nam);
int in6_getsockaddr(struct socket *so, struct sockaddr **nam);
int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam);
int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam);
-int in6_selecthlim(struct in6pcb *, struct ifnet *);
+int in6_selecthlim(struct inpcb *, struct ifnet *);
int in6_pcbsetport(struct in6_addr *, struct inpcb *, struct ucred *);
void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m, int);
#endif /* _KERNEL */
diff --git a/freebsd/sys/netinet6/in6_proto.c b/freebsd/sys/netinet6/in6_proto.c
index cf62e60c..a16818ce 100644
--- a/freebsd/sys/netinet6/in6_proto.c
+++ b/freebsd/sys/netinet6/in6_proto.c
@@ -386,10 +386,6 @@ VNET_DEFINE(int, ip6_accept_rtadv) = 0;
VNET_DEFINE(int, ip6_no_radr) = 0;
VNET_DEFINE(int, ip6_norbit_raif) = 0;
VNET_DEFINE(int, ip6_rfc6204w3) = 0;
-VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */
-int ip6_maxfrags; /* initialized in frag6.c:frag6_init() */
-VNET_DEFINE(int, ip6_maxfragbucketsize);/* initialized in frag6.c:frag6_init() */
-VNET_DEFINE(int, ip6_maxfragsperpacket); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_log_interval) = 5;
VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we
* process? */
@@ -476,20 +472,6 @@ sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
return (0);
}
-static int
-sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS)
-{
- int error, val;
-
- val = V_ip6_maxfragpackets;
- error = sysctl_handle_int(oidp, &val, 0, req);
- if (error != 0 || !req->newptr)
- return (error);
- V_ip6_maxfragpackets = val;
- frag6_set_bucketsize();
- return (0);
-}
-
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
"Enable forwarding of IPv6 packets between interfaces");
@@ -502,12 +484,6 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim,
SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
ip6stat,
"IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
-SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
- CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0,
- sysctl_ip6_maxfragpackets, "I",
- "Default maximum number of outstanding fragmented IPv6 packets. "
- "A value of 0 means no fragmented packets will be accepted, while a "
- "a value of -1 means no limit");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
"Default value of per-interface flag for accepting ICMPv6 RA messages");
@@ -577,17 +553,6 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
"Use the default scope zone when none is specified");
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
- CTLFLAG_RW, &ip6_maxfrags, 0,
- "Maximum allowed number of outstanding IPv6 packet fragments. "
- "A value of 0 means no fragmented packets will be accepted, while a "
- "a value of -1 means no limit");
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0,
- "Maximum number of reassembly queues per hash bucket");
-SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0,
- "Maximum allowed number of fragments per packet");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
"Enable path MTU discovery for multicast packets");
diff --git a/freebsd/sys/netinet6/in6_src.c b/freebsd/sys/netinet6/in6_src.c
index 170eaf18..0bd8bba4 100644
--- a/freebsd/sys/netinet6/in6_src.c
+++ b/freebsd/sys/netinet6/in6_src.c
@@ -933,21 +933,21 @@ in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* 3. The system default hoplimit.
*/
int
-in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp)
+in6_selecthlim(struct inpcb *inp, struct ifnet *ifp)
{
- if (in6p && in6p->in6p_hops >= 0)
- return (in6p->in6p_hops);
+ if (inp && inp->in6p_hops >= 0)
+ return (inp->in6p_hops);
else if (ifp)
return (ND_IFINFO(ifp)->chlim);
- else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
+ else if (inp && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
struct nhop6_basic nh6;
struct in6_addr dst;
uint32_t fibnum, scopeid;
int hlim;
- fibnum = in6p->inp_inc.inc_fibnum;
- in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid);
+ fibnum = inp->inp_inc.inc_fibnum;
+ in6_splitscope(&inp->in6p_faddr, &dst, &scopeid);
if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){
hlim = ND_IFINFO(nh6.nh_ifp)->chlim;
return (hlim);
diff --git a/freebsd/sys/netinet6/ip6_forward.c b/freebsd/sys/netinet6/ip6_forward.c
index 80535efe..97a7a6c6 100644
--- a/freebsd/sys/netinet6/ip6_forward.c
+++ b/freebsd/sys/netinet6/ip6_forward.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_ipsec.h>
#include <rtems/bsd/local/opt_ipstealth.h>
+#include <rtems/bsd/local/opt_sctp.h>
#include <sys/param.h>
#include <sys/systm.h>
diff --git a/freebsd/sys/netinet6/ip6_input.c b/freebsd/sys/netinet6/ip6_input.c
index 25ab624c..6800d002 100644
--- a/freebsd/sys/netinet6/ip6_input.c
+++ b/freebsd/sys/netinet6/ip6_input.c
@@ -205,9 +205,6 @@ struct rmlock in6_ifaddr_lock;
RM_SYSINIT(in6_ifaddr_lock, &in6_ifaddr_lock, "in6_ifaddr_lock");
static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
-#ifdef PULLDOWN_TEST
-static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
-#endif
/*
* IP6 initialization: fill in IP6 protocol switch table.
@@ -396,6 +393,7 @@ ip6_destroy(void *unused __unused)
}
IFNET_RUNLOCK();
+ frag6_destroy();
nd6_destroy();
in6_ifattach_destroy();
@@ -406,20 +404,22 @@ VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL);
#endif
static int
-ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off,
+ip6_input_hbh(struct mbuf **mp, uint32_t *plen, uint32_t *rtalert, int *off,
int *nxt, int *ours)
{
+ struct mbuf *m;
struct ip6_hdr *ip6;
struct ip6_hbh *hbh;
- if (ip6_hopopts_input(plen, rtalert, &m, off)) {
+ if (ip6_hopopts_input(plen, rtalert, mp, off)) {
#if 0 /*touches NULL pointer*/
- in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
+ in6_ifstat_inc((*mp)->m_pkthdr.rcvif, ifs6_in_discard);
#endif
goto out; /* m have already been freed */
}
/* adjust pointer */
+ m = *mp;
ip6 = mtod(m, struct ip6_hdr *);
/*
@@ -441,17 +441,8 @@ ip6_input_hbh(struct mbuf *m, uint32_t *plen, uint32_t *rtalert, int *off,
(caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
goto out;
}
-#ifndef PULLDOWN_TEST
/* ip6_hopopts_input() ensures that mbuf is contiguous */
hbh = (struct ip6_hbh *)(ip6 + 1);
-#else
- IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
- sizeof(struct ip6_hbh));
- if (hbh == NULL) {
- IP6STAT_INC(ip6s_tooshort);
- goto out;
- }
-#endif
*nxt = hbh->ip6h_nxt;
/*
@@ -602,7 +593,6 @@ ip6_input(struct mbuf *m)
in6_ifstat_inc(rcvif, ifs6_in_receive);
IP6STAT_INC(ip6s_total);
-#ifndef PULLDOWN_TEST
/*
* L2 bridge code and some other code can return mbuf chain
* that does not conform to KAME requirement. too bad.
@@ -624,9 +614,6 @@ ip6_input(struct mbuf *m)
m_freem(m);
m = n;
}
- IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
-#endif
-
if (m->m_len < sizeof(struct ip6_hdr)) {
if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
IP6STAT_INC(ip6s_toosmall);
@@ -693,11 +680,10 @@ ip6_input(struct mbuf *m)
* and bypass security checks (act as if it was from 127.0.0.1 by using
* IPv6 src ::ffff:127.0.0.1). Be cautious.
*
- * This check chokes if we are in an SIIT cloud. As none of BSDs
- * support IPv4-less kernel compilation, we cannot support SIIT
- * environment at all. So, it makes more sense for us to reject any
- * malicious packets for non-SIIT environment, than try to do a
- * partial support for SIIT environment.
+ * We have supported IPv6-only kernels for a few years and this issue
+ * has not come up. The world seems to move mostly towards not using
+ * v4mapped on the wire, so it makes sense for us to keep rejecting
+ * any such packets.
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
@@ -859,7 +845,7 @@ passin:
*/
plen = (u_int32_t)ntohs(ip6->ip6_plen);
if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
- if (ip6_input_hbh(m, &plen, &rtalert, &off, &nxt, &ours) != 0)
+ if (ip6_input_hbh(&m, &plen, &rtalert, &off, &nxt, &ours) != 0)
return;
} else
nxt = ip6->ip6_nxt;
@@ -915,24 +901,6 @@ passin:
return;
}
- ip6 = mtod(m, struct ip6_hdr *);
-
- /*
- * Malicious party may be able to use IPv4 mapped addr to confuse
- * tcp/udp stack and bypass security checks (act as if it was from
- * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious.
- *
- * For SIIT end node behavior, you may want to disable the check.
- * However, you will become vulnerable to attacks using IPv4 mapped
- * source.
- */
- if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
- IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
- IP6STAT_INC(ip6s_badscope);
- in6_ifstat_inc(rcvif, ifs6_in_addrerr);
- goto bad;
- }
-
/*
* Tell launch routine the next header
*/
@@ -987,33 +955,33 @@ ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
struct ip6_hbh *hbh;
/* validation of the length of the header */
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
+ if (m->m_len < off + sizeof(*hbh)) {
+ m = m_pullup(m, off + sizeof(*hbh));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = NULL;
+ return (-1);
+ }
+ }
hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
hbhlen = (hbh->ip6h_len + 1) << 3;
- IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
- hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
-#else
- IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
- sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
- if (hbh == NULL) {
- IP6STAT_INC(ip6s_tooshort);
- return -1;
- }
- hbhlen = (hbh->ip6h_len + 1) << 3;
- IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
- hbhlen);
- if (hbh == NULL) {
- IP6STAT_INC(ip6s_tooshort);
- return -1;
+ if (m->m_len < off + hbhlen) {
+ m = m_pullup(m, off + hbhlen);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = NULL;
+ return (-1);
+ }
}
-#endif
+ hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
off += hbhlen;
hbhlen -= sizeof(struct ip6_hbh);
if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
- hbhlen, rtalertp, plenp) < 0)
+ hbhlen, rtalertp, plenp) < 0) {
+ *mp = NULL;
return (-1);
+ }
*offp = off;
*mp = m;
@@ -1198,10 +1166,9 @@ ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
* Create the "control" list for this pcb.
* These functions will not modify mbuf chain at all.
*
- * With KAME mbuf chain restriction:
* The routine will be called from upper layer handlers like tcp6_input().
* Thus the routine assumes that the caller (tcp6_input) have already
- * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
+ * called m_pullup() and all the extension headers are located in the
* very first mbuf on the mbuf chain.
*
* ip6_savecontrol_v4 will handle those options that are possible to be
@@ -1409,15 +1376,16 @@ ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
}
void
-ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
+ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp)
{
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct ip6_hdr *ip6;
int v4only = 0;
- mp = ip6_savecontrol_v4(in6p, m, mp, &v4only);
+ mp = ip6_savecontrol_v4(inp, m, mp, &v4only);
if (v4only)
return;
+ ip6 = mtod(m, struct ip6_hdr *);
/*
* IPV6_HOPOPTS socket option. Recall that we required super-user
* privilege for the option (see ip6_ctloutput), but it might be too
@@ -1425,7 +1393,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
* returned to normal user.
* See also RFC 2292 section 6 (or RFC 3542 section 8).
*/
- if ((in6p->inp_flags & IN6P_HOPOPTS) != 0) {
+ if ((inp->inp_flags & IN6P_HOPOPTS) != 0) {
/*
* Check if a hop-by-hop options header is contatined in the
* received packet, and if so, store the options as ancillary
@@ -1435,29 +1403,10 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
*/
if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
struct ip6_hbh *hbh;
- int hbhlen = 0;
-#ifdef PULLDOWN_TEST
- struct mbuf *ext;
-#endif
+ int hbhlen;
-#ifndef PULLDOWN_TEST
hbh = (struct ip6_hbh *)(ip6 + 1);
hbhlen = (hbh->ip6h_len + 1) << 3;
-#else
- ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
- ip6->ip6_nxt);
- if (ext == NULL) {
- IP6STAT_INC(ip6s_tooshort);
- return;
- }
- hbh = mtod(ext, struct ip6_hbh *);
- hbhlen = (hbh->ip6h_len + 1) << 3;
- if (hbhlen != ext->m_len) {
- m_freem(ext);
- IP6STAT_INC(ip6s_tooshort);
- return;
- }
-#endif
/*
* XXX: We copy the whole header even if a
@@ -1467,17 +1416,14 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
* Note: this constraint is removed in RFC3542
*/
*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
- IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
+ IS2292(inp, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
-#ifdef PULLDOWN_TEST
- m_freem(ext);
-#endif
}
}
- if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
+ if ((inp->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
/*
@@ -1490,9 +1436,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
while (1) { /* is explicit loop prevention necessary? */
struct ip6_ext *ip6e = NULL;
int elen;
-#ifdef PULLDOWN_TEST
- struct mbuf *ext = NULL;
-#endif
/*
* if it is not an extension header, don't try to
@@ -1508,7 +1451,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
goto loopend;
}
-#ifndef PULLDOWN_TEST
if (off + sizeof(*ip6e) > m->m_len)
goto loopend;
ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
@@ -1518,42 +1460,25 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
elen = (ip6e->ip6e_len + 1) << 3;
if (off + elen > m->m_len)
goto loopend;
-#else
- ext = ip6_pullexthdr(m, off, nxt);
- if (ext == NULL) {
- IP6STAT_INC(ip6s_tooshort);
- return;
- }
- ip6e = mtod(ext, struct ip6_ext *);
- if (nxt == IPPROTO_AH)
- elen = (ip6e->ip6e_len + 2) << 2;
- else
- elen = (ip6e->ip6e_len + 1) << 3;
- if (elen != ext->m_len) {
- m_freem(ext);
- IP6STAT_INC(ip6s_tooshort);
- return;
- }
-#endif
switch (nxt) {
case IPPROTO_DSTOPTS:
- if (!(in6p->inp_flags & IN6P_DSTOPTS))
+ if (!(inp->inp_flags & IN6P_DSTOPTS))
break;
*mp = sbcreatecontrol((caddr_t)ip6e, elen,
- IS2292(in6p,
+ IS2292(inp,
IPV6_2292DSTOPTS, IPV6_DSTOPTS),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
break;
case IPPROTO_ROUTING:
- if (!(in6p->inp_flags & IN6P_RTHDR))
+ if (!(inp->inp_flags & IN6P_RTHDR))
break;
*mp = sbcreatecontrol((caddr_t)ip6e, elen,
- IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
+ IS2292(inp, IPV6_2292RTHDR, IPV6_RTHDR),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
@@ -1569,9 +1494,6 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
* the code just in case (nxt overwritten or
* other cases).
*/
-#ifdef PULLDOWN_TEST
- m_freem(ext);
-#endif
goto loopend;
}
@@ -1580,16 +1502,12 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
off += elen;
nxt = ip6e->ip6e_nxt;
ip6e = NULL;
-#ifdef PULLDOWN_TEST
- m_freem(ext);
- ext = NULL;
-#endif
}
loopend:
;
}
- if (in6p->inp_flags2 & INP_RECVFLOWID) {
+ if (inp->inp_flags2 & INP_RECVFLOWID) {
uint32_t flowid, flow_type;
flowid = m->m_pkthdr.flowid;
@@ -1610,7 +1528,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
}
#ifdef RSS
- if (in6p->inp_flags2 & INP_RECVRSSBUCKETID) {
+ if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
uint32_t flowid, flow_type;
uint32_t rss_bucketid;
@@ -1669,49 +1587,6 @@ ip6_notify_pmtu(struct inpcb *inp, struct sockaddr_in6 *dst, u_int32_t mtu)
sorwakeup(so);
}
-#ifdef PULLDOWN_TEST
-/*
- * pull single extension header from mbuf chain. returns single mbuf that
- * contains the result, or NULL on error.
- */
-static struct mbuf *
-ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
-{
- struct ip6_ext ip6e;
- size_t elen;
- struct mbuf *n;
-
-#ifdef DIAGNOSTIC
- switch (nxt) {
- case IPPROTO_DSTOPTS:
- case IPPROTO_ROUTING:
- case IPPROTO_HOPOPTS:
- case IPPROTO_AH: /* is it possible? */
- break;
- default:
- printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
- }
-#endif
-
- m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
- if (nxt == IPPROTO_AH)
- elen = (ip6e.ip6e_len + 2) << 2;
- else
- elen = (ip6e.ip6e_len + 1) << 3;
-
- if (elen > MLEN)
- n = m_getcl(M_NOWAIT, MT_DATA, 0);
- else
- n = m_get(M_NOWAIT, MT_DATA);
- if (n == NULL)
- return NULL;
-
- m_copydata(m, off, elen, mtod(n, caddr_t));
- n->m_len = elen;
- return n;
-}
-#endif
-
/*
* Get pointer to the previous header followed by the header
* currently processed.
diff --git a/freebsd/sys/netinet6/ip6_mroute.c b/freebsd/sys/netinet6/ip6_mroute.c
index 9dee53b0..437d6da7 100644
--- a/freebsd/sys/netinet6/ip6_mroute.c
+++ b/freebsd/sys/netinet6/ip6_mroute.c
@@ -1722,12 +1722,10 @@ pim6_input(struct mbuf *m, int off, int proto, void *arg __unused)
PIM6STAT_INC(pim6s_rcv_total);
- ip6 = mtod(m, struct ip6_hdr *);
- pimlen = m->m_pkthdr.len - off;
-
/*
* Validate lengths
*/
+ pimlen = m->m_pkthdr.len - off;
if (pimlen < PIM_MINLEN) {
PIM6STAT_INC(pim6s_rcv_tooshort);
MRT6_DLOG(DEBUG_PIM, "PIM packet too short");
@@ -1749,20 +1747,15 @@ pim6_input(struct mbuf *m, int off, int proto, void *arg __unused)
* Make sure that the IP6 and PIM headers in contiguous memory, and
* possibly the PIM REGISTER header
*/
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, minlen, IPPROTO_DONE);
- /* adjust pointer */
+ if (m->m_len < off + minlen) {
+ m = m_pullup(m, off + minlen);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ return (IPPROTO_DONE);
+ }
+ }
ip6 = mtod(m, struct ip6_hdr *);
-
- /* adjust mbuf to point to the PIM header */
pim = (struct pim *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
- if (pim == NULL) {
- PIM6STAT_INC(pim6s_rcv_tooshort);
- return (IPPROTO_DONE);
- }
-#endif
#define PIM6_CHECKSUM
#ifdef PIM6_CHECKSUM
diff --git a/freebsd/sys/netinet6/ip6_output.c b/freebsd/sys/netinet6/ip6_output.c
index e941ac49..73312ca6 100644
--- a/freebsd/sys/netinet6/ip6_output.c
+++ b/freebsd/sys/netinet6/ip6_output.c
@@ -968,6 +968,7 @@ passout:
in_pcboutput_txrtlmt(inp, ifp, m);
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
} else {
m->m_pkthdr.snd_tag = NULL;
}
@@ -1083,6 +1084,7 @@ sendorfree:
in_pcboutput_txrtlmt(inp, ifp, m);
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = inp->inp_snd_tag;
+ m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
} else {
m->m_pkthdr.snd_tag = NULL;
}
@@ -1421,7 +1423,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
{
int optdatalen, uproto;
void *optdata;
- struct inpcb *in6p = sotoinpcb(so);
+ struct inpcb *inp = sotoinpcb(so);
int error, optval;
int level, op, optname;
int optlen;
@@ -1456,43 +1458,43 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
sopt->sopt_dir == SOPT_SET) {
switch (sopt->sopt_name) {
case SO_REUSEADDR:
- INP_WLOCK(in6p);
+ INP_WLOCK(inp);
if ((so->so_options & SO_REUSEADDR) != 0)
- in6p->inp_flags2 |= INP_REUSEADDR;
+ inp->inp_flags2 |= INP_REUSEADDR;
else
- in6p->inp_flags2 &= ~INP_REUSEADDR;
- INP_WUNLOCK(in6p);
+ inp->inp_flags2 &= ~INP_REUSEADDR;
+ INP_WUNLOCK(inp);
error = 0;
break;
case SO_REUSEPORT:
- INP_WLOCK(in6p);
+ INP_WLOCK(inp);
if ((so->so_options & SO_REUSEPORT) != 0)
- in6p->inp_flags2 |= INP_REUSEPORT;
+ inp->inp_flags2 |= INP_REUSEPORT;
else
- in6p->inp_flags2 &= ~INP_REUSEPORT;
- INP_WUNLOCK(in6p);
+ inp->inp_flags2 &= ~INP_REUSEPORT;
+ INP_WUNLOCK(inp);
error = 0;
break;
case SO_REUSEPORT_LB:
- INP_WLOCK(in6p);
+ INP_WLOCK(inp);
if ((so->so_options & SO_REUSEPORT_LB) != 0)
- in6p->inp_flags2 |= INP_REUSEPORT_LB;
+ inp->inp_flags2 |= INP_REUSEPORT_LB;
else
- in6p->inp_flags2 &= ~INP_REUSEPORT_LB;
- INP_WUNLOCK(in6p);
+ inp->inp_flags2 &= ~INP_REUSEPORT_LB;
+ INP_WUNLOCK(inp);
error = 0;
break;
case SO_SETFIB:
- INP_WLOCK(in6p);
- in6p->inp_inc.inc_fibnum = so->so_fibnum;
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ inp->inp_inc.inc_fibnum = so->so_fibnum;
+ INP_WUNLOCK(inp);
error = 0;
break;
case SO_MAX_PACING_RATE:
#ifdef RATELIMIT
- INP_WLOCK(in6p);
- in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
+ INP_WUNLOCK(inp);
error = 0;
#else
error = EOPNOTSUPP;
@@ -1526,7 +1528,7 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
error = soopt_mcopyin(sopt, m); /* XXX */
if (error != 0)
break;
- error = ip6_pcbopts(&in6p->in6p_outputopts,
+ error = ip6_pcbopts(&inp->in6p_outputopts,
m, so, sopt);
m_freem(m); /* XXX */
break;
@@ -1597,57 +1599,57 @@ ip6_ctloutput(struct socket *so, struct sockopt *sopt)
error = EINVAL;
else {
/* -1 = kernel default */
- in6p->in6p_hops = optval;
- if ((in6p->inp_vflag &
+ inp->in6p_hops = optval;
+ if ((inp->inp_vflag &
INP_IPV4) != 0)
- in6p->inp_ip_ttl = optval;
+ inp->inp_ip_ttl = optval;
}
break;
#define OPTSET(bit) \
do { \
- INP_WLOCK(in6p); \
+ INP_WLOCK(inp); \
if (optval) \
- in6p->inp_flags |= (bit); \
+ inp->inp_flags |= (bit); \
else \
- in6p->inp_flags &= ~(bit); \
- INP_WUNLOCK(in6p); \
+ inp->inp_flags &= ~(bit); \
+ INP_WUNLOCK(inp); \
} while (/*CONSTCOND*/ 0)
#define OPTSET2292(bit) \
do { \
- INP_WLOCK(in6p); \
- in6p->inp_flags |= IN6P_RFC2292; \
+ INP_WLOCK(inp); \
+ inp->inp_flags |= IN6P_RFC2292; \
if (optval) \
- in6p->inp_flags |= (bit); \
+ inp->inp_flags |= (bit); \
else \
- in6p->inp_flags &= ~(bit); \
- INP_WUNLOCK(in6p); \
+ inp->inp_flags &= ~(bit); \
+ INP_WUNLOCK(inp); \
} while (/*CONSTCOND*/ 0)
-#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
+#define OPTBIT(bit) (inp->inp_flags & (bit) ? 1 : 0)
#define OPTSET2_N(bit, val) do { \
if (val) \
- in6p->inp_flags2 |= bit; \
+ inp->inp_flags2 |= bit; \
else \
- in6p->inp_flags2 &= ~bit; \
+ inp->inp_flags2 &= ~bit; \
} while (0)
#define OPTSET2(bit, val) do { \
- INP_WLOCK(in6p); \
+ INP_WLOCK(inp); \
OPTSET2_N(bit, val); \
- INP_WUNLOCK(in6p); \
+ INP_WUNLOCK(inp); \
} while (0)
-#define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
+#define OPTBIT2(bit) (inp->inp_flags2 & (bit) ? 1 : 0)
#define OPTSET2292_EXCLUSIVE(bit) \
do { \
- INP_WLOCK(in6p); \
+ INP_WLOCK(inp); \
if (OPTBIT(IN6P_RFC2292)) { \
error = EINVAL; \
} else { \
if (optval) \
- in6p->inp_flags |= (bit); \
+ inp->inp_flags |= (bit); \
else \
- in6p->inp_flags &= ~(bit); \
+ inp->inp_flags &= ~(bit); \
} \
- INP_WUNLOCK(in6p); \
+ INP_WUNLOCK(inp); \
} while (/*CONSTCOND*/ 0)
case IPV6_RECVPKTINFO:
@@ -1663,17 +1665,17 @@ do { \
error = EINVAL;
break;
}
- INP_WLOCK(in6p);
- if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(inp);
return (ECONNRESET);
}
- optp = &in6p->in6p_outputopts;
+ optp = &inp->in6p_outputopts;
error = ip6_pcbopt(IPV6_HOPLIMIT,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
break;
}
@@ -1724,16 +1726,16 @@ do { \
* available only prior to bind(2).
* see ipng mailing list, Jun 22 2001.
*/
- if (in6p->inp_lport ||
- !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
+ if (inp->inp_lport ||
+ !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
error = EINVAL;
break;
}
OPTSET(IN6P_IPV6_V6ONLY);
if (optval)
- in6p->inp_vflag &= ~INP_IPV4;
+ inp->inp_vflag &= ~INP_IPV4;
else
- in6p->inp_vflag |= INP_IPV4;
+ inp->inp_vflag |= INP_IPV4;
break;
case IPV6_RECVTCLASS:
/* cannot mix with RFC2292 XXX */
@@ -1757,10 +1759,10 @@ do { \
case IPV6_RSS_LISTEN_BUCKET:
if ((optval >= 0) &&
(optval < rss_getnumbuckets())) {
- INP_WLOCK(in6p);
- in6p->inp_rss_listen_bucket = optval;
+ INP_WLOCK(inp);
+ inp->inp_rss_listen_bucket = optval;
OPTSET2_N(INP_RSS_BUCKET_SET, 1);
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
} else {
error = EINVAL;
}
@@ -1783,17 +1785,17 @@ do { \
break;
{
struct ip6_pktopts **optp;
- INP_WLOCK(in6p);
- if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(inp);
return (ECONNRESET);
}
- optp = &in6p->in6p_outputopts;
+ optp = &inp->in6p_outputopts;
error = ip6_pcbopt(optname,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
break;
}
@@ -1875,16 +1877,16 @@ do { \
break;
optlen = sopt->sopt_valsize;
optbuf = optbuf_storage;
- INP_WLOCK(in6p);
- if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
- INP_WUNLOCK(in6p);
+ INP_WLOCK(inp);
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+ INP_WUNLOCK(inp);
return (ECONNRESET);
}
- optp = &in6p->in6p_outputopts;
+ optp = &inp->in6p_outputopts;
error = ip6_pcbopt(optname, optbuf, optlen,
optp, (td != NULL) ? td->td_ucred : NULL,
uproto);
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
break;
}
#undef OPTSET
@@ -1901,7 +1903,7 @@ do { \
case MCAST_LEAVE_GROUP:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
- error = ip6_setmoptions(in6p, sopt);
+ error = ip6_setmoptions(inp, sopt);
break;
case IPV6_PORTRANGE:
@@ -1910,34 +1912,34 @@ do { \
if (error)
break;
- INP_WLOCK(in6p);
+ INP_WLOCK(inp);
switch (optval) {
case IPV6_PORTRANGE_DEFAULT:
- in6p->inp_flags &= ~(INP_LOWPORT);
- in6p->inp_flags &= ~(INP_HIGHPORT);
+ inp->inp_flags &= ~(INP_LOWPORT);
+ inp->inp_flags &= ~(INP_HIGHPORT);
break;
case IPV6_PORTRANGE_HIGH:
- in6p->inp_flags &= ~(INP_LOWPORT);
- in6p->inp_flags |= INP_HIGHPORT;
+ inp->inp_flags &= ~(INP_LOWPORT);
+ inp->inp_flags |= INP_HIGHPORT;
break;
case IPV6_PORTRANGE_LOW:
- in6p->inp_flags &= ~(INP_HIGHPORT);
- in6p->inp_flags |= INP_LOWPORT;
+ inp->inp_flags &= ~(INP_HIGHPORT);
+ inp->inp_flags |= INP_LOWPORT;
break;
default:
error = EINVAL;
break;
}
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
break;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
case IPV6_IPSEC_POLICY:
if (IPSEC_ENABLED(ipv6)) {
- error = IPSEC_PCBCTL(ipv6, in6p, sopt);
+ error = IPSEC_PCBCTL(ipv6, inp, sopt);
break;
}
/* FALLTHROUGH */
@@ -2005,7 +2007,7 @@ do { \
break;
case IPV6_UNICAST_HOPS:
- optval = in6p->in6p_hops;
+ optval = inp->in6p_hops;
break;
case IPV6_RECVPKTINFO:
@@ -2031,7 +2033,7 @@ do { \
case IPV6_PORTRANGE:
{
int flags;
- flags = in6p->inp_flags;
+ flags = inp->inp_flags;
if (flags & INP_HIGHPORT)
optval = IPV6_PORTRANGE_HIGH;
else if (flags & INP_LOWPORT)
@@ -2057,11 +2059,11 @@ do { \
break;
case IPV6_FLOWID:
- optval = in6p->inp_flowid;
+ optval = inp->inp_flowid;
break;
case IPV6_FLOWTYPE:
- optval = in6p->inp_flowtype;
+ optval = inp->inp_flowtype;
break;
case IPV6_RECVFLOWID:
@@ -2070,8 +2072,8 @@ do { \
#ifdef RSS
case IPV6_RSSBUCKETID:
retval =
- rss_hash2bucket(in6p->inp_flowid,
- in6p->inp_flowtype,
+ rss_hash2bucket(inp->inp_flowid,
+ inp->inp_flowtype,
&rss_bucket);
if (retval == 0)
optval = rss_bucket;
@@ -2107,12 +2109,12 @@ do { \
* XXX: we dot not consider the case of source
* routing, or optional information to specify
* the outgoing interface.
- * Copy faddr out of in6p to avoid holding lock
+ * Copy faddr out of inp to avoid holding lock
* on inp during route lookup.
*/
- INP_RLOCK(in6p);
- bcopy(&in6p->in6p_faddr, &addr, sizeof(addr));
- INP_RUNLOCK(in6p);
+ INP_RLOCK(inp);
+ bcopy(&inp->in6p_faddr, &addr, sizeof(addr));
+ INP_RUNLOCK(inp);
error = ip6_getpmtu_ctl(so->so_fibnum,
&addr, &pmtu);
if (error)
@@ -2164,20 +2166,20 @@ do { \
case IPV6_DONTFRAG:
case IPV6_USE_MIN_MTU:
case IPV6_PREFER_TEMPADDR:
- error = ip6_getpcbopt(in6p, optname, sopt);
+ error = ip6_getpcbopt(inp, optname, sopt);
break;
case IPV6_MULTICAST_IF:
case IPV6_MULTICAST_HOPS:
case IPV6_MULTICAST_LOOP:
case IPV6_MSFILTER:
- error = ip6_getmoptions(in6p, sopt);
+ error = ip6_getmoptions(inp, sopt);
break;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
case IPV6_IPSEC_POLICY:
if (IPSEC_ENABLED(ipv6)) {
- error = IPSEC_PCBCTL(ipv6, in6p, sopt);
+ error = IPSEC_PCBCTL(ipv6, inp, sopt);
break;
}
/* FALLTHROUGH */
@@ -2197,7 +2199,7 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
{
int error = 0, optval, optlen;
const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
- struct inpcb *in6p = sotoinpcb(so);
+ struct inpcb *inp = sotoinpcb(so);
int level, op, optname;
level = sopt->sopt_level;
@@ -2240,14 +2242,14 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
if (optval != icmp6off)
error = EINVAL;
} else
- in6p->in6p_cksum = optval;
+ inp->in6p_cksum = optval;
break;
case SOPT_GET:
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
optval = icmp6off;
else
- optval = in6p->in6p_cksum;
+ optval = inp->in6p_cksum;
error = sooptcopyout(sopt, &optval, sizeof(optval));
break;
@@ -2346,16 +2348,16 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
#define GET_PKTOPT_VAR(field, lenexpr) do { \
if (pktopt && pktopt->field) { \
- INP_RUNLOCK(in6p); \
+ INP_RUNLOCK(inp); \
optdata = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK); \
malloc_optdata = true; \
- INP_RLOCK(in6p); \
- if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \
- INP_RUNLOCK(in6p); \
+ INP_RLOCK(inp); \
+ if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \
+ INP_RUNLOCK(inp); \
free(optdata, M_TEMP); \
return (ECONNRESET); \
} \
- pktopt = in6p->in6p_outputopts; \
+ pktopt = inp->in6p_outputopts; \
if (pktopt && pktopt->field) { \
optdatalen = min(lenexpr, sopt->sopt_valsize); \
bcopy(&pktopt->field, optdata, optdatalen); \
@@ -2374,7 +2376,7 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
pktopt->field->sa_len)
static int
-ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt)
+ip6_getpcbopt(struct inpcb *inp, int optname, struct sockopt *sopt)
{
void *optdata = NULL;
bool malloc_optdata = false;
@@ -2386,8 +2388,8 @@ ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt)
int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
struct ip6_pktopts *pktopt;
- INP_RLOCK(in6p);
- pktopt = in6p->in6p_outputopts;
+ INP_RLOCK(inp);
+ pktopt = inp->in6p_outputopts;
switch (optname) {
case IPV6_PKTINFO:
@@ -2447,10 +2449,10 @@ ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt)
#ifdef DIAGNOSTIC
panic("ip6_getpcbopt: unexpected option\n");
#endif
- INP_RUNLOCK(in6p);
+ INP_RUNLOCK(inp);
return (ENOPROTOOPT);
}
- INP_RUNLOCK(in6p);
+ INP_RUNLOCK(inp);
error = sooptcopyout(sopt, optdata, optdatalen);
if (malloc_optdata)
@@ -3135,23 +3137,23 @@ ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
* Compute IPv6 extension header length.
*/
int
-ip6_optlen(struct inpcb *in6p)
+ip6_optlen(struct inpcb *inp)
{
int len;
- if (!in6p->in6p_outputopts)
+ if (!inp->in6p_outputopts)
return 0;
len = 0;
#define elen(x) \
(((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
- len += elen(in6p->in6p_outputopts->ip6po_hbh);
- if (in6p->in6p_outputopts->ip6po_rthdr)
+ len += elen(inp->in6p_outputopts->ip6po_hbh);
+ if (inp->in6p_outputopts->ip6po_rthdr)
/* dest1 is valid with rthdr only */
- len += elen(in6p->in6p_outputopts->ip6po_dest1);
- len += elen(in6p->in6p_outputopts->ip6po_rthdr);
- len += elen(in6p->in6p_outputopts->ip6po_dest2);
+ len += elen(inp->in6p_outputopts->ip6po_dest1);
+ len += elen(inp->in6p_outputopts->ip6po_rthdr);
+ len += elen(inp->in6p_outputopts->ip6po_dest2);
return len;
#undef elen
}
diff --git a/freebsd/sys/netinet6/ip6_var.h b/freebsd/sys/netinet6/ip6_var.h
index b66f5cfb..05881f08 100644
--- a/freebsd/sys/netinet6/ip6_var.h
+++ b/freebsd/sys/netinet6/ip6_var.h
@@ -68,39 +68,27 @@
#include <sys/epoch.h>
+#ifdef _KERNEL
+struct ip6asfrag; /* frag6.c */
+TAILQ_HEAD(ip6fraghead, ip6asfrag);
+
/*
* IP6 reassembly queue structure. Each fragment
* being reassembled is attached to one of these structures.
*/
struct ip6q {
- struct ip6asfrag *ip6q_down;
- struct ip6asfrag *ip6q_up;
+ struct ip6fraghead ip6q_frags;
u_int32_t ip6q_ident;
u_int8_t ip6q_nxt;
u_int8_t ip6q_ecn;
u_int8_t ip6q_ttl;
struct in6_addr ip6q_src, ip6q_dst;
- struct ip6q *ip6q_next;
- struct ip6q *ip6q_prev;
+ TAILQ_ENTRY(ip6q) ip6q_tq;
int ip6q_unfrglen; /* len of unfragmentable part */
-#ifdef notyet
- u_char *ip6q_nxtp;
-#endif
int ip6q_nfrag; /* # of fragments */
struct label *ip6q_label;
};
-
-struct ip6asfrag {
- struct ip6asfrag *ip6af_down;
- struct ip6asfrag *ip6af_up;
- struct mbuf *ip6af_m;
- int ip6af_offset; /* offset in ip6af_m to next header */
- int ip6af_frglen; /* fragmentable part length */
- int ip6af_off; /* fragment offset */
- u_int16_t ip6af_mff; /* more fragment bit in frag off */
-};
-
-#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m))
+#endif /* _KERNEL */
/*
* IP6 reinjecting structure.
@@ -207,6 +195,7 @@ struct ip6stat {
uint64_t ip6s_localout; /* total ip packets generated here */
uint64_t ip6s_odropped; /* lost packets due to nobufs, etc. */
uint64_t ip6s_reassembled; /* total packets reassembled ok */
+ uint64_t ip6s_atomicfrags; /* atomic fragments */
uint64_t ip6s_fragmented; /* datagrams successfully fragmented */
uint64_t ip6s_ofragments; /* output fragments created */
uint64_t ip6s_cantfrag; /* don't fragment flag was set, etc. */
@@ -298,12 +287,6 @@ VNET_DECLARE(int, ip6_v6only);
VNET_DECLARE(struct socket *, ip6_mrouter); /* multicast routing daemon */
VNET_DECLARE(int, ip6_sendredirects); /* send IP redirects when forwarding? */
-VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly
- * queue */
-extern int ip6_maxfrags; /* Maximum fragments in reassembly
- * queue */
-VNET_DECLARE(int, ip6_maxfragbucketsize); /* Maximum reassembly queues per bucket */
-VNET_DECLARE(int, ip6_maxfragsperpacket); /* Maximum fragments per packet */
VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */
VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */
VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
@@ -317,9 +300,6 @@ VNET_DECLARE(int, ip6_hdrnestlimit); /* upper limit of # of extension
VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */
#define V_ip6_mrouter VNET(ip6_mrouter)
#define V_ip6_sendredirects VNET(ip6_sendredirects)
-#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
-#define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize)
-#define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket)
#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv)
#define V_ip6_no_radr VNET(ip6_no_radr)
#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
@@ -406,8 +386,8 @@ int ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
int route6_input(struct mbuf **, int *, int);
-void frag6_set_bucketsize(void);
void frag6_init(void);
+void frag6_destroy(void);
int frag6_input(struct mbuf **, int *, int);
void frag6_slowtimo(void);
void frag6_drain(void);
diff --git a/freebsd/sys/netinet6/mld6.c b/freebsd/sys/netinet6/mld6.c
index a0d045d5..e7b400ae 100644
--- a/freebsd/sys/netinet6/mld6.c
+++ b/freebsd/sys/netinet6/mld6.c
@@ -1254,20 +1254,27 @@ out_locked:
* Return IPPROTO_DONE if we freed m. Otherwise, return 0.
*/
int
-mld_input(struct mbuf *m, int off, int icmp6len)
+mld_input(struct mbuf **mp, int off, int icmp6len)
{
struct ifnet *ifp;
struct ip6_hdr *ip6;
+ struct mbuf *m;
struct mld_hdr *mld;
int mldlen;
+ m = *mp;
CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off);
ifp = m->m_pkthdr.rcvif;
- ip6 = mtod(m, struct ip6_hdr *);
-
/* Pullup to appropriate size. */
+ if (m->m_len < off + sizeof(*mld)) {
+ m = m_pullup(m, off + sizeof(*mld));
+ if (m == NULL) {
+ ICMP6STAT_INC(icp6s_badlen);
+ return (IPPROTO_DONE);
+ }
+ }
mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
if (mld->mld_type == MLD_LISTENER_QUERY &&
icmp6len >= sizeof(struct mldv2_query)) {
@@ -1275,11 +1282,16 @@ mld_input(struct mbuf *m, int off, int icmp6len)
} else {
mldlen = sizeof(struct mld_hdr);
}
- IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
- if (mld == NULL) {
- ICMP6STAT_INC(icp6s_badlen);
- return (IPPROTO_DONE);
+ if (m->m_len < off + mldlen) {
+ m = m_pullup(m, off + mldlen);
+ if (m == NULL) {
+ ICMP6STAT_INC(icp6s_badlen);
+ return (IPPROTO_DONE);
+ }
}
+ *mp = m;
+ ip6 = mtod(m, struct ip6_hdr *);
+ mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
/*
* Userland needs to see all of this traffic for implementing
diff --git a/freebsd/sys/netinet6/mld6_var.h b/freebsd/sys/netinet6/mld6_var.h
index 8dc2ffa4..0aedde27 100644
--- a/freebsd/sys/netinet6/mld6_var.h
+++ b/freebsd/sys/netinet6/mld6_var.h
@@ -167,7 +167,7 @@ struct mld_ifsoftc *
void mld_domifdetach(struct ifnet *);
void mld_fasttimo(void);
void mld_ifdetach(struct ifnet *, struct in6_multi_head *);
-int mld_input(struct mbuf *, int, int);
+int mld_input(struct mbuf **, int, int);
void mld_slowtimo(void);
#ifdef SYSCTL_DECL
diff --git a/freebsd/sys/netinet6/nd6.c b/freebsd/sys/netinet6/nd6.c
index 140dde59..aea8168e 100644
--- a/freebsd/sys/netinet6/nd6.c
+++ b/freebsd/sys/netinet6/nd6.c
@@ -117,7 +117,6 @@ VNET_DEFINE(int, nd6_debug) = 0;
static eventhandler_tag lle_event_eh, iflladdr_event_eh;
-VNET_DEFINE(struct nd_drhead, nd_defrouter);
VNET_DEFINE(struct nd_prhead, nd_prefix);
VNET_DEFINE(struct rwlock, nd6_lock);
VNET_DEFINE(uint64_t, nd6_list_genid);
@@ -147,9 +146,11 @@ static int nd6_need_cache(struct ifnet *);
VNET_DEFINE_STATIC(struct callout, nd6_slowtimo_ch);
#define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch)
-VNET_DEFINE(struct callout, nd6_timer_ch);
+VNET_DEFINE_STATIC(struct callout, nd6_timer_ch);
#define V_nd6_timer_ch VNET(nd6_timer_ch)
+SYSCTL_DECL(_net_inet6_icmp6);
+
static void
nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
{
@@ -219,7 +220,7 @@ nd6_init(void)
rw_init(&V_nd6_lock, "nd6 list");
LIST_INIT(&V_nd_prefix);
- TAILQ_INIT(&V_nd_defrouter);
+ nd6_defrouter_init();
/* Start timers. */
callout_init(&V_nd6_slowtimo_ch, 0);
@@ -894,27 +895,15 @@ void
nd6_timer(void *arg)
{
CURVNET_SET((struct vnet *) arg);
- struct nd_drhead drq;
struct nd_prhead prl;
- struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
struct ifnet *ifp;
struct in6_ifaddr *ia6, *nia6;
uint64_t genid;
- TAILQ_INIT(&drq);
LIST_INIT(&prl);
- ND6_WLOCK();
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr)
- if (dr->expire && dr->expire < time_uptime)
- defrouter_unlink(dr, &drq);
- ND6_WUNLOCK();
-
- while ((dr = TAILQ_FIRST(&drq)) != NULL) {
- TAILQ_REMOVE(&drq, dr, dr_entry);
- defrouter_del(dr);
- }
+ nd6_defrouter_timer();
/*
* expire interface addresses.
@@ -1137,34 +1126,15 @@ regen_tmpaddr(struct in6_ifaddr *ia6)
void
nd6_purge(struct ifnet *ifp)
{
- struct nd_drhead drq;
struct nd_prhead prl;
- struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
- TAILQ_INIT(&drq);
LIST_INIT(&prl);
- /*
- * Nuke default router list entries toward ifp.
- * We defer removal of default router list entries that is installed
- * in the routing table, in order to keep additional side effects as
- * small as possible.
- */
- ND6_WLOCK();
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
- if (dr->installed)
- continue;
- if (dr->ifp == ifp)
- defrouter_unlink(dr, &drq);
- }
- TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
- if (!dr->installed)
- continue;
- if (dr->ifp == ifp)
- defrouter_unlink(dr, &drq);
- }
+ /* Purge default router list entries toward ifp. */
+ nd6_defrouter_purge(ifp);
+ ND6_WLOCK();
/*
* Remove prefixes on ifp. We should have already removed addresses on
* this interface, so no addresses should be referencing these prefixes.
@@ -1175,11 +1145,7 @@ nd6_purge(struct ifnet *ifp)
}
ND6_WUNLOCK();
- /* Delete the unlinked router and prefix objects. */
- while ((dr = TAILQ_FIRST(&drq)) != NULL) {
- TAILQ_REMOVE(&drq, dr, dr_entry);
- defrouter_del(dr);
- }
+ /* Delete the unlinked prefix objects. */
while ((pr = LIST_FIRST(&prl)) != NULL) {
LIST_REMOVE(pr, ndpr_entry);
nd6_prefix_del(pr);
@@ -1365,7 +1331,7 @@ restart:
* as on-link, and thus, as a neighbor.
*/
if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV &&
- TAILQ_EMPTY(&V_nd_defrouter) &&
+ nd6_defrouter_list_empty() &&
V_nd6_defifindex == ifp->if_index) {
return (1);
}
@@ -1808,22 +1774,9 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
case SIOCSRTRFLUSH_IN6:
{
/* flush all the default routers */
- struct nd_drhead drq;
- struct nd_defrouter *dr;
-
- TAILQ_INIT(&drq);
defrouter_reset();
-
- ND6_WLOCK();
- while ((dr = TAILQ_FIRST(&V_nd_defrouter)) != NULL)
- defrouter_unlink(dr, &drq);
- ND6_WUNLOCK();
- while ((dr = TAILQ_FIRST(&drq)) != NULL) {
- TAILQ_REMOVE(&drq, dr, dr_entry);
- defrouter_del(dr);
- }
-
+ nd6_defrouter_flush_all();
defrouter_select();
break;
}
@@ -2367,13 +2320,7 @@ nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
}
}
if (lle == NULL) {
- if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
- m_freem(m);
- return (ENOBUFS);
- }
-
- if (m != NULL)
- m_freem(m);
+ m_freem(m);
return (ENOBUFS);
}
@@ -2616,59 +2563,6 @@ clear_llinfo_pqueue(struct llentry *ln)
ln->la_hold = NULL;
}
-static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
-static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
-
-SYSCTL_DECL(_net_inet6_icmp6);
-SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
- CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
- NULL, 0, nd6_sysctl_drlist, "S,in6_defrouter",
- "NDP default router list");
-SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
- CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
- NULL, 0, nd6_sysctl_prlist, "S,in6_prefix",
- "NDP prefix list");
-SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
-SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
- CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
-
-static int
-nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
-{
- struct in6_defrouter d;
- struct nd_defrouter *dr;
- int error;
-
- if (req->newptr != NULL)
- return (EPERM);
-
- error = sysctl_wire_old_buffer(req, 0);
- if (error != 0)
- return (error);
-
- bzero(&d, sizeof(d));
- d.rtaddr.sin6_family = AF_INET6;
- d.rtaddr.sin6_len = sizeof(d.rtaddr);
-
- ND6_RLOCK();
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
- d.rtaddr.sin6_addr = dr->rtaddr;
- error = sa6_recoverscope(&d.rtaddr);
- if (error != 0)
- break;
- d.flags = dr->raflags;
- d.rtlifetime = dr->rtlifetime;
- d.expire = dr->expire + (time_second - time_uptime);
- d.if_index = dr->ifp->if_index;
- error = SYSCTL_OUT(req, &d, sizeof(d));
- if (error != 0)
- break;
- }
- ND6_RUNLOCK();
- return (error);
-}
-
static int
nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
{
@@ -2742,3 +2636,11 @@ out:
ND6_RUNLOCK();
return (error);
}
+SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, nd6_sysctl_prlist, "S,in6_prefix",
+ "NDP prefix list");
+SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
+SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h
index cabfeec0..71c99b1b 100644
--- a/freebsd/sys/netinet6/nd6.h
+++ b/freebsd/sys/netinet6/nd6.h
@@ -329,7 +329,6 @@ VNET_DECLARE(int, nd6_mmaxtries);
VNET_DECLARE(int, nd6_useloopback);
VNET_DECLARE(int, nd6_maxnudhint);
VNET_DECLARE(int, nd6_gctimer);
-VNET_DECLARE(struct nd_drhead, nd_defrouter);
VNET_DECLARE(struct nd_prhead, nd_prefix);
VNET_DECLARE(int, nd6_debug);
VNET_DECLARE(int, nd6_onlink_ns_rfc4861);
@@ -340,7 +339,6 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861);
#define V_nd6_useloopback VNET(nd6_useloopback)
#define V_nd6_maxnudhint VNET(nd6_maxnudhint)
#define V_nd6_gctimer VNET(nd6_gctimer)
-#define V_nd_defrouter VNET(nd_defrouter)
#define V_nd_prefix VNET(nd_prefix)
#define V_nd6_debug VNET(nd6_debug)
#define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861)
@@ -470,6 +468,8 @@ void nd6_dad_stop(struct ifaddr *);
/* nd6_rtr.c */
void nd6_rs_input(struct mbuf *, int, int);
void nd6_ra_input(struct mbuf *, int, int);
+struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
+struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);
void defrouter_reset(void);
void defrouter_select_fib(int fibnum);
void defrouter_select(void);
@@ -478,6 +478,11 @@ void defrouter_rele(struct nd_defrouter *);
bool defrouter_remove(struct in6_addr *, struct ifnet *);
void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *);
void defrouter_del(struct nd_defrouter *);
+bool nd6_defrouter_list_empty(void);
+void nd6_defrouter_flush_all(void);
+void nd6_defrouter_purge(struct ifnet *);
+void nd6_defrouter_timer(void);
+void nd6_defrouter_init(void);
int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
struct nd_prefix **);
void nd6_prefix_unlink(struct nd_prefix *, struct nd_prhead *);
@@ -487,8 +492,6 @@ void nd6_prefix_rele(struct nd_prefix *);
int nd6_prefix_onlink(struct nd_prefix *);
int nd6_prefix_offlink(struct nd_prefix *);
void pfxlist_onlink_check(void);
-struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
-struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);
struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *);
void rt6_flush(struct in6_addr *, struct ifnet *);
int nd6_setdefaultiface(int);
diff --git a/freebsd/sys/netinet6/nd6_nbr.c b/freebsd/sys/netinet6/nd6_nbr.c
index 49810020..634eea06 100644
--- a/freebsd/sys/netinet6/nd6_nbr.c
+++ b/freebsd/sys/netinet6/nd6_nbr.c
@@ -122,53 +122,53 @@ VNET_DEFINE_STATIC(int, dad_maxtry) = 15; /* max # of *tries* to
void
nd6_ns_input(struct mbuf *m, int off, int icmp6len)
{
- struct ifnet *ifp = m->m_pkthdr.rcvif;
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
struct nd_neighbor_solicit *nd_ns;
- struct in6_addr saddr6 = ip6->ip6_src;
- struct in6_addr daddr6 = ip6->ip6_dst;
- struct in6_addr taddr6;
- struct in6_addr myaddr6;
- char *lladdr = NULL;
- struct ifaddr *ifa = NULL;
- int lladdrlen = 0;
- int anycast = 0, proxy = 0, tentative = 0;
- int tlladdr;
- int rflag;
- union nd_opts ndopts;
+ struct in6_addr daddr6, myaddr6, saddr6, taddr6;
+ struct ifaddr *ifa;
struct sockaddr_dl proxydl;
+ union nd_opts ndopts;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ char *lladdr;
+ int anycast, lladdrlen, proxy, rflag, tentative, tlladdr;
+
+ ifa = NULL;
/* RFC 6980: Nodes MUST silently ignore fragments */
if(m->m_flags & M_FRAGMENTED)
goto freeit;
- rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0;
- if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif)
- rflag = 0;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, icmp6len,);
- nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len);
- if (nd_ns == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return;
- }
-#endif
- ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */
- taddr6 = nd_ns->nd_ns_target;
- if (in6_setscope(&taddr6, ifp, NULL) != 0)
- goto bad;
-
+ ifp = m->m_pkthdr.rcvif;
+ ip6 = mtod(m, struct ip6_hdr *);
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
- goto bad;
+ goto bads;
}
+ if (m->m_len < off + icmp6len) {
+ m = m_pullup(m, off + icmp6len);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ return;
+ }
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
+
+ saddr6 = ip6->ip6_src;
+ daddr6 = ip6->ip6_dst;
+ taddr6 = nd_ns->nd_ns_target;
+ if (in6_setscope(&taddr6, ifp, NULL) != 0)
+ goto bad;
+
+ rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0;
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif)
+ rflag = 0;
+
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
/* dst has to be a solicited node multicast address. */
if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL &&
@@ -216,6 +216,8 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
goto freeit;
}
+ lladdr = NULL;
+ lladdrlen = 0;
if (ndopts.nd_opts_src_lladdr) {
lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
@@ -255,6 +257,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/* (2) check. */
+ proxy = 0;
if (ifa == NULL) {
struct sockaddr_dl rt_gateway;
struct rt_addrinfo info;
@@ -381,6 +384,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
ip6_sprintf(ip6bufs, &daddr6)));
nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n",
ip6_sprintf(ip6bufs, &taddr6)));
+ bads:
ICMP6STAT_INC(icp6s_badns);
if (ifa != NULL)
ifa_free(ifa);
@@ -615,32 +619,32 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6,
void
nd6_na_input(struct mbuf *m, int off, int icmp6len)
{
- struct ifnet *ifp = m->m_pkthdr.rcvif;
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
- struct nd_neighbor_advert *nd_na;
- struct in6_addr daddr6 = ip6->ip6_dst;
- struct in6_addr taddr6;
- int flags;
- int is_router;
- int is_solicited;
- int is_override;
- char *lladdr = NULL;
- int lladdrlen = 0;
- int checklink = 0;
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
struct ifaddr *ifa;
- struct llentry *ln = NULL;
- union nd_opts ndopts;
- struct mbuf *chain = NULL;
+ struct llentry *ln;
+ struct mbuf *chain;
+ struct nd_neighbor_advert *nd_na;
+ struct in6_addr daddr6, taddr6;
struct sockaddr_in6 sin6;
+ union nd_opts ndopts;
u_char linkhdr[LLE_MAX_LINKHDR];
- size_t linkhdrsize;
- int lladdr_off;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ char *lladdr;
+ size_t linkhdrsize;
+ int flags, is_override, is_router, is_solicited;
+ int lladdr_off, lladdrlen, checklink;
+
+ chain = NULL;
+ ln = NULL;
+ checklink = 0;
/* RFC 6980: Nodes MUST silently ignore fragments */
if(m->m_flags & M_FRAGMENTED)
goto freeit;
+ ifp = m->m_pkthdr.rcvif;
+ ip6 = mtod(m, struct ip6_hdr *);
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
@@ -649,22 +653,20 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
goto bad;
}
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, icmp6len,);
- nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len);
- if (nd_na == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return;
+ if (m->m_len < off + icmp6len) {
+ m = m_pullup(m, off + icmp6len);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ return;
+ }
}
-#endif
+ ip6 = mtod(m, struct ip6_hdr *);
+ nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off);
flags = nd_na->nd_na_flags_reserved;
is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
- memset(&sin6, 0, sizeof(sin6));
taddr6 = nd_na->nd_na_target;
if (in6_setscope(&taddr6, ifp, NULL))
@@ -676,6 +678,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
ip6_sprintf(ip6bufs, &taddr6)));
goto bad;
}
+
+ daddr6 = ip6->ip6_dst;
if (IN6_IS_ADDR_MULTICAST(&daddr6))
if (is_solicited) {
nd6log((LOG_ERR,
@@ -692,6 +696,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
goto freeit;
}
+ lladdr = NULL;
+ lladdrlen = 0;
if (ndopts.nd_opts_tgt_lladdr) {
lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
@@ -889,8 +895,10 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* rt->rt_flags &= ~RTF_REJECT;
*/
ln->la_asked = 0;
- if (ln->la_hold != NULL)
+ if (ln->la_hold != NULL) {
+ memset(&sin6, 0, sizeof(sin6));
nd6_grab_holdchain(ln, &chain, &sin6);
+ }
freeit:
if (ln != NULL)
LLE_WUNLOCK(ln);
diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c
index a60e7c66..9dddedf4 100644
--- a/freebsd/sys/netinet6/nd6_rtr.c
+++ b/freebsd/sys/netinet6/nd6_rtr.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/errno.h>
#include <sys/rmlock.h>
#include <sys/rwlock.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/queue.h>
@@ -74,24 +75,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/icmp6.h>
#include <netinet6/scope6_var.h>
-static int rtpref(struct nd_defrouter *);
static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *,
struct mbuf *, int);
-static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int);
-static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *,
- struct nd_defrouter *);
-static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
-static void pfxrtr_del(struct nd_pfxrouter *);
-static struct nd_pfxrouter *find_pfxlist_reachable_router(struct nd_prefix *);
-static void defrouter_delreq(struct nd_defrouter *);
-static void nd6_rtmsg(int, struct rtentry *);
-static int in6_init_prefix_ltimes(struct nd_prefix *);
-static void in6_init_address_ltimes(struct nd_prefix *,
- struct in6_addrlifetime *);
-
-static int rt6_deleteroute(const struct rtentry *, void *);
+VNET_DEFINE_STATIC(struct nd_drhead, nd6_defrouter);
+#define V_nd6_defrouter VNET(nd6_defrouter)
VNET_DECLARE(int, nd6_recalc_reachtm_interval);
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
@@ -108,6 +97,8 @@ VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
+SYSCTL_DECL(_net_inet6_icmp6);
+
/* RTPREF_MEDIUM has to be 0! */
#define RTPREF_HIGH 1
#define RTPREF_MEDIUM 0
@@ -115,6 +106,37 @@ VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
#define RTPREF_RESERVED (-2)
#define RTPREF_INVALID (-3) /* internal */
+void
+defrouter_ref(struct nd_defrouter *dr)
+{
+
+ refcount_acquire(&dr->refcnt);
+}
+
+void
+defrouter_rele(struct nd_defrouter *dr)
+{
+
+ if (refcount_release(&dr->refcnt))
+ free(dr, M_IP6NDP);
+}
+
+/*
+ * Remove a router from the global list and optionally stash it in a
+ * caller-supplied queue.
+ */
+void
+defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq)
+{
+
+ ND6_WLOCK_ASSERT();
+
+ TAILQ_REMOVE(&V_nd6_defrouter, dr, dr_entry);
+ V_nd6_list_genid++;
+ if (drq != NULL)
+ TAILQ_INSERT_TAIL(drq, dr, dr_entry);
+}
+
/*
* Receive Router Solicitation Message - just for routers.
* Router solicitation/advertisement is mostly managed by userland program
@@ -125,14 +147,16 @@ VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
void
nd6_rs_input(struct mbuf *m, int off, int icmp6len)
{
- struct ifnet *ifp = m->m_pkthdr.rcvif;
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
struct nd_router_solicit *nd_rs;
- struct in6_addr saddr6 = ip6->ip6_src;
- char *lladdr = NULL;
- int lladdrlen = 0;
+ struct in6_addr saddr6;
union nd_opts ndopts;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ char *lladdr;
+ int lladdrlen;
+
+ ifp = m->m_pkthdr.rcvif;
/*
* Accept RS only when V_ip6_forwarding=1 and the interface has
@@ -146,9 +170,10 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len)
goto freeit;
/* Sanity checks */
+ ip6 = mtod(m, struct ip6_hdr *);
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
- "nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
+ "%s: invalid hlim (%d) from %s to %s on %s\n", __func__,
ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
@@ -158,29 +183,31 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len)
* Don't update the neighbor cache, if src = ::.
* This indicates that the src has no IP address assigned yet.
*/
+ saddr6 = ip6->ip6_src;
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
goto freeit;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, icmp6len,);
- nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
- if (nd_rs == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return;
+ if (m->m_len < off + icmp6len) {
+ m = m_pullup(m, off + icmp6len);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ return;
+ }
}
-#endif
+ ip6 = mtod(m, struct ip6_hdr *);
+ nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
icmp6len -= sizeof(*nd_rs);
nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
if (nd6_options(&ndopts) < 0) {
nd6log((LOG_INFO,
- "nd6_rs_input: invalid ND option, ignored\n"));
+ "%s: invalid ND option, ignored\n", __func__));
/* nd6_options have incremented stats */
goto freeit;
}
+ lladdr = NULL;
+ lladdrlen = 0;
if (ndopts.nd_opts_src_lladdr) {
lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
@@ -188,9 +215,8 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len)
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO,
- "nd6_rs_input: lladdrlen mismatch for %s "
- "(if %d, RS packet %d)\n",
- ip6_sprintf(ip6bufs, &saddr6),
+ "%s: lladdrlen mismatch for %s (if %d, RS packet %d)\n",
+ __func__, ip6_sprintf(ip6bufs, &saddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
@@ -216,22 +242,22 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len)
void
nd6_ra_input(struct mbuf *m, int off, int icmp6len)
{
- struct ifnet *ifp = m->m_pkthdr.rcvif;
- struct nd_ifinfo *ndi = ND_IFINFO(ifp);
- struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct ifnet *ifp;
+ struct nd_ifinfo *ndi;
+ struct ip6_hdr *ip6;
struct nd_router_advert *nd_ra;
- struct in6_addr saddr6 = ip6->ip6_src;
- int mcast = 0;
- union nd_opts ndopts;
+ struct in6_addr saddr6;
struct nd_defrouter *dr;
+ union nd_opts ndopts;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
-
- dr = NULL;
+ int mcast;
/*
* We only accept RAs only when the per-interface flag
* ND6_IFF_ACCEPT_RTADV is on the receiving interface.
*/
+ ifp = m->m_pkthdr.rcvif;
+ ndi = ND_IFINFO(ifp);
if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
goto freeit;
@@ -239,41 +265,44 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
if(m->m_flags & M_FRAGMENTED)
goto freeit;
+ ip6 = mtod(m, struct ip6_hdr *);
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
- "nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
+ "%s: invalid hlim (%d) from %s to %s on %s\n", __func__,
ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
+ saddr6 = ip6->ip6_src;
if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
nd6log((LOG_ERR,
- "nd6_ra_input: src %s is not link-local\n",
+ "%s: src %s is not link-local\n", __func__,
ip6_sprintf(ip6bufs, &saddr6)));
goto bad;
}
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, icmp6len,);
- nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
- if (nd_ra == NULL) {
- ICMP6STAT_INC(icp6s_tooshort);
- return;
+ if (m->m_len < off + icmp6len) {
+ m = m_pullup(m, off + icmp6len);
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ return;
+ }
}
-#endif
+ ip6 = mtod(m, struct ip6_hdr *);
+ nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
icmp6len -= sizeof(*nd_ra);
nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
if (nd6_options(&ndopts) < 0) {
nd6log((LOG_INFO,
- "nd6_ra_input: invalid ND option, ignored\n"));
+ "%s: invalid ND option, ignored\n", __func__));
/* nd6_options have incremented stats */
goto freeit;
}
+ mcast = 0;
+ dr = NULL;
{
struct nd_defrouter dr0;
u_int32_t advreachable = nd_ra->nd_ra_reachable;
@@ -341,26 +370,25 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
if (pi->nd_opt_pi_len != 4) {
nd6log((LOG_INFO,
- "nd6_ra_input: invalid option "
- "len %d for prefix information option, "
- "ignored\n", pi->nd_opt_pi_len));
+ "%s: invalid option len %d for prefix "
+ "information option, ignored\n", __func__,
+ pi->nd_opt_pi_len));
continue;
}
if (128 < pi->nd_opt_pi_prefix_len) {
nd6log((LOG_INFO,
- "nd6_ra_input: invalid prefix "
- "len %d for prefix information option, "
- "ignored\n", pi->nd_opt_pi_prefix_len));
+ "%s: invalid prefix len %d for prefix "
+ "information option, ignored\n", __func__,
+ pi->nd_opt_pi_prefix_len));
continue;
}
if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
|| IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
nd6log((LOG_INFO,
- "nd6_ra_input: invalid prefix "
- "%s, ignored\n",
- ip6_sprintf(ip6bufs,
+ "%s: invalid prefix %s, ignored\n",
+ __func__, ip6_sprintf(ip6bufs,
&pi->nd_opt_pi_prefix)));
continue;
}
@@ -397,8 +425,8 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
/* lower bound */
if (mtu < IPV6_MMTU) {
- nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
- "mtu=%lu sent from %s, ignoring\n",
+ nd6log((LOG_INFO, "%s: bogus mtu option mtu=%lu sent "
+ "from %s, ignoring\n", __func__,
mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
goto skip;
}
@@ -416,9 +444,8 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
rt_updatemtu(ifp);
}
} else {
- nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
- "mtu=%lu sent from %s; "
- "exceeds maxmtu %lu, ignoring\n",
+ nd6log((LOG_INFO, "%s: bogus mtu=%lu sent from %s; "
+ "exceeds maxmtu %lu, ignoring\n", __func__,
mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
}
}
@@ -439,8 +466,8 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO,
- "nd6_ra_input: lladdrlen mismatch for %s "
- "(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
+ "%s: lladdrlen mismatch for %s (if %d, RA packet %d)\n",
+ __func__, ip6_sprintf(ip6bufs, &saddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
@@ -493,10 +520,71 @@ nd6_rtmsg(int cmd, struct rtentry *rt)
ifa_free(ifa);
}
-/*
- * default router list processing sub routines
- */
+/* PFXRTR */
+static struct nd_pfxrouter *
+pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
+{
+ struct nd_pfxrouter *search;
+
+ ND6_LOCK_ASSERT();
+
+ LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
+ if (search->router == dr)
+ break;
+ }
+ return (search);
+}
+
+static void
+pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
+{
+ struct nd_pfxrouter *new;
+ bool update;
+
+ ND6_UNLOCK_ASSERT();
+
+ ND6_RLOCK();
+ if (pfxrtr_lookup(pr, dr) != NULL) {
+ ND6_RUNLOCK();
+ return;
+ }
+ ND6_RUNLOCK();
+
+ new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
+ if (new == NULL)
+ return;
+ defrouter_ref(dr);
+ new->router = dr;
+
+ ND6_WLOCK();
+ if (pfxrtr_lookup(pr, dr) == NULL) {
+ LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
+ update = true;
+ } else {
+ /* We lost a race to add the reference. */
+ defrouter_rele(dr);
+ free(new, M_IP6NDP);
+ update = false;
+ }
+ ND6_WUNLOCK();
+
+ if (update)
+ pfxlist_onlink_check();
+}
+
+static void
+pfxrtr_del(struct nd_pfxrouter *pfr)
+{
+
+ ND6_WLOCK_ASSERT();
+
+ LIST_REMOVE(pfr, pfr_entry);
+ defrouter_rele(pfr->router);
+ free(pfr, M_IP6NDP);
+}
+
+/* Default router list processing sub routines. */
static void
defrouter_addreq(struct nd_defrouter *new)
{
@@ -524,46 +612,6 @@ defrouter_addreq(struct nd_defrouter *new)
new->installed = 1;
}
-struct nd_defrouter *
-defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
-{
- struct nd_defrouter *dr;
-
- ND6_LOCK_ASSERT();
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
- if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
- defrouter_ref(dr);
- return (dr);
- }
- return (NULL);
-}
-
-struct nd_defrouter *
-defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
-{
- struct nd_defrouter *dr;
-
- ND6_RLOCK();
- dr = defrouter_lookup_locked(addr, ifp);
- ND6_RUNLOCK();
- return (dr);
-}
-
-void
-defrouter_ref(struct nd_defrouter *dr)
-{
-
- refcount_acquire(&dr->refcnt);
-}
-
-void
-defrouter_rele(struct nd_defrouter *dr)
-{
-
- if (refcount_release(&dr->refcnt))
- free(dr, M_IP6NDP);
-}
-
/*
* Remove the default route for a given router.
* This is just a subroutine function for defrouter_select_fib(), and
@@ -595,6 +643,79 @@ defrouter_delreq(struct nd_defrouter *dr)
dr->installed = 0;
}
+void
+defrouter_del(struct nd_defrouter *dr)
+{
+ struct nd_defrouter *deldr = NULL;
+ struct nd_prefix *pr;
+ struct nd_pfxrouter *pfxrtr;
+
+ ND6_UNLOCK_ASSERT();
+
+ /*
+ * Flush all the routing table entries that use the router
+ * as a next hop.
+ */
+ if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
+ rt6_flush(&dr->rtaddr, dr->ifp);
+
+ if (dr->installed) {
+ deldr = dr;
+ defrouter_delreq(dr);
+ }
+
+ /*
+ * Also delete all the pointers to the router in each prefix lists.
+ */
+ ND6_WLOCK();
+ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
+ if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
+ pfxrtr_del(pfxrtr);
+ }
+ ND6_WUNLOCK();
+
+ pfxlist_onlink_check();
+
+ /*
+ * If the router is the primary one, choose a new one.
+ * Note that defrouter_select_fib() will remove the current
+ * gateway from the routing table.
+ */
+ if (deldr)
+ defrouter_select_fib(deldr->ifp->if_fib);
+
+ /*
+ * Release the list reference.
+ */
+ defrouter_rele(dr);
+}
+
+
+struct nd_defrouter *
+defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
+{
+ struct nd_defrouter *dr;
+
+ ND6_LOCK_ASSERT();
+ TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry)
+ if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
+ defrouter_ref(dr);
+ return (dr);
+ }
+ return (NULL);
+}
+
+struct nd_defrouter *
+defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
+{
+ struct nd_defrouter *dr;
+
+ ND6_RLOCK();
+ dr = defrouter_lookup_locked(addr, ifp);
+ ND6_RUNLOCK();
+ return (dr);
+}
+
/*
* Remove all default routes from default router list.
*/
@@ -611,14 +732,14 @@ defrouter_reset(void)
* current default router list and use that when deleting routes.
*/
ND6_RLOCK();
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
+ TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry)
count++;
ND6_RUNLOCK();
dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO);
ND6_RLOCK();
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
+ TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
if (i == count)
break;
defrouter_ref(dr);
@@ -662,67 +783,30 @@ defrouter_remove(struct in6_addr *addr, struct ifnet *ifp)
}
/*
- * Remove a router from the global list and optionally stash it in a
- * caller-supplied queue.
- *
- * The ND lock must be held.
+ * for default router selection
+ * regards router-preference field as a 2-bit signed integer
*/
-void
-defrouter_unlink(struct nd_defrouter *dr, struct nd_drhead *drq)
-{
-
- ND6_WLOCK_ASSERT();
- TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
- V_nd6_list_genid++;
- if (drq != NULL)
- TAILQ_INSERT_TAIL(drq, dr, dr_entry);
-}
-
-void
-defrouter_del(struct nd_defrouter *dr)
+static int
+rtpref(struct nd_defrouter *dr)
{
- struct nd_defrouter *deldr = NULL;
- struct nd_prefix *pr;
- struct nd_pfxrouter *pfxrtr;
-
- ND6_UNLOCK_ASSERT();
-
- /*
- * Flush all the routing table entries that use the router
- * as a next hop.
- */
- if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
- rt6_flush(&dr->rtaddr, dr->ifp);
-
- if (dr->installed) {
- deldr = dr;
- defrouter_delreq(dr);
- }
-
- /*
- * Also delete all the pointers to the router in each prefix lists.
- */
- ND6_WLOCK();
- LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
- if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
- pfxrtr_del(pfxrtr);
+ switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) {
+ case ND_RA_FLAG_RTPREF_HIGH:
+ return (RTPREF_HIGH);
+ case ND_RA_FLAG_RTPREF_MEDIUM:
+ case ND_RA_FLAG_RTPREF_RSV:
+ return (RTPREF_MEDIUM);
+ case ND_RA_FLAG_RTPREF_LOW:
+ return (RTPREF_LOW);
+ default:
+ /*
+ * This case should never happen. If it did, it would mean a
+ * serious bug of kernel internal. We thus always bark here.
+ * Or, can we even panic?
+ */
+ log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags);
+ return (RTPREF_INVALID);
}
- ND6_WUNLOCK();
-
- pfxlist_onlink_check();
-
- /*
- * If the router is the primary one, choose a new one.
- * Note that defrouter_select_fib() will remove the current
- * gateway from the routing table.
- */
- if (deldr)
- defrouter_select_fib(deldr->ifp->if_fib);
-
- /*
- * Release the list reference.
- */
- defrouter_rele(dr);
+ /* NOTREACHED */
}
/*
@@ -767,7 +851,7 @@ defrouter_select_fib(int fibnum)
* Let's handle easy case (3) first:
* If default router list is empty, there's nothing to be done.
*/
- if (TAILQ_EMPTY(&V_nd_defrouter)) {
+ if (TAILQ_EMPTY(&V_nd6_defrouter)) {
ND6_RUNLOCK();
return;
}
@@ -778,7 +862,7 @@ defrouter_select_fib(int fibnum)
* the ordering rule of the list described in defrtrlist_update().
*/
selected_dr = installed_dr = NULL;
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
+ TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
IF_AFDATA_RLOCK(dr->ifp);
if (selected_dr == NULL && dr->ifp->if_fib == fibnum &&
(ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
@@ -817,12 +901,12 @@ defrouter_select_fib(int fibnum)
if (selected_dr == NULL) {
if (installed_dr == NULL ||
TAILQ_NEXT(installed_dr, dr_entry) == NULL)
- dr = TAILQ_FIRST(&V_nd_defrouter);
+ dr = TAILQ_FIRST(&V_nd6_defrouter);
else
dr = TAILQ_NEXT(installed_dr, dr_entry);
/* Ensure we select a router for this FIB. */
- TAILQ_FOREACH_FROM(dr, &V_nd_defrouter, dr_entry) {
+ TAILQ_FOREACH_FROM(dr, &V_nd6_defrouter, dr_entry) {
if (dr->ifp->if_fib == fibnum) {
selected_dr = dr;
defrouter_ref(selected_dr);
@@ -872,33 +956,6 @@ defrouter_select(void)
defrouter_select_fib(RT_ALL_FIBS);
}
-/*
- * for default router selection
- * regards router-preference field as a 2-bit signed integer
- */
-static int
-rtpref(struct nd_defrouter *dr)
-{
- switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) {
- case ND_RA_FLAG_RTPREF_HIGH:
- return (RTPREF_HIGH);
- case ND_RA_FLAG_RTPREF_MEDIUM:
- case ND_RA_FLAG_RTPREF_RSV:
- return (RTPREF_MEDIUM);
- case ND_RA_FLAG_RTPREF_LOW:
- return (RTPREF_LOW);
- default:
- /*
- * This case should never happen. If it did, it would mean a
- * serious bug of kernel internal. We thus always bark here.
- * Or, can we even panic?
- */
- log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags);
- return (RTPREF_INVALID);
- }
- /* NOTREACHED */
-}
-
static struct nd_defrouter *
defrtrlist_update(struct nd_defrouter *new)
{
@@ -960,7 +1017,7 @@ restart:
* The preferred router may have changed, so relocate this
* router.
*/
- TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+ TAILQ_REMOVE(&V_nd6_defrouter, dr, dr_entry);
n = dr;
} else {
n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO);
@@ -981,14 +1038,14 @@ restart:
*/
/* insert at the end of the group */
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
+ TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
if (rtpref(n) > rtpref(dr))
break;
}
if (dr != NULL)
TAILQ_INSERT_BEFORE(dr, n, dr_entry);
else
- TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
+ TAILQ_INSERT_TAIL(&V_nd6_defrouter, n, dr_entry);
V_nd6_list_genid++;
ND6_WUNLOCK();
@@ -997,66 +1054,154 @@ restart:
return (n);
}
-static struct nd_pfxrouter *
-pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
+static int
+in6_init_prefix_ltimes(struct nd_prefix *ndpr)
{
- struct nd_pfxrouter *search;
+ if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
+ ndpr->ndpr_preferred = 0;
+ else
+ ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
+ if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
+ ndpr->ndpr_expire = 0;
+ else
+ ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
- ND6_LOCK_ASSERT();
+ return 0;
+}
- LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
- if (search->router == dr)
- break;
+static void
+in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
+{
+ /* init ia6t_expire */
+ if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
+ lt6->ia6t_expire = 0;
+ else {
+ lt6->ia6t_expire = time_uptime;
+ lt6->ia6t_expire += lt6->ia6t_vltime;
+ }
+
+ /* init ia6t_preferred */
+ if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
+ lt6->ia6t_preferred = 0;
+ else {
+ lt6->ia6t_preferred = time_uptime;
+ lt6->ia6t_preferred += lt6->ia6t_pltime;
}
- return (search);
}
-static void
-pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
+static struct in6_ifaddr *
+in6_ifadd(struct nd_prefixctl *pr, int mcast)
{
- struct nd_pfxrouter *new;
- bool update;
+ struct ifnet *ifp = pr->ndpr_ifp;
+ struct ifaddr *ifa;
+ struct in6_aliasreq ifra;
+ struct in6_ifaddr *ia, *ib;
+ int error, plen0;
+ struct in6_addr mask;
+ int prefixlen = pr->ndpr_plen;
+ int updateflags;
+ char ip6buf[INET6_ADDRSTRLEN];
- ND6_UNLOCK_ASSERT();
+ in6_prefixlen2mask(&mask, prefixlen);
- ND6_RLOCK();
- if (pfxrtr_lookup(pr, dr) != NULL) {
- ND6_RUNLOCK();
- return;
+ /*
+ * find a link-local address (will be interface ID).
+ * Is it really mandatory? Theoretically, a global or a site-local
+ * address can be configured without a link-local address, if we
+ * have a unique interface identifier...
+ *
+ * it is not mandatory to have a link-local address, we can generate
+ * interface identifier on the fly. we do this because:
+ * (1) it should be the easiest way to find interface identifier.
+ * (2) RFC2462 5.4 suggesting the use of the same interface identifier
+ * for multiple addresses on a single interface, and possible shortcut
+ * of DAD. we omitted DAD for this reason in the past.
+ * (3) a user can prevent autoconfiguration of global address
+ * by removing link-local address by hand (this is partly because we
+ * don't have other way to control the use of IPv6 on an interface.
+ * this has been our design choice - cf. NRL's "ifconfig auto").
+ * (4) it is easier to manage when an interface has addresses
+ * with the same interface identifier, than to have multiple addresses
+ * with different interface identifiers.
+ */
+ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
+ if (ifa)
+ ib = (struct in6_ifaddr *)ifa;
+ else
+ return NULL;
+
+ /* prefixlen + ifidlen must be equal to 128 */
+ plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
+ if (prefixlen != plen0) {
+ ifa_free(ifa);
+ nd6log((LOG_INFO,
+ "%s: wrong prefixlen for %s (prefix=%d ifid=%d)\n",
+ __func__, if_name(ifp), prefixlen, 128 - plen0));
+ return NULL;
}
- ND6_RUNLOCK();
- new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
- if (new == NULL)
- return;
- defrouter_ref(dr);
- new->router = dr;
+ /* make ifaddr */
+ in6_prepare_ifra(&ifra, &pr->ndpr_prefix.sin6_addr, &mask);
- ND6_WLOCK();
- if (pfxrtr_lookup(pr, dr) == NULL) {
- LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
- update = true;
- } else {
- /* We lost a race to add the reference. */
- defrouter_rele(dr);
- free(new, M_IP6NDP);
- update = false;
- }
- ND6_WUNLOCK();
+ IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask);
+ /* interface ID */
+ ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
+ (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
+ ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
+ (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
+ ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
+ (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
+ ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
+ (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
+ ifa_free(ifa);
- if (update)
- pfxlist_onlink_check();
-}
+ /* lifetimes. */
+ ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
+ ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
-static void
-pfxrtr_del(struct nd_pfxrouter *pfr)
-{
+ /* XXX: scope zone ID? */
- ND6_WLOCK_ASSERT();
+ ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
- LIST_REMOVE(pfr, pfr_entry);
- defrouter_rele(pfr->router);
- free(pfr, M_IP6NDP);
+ /*
+ * Make sure that we do not have this address already. This should
+ * usually not happen, but we can still see this case, e.g., if we
+ * have manually configured the exact address to be configured.
+ */
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
+ &ifra.ifra_addr.sin6_addr);
+ if (ifa != NULL) {
+ ifa_free(ifa);
+ /* this should be rare enough to make an explicit log */
+ log(LOG_INFO, "in6_ifadd: %s is already configured\n",
+ ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
+ return (NULL);
+ }
+
+ /*
+ * Allocate ifaddr structure, link into chain, etc.
+ * If we are going to create a new address upon receiving a multicasted
+ * RA, we need to impose a random delay before starting DAD.
+ * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
+ */
+ updateflags = 0;
+ if (mcast)
+ updateflags |= IN6_IFAUPDATE_DADDELAY;
+ if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
+ nd6log((LOG_ERR,
+ "%s: failed to make ifaddr %s on %s (errno=%d)\n", __func__,
+ ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
+ if_name(ifp), error));
+ return (NULL); /* ifaddr must not have been allocated. */
+ }
+
+ ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
+ /*
+ * XXXRW: Assumption of non-NULLness here might not be true with
+ * fine-grained locking -- should we validate it? Or just return
+ * earlier ifa rather than looking it up again?
+ */
+ return (ia); /* this is always non-NULL and referenced. */
}
static struct nd_prefix *
@@ -1146,8 +1291,8 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
if (new->ndpr_raf_onlink) {
ND6_ONLINK_LOCK();
if ((error = nd6_prefix_onlink(new)) != 0) {
- nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
- "the prefix %s/%d on-link on %s (errno=%d)\n",
+ nd6log((LOG_ERR, "%s: failed to make the prefix %s/%d "
+ "on-link on %s (errno=%d)\n", __func__,
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp), error));
/* proceed anyway. XXX: is it correct? */
@@ -1203,8 +1348,8 @@ nd6_prefix_del(struct nd_prefix *pr)
ND6_ONLINK_LOCK();
if ((e = nd6_prefix_offlink(pr)) != 0) {
nd6log((LOG_ERR,
- "nd6_prefix_del: failed to make %s/%d offlink "
- "on %s, errno=%d\n",
+ "%s: failed to make the prefix %s/%d offlink on %s "
+ "(errno=%d)\n", __func__,
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
/* what should we do? */
@@ -1275,9 +1420,8 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
ND6_ONLINK_LOCK();
if ((error = nd6_prefix_onlink(pr)) != 0) {
nd6log((LOG_ERR,
- "prelist_update: failed to make "
- "the prefix %s/%d on-link on %s "
- "(errno=%d)\n",
+ "%s: failed to make the prefix %s/%d "
+ "on-link on %s (errno=%d)\n", __func__,
ip6_sprintf(ip6buf,
&pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp),
@@ -1297,8 +1441,8 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
error = nd6_prelist_add(new, dr, &pr);
if (error != 0) {
- nd6log((LOG_NOTICE, "prelist_update: "
- "nd6_prelist_add failed for %s/%d on %s errno=%d\n",
+ nd6log((LOG_NOTICE, "%s: nd6_prelist_add() failed for "
+ "the prefix %s/%d on %s (errno=%d)\n", __func__,
ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
new->ndpr_plen, if_name(new->ndpr_ifp), error));
goto end; /* we should just give up in this case. */
@@ -1498,9 +1642,8 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
}
if (ifidlen + pr->ndpr_plen != 128) {
nd6log((LOG_INFO,
- "prelist_update: invalid prefixlen "
- "%d for %s, ignored\n",
- pr->ndpr_plen, if_name(ifp)));
+ "%s: invalid prefixlen %d for %s, ignored\n",
+ __func__, pr->ndpr_plen, if_name(ifp)));
goto end;
}
@@ -1526,10 +1669,9 @@ prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
if (V_ip6_use_tempaddr) {
int e;
if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
- nd6log((LOG_NOTICE, "prelist_update: "
- "failed to create a temporary "
- "address, errno=%d\n",
- e));
+ nd6log((LOG_NOTICE, "%s: failed to "
+ "create a temporary address "
+ "(errno=%d)\n", __func__, e));
}
}
ifa_free(&ia6->ia_ifa);
@@ -1621,7 +1763,7 @@ pfxlist_onlink_check(void)
* that does not advertise any prefixes.
*/
if (pr == NULL) {
- TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
+ TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
struct nd_prefix *pr0;
LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) {
@@ -1632,7 +1774,7 @@ pfxlist_onlink_check(void)
break;
}
}
- if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
+ if (pr != NULL || (!TAILQ_EMPTY(&V_nd6_defrouter) && pfxrtr == NULL)) {
/*
* There is at least one prefix that has a reachable router,
* or at least a router which probably does not advertise
@@ -1692,16 +1834,16 @@ restart:
if ((flags & NDPRF_ONLINK) != 0 &&
(e = nd6_prefix_offlink(pr)) != 0) {
nd6log((LOG_ERR,
- "pfxlist_onlink_check: failed to "
- "make %s/%d offlink, errno=%d\n",
+ "%s: failed to make %s/%d offlink "
+ "(errno=%d)\n", __func__,
ip6_sprintf(ip6buf,
&pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, e));
} else if ((flags & NDPRF_ONLINK) == 0 &&
(e = nd6_prefix_onlink(pr)) != 0) {
nd6log((LOG_ERR,
- "pfxlist_onlink_check: failed to "
- "make %s/%d onlink, errno=%d\n",
+ "%s: failed to make %s/%d onlink "
+ "(errno=%d)\n", __func__,
ip6_sprintf(ip6buf,
&pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, e));
@@ -1834,9 +1976,9 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
- nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add "
+ nd6log((LOG_ERR, "%s: failed to add "
"route for a prefix (%s/%d) on %s, gw=%s, mask=%s, "
- "flags=%lx errno = %d\n",
+ "flags=%lx errno = %d\n", __func__,
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp),
ip6_sprintf(ip6bufg, &sin6->sin6_addr),
@@ -1927,8 +2069,8 @@ nd6_prefix_onlink(struct nd_prefix *pr)
* interface. This should, of course, be rare though.
*/
nd6log((LOG_NOTICE,
- "nd6_prefix_onlink: failed to find any ifaddr"
- " to add route for a prefix(%s/%d) on %s\n",
+ "%s: failed to find any ifaddr to add route for a "
+ "prefix(%s/%d) on %s\n", __func__,
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(ifp)));
return (0);
@@ -2027,10 +2169,9 @@ restart:
ND6_RUNLOCK();
if ((e = nd6_prefix_onlink(opr)) != 0) {
nd6log((LOG_ERR,
- "nd6_prefix_offlink: failed to "
- "recover a prefix %s/%d from %s "
- "to %s (errno = %d)\n",
- ip6_sprintf(ip6buf,
+ "%s: failed to recover a prefix "
+ "%s/%d from %s to %s (errno=%d)\n",
+ __func__, ip6_sprintf(ip6buf,
&opr->ndpr_prefix.sin6_addr),
opr->ndpr_plen, if_name(ifp),
if_name(opr->ndpr_ifp), e));
@@ -2045,10 +2186,9 @@ restart:
} else {
/* XXX: can we still set the NDPRF_ONLINK flag? */
nd6log((LOG_ERR,
- "nd6_prefix_offlink: failed to delete route: "
- "%s/%d on %s (errno = %d)\n",
- ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
- if_name(ifp), error));
+ "%s: failed to delete route: %s/%d on %s (errno=%d)\n",
+ __func__, ip6_sprintf(ip6buf, &sa6.sin6_addr),
+ pr->ndpr_plen, if_name(ifp), error));
}
if (a_failure)
@@ -2058,121 +2198,6 @@ restart:
return (error);
}
-static struct in6_ifaddr *
-in6_ifadd(struct nd_prefixctl *pr, int mcast)
-{
- struct ifnet *ifp = pr->ndpr_ifp;
- struct ifaddr *ifa;
- struct in6_aliasreq ifra;
- struct in6_ifaddr *ia, *ib;
- int error, plen0;
- struct in6_addr mask;
- int prefixlen = pr->ndpr_plen;
- int updateflags;
- char ip6buf[INET6_ADDRSTRLEN];
-
- in6_prefixlen2mask(&mask, prefixlen);
-
- /*
- * find a link-local address (will be interface ID).
- * Is it really mandatory? Theoretically, a global or a site-local
- * address can be configured without a link-local address, if we
- * have a unique interface identifier...
- *
- * it is not mandatory to have a link-local address, we can generate
- * interface identifier on the fly. we do this because:
- * (1) it should be the easiest way to find interface identifier.
- * (2) RFC2462 5.4 suggesting the use of the same interface identifier
- * for multiple addresses on a single interface, and possible shortcut
- * of DAD. we omitted DAD for this reason in the past.
- * (3) a user can prevent autoconfiguration of global address
- * by removing link-local address by hand (this is partly because we
- * don't have other way to control the use of IPv6 on an interface.
- * this has been our design choice - cf. NRL's "ifconfig auto").
- * (4) it is easier to manage when an interface has addresses
- * with the same interface identifier, than to have multiple addresses
- * with different interface identifiers.
- */
- ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
- if (ifa)
- ib = (struct in6_ifaddr *)ifa;
- else
- return NULL;
-
- /* prefixlen + ifidlen must be equal to 128 */
- plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
- if (prefixlen != plen0) {
- ifa_free(ifa);
- nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
- "(prefix=%d ifid=%d)\n",
- if_name(ifp), prefixlen, 128 - plen0));
- return NULL;
- }
-
- /* make ifaddr */
- in6_prepare_ifra(&ifra, &pr->ndpr_prefix.sin6_addr, &mask);
-
- IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask);
- /* interface ID */
- ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
- (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
- ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
- (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
- ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
- (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
- ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
- (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
- ifa_free(ifa);
-
- /* lifetimes. */
- ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
- ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
-
- /* XXX: scope zone ID? */
-
- ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
-
- /*
- * Make sure that we do not have this address already. This should
- * usually not happen, but we can still see this case, e.g., if we
- * have manually configured the exact address to be configured.
- */
- ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
- &ifra.ifra_addr.sin6_addr);
- if (ifa != NULL) {
- ifa_free(ifa);
- /* this should be rare enough to make an explicit log */
- log(LOG_INFO, "in6_ifadd: %s is already configured\n",
- ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
- return (NULL);
- }
-
- /*
- * Allocate ifaddr structure, link into chain, etc.
- * If we are going to create a new address upon receiving a multicasted
- * RA, we need to impose a random delay before starting DAD.
- * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
- */
- updateflags = 0;
- if (mcast)
- updateflags |= IN6_IFAUPDATE_DADDELAY;
- if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
- nd6log((LOG_ERR,
- "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
- ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
- if_name(ifp), error));
- return (NULL); /* ifaddr must not have been allocated. */
- }
-
- ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
- /*
- * XXXRW: Assumption of non-NULLness here might not be true with
- * fine-grained locking -- should we validate it? Or just return
- * earlier ifa rather than looking it up again?
- */
- return (ia); /* this is always non-NULL and referenced. */
-}
-
/*
* ia0 - corresponding public address
*/
@@ -2199,8 +2224,8 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
again:
if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
(const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
- nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
- "random IFID\n"));
+ nd6log((LOG_NOTICE, "%s: failed to find a good random IFID\n",
+ __func__));
return (EINVAL);
}
ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
@@ -2222,8 +2247,8 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
}
/* Give up. Something strange should have happened. */
- nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
- "find a unique random IFID\n"));
+ nd6log((LOG_NOTICE, "%s: failed to find a unique random IFID\n",
+ __func__));
return (EEXIST);
}
@@ -2276,8 +2301,8 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
if (newia == NULL) { /* XXX: can it happen? */
nd6log((LOG_ERR,
- "in6_tmpifadd: ifa update succeeded, but we got "
- "no ifaddr\n"));
+ "%s: ifa update succeeded, but we got no ifaddr\n",
+ __func__));
return (EINVAL); /* XXX */
}
newia->ia6_ndpr = ia0->ia6_ndpr;
@@ -2298,58 +2323,6 @@ in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
}
static int
-in6_init_prefix_ltimes(struct nd_prefix *ndpr)
-{
- if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
- ndpr->ndpr_preferred = 0;
- else
- ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
- if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
- ndpr->ndpr_expire = 0;
- else
- ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
-
- return 0;
-}
-
-static void
-in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
-{
- /* init ia6t_expire */
- if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
- lt6->ia6t_expire = 0;
- else {
- lt6->ia6t_expire = time_uptime;
- lt6->ia6t_expire += lt6->ia6t_vltime;
- }
-
- /* init ia6t_preferred */
- if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
- lt6->ia6t_preferred = 0;
- else {
- lt6->ia6t_preferred = time_uptime;
- lt6->ia6t_preferred += lt6->ia6t_pltime;
- }
-}
-
-/*
- * Delete all the routing table entries that use the specified gateway.
- * XXX: this function causes search through all entries of routing table, so
- * it shouldn't be called when acting as a router.
- */
-void
-rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
-{
-
- /* We'll care only link-local addresses */
- if (!IN6_IS_ADDR_LINKLOCAL(gateway))
- return;
-
- /* XXX Do we really need to walk any but the default FIB? */
- rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway);
-}
-
-static int
rt6_deleteroute(const struct rtentry *rt, void *arg)
{
#define SIN6(s) ((struct sockaddr_in6 *)s)
@@ -2381,6 +2354,23 @@ rt6_deleteroute(const struct rtentry *rt, void *arg)
#undef SIN6
}
+/*
+ * Delete all the routing table entries that use the specified gateway.
+ * XXX: this function causes search through all entries of routing table, so
+ * it shouldn't be called when acting as a router.
+ */
+void
+rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
+{
+
+ /* We'll care only link-local addresses */
+ if (!IN6_IS_ADDR_LINKLOCAL(gateway))
+ return;
+
+ /* XXX Do we really need to walk any but the default FIB? */
+ rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway);
+}
+
int
nd6_setdefaultiface(int ifindex)
{
@@ -2408,3 +2398,131 @@ nd6_setdefaultiface(int ifindex)
return (error);
}
+
+bool
+nd6_defrouter_list_empty(void)
+{
+
+ return (TAILQ_EMPTY(&V_nd6_defrouter));
+}
+
+void
+nd6_defrouter_timer(void)
+{
+ struct nd_defrouter *dr, *ndr;
+ struct nd_drhead drq;
+
+ TAILQ_INIT(&drq);
+
+ ND6_WLOCK();
+ TAILQ_FOREACH_SAFE(dr, &V_nd6_defrouter, dr_entry, ndr)
+ if (dr->expire && dr->expire < time_uptime)
+ defrouter_unlink(dr, &drq);
+ ND6_WUNLOCK();
+
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
+ }
+}
+
+/*
+ * Nuke default router list entries toward ifp.
+ * We defer removal of default router list entries that is installed in the
+ * routing table, in order to keep additional side effects as small as possible.
+ */
+void
+nd6_defrouter_purge(struct ifnet *ifp)
+{
+ struct nd_defrouter *dr, *ndr;
+ struct nd_drhead drq;
+
+ TAILQ_INIT(&drq);
+
+ ND6_WLOCK();
+ TAILQ_FOREACH_SAFE(dr, &V_nd6_defrouter, dr_entry, ndr) {
+ if (dr->installed)
+ continue;
+ if (dr->ifp == ifp)
+ defrouter_unlink(dr, &drq);
+ }
+ TAILQ_FOREACH_SAFE(dr, &V_nd6_defrouter, dr_entry, ndr) {
+ if (!dr->installed)
+ continue;
+ if (dr->ifp == ifp)
+ defrouter_unlink(dr, &drq);
+ }
+ ND6_WUNLOCK();
+
+ /* Delete the unlinked router objects. */
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
+ }
+}
+
+void
+nd6_defrouter_flush_all(void)
+{
+ struct nd_defrouter *dr;
+ struct nd_drhead drq;
+
+ TAILQ_INIT(&drq);
+
+ ND6_WLOCK();
+ while ((dr = TAILQ_FIRST(&V_nd6_defrouter)) != NULL)
+ defrouter_unlink(dr, &drq);
+ ND6_WUNLOCK();
+
+ while ((dr = TAILQ_FIRST(&drq)) != NULL) {
+ TAILQ_REMOVE(&drq, dr, dr_entry);
+ defrouter_del(dr);
+ }
+}
+
+void
+nd6_defrouter_init(void)
+{
+
+ TAILQ_INIT(&V_nd6_defrouter);
+}
+
+static int
+nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
+{
+ struct in6_defrouter d;
+ struct nd_defrouter *dr;
+ int error;
+
+ if (req->newptr != NULL)
+ return (EPERM);
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
+ bzero(&d, sizeof(d));
+ d.rtaddr.sin6_family = AF_INET6;
+ d.rtaddr.sin6_len = sizeof(d.rtaddr);
+
+ ND6_RLOCK();
+ TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
+ d.rtaddr.sin6_addr = dr->rtaddr;
+ error = sa6_recoverscope(&d.rtaddr);
+ if (error != 0)
+ break;
+ d.flags = dr->raflags;
+ d.rtlifetime = dr->rtlifetime;
+ d.expire = dr->expire + (time_second - time_uptime);
+ d.if_index = dr->ifp->if_index;
+ error = SYSCTL_OUT(req, &d, sizeof(d));
+ if (error != 0)
+ break;
+ }
+ ND6_RUNLOCK();
+ return (error);
+}
+SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, nd6_sysctl_drlist, "S,in6_defrouter",
+ "NDP default router list");
diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c
index aa62b7e1..c33bca05 100644
--- a/freebsd/sys/netinet6/raw_ip6.c
+++ b/freebsd/sys/netinet6/raw_ip6.c
@@ -163,7 +163,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
struct ifnet *ifp;
struct mbuf *m = *mp;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
- struct inpcb *in6p;
+ struct inpcb *inp;
struct inpcb *last = NULL;
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
@@ -176,18 +176,18 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
ifp = m->m_pkthdr.rcvif;
INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
- CK_LIST_FOREACH(in6p, &V_ripcb, inp_list) {
+ CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
/* XXX inp locking */
- if ((in6p->inp_vflag & INP_IPV6) == 0)
+ if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
- if (in6p->inp_ip_p &&
- in6p->inp_ip_p != proto)
+ if (inp->inp_ip_p &&
+ inp->inp_ip_p != proto)
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
- !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst))
continue;
- if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
- !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+ !IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src))
continue;
if (last != NULL) {
struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
@@ -225,23 +225,23 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
INP_RUNLOCK(last);
last = NULL;
}
- INP_RLOCK(in6p);
- if (__predict_false(in6p->inp_flags2 & INP_FREED))
+ INP_RLOCK(inp);
+ if (__predict_false(inp->inp_flags2 & INP_FREED))
goto skip_2;
- if (jailed_without_vnet(in6p->inp_cred)) {
+ if (jailed_without_vnet(inp->inp_cred)) {
/*
* Allow raw socket in jail to receive multicast;
* assume process had PRIV_NETINET_RAW at attach,
* and fall through into normal filter path if so.
*/
if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
- prison_check_ip6(in6p->inp_cred,
+ prison_check_ip6(inp->inp_cred,
&ip6->ip6_dst) != 0)
goto skip_2;
}
- if (in6p->in6p_cksum != -1) {
+ if (inp->in6p_cksum != -1) {
RIP6STAT_INC(rip6s_isum);
- if (m->m_pkthdr.len - (*offp + in6p->in6p_cksum) < 2 ||
+ if (m->m_pkthdr.len - (*offp + inp->in6p_cksum) < 2 ||
in6_cksum(m, proto, *offp,
m->m_pkthdr.len - *offp)) {
RIP6STAT_INC(rip6s_badsum);
@@ -260,7 +260,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
* should receive it, as multicast filtering is now
* the responsibility of the transport layer.
*/
- if (in6p->in6p_moptions &&
+ if (inp->in6p_moptions &&
IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
/*
* If the incoming datagram is for MLD, allow it
@@ -290,7 +290,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
mcaddr.sin6_family = AF_INET6;
mcaddr.sin6_addr = ip6->ip6_dst;
- blocked = im6o_mc_filter(in6p->in6p_moptions,
+ blocked = im6o_mc_filter(inp->in6p_moptions,
ifp,
(struct sockaddr *)&mcaddr,
(struct sockaddr *)&fromsa);
@@ -300,10 +300,10 @@ rip6_input(struct mbuf **mp, int *offp, int proto)
goto skip_2;
}
}
- last = in6p;
+ last = inp;
continue;
skip_2:
- INP_RUNLOCK(in6p);
+ INP_RUNLOCK(inp);
}
INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
@@ -396,7 +396,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
struct m_tag *mtag;
struct sockaddr_in6 *dstsock;
struct ip6_hdr *ip6;
- struct inpcb *in6p;
+ struct inpcb *inp;
u_int plen = m->m_pkthdr.len;
int error = 0;
struct ip6_pktopts opt, *optp;
@@ -413,18 +413,18 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
control = va_arg(ap, struct mbuf *);
va_end(ap);
- in6p = sotoinpcb(so);
- INP_WLOCK(in6p);
+ inp = sotoinpcb(so);
+ INP_WLOCK(inp);
if (control != NULL) {
if ((error = ip6_setpktopts(control, &opt,
- in6p->in6p_outputopts, so->so_cred,
+ inp->in6p_outputopts, so->so_cred,
so->so_proto->pr_protocol)) != 0) {
goto bad;
}
optp = &opt;
} else
- optp = in6p->in6p_outputopts;
+ optp = inp->in6p_outputopts;
/*
* Check and convert scope zone ID into internal form.
@@ -467,12 +467,12 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
/*
* Source address selection.
*/
- error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred,
+ error = in6_selectsrc_socket(dstsock, optp, inp, so->so_cred,
scope_ambiguous, &in6a, &hlim);
if (error)
goto bad;
- error = prison_check_ip6(in6p->inp_cred, &in6a);
+ error = prison_check_ip6(inp->inp_cred, &in6a);
if (error != 0)
goto bad;
ip6->ip6_src = in6a;
@@ -483,18 +483,18 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
* Fill in the rest of the IPv6 header fields.
*/
ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
- (in6p->inp_flow & IPV6_FLOWINFO_MASK);
+ (inp->inp_flow & IPV6_FLOWINFO_MASK);
ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
(IPV6_VERSION & IPV6_VERSION_MASK);
/*
* ip6_plen will be filled in ip6_output, so not fill it here.
*/
- ip6->ip6_nxt = in6p->inp_ip_p;
+ ip6->ip6_nxt = inp->inp_ip_p;
ip6->ip6_hlim = hlim;
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
- in6p->in6p_cksum != -1) {
+ inp->in6p_cksum != -1) {
struct mbuf *n;
int off;
u_int16_t *p;
@@ -503,7 +503,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
off = offsetof(struct icmp6_hdr, icmp6_cksum);
else
- off = in6p->in6p_cksum;
+ off = inp->in6p_cksum;
if (plen < off + 2) {
error = EINVAL;
goto bad;
@@ -539,7 +539,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
}
}
- error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p);
+ error = ip6_output(m, optp, NULL, 0, inp->in6p_moptions, &oifp, inp);
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
if (oifp)
icmp6_ifoutstat_inc(oifp, type, code);
@@ -558,7 +558,7 @@ rip6_output(struct mbuf *m, struct socket *so, ...)
ip6_clearpktopts(&opt, -1);
m_freem(control);
}
- INP_WUNLOCK(in6p);
+ INP_WUNLOCK(inp);
return (error);
}
diff --git a/freebsd/sys/netinet6/route6.c b/freebsd/sys/netinet6/route6.c
index 7014daa6..908a5479 100644
--- a/freebsd/sys/netinet6/route6.c
+++ b/freebsd/sys/netinet6/route6.c
@@ -64,12 +64,16 @@ int
route6_input(struct mbuf **mp, int *offp, int proto)
{
struct ip6_hdr *ip6;
- struct mbuf *m = *mp;
+ struct mbuf *m;
struct ip6_rthdr *rh;
int off = *offp, rhlen;
#ifdef __notyet__
struct ip6aux *ip6a;
+#endif
+
+ m = *mp;
+#ifdef __notyet__
ip6a = ip6_findaux(m);
if (ip6a) {
/* XXX reject home-address option before rthdr */
@@ -81,18 +85,16 @@ route6_input(struct mbuf **mp, int *offp, int proto)
}
#endif
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, sizeof(*rh), IPPROTO_DONE);
+ if (m->m_len < off + sizeof(*rh)) {
+ m = m_pullup(m, off + sizeof(*rh));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = NULL;
+ return (IPPROTO_DONE);
+ }
+ }
ip6 = mtod(m, struct ip6_hdr *);
rh = (struct ip6_rthdr *)((caddr_t)ip6 + off);
-#else
- ip6 = mtod(m, struct ip6_hdr *);
- IP6_EXTHDR_GET(rh, struct ip6_rthdr *, m, off, sizeof(*rh));
- if (rh == NULL) {
- IP6STAT_INC(ip6s_tooshort);
- return IPPROTO_DONE;
- }
-#endif
/*
* While this switch may look gratuitous, leave it in
@@ -108,9 +110,11 @@ route6_input(struct mbuf **mp, int *offp, int proto)
IP6STAT_INC(ip6s_badoptions);
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
(caddr_t)&rh->ip6r_type - (caddr_t)ip6);
+ *mp = NULL;
return (IPPROTO_DONE);
}
*offp += rhlen;
+ *mp = m;
return (rh->ip6r_nxt);
}
diff --git a/freebsd/sys/netinet6/sctp6_usrreq.c b/freebsd/sys/netinet6/sctp6_usrreq.c
index 3465f3c3..d3421894 100644
--- a/freebsd/sys/netinet6/sctp6_usrreq.c
+++ b/freebsd/sys/netinet6/sctp6_usrreq.c
@@ -107,13 +107,15 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
/* Get IP, SCTP, and first chunk header together in the first mbuf. */
offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
- ip6 = mtod(m, struct ip6_hdr *);
- IP6_EXTHDR_GET(sh, struct sctphdr *, m, iphlen,
- (int)(sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr)));
- if (sh == NULL) {
- SCTP_STAT_INCR(sctps_hdrops);
- return (IPPROTO_DONE);
+ if (m->m_len < offset) {
+ m = m_pullup(m, offset);
+ if (m == NULL) {
+ SCTP_STAT_INCR(sctps_hdrops);
+ return (IPPROTO_DONE);
+ }
}
+ ip6 = mtod(m, struct ip6_hdr *);
+ sh = (struct sctphdr *)(mtod(m, caddr_t) + iphlen);
ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr));
offset -= sizeof(struct sctp_chunkhdr);
memset(&src, 0, sizeof(struct sockaddr_in6));
@@ -522,7 +524,6 @@ sctp_must_try_again:
static int
sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUSED)
{
- struct in6pcb *inp6;
int error;
struct sctp_inpcb *inp;
uint32_t vrf_id = SCTP_DEFAULT_VRFID;
@@ -544,18 +545,17 @@ sctp6_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNU
inp = (struct sctp_inpcb *)so->so_pcb;
SCTP_INP_WLOCK(inp);
inp->sctp_flags |= SCTP_PCB_FLAGS_BOUND_V6; /* I'm v6! */
- inp6 = (struct in6pcb *)inp;
- inp6->inp_vflag |= INP_IPV6;
- inp6->in6p_hops = -1; /* use kernel default */
- inp6->in6p_cksum = -1; /* just to be sure */
+ inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+ inp->ip_inp.inp.in6p_hops = -1; /* use kernel default */
+ inp->ip_inp.inp.in6p_cksum = -1; /* just to be sure */
#ifdef INET
/*
* XXX: ugly!! IPv4 TTL initialization is necessary for an IPv6
* socket as well, because the socket may be bound to an IPv6
* wildcard address, which may match an IPv4-mapped IPv6 address.
*/
- inp6->inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
+ inp->ip_inp.inp.inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
#endif
SCTP_INP_WUNLOCK(inp);
return (0);
@@ -565,8 +565,8 @@ static int
sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
{
struct sctp_inpcb *inp;
- struct in6pcb *inp6;
int error;
+ u_char vflagsav;
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
@@ -597,16 +597,16 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
return (EINVAL);
}
}
- inp6 = (struct in6pcb *)inp;
- inp6->inp_vflag &= ~INP_IPV4;
- inp6->inp_vflag |= INP_IPV6;
- if ((addr != NULL) && (SCTP_IPV6_V6ONLY(inp6) == 0)) {
+ vflagsav = inp->ip_inp.inp.inp_vflag;
+ inp->ip_inp.inp.inp_vflag &= ~INP_IPV4;
+ inp->ip_inp.inp.inp_vflag |= INP_IPV6;
+ if ((addr != NULL) && (SCTP_IPV6_V6ONLY(&inp->ip_inp.inp) == 0)) {
switch (addr->sa_family) {
#ifdef INET
case AF_INET:
/* binding v4 addr to v6 socket, so reset flags */
- inp6->inp_vflag |= INP_IPV4;
- inp6->inp_vflag &= ~INP_IPV6;
+ inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+ inp->ip_inp.inp.inp_vflag &= ~INP_IPV6;
break;
#endif
#ifdef INET6
@@ -617,17 +617,17 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
sin6_p = (struct sockaddr_in6 *)addr;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) {
- inp6->inp_vflag |= INP_IPV4;
+ inp->ip_inp.inp.inp_vflag |= INP_IPV4;
}
#ifdef INET
if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
struct sockaddr_in sin;
in6_sin6_2_sin(&sin, sin6_p);
- inp6->inp_vflag |= INP_IPV4;
- inp6->inp_vflag &= ~INP_IPV6;
+ inp->ip_inp.inp.inp_vflag |= INP_IPV4;
+ inp->ip_inp.inp.inp_vflag &= ~INP_IPV6;
error = sctp_inpcb_bind(so, (struct sockaddr *)&sin, NULL, p);
- return (error);
+ goto out;
}
#endif
break;
@@ -644,7 +644,8 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
if (addr->sa_family == AF_INET) {
/* can't bind v4 addr to v6 only socket! */
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
- return (EINVAL);
+ error = EINVAL;
+ goto out;
}
#endif
sin6_p = (struct sockaddr_in6 *)addr;
@@ -653,10 +654,14 @@ sctp6_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
/* can't bind v4-mapped addrs either! */
/* NOTE: we don't support SIIT */
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
- return (EINVAL);
+ error = EINVAL;
+ goto out;
}
}
error = sctp_inpcb_bind(so, addr, NULL, p);
+out:
+ if (error != 0)
+ inp->ip_inp.inp.inp_vflag = vflagsav;
return (error);
}
@@ -687,7 +692,6 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *p)
{
struct sctp_inpcb *inp;
- struct in6pcb *inp6;
#ifdef INET
struct sockaddr_in6 *sin6;
@@ -704,7 +708,6 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
- inp6 = (struct in6pcb *)inp;
/*
* For the TCP model we may get a NULL addr, if we are a connected
* socket thats ok.
@@ -724,7 +727,7 @@ sctp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
}
#ifdef INET
sin6 = (struct sockaddr_in6 *)addr;
- if (SCTP_IPV6_V6ONLY(inp6)) {
+ if (SCTP_IPV6_V6ONLY(inp)) {
/*
* if IPV6_V6ONLY flag, we discard datagrams destined to a
* v4 addr or v4-mapped addr
@@ -793,14 +796,10 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
struct sctp_inpcb *inp;
struct sctp_tcb *stcb;
#ifdef INET
- struct in6pcb *inp6;
struct sockaddr_in6 *sin6;
union sctp_sockstore store;
#endif
-#ifdef INET
- inp6 = (struct in6pcb *)so->so_pcb;
-#endif
inp = (struct sctp_inpcb *)so->so_pcb;
if (inp == NULL) {
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, ECONNRESET);
@@ -858,7 +857,7 @@ sctp6_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
}
#ifdef INET
sin6 = (struct sockaddr_in6 *)addr;
- if (SCTP_IPV6_V6ONLY(inp6)) {
+ if (SCTP_IPV6_V6ONLY(inp)) {
/*
* if IPV6_V6ONLY flag, ignore connections destined to a v4
* addr or v4-mapped addr
@@ -1100,10 +1099,10 @@ sctp6_peeraddr(struct socket *so, struct sockaddr **addr)
static int
sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
{
- struct in6pcb *inp6 = sotoin6pcb(so);
+ struct inpcb *inp = sotoinpcb(so);
int error;
- if (inp6 == NULL) {
+ if (inp == NULL) {
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
@@ -1136,10 +1135,10 @@ sctp6_in6getaddr(struct socket *so, struct sockaddr **nam)
static int
sctp6_getpeeraddr(struct socket *so, struct sockaddr **nam)
{
- struct in6pcb *inp6 = sotoin6pcb(so);
+ struct inpcb *inp = sotoinpcb(so);
int error;
- if (inp6 == NULL) {
+ if (inp == NULL) {
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP6_USRREQ, EINVAL);
return (EINVAL);
}
diff --git a/freebsd/sys/netinet6/udp6_usrreq.c b/freebsd/sys/netinet6/udp6_usrreq.c
index 270b4880..845d0dc9 100644
--- a/freebsd/sys/netinet6/udp6_usrreq.c
+++ b/freebsd/sys/netinet6/udp6_usrreq.c
@@ -224,16 +224,16 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
ifp = m->m_pkthdr.rcvif;
-#ifndef PULLDOWN_TEST
- IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE);
+ if (m->m_len < off + sizeof(struct udphdr)) {
+ m = m_pullup(m, off + sizeof(struct udphdr));
+ if (m == NULL) {
+ IP6STAT_INC(ip6s_exthdrtoolong);
+ *mp = NULL;
+ return (IPPROTO_DONE);
+ }
+ }
ip6 = mtod(m, struct ip6_hdr *);
uh = (struct udphdr *)((caddr_t)ip6 + off);
-#else
- IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh));
- if (!uh)
- return (IPPROTO_DONE);
- ip6 = mtod(m, struct ip6_hdr *);
-#endif
UDPSTAT_INC(udps_ipackets);
@@ -396,8 +396,11 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
else
UDP_PROBE(receive, NULL, last,
ip6, last, uh);
- if (udp6_append(last, n, off, fromsa))
+ if (udp6_append(last, n, off, fromsa)) {
+ /* XXX-BZ do we leak m here? */
+ *mp = NULL;
goto inp_lost;
+ }
}
INP_RUNLOCK(last);
}
@@ -438,6 +441,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
INP_RUNLOCK(last);
inp_lost:
INP_INFO_RUNLOCK_ET(pcbinfo, et);
+ *mp = NULL;
return (IPPROTO_DONE);
}
/*
@@ -481,7 +485,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
m->m_pkthdr.rcvif, m);
if (inp == NULL) {
- if (udp_log_in_vain) {
+ if (V_udp_log_in_vain) {
char ip6bufs[INET6_ADDRSTRLEN];
char ip6bufd[INET6_ADDRSTRLEN];
@@ -505,6 +509,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
if (V_udp_blackhole)
goto badunlocked;
icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
+ *mp = NULL;
return (IPPROTO_DONE);
}
INP_RLOCK_ASSERT(inp);
@@ -513,6 +518,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
INP_RUNLOCK(inp);
m_freem(m);
+ *mp = NULL;
return (IPPROTO_DONE);
}
}
@@ -522,6 +528,7 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
if (udp6_append(inp, m, off, fromsa) == 0)
INP_RUNLOCK(inp);
+ *mp = NULL;
return (IPPROTO_DONE);
badheadlocked:
@@ -529,6 +536,7 @@ badheadlocked:
badunlocked:
if (m)
m_freem(m);
+ *mp = NULL;
return (IPPROTO_DONE);
}
@@ -1145,6 +1153,7 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
struct inpcb *inp;
struct inpcbinfo *pcbinfo;
int error;
+ u_char vflagsav;
pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
@@ -1152,6 +1161,7 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
INP_WLOCK(inp);
INP_HASH_WLOCK(pcbinfo);
+ vflagsav = inp->inp_vflag;
inp->inp_vflag &= ~INP_IPV4;
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
@@ -1179,6 +1189,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
#ifdef INET
out:
#endif
+ if (error != 0)
+ inp->inp_vflag = vflagsav;
INP_HASH_WUNLOCK(pcbinfo);
INP_WUNLOCK(inp);
return (error);
@@ -1225,6 +1237,7 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
struct inpcbinfo *pcbinfo;
struct sockaddr_in6 *sin6;
int error;
+ u_char vflagsav;
pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
inp = sotoinpcb(so);
@@ -1252,17 +1265,26 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
}
in6_sin6_2_sin(&sin, sin6);
- inp->inp_vflag |= INP_IPV4;
- inp->inp_vflag &= ~INP_IPV6;
error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
if (error != 0)
goto out;
+ vflagsav = inp->inp_vflag;
+ inp->inp_vflag |= INP_IPV4;
+ inp->inp_vflag &= ~INP_IPV6;
INP_HASH_WLOCK(pcbinfo);
error = in_pcbconnect(inp, (struct sockaddr *)&sin,
td->td_ucred);
INP_HASH_WUNLOCK(pcbinfo);
+ /*
+ * If connect succeeds, mark socket as connected. If
+ * connect fails and socket is unbound, reset inp_vflag
+ * field.
+ */
if (error == 0)
soisconnected(so);
+ else if (inp->inp_laddr.s_addr == INADDR_ANY &&
+ inp->inp_lport == 0)
+ inp->inp_vflag = vflagsav;
goto out;
} else {
if ((inp->inp_vflag & INP_IPV6) == 0) {
@@ -1275,16 +1297,25 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = EISCONN;
goto out;
}
- inp->inp_vflag &= ~INP_IPV4;
- inp->inp_vflag |= INP_IPV6;
error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
if (error != 0)
goto out;
+ vflagsav = inp->inp_vflag;
+ inp->inp_vflag &= ~INP_IPV4;
+ inp->inp_vflag |= INP_IPV6;
INP_HASH_WLOCK(pcbinfo);
error = in6_pcbconnect(inp, nam, td->td_ucred);
INP_HASH_WUNLOCK(pcbinfo);
+ /*
+ * If connect succeeds, mark socket as connected. If
+ * connect fails and socket is unbound, reset inp_vflag
+ * field.
+ */
if (error == 0)
soisconnected(so);
+ else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
+ inp->inp_lport == 0)
+ inp->inp_vflag = vflagsav;
out:
INP_WUNLOCK(inp);
return (error);
diff --git a/freebsd/sys/netipsec/xform_ah.c b/freebsd/sys/netipsec/xform_ah.c
index 618fbd9b..afe26445 100644
--- a/freebsd/sys/netipsec/xform_ah.c
+++ b/freebsd/sys/netipsec/xform_ah.c
@@ -577,14 +577,16 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
/* Figure out header size. */
rplen = HDRSIZE(sav);
- /* XXX don't pullup, just copy header */
- IP6_EXTHDR_GET(ah, struct newah *, m, skip, rplen);
- if (ah == NULL) {
- DPRINTF(("ah_input: cannot pullup header\n"));
- AHSTAT_INC(ahs_hdrops); /*XXX*/
- error = ENOBUFS;
- goto bad;
+ if (m->m_len < skip + rplen) {
+ m = m_pullup(m, skip + rplen);
+ if (m == NULL) {
+ DPRINTF(("ah_input: cannot pullup header\n"));
+ AHSTAT_INC(ahs_hdrops); /*XXX*/
+ error = ENOBUFS;
+ goto bad;
+ }
}
+ ah = (struct newah *)(mtod(m, caddr_t) + skip);
/* Check replay window, if applicable. */
SECASVAR_LOCK(sav);
diff --git a/freebsd/sys/netipsec/xform_esp.c b/freebsd/sys/netipsec/xform_esp.c
index f5752a96..da64655d 100644
--- a/freebsd/sys/netipsec/xform_esp.c
+++ b/freebsd/sys/netipsec/xform_esp.c
@@ -309,8 +309,17 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff)
ESPSTAT_INC(esps_badilen);
goto bad;
}
- /* XXX don't pullup, just copy header */
- IP6_EXTHDR_GET(esp, struct newesp *, m, skip, sizeof (struct newesp));
+
+ if (m->m_len < skip + sizeof(*esp)) {
+ m = m_pullup(m, skip + sizeof(*esp));
+ if (m == NULL) {
+ DPRINTF(("%s: cannot pullup header\n", __func__));
+ ESPSTAT_INC(esps_hdrops); /*XXX*/
+ error = ENOBUFS;
+ goto bad;
+ }
+ }
+ esp = (struct newesp *)(mtod(m, caddr_t) + skip);
esph = sav->tdb_authalgxform;
espx = sav->tdb_encalgxform;
@@ -609,6 +618,13 @@ esp_input_cb(struct cryptop *crp)
}
}
+ /*
+ * RFC4303 2.6:
+ * Silently drop packet if next header field is IPPROTO_NONE.
+ */
+ if (lastthree[2] == IPPROTO_NONE)
+ goto bad;
+
/* Trim the mbuf chain to remove trailing authenticator and padding */
m_adj(m, -(lastthree[1] + 2));
diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c
index 4f9da55b..6bc1b8c8 100644
--- a/freebsd/sys/netpfil/pf/pf.c
+++ b/freebsd/sys/netpfil/pf/pf.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet6.h>
#include <rtems/bsd/local/opt_bpf.h>
#include <rtems/bsd/local/opt_pf.h>
+#include <rtems/bsd/local/opt_sctp.h>
#include <sys/param.h>
#include <sys/bus.h>
@@ -105,6 +106,10 @@ __FBSDID("$FreeBSD$");
#include <netinet6/scope6_var.h>
#endif /* INET6 */
+#ifdef SCTP
+#include <netinet/sctp_crc32.h>
+#endif
+
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
@@ -5601,7 +5606,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
}
#ifdef SCTP
if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
- sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+ sctp_delayed_cksum(m0, (uint32_t)(ip->ip_hl << 2));
m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
}
#endif
diff --git a/freebsd/sys/opencrypto/cryptodev.c b/freebsd/sys/opencrypto/cryptodev.c
index 02a03034..d3f4ad1c 100644
--- a/freebsd/sys/opencrypto/cryptodev.c
+++ b/freebsd/sys/opencrypto/cryptodev.c
@@ -268,6 +268,7 @@ crypt_kop_to_32(const struct crypt_kop *from, struct crypt_kop32 *to)
struct csession {
TAILQ_ENTRY(csession) next;
crypto_session_t cses;
+ volatile u_int refs;
u_int32_t ses;
struct mtx lock; /* for op submission */
@@ -294,6 +295,7 @@ struct cryptop_data {
struct fcrypt {
TAILQ_HEAD(csessionlist, csession) csessions;
int sesn;
+ struct mtx lock;
};
static struct timeval warninterval = { .tv_sec = 60, .tv_usec = 0 };
@@ -330,8 +332,7 @@ static const rtems_filesystem_file_handlers_r cryptofops;
#endif /* __rtems__ */
static struct csession *csefind(struct fcrypt *, u_int);
-static int csedelete(struct fcrypt *, struct csession *);
-static struct csession *cseadd(struct fcrypt *, struct csession *);
+static bool csedelete(struct fcrypt *, u_int);
static struct csession *csecreate(struct fcrypt *, crypto_session_t, caddr_t,
u_int64_t, caddr_t, u_int64_t, u_int32_t, u_int32_t, struct enc_xform *,
struct auth_hash *);
@@ -398,8 +399,6 @@ cryptof_ioctl(
struct crypt_op copc;
struct crypt_kop kopc;
#endif
- static struct timeval arc4warn, blfwarn, castwarn, deswarn, md5warn;
- static struct timeval skipwarn, tdeswarn;
switch (cmd) {
case CIOCGSESSION:
@@ -420,28 +419,18 @@ cryptof_ioctl(
case 0:
break;
case CRYPTO_DES_CBC:
- if (ratecheck(&deswarn, &warninterval))
- gone_in(13, "DES cipher via /dev/crypto");
txform = &enc_xform_des;
break;
case CRYPTO_3DES_CBC:
- if (ratecheck(&tdeswarn, &warninterval))
- gone_in(13, "3DES cipher via /dev/crypto");
txform = &enc_xform_3des;
break;
case CRYPTO_BLF_CBC:
- if (ratecheck(&blfwarn, &warninterval))
- gone_in(13, "Blowfish cipher via /dev/crypto");
txform = &enc_xform_blf;
break;
case CRYPTO_CAST_CBC:
- if (ratecheck(&castwarn, &warninterval))
- gone_in(13, "CAST128 cipher via /dev/crypto");
txform = &enc_xform_cast5;
break;
case CRYPTO_SKIPJACK_CBC:
- if (ratecheck(&skipwarn, &warninterval))
- gone_in(13, "Skipjack cipher via /dev/crypto");
txform = &enc_xform_skipjack;
break;
case CRYPTO_AES_CBC:
@@ -454,8 +443,6 @@ cryptof_ioctl(
txform = &enc_xform_null;
break;
case CRYPTO_ARC4:
- if (ratecheck(&arc4warn, &warninterval))
- gone_in(13, "ARC4 cipher via /dev/crypto");
txform = &enc_xform_arc4;
break;
case CRYPTO_CAMELLIA_CBC:
@@ -484,9 +471,6 @@ cryptof_ioctl(
case 0:
break;
case CRYPTO_MD5_HMAC:
- if (ratecheck(&md5warn, &warninterval))
- gone_in(13,
- "MD5-HMAC authenticator via /dev/crypto");
thash = &auth_hash_hmac_md5;
break;
case CRYPTO_POLY1305:
@@ -608,8 +592,8 @@ cryptof_ioctl(
if (thash) {
cria.cri_alg = thash->type;
cria.cri_klen = sop->mackeylen * 8;
- if (thash->keysize != 0 &&
- sop->mackeylen > thash->keysize) {
+ if (sop->mackeylen > thash->keysize ||
+ sop->mackeylen < 0) {
CRYPTDEB("invalid mac key length");
error = EINVAL;
SDT_PROBE1(opencrypto, dev, ioctl, error,
@@ -692,13 +676,10 @@ bail:
break;
case CIOCFSESSION:
ses = *(u_int32_t *)data;
- cse = csefind(fcr, ses);
- if (cse == NULL) {
+ if (!csedelete(fcr, ses)) {
SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__);
return (EINVAL);
}
- csedelete(fcr, cse);
- csefree(cse);
break;
case CIOCCRYPT:
#ifdef COMPAT_FREEBSD32
@@ -715,6 +696,7 @@ bail:
return (EINVAL);
}
error = cryptodev_op(cse, cop, active_cred, td);
+ csefree(cse);
#ifdef COMPAT_FREEBSD32
if (error == 0 && cmd == CIOCCRYPT32)
crypt_op_to_32(cop, data);
@@ -781,6 +763,7 @@ bail:
return (EINVAL);
}
error = cryptodev_aead(cse, caead, active_cred, td);
+ csefree(cse);
break;
default:
error = EINVAL;
@@ -843,6 +826,47 @@ cod_free(struct cryptop_data *cod)
free(cod, M_XDATA);
}
+static void
+cryptodev_warn(struct csession *cse)
+{
+ static struct timeval arc4warn, blfwarn, castwarn, deswarn, md5warn;
+ static struct timeval skipwarn, tdeswarn;
+
+ switch (cse->cipher) {
+ case CRYPTO_DES_CBC:
+ if (ratecheck(&deswarn, &warninterval))
+ gone_in(13, "DES cipher via /dev/crypto");
+ break;
+ case CRYPTO_3DES_CBC:
+ if (ratecheck(&tdeswarn, &warninterval))
+ gone_in(13, "3DES cipher via /dev/crypto");
+ break;
+ case CRYPTO_BLF_CBC:
+ if (ratecheck(&blfwarn, &warninterval))
+ gone_in(13, "Blowfish cipher via /dev/crypto");
+ break;
+ case CRYPTO_CAST_CBC:
+ if (ratecheck(&castwarn, &warninterval))
+ gone_in(13, "CAST128 cipher via /dev/crypto");
+ break;
+ case CRYPTO_SKIPJACK_CBC:
+ if (ratecheck(&skipwarn, &warninterval))
+ gone_in(13, "Skipjack cipher via /dev/crypto");
+ break;
+ case CRYPTO_ARC4:
+ if (ratecheck(&arc4warn, &warninterval))
+ gone_in(13, "ARC4 cipher via /dev/crypto");
+ break;
+ }
+
+ switch (cse->mac) {
+ case CRYPTO_MD5_HMAC:
+ if (ratecheck(&md5warn, &warninterval))
+ gone_in(13, "MD5-HMAC authenticator via /dev/crypto");
+ break;
+ }
+}
+
static int
cryptodev_op(
struct csession *cse,
@@ -965,6 +989,7 @@ cryptodev_op(
error = EINVAL;
goto bail;
}
+ cryptodev_warn(cse);
again:
/*
@@ -1134,6 +1159,7 @@ cryptodev_aead(
SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__);
goto bail;
}
+ cryptodev_warn(cse);
again:
/*
* Let the dispatch run unlocked, then, interlock against the
@@ -1383,6 +1409,9 @@ cryptof_close(struct file *fp, struct thread *td)
while ((cse = TAILQ_FIRST(&fcr->csessions))) {
TAILQ_REMOVE(&fcr->csessions, cse, next);
+ KASSERT(cse->refs == 1,
+ ("%s: crypto session %p with %d refs", __func__, cse,
+ cse->refs));
csefree(cse);
}
free(fcr, M_XDATA);
@@ -1425,34 +1454,36 @@ csefind(struct fcrypt *fcr, u_int ses)
{
struct csession *cse;
- TAILQ_FOREACH(cse, &fcr->csessions, next)
- if (cse->ses == ses)
+ mtx_lock(&fcr->lock);
+ TAILQ_FOREACH(cse, &fcr->csessions, next) {
+ if (cse->ses == ses) {
+ refcount_acquire(&cse->refs);
+ mtx_unlock(&fcr->lock);
return (cse);
+ }
+ }
+ mtx_unlock(&fcr->lock);
return (NULL);
}
-static int
-csedelete(struct fcrypt *fcr, struct csession *cse_del)
+static bool
+csedelete(struct fcrypt *fcr, u_int ses)
{
struct csession *cse;
+ mtx_lock(&fcr->lock);
TAILQ_FOREACH(cse, &fcr->csessions, next) {
- if (cse == cse_del) {
+ if (cse->ses == ses) {
TAILQ_REMOVE(&fcr->csessions, cse, next);
- return (1);
+ mtx_unlock(&fcr->lock);
+ csefree(cse);
+ return (true);
}
}
- return (0);
+ mtx_unlock(&fcr->lock);
+ return (false);
}
-static struct csession *
-cseadd(struct fcrypt *fcr, struct csession *cse)
-{
- TAILQ_INSERT_TAIL(&fcr->csessions, cse, next);
- cse->ses = fcr->sesn++;
- return (cse);
-}
-
struct csession *
csecreate(struct fcrypt *fcr, crypto_session_t cses, caddr_t key, u_int64_t keylen,
caddr_t mackey, u_int64_t mackeylen, u_int32_t cipher, u_int32_t mac,
@@ -1464,6 +1495,7 @@ csecreate(struct fcrypt *fcr, crypto_session_t cses, caddr_t key, u_int64_t keyl
if (cse == NULL)
return NULL;
mtx_init(&cse->lock, "cryptodev", "crypto session lock", MTX_DEF);
+ refcount_init(&cse->refs, 1);
cse->key = key;
cse->keylen = keylen/8;
cse->mackey = mackey;
@@ -1473,7 +1505,10 @@ csecreate(struct fcrypt *fcr, crypto_session_t cses, caddr_t key, u_int64_t keyl
cse->mac = mac;
cse->txform = txform;
cse->thash = thash;
- cseadd(fcr, cse);
+ mtx_lock(&fcr->lock);
+ TAILQ_INSERT_TAIL(&fcr->csessions, cse, next);
+ cse->ses = fcr->sesn++;
+ mtx_unlock(&fcr->lock);
return (cse);
}
@@ -1481,6 +1516,8 @@ static void
csefree(struct csession *cse)
{
+ if (!refcount_release(&cse->refs))
+ return;
crypto_freesession(cse->cses);
mtx_destroy(&cse->lock);
if (cse->key)
@@ -1517,13 +1554,14 @@ cryptoioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread
switch (cmd) {
case CRIOGET:
- fcr = malloc(sizeof(struct fcrypt), M_XDATA, M_WAITOK);
+ fcr = malloc(sizeof(struct fcrypt), M_XDATA, M_WAITOK | M_ZERO);
TAILQ_INIT(&fcr->csessions);
- fcr->sesn = 0;
+ mtx_init(&fcr->lock, "fcrypt", NULL, MTX_DEF);
error = falloc(td, &f, &fd, 0);
if (error) {
+ mtx_destroy(&fcr->lock);
free(fcr, M_XDATA);
return (error);
}
diff --git a/freebsd/sys/sys/buf.h b/freebsd/sys/sys/buf.h
index a099a972..209174b4 100644
--- a/freebsd/sys/sys/buf.h
+++ b/freebsd/sys/sys/buf.h
@@ -450,7 +450,7 @@ buf_countdeps(struct buf *bp, int i)
}
static __inline void
-buf_track(struct buf *bp, const char *location)
+buf_track(struct buf *bp __unused, const char *location __unused)
{
#if defined(FULL_BUF_TRACKING)
diff --git a/freebsd/sys/sys/bus.h b/freebsd/sys/sys/bus.h
index 1ac476a4..48babb3a 100644
--- a/freebsd/sys/sys/bus.h
+++ b/freebsd/sys/sys/bus.h
@@ -563,6 +563,7 @@ int bus_child_present(device_t child);
int bus_child_pnpinfo_str(device_t child, char *buf, size_t buflen);
int bus_child_location_str(device_t child, char *buf, size_t buflen);
void bus_enumerate_hinted_children(device_t bus);
+int bus_delayed_attach_children(device_t bus);
static __inline struct resource *
bus_alloc_resource_any(device_t dev, int type, int *rid, u_int flags)
diff --git a/freebsd/sys/sys/conf.h b/freebsd/sys/sys/conf.h
index 4ace162f..d6215ba9 100644
--- a/freebsd/sys/sys/conf.h
+++ b/freebsd/sys/sys/conf.h
@@ -66,7 +66,7 @@ struct cdev {
#define SI_ETERNAL 0x0001 /* never destroyed */
#define SI_ALIAS 0x0002 /* carrier of alias name */
#define SI_NAMED 0x0004 /* make_dev{_alias} has been called */
-#define SI_CHEAPCLONE 0x0008 /* can be removed_dev'ed when vnode reclaims */
+#define SI_UNUSED1 0x0008 /* unused */
#define SI_CHILD 0x0010 /* child of another struct cdev **/
#define SI_DUMPDEV 0x0080 /* is kernel dumpdev */
#define SI_CLONELIST 0x0200 /* on a clone list */
diff --git a/freebsd/sys/sys/kernel.h b/freebsd/sys/sys/kernel.h
index 41a5233a..fb9ad6ac 100644
--- a/freebsd/sys/sys/kernel.h
+++ b/freebsd/sys/sys/kernel.h
@@ -475,6 +475,8 @@ struct tunable_str {
#define TUNABLE_LONG_FETCH(path, var)
#define TUNABLE_ULONG(path, var)
#define TUNABLE_ULONG_FETCH(path, var)
+#define TUNABLE_UINT64(path, var)
+#define TUNABLE_UINT64_FETCH(path, var)
#define TUNABLE_QUAD(path, var)
#define TUNABLE_QUAD_FETCH(path, var)
#define TUNABLE_STR(path, var, size)
diff --git a/freebsd/sys/sys/linker.h b/freebsd/sys/sys/linker.h
index 8aae31d9..10baaa03 100644
--- a/freebsd/sys/sys/linker.h
+++ b/freebsd/sys/sys/linker.h
@@ -97,6 +97,11 @@ struct linker_file {
*/
int nenabled; /* number of enabled probes. */
int fbt_nentries; /* number of fbt entries created. */
+
+#ifdef __arm__
+ caddr_t exidx_addr; /* Unwind data index table start */
+ size_t exidx_size; /* Unwind data index table size */
+#endif
};
/*
diff --git a/freebsd/sys/sys/malloc.h b/freebsd/sys/sys/malloc.h
index 83510329..56b17f36 100644
--- a/freebsd/sys/sys/malloc.h
+++ b/freebsd/sys/sys/malloc.h
@@ -185,7 +185,11 @@ void *contigmalloc_domainset(unsigned long size, struct malloc_type *type,
unsigned long alignment, vm_paddr_t boundary)
__malloc_like __result_use_check __alloc_size(1) __alloc_align(7);
void free(void *addr, struct malloc_type *type);
+#ifndef __rtems__
void free_domain(void *addr, struct malloc_type *type);
+#else /* __rtems__ */
+#define free_domain(addr, type) free(addr, type)
+#endif /* __rtems__ */
#ifndef __rtems__
void *malloc(size_t size, struct malloc_type *type, int flags) __malloc_like
__result_use_check __alloc_size(1);
@@ -250,9 +254,13 @@ void *_bsd_malloc(size_t size, struct malloc_type *type, int flags)
_malloc_item; \
})
+#ifndef __rtems__
void *malloc_domainset(size_t size, struct malloc_type *type,
struct domainset *ds, int flags) __malloc_like __result_use_check
__alloc_size(1);
+#else /* __rtems__ */
+#define malloc_domainset(size, type, ds, flags) malloc(size, type, flags)
+#endif /* __rtems__ */
void *mallocarray(size_t nmemb, size_t size, struct malloc_type *type,
int flags) __malloc_like __result_use_check
__alloc_size2(1, 2);
diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h
index 634f7d9e..badc7eef 100644
--- a/freebsd/sys/sys/mbuf.h
+++ b/freebsd/sys/sys/mbuf.h
@@ -521,6 +521,8 @@ struct mbuf {
#define CSUM_L5_VALID 0x20000000 /* checksum is correct */
#define CSUM_COALESCED 0x40000000 /* contains merged segments */
+#define CSUM_SND_TAG 0x80000000 /* Packet header has send tag */
+
/*
* CSUM flag description for use with printf(9) %b identifier.
*/
@@ -530,7 +532,7 @@ struct mbuf {
"\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
"\16CSUM_IP6_ISCSI" \
"\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
- "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED"
+ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED\40CSUM_SND_TAG"
/* CSUM flags compatibility mappings. */
#define CSUM_IP_CHECKED CSUM_L3_CALC
diff --git a/freebsd/sys/sys/mount.h b/freebsd/sys/sys/mount.h
index dabb506d..698716f5 100644
--- a/freebsd/sys/sys/mount.h
+++ b/freebsd/sys/sys/mount.h
@@ -396,6 +396,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
#define MNTK_UNMAPPED_BUFS 0x00002000
#define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */
#define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */
+#define MNTK_VMSETSIZE_BUG 0x00010000
#define MNTK_NOASYNC 0x00800000 /* disable async */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
diff --git a/freebsd/sys/sys/pcpu.h b/freebsd/sys/sys/pcpu.h
index 0ce30af7..7812c3d8 100644
--- a/freebsd/sys/sys/pcpu.h
+++ b/freebsd/sys/sys/pcpu.h
@@ -221,10 +221,6 @@ extern struct cpuhead cpuhead;
extern struct pcpu *cpuid_to_pcpu[];
#define curcpu PCPU_GET(cpuid)
-#define curproc (curthread->td_proc)
-#ifndef curthread
-#define curthread PCPU_GET(curthread)
-#endif
#define curvidata PCPU_GET(vidata)
#ifndef __rtems__
@@ -233,20 +229,12 @@ extern struct pcpu *cpuid_to_pcpu[];
#define UMA_PCPU_ALLOC_SIZE (PAGE_SIZE / 32)
#endif /* __rtems__ */
-#ifndef __rtems__
-#ifdef CTASSERT
-#if defined(__i386__) || defined(__amd64__)
-/* Required for counters(9) to work on x86. */
-CTASSERT(sizeof(struct pcpu) == UMA_PCPU_ALLOC_SIZE);
-#else
-/*
- * To minimize memory waste in per-cpu UMA zones, size of struct pcpu
- * should be denominator of PAGE_SIZE.
- */
-CTASSERT((PAGE_SIZE / sizeof(struct pcpu)) * sizeof(struct pcpu) == PAGE_SIZE);
-#endif /* UMA_PCPU_ALLOC_SIZE && x86 */
-#endif /* CTASSERT */
-#endif /* __rtems__ */
+#include <machine/pcpu_aux.h>
+
+#ifndef curthread
+#define curthread PCPU_GET(curthread)
+#endif
+#define curproc (curthread->td_proc)
/* Accessor to elements allocated via UMA_ZONE_PCPU zone. */
static inline void *
diff --git a/freebsd/sys/sys/proc.h b/freebsd/sys/sys/proc.h
index 01cf3963..04a0e430 100644
--- a/freebsd/sys/sys/proc.h
+++ b/freebsd/sys/sys/proc.h
@@ -1057,6 +1057,8 @@ struct fork_req {
int *fr_pd_fd;
int fr_pd_flags;
struct filecaps *fr_pd_fcaps;
+ int fr_flags2;
+#define FR2_DROPSIG_CAUGHT 0x00001 /* Drop caught non-DFL signals */
};
/*
@@ -1185,6 +1187,7 @@ void cpu_thread_swapin(struct thread *);
void cpu_thread_swapout(struct thread *);
struct thread *thread_alloc(int pages);
int thread_alloc_stack(struct thread *, int pages);
+int thread_check_susp(struct thread *td, bool sleep);
void thread_cow_get_proc(struct thread *newtd, struct proc *p);
void thread_cow_get(struct thread *newtd, struct thread *td);
void thread_cow_free(struct thread *td);
diff --git a/freebsd/sys/sys/signalvar.h b/freebsd/sys/sys/signalvar.h
index aafbc0f8..70dd8fa3 100644
--- a/freebsd/sys/sys/signalvar.h
+++ b/freebsd/sys/sys/signalvar.h
@@ -382,6 +382,7 @@ void sigacts_copy(struct sigacts *dest, struct sigacts *src);
void sigacts_free(struct sigacts *ps);
struct sigacts *sigacts_hold(struct sigacts *ps);
int sigacts_shared(struct sigacts *ps);
+void sig_drop_caught(struct proc *p);
void sigexit(struct thread *td, int sig) __dead2;
int sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
int sig_ffs(sigset_t *set);
diff --git a/freebsd/sys/sys/smp.h b/freebsd/sys/sys/smp.h
index aa0c3119..22b7dcd5 100644
--- a/freebsd/sys/sys/smp.h
+++ b/freebsd/sys/sys/smp.h
@@ -168,8 +168,10 @@ extern cpuset_t logical_cpus_mask;
#ifndef __rtems__
extern u_int mp_maxid;
extern int mp_maxcpus;
+extern int mp_ncores;
extern int mp_ncpus;
extern volatile int smp_started;
+extern int smp_threads_per_core;
extern cpuset_t all_cpus;
extern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */
diff --git a/freebsd/sys/sys/sysctl.h b/freebsd/sys/sys/sysctl.h
index b2ae7f97..c21f19d3 100644
--- a/freebsd/sys/sys/sysctl.h
+++ b/freebsd/sys/sys/sysctl.h
@@ -888,7 +888,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
/*
* Top-level identifiers
*/
-#define CTL_UNSPEC 0 /* unused */
+#define CTL_SYSCTL 0 /* "magic" numbers */
#define CTL_KERN 1 /* "high kernel": proc, limits */
#define CTL_VM 2 /* virtual memory */
#define CTL_VFS 3 /* filesystem, mount type is next */
@@ -900,6 +900,17 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define CTL_P1003_1B 9 /* POSIX 1003.1B */
/*
+ * CTL_SYSCTL identifiers
+ */
+#define CTL_SYSCTL_DEBUG 0 /* printf all nodes */
+#define CTL_SYSCTL_NAME 1 /* string name of OID */
+#define CTL_SYSCTL_NEXT 2 /* next OID */
+#define CTL_SYSCTL_NAME2OID 3 /* int array of name */
+#define CTL_SYSCTL_OIDFMT 4 /* OID's kind and format */
+#define CTL_SYSCTL_OIDDESCR 5 /* OID's description */
+#define CTL_SYSCTL_OIDLABEL 6 /* aggregation label */
+
+/*
* CTL_KERN identifiers
*/
#define KERN_OSTYPE 1 /* string: system version */
@@ -1085,6 +1096,7 @@ SYSCTL_DECL(_hw_bus);
SYSCTL_DECL(_hw_bus_devices);
SYSCTL_DECL(_hw_bus_info);
SYSCTL_DECL(_machdep);
+SYSCTL_DECL(_machdep_mitigations);
SYSCTL_DECL(_user);
SYSCTL_DECL(_compat);
SYSCTL_DECL(_regression);
diff --git a/freebsd/sys/sys/systm.h b/freebsd/sys/sys/systm.h
index a52cde01..aae31704 100644
--- a/freebsd/sys/sys/systm.h
+++ b/freebsd/sys/sys/systm.h
@@ -634,9 +634,14 @@ int poll_no_poll(int events);
void DELAY(int usec);
/* Root mount holdback API */
-struct root_hold_token;
+struct root_hold_token {
+ int flags;
+ const char *who;
+ TAILQ_ENTRY(root_hold_token) list;
+};
struct root_hold_token *root_mount_hold(const char *identifier);
+void root_mount_hold_token(const char *identifier, struct root_hold_token *h);
void root_mount_rel(struct root_hold_token *h);
int root_mounted(void);
diff --git a/freebsd/sys/sys/taskqueue.h b/freebsd/sys/sys/taskqueue.h
index 4af1e0a3..3f7ff1f5 100644
--- a/freebsd/sys/sys/taskqueue.h
+++ b/freebsd/sys/sys/taskqueue.h
@@ -42,6 +42,7 @@
struct taskqueue;
struct taskqgroup;
+struct proc;
struct thread;
struct timeout_task {
@@ -75,7 +76,9 @@ struct taskqueue *taskqueue_create(const char *name, int mflags,
taskqueue_enqueue_fn enqueue,
void *context);
int taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
- const char *name, ...) __printflike(4, 5);
+ const char *name, ...) __printflike(4, 5);
+int taskqueue_start_threads_in_proc(struct taskqueue **tqp, int count,
+ int pri, struct proc *p, const char *name, ...) __printflike(5, 6);
int taskqueue_start_threads_cpuset(struct taskqueue **tqp, int count,
int pri, cpuset_t *mask, const char *name, ...) __printflike(5, 6);
int taskqueue_enqueue(struct taskqueue *queue, struct task *task);
diff --git a/freebsd/sys/sys/unpcb.h b/freebsd/sys/sys/unpcb.h
index 7d7a20ac..3ea20b1d 100644
--- a/freebsd/sys/sys/unpcb.h
+++ b/freebsd/sys/sys/unpcb.h
@@ -160,7 +160,7 @@ struct xunpcb {
char xu_dummy2[256];
};
struct xsocket xu_socket;
-} __aligned(8);
+} __aligned(MAX(8, sizeof(void *)));
struct xunpgen {
ksize_t xug_len;
diff --git a/freebsd/sys/sys/vnode.h b/freebsd/sys/sys/vnode.h
index c1235a79..f3cdf8a5 100644
--- a/freebsd/sys/sys/vnode.h
+++ b/freebsd/sys/sys/vnode.h
@@ -247,6 +247,7 @@ struct xvnode {
#define VV_NOSYNC 0x0004 /* unlinked, stop syncing */
#define VV_ETERNALDEV 0x0008 /* device that is never destroyed */
#define VV_CACHEDLABEL 0x0010 /* Vnode has valid cached MAC label */
+#define VV_VMSIZEVNLOCK 0x0020 /* object size check requires vnode lock */
#define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */
#define VV_SYSTEM 0x0080 /* vnode being used by kernel */
#define VV_PROCDEP 0x0100 /* vnode is process dependent */
@@ -577,6 +578,7 @@ typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int);
#define VN_OPEN_NOAUDIT 0x00000001
#define VN_OPEN_NOCAPCHECK 0x00000002
#define VN_OPEN_NAMECACHE 0x00000004
+#define VN_OPEN_INVFS 0x00000008
/*
* Public vnode manipulation functions.
@@ -920,6 +922,8 @@ int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
void vn_fsid(struct vnode *vp, struct vattr *va);
+int vn_dir_check_exec(struct vnode *vp, struct componentname *cnp);
+
#endif /* _KERNEL */
#endif /* __rtems__ */
diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c
index 7738c5d2..8c3a84b4 100644
--- a/freebsd/sys/vm/uma_core.c
+++ b/freebsd/sys/vm/uma_core.c
@@ -189,8 +189,14 @@ SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
#ifndef __rtems__
/* Is the VM done starting up? */
-static enum { BOOT_COLD = 0, BOOT_STRAPPED, BOOT_PAGEALLOC, BOOT_BUCKETS,
- BOOT_RUNNING } booted = BOOT_COLD;
+static enum {
+ BOOT_COLD,
+ BOOT_STRAPPED,
+ BOOT_PAGEALLOC,
+ BOOT_BUCKETS,
+ BOOT_RUNNING,
+ BOOT_SHUTDOWN,
+} booted = BOOT_COLD;
#endif /* __rtems__ */
/*
@@ -311,6 +317,9 @@ static int hash_expand(struct uma_hash *, struct uma_hash *);
static void hash_free(struct uma_hash *hash);
static void uma_timeout(void *);
static void uma_startup3(void);
+#ifndef __rtems__
+static void uma_shutdown(void);
+#endif /* __rtems__ */
static void *zone_alloc_item(uma_zone_t, void *, int, int);
static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
static void bucket_enable(void);
@@ -1255,8 +1264,7 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
case BOOT_PAGEALLOC:
if (keg->uk_ppera > 1)
break;
- case BOOT_BUCKETS:
- case BOOT_RUNNING:
+ default:
#ifdef UMA_MD_SMALL_ALLOC
keg->uk_allocf = (keg->uk_ppera > 1) ?
page_alloc : uma_small_alloc;
@@ -2259,10 +2267,6 @@ uma_startup2(void)
}
#endif /* __rtems__ */
-/*
- * Initialize our callout handle
- *
- */
static void
uma_startup3(void)
{
@@ -2278,9 +2282,21 @@ uma_startup3(void)
callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
#ifndef __rtems__
booted = BOOT_RUNNING;
+
+ EVENTHANDLER_REGISTER(shutdown_post_sync, uma_shutdown, NULL,
+ EVENTHANDLER_PRI_FIRST);
#endif /* __rtems__ */
}
+#ifndef __rtems__
+static void
+uma_shutdown(void)
+{
+
+ booted = BOOT_SHUTDOWN;
+}
+#endif /* __rtems__ */
+
static uma_keg_t
uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
int align, uint32_t flags)
@@ -2518,6 +2534,16 @@ void
uma_zdestroy(uma_zone_t zone)
{
+#ifndef __rtems__
+ /*
+ * Large slabs are expensive to reclaim, so don't bother doing
+ * unnecessary work if we're shutting down.
+ */
+ if (booted == BOOT_SHUTDOWN &&
+ zone->uz_fini == NULL &&
+ zone->uz_release == (uma_release)zone_release)
+ return;
+#endif /* __rtems__ */
sx_slock(&uma_drain_lock);
zone_free_item(zones, zone, NULL, SKIP_NONE);
sx_sunlock(&uma_drain_lock);
diff --git a/freebsd/sys/vm/vm_extern.h b/freebsd/sys/vm/vm_extern.h
index 52884357..0d17f8af 100644
--- a/freebsd/sys/vm/vm_extern.h
+++ b/freebsd/sys/vm/vm_extern.h
@@ -85,19 +85,18 @@ void kmeminit(void);
int kernacc(void *, int, int);
int useracc(void *, int, int);
-int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
+int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags, vm_page_t *m_hold);
void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
vm_ooffset_t *);
int vm_fault_disable_pagefaults(void);
void vm_fault_enable_pagefaults(int save);
#ifndef __rtems__
-int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
- int fault_flags, vm_page_t *m_hold);
int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
vm_prot_t prot, vm_page_t *ma, int max_count);
+int vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags, int *signo, int *ucode);
#endif /* __rtems__ */
-void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
-int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t);
int vm_forkproc(struct thread *, struct proc *, struct thread *,
struct vmspace *, int);
void vm_waitproc(struct proc *);