summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/kern
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2019-09-24 11:05:03 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2019-11-13 10:47:04 +0100
commita5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf (patch)
treedb091fb0f7d091804482156c9f3f55879ac93d5b /freebsd/sys/kern
parenttest/syscalls01: Fix sporadic test failures (diff)
downloadrtems-libbsd-a5ddb0ea69f21c16b7697a935d7a0c16bb3cffcf.tar.bz2
Update to FreeBSD head 2019-09-24
Git mirror commit 6b0307a0a5184339393f555d5d424190d8a8277a.
Diffstat (limited to 'freebsd/sys/kern')
-rw-r--r--freebsd/sys/kern/init_main.c25
-rw-r--r--freebsd/sys/kern/kern_conf.c2
-rw-r--r--freebsd/sys/kern/kern_event.c1
-rw-r--r--freebsd/sys/kern/kern_intr.c37
-rw-r--r--freebsd/sys/kern/kern_mbuf.c470
-rw-r--r--freebsd/sys/kern/kern_mib.c101
-rw-r--r--freebsd/sys/kern/kern_mtxpool.c4
-rw-r--r--freebsd/sys/kern/kern_synch.c83
-rw-r--r--freebsd/sys/kern/kern_sysctl.c683
-rw-r--r--freebsd/sys/kern/kern_time.c12
-rw-r--r--freebsd/sys/kern/kern_timeout.c57
-rw-r--r--freebsd/sys/kern/kern_uuid.c2
-rw-r--r--freebsd/sys/kern/subr_blist.c344
-rw-r--r--freebsd/sys/kern/subr_bus.c135
-rw-r--r--freebsd/sys/kern/subr_eventhandler.c1
-rw-r--r--freebsd/sys/kern/subr_gtaskqueue.c101
-rw-r--r--freebsd/sys/kern/subr_kobj.c95
-rw-r--r--freebsd/sys/kern/subr_lock.c42
-rw-r--r--freebsd/sys/kern/subr_pcpu.c12
-rw-r--r--freebsd/sys/kern/subr_prf.c45
-rw-r--r--freebsd/sys/kern/subr_sbuf.c159
-rw-r--r--freebsd/sys/kern/subr_sleepqueue.c55
-rw-r--r--freebsd/sys/kern/subr_taskqueue.c2
-rw-r--r--freebsd/sys/kern/sys_generic.c6
-rwxr-xr-xfreebsd/sys/kern/sys_pipe.c95
-rw-r--r--freebsd/sys/kern/tty.c8
-rw-r--r--freebsd/sys/kern/uipc_mbuf.c292
-rw-r--r--freebsd/sys/kern/uipc_mbuf2.c2
-rw-r--r--freebsd/sys/kern/uipc_sockbuf.c139
-rw-r--r--freebsd/sys/kern/uipc_socket.c136
-rw-r--r--freebsd/sys/kern/uipc_syscalls.c45
-rw-r--r--freebsd/sys/kern/uipc_usrreq.c67
32 files changed, 2708 insertions, 550 deletions
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c
index c6a9e310..2265d89a 100644
--- a/freebsd/sys/kern/init_main.c
+++ b/freebsd/sys/kern/init_main.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/epoch.h>
+#include <sys/eventhandler.h>
#include <sys/exec.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@@ -110,6 +111,14 @@ struct thread0_storage thread0_st __aligned(32);
struct vmspace vmspace0;
struct proc *initproc;
+int
+linux_alloc_current_noop(struct thread *td __unused, int flags __unused)
+{
+ return (0);
+}
+int (*lkpi_alloc_current)(struct thread *, int) = linux_alloc_current_noop;
+
+
#ifndef BOOTHOWTO
#define BOOTHOWTO 0
#endif
@@ -155,11 +164,6 @@ SET_DECLARE(sysinit_set, struct sysinit);
struct sysinit **sysinit, **sysinit_end;
struct sysinit **newsysinit, **newsysinit_end;
-EVENTHANDLER_LIST_DECLARE(process_init);
-EVENTHANDLER_LIST_DECLARE(thread_init);
-EVENTHANDLER_LIST_DECLARE(process_ctor);
-EVENTHANDLER_LIST_DECLARE(thread_ctor);
-
/*
* Merge a new sysinit set into the current set, reallocating it if
* necessary. This can only be called after malloc is running.
@@ -440,7 +444,6 @@ struct sysentvec null_sysvec = {
.sv_coredump = NULL,
.sv_imgact_try = NULL,
.sv_minsigstksz = 0,
- .sv_pagesize = PAGE_SIZE,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS,
.sv_usrstack = USRSTACK,
@@ -482,7 +485,7 @@ proc0_init(void *dummy __unused)
GIANT_REQUIRED;
p = &proc0;
td = &thread0;
-
+
/*
* Initialize magic number and osrel.
*/
@@ -822,11 +825,9 @@ start_init(void *dummy)
}
/*
- * Like kproc_create(), but runs in its own address space.
- * We do this early to reserve pid 1.
- *
- * Note special case - do not make it runnable yet. Other work
- * in progress will change this more.
+ * Like kproc_create(), but runs in its own address space. We do this
+ * early to reserve pid 1. Note special case - do not make it
+ * runnable yet, init execution is started when userspace can be served.
*/
static void
create_init(const void *udata __unused)
diff --git a/freebsd/sys/kern/kern_conf.c b/freebsd/sys/kern/kern_conf.c
index 560a450a..26718648 100644
--- a/freebsd/sys/kern/kern_conf.c
+++ b/freebsd/sys/kern/kern_conf.c
@@ -656,7 +656,7 @@ prep_cdevsw(struct cdevsw *devsw, int flags)
return (0);
}
- if (devsw->d_version != D_VERSION_03) {
+ if (devsw->d_version != D_VERSION_04) {
printf(
"WARNING: Device driver \"%s\" has wrong version %s\n",
devsw->d_name == NULL ? "???" : devsw->d_name,
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c
index 5c75c657..f0700e55 100644
--- a/freebsd/sys/kern/kern_event.c
+++ b/freebsd/sys/kern/kern_event.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/capsicum.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c
index 122e82bd..65b633f6 100644
--- a/freebsd/sys/kern/kern_intr.c
+++ b/freebsd/sys/kern/kern_intr.c
@@ -101,7 +101,7 @@ struct proc *intrproc;
static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
-static int intr_storm_threshold = 1000;
+static int intr_storm_threshold = 0;
SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RWTUN,
&intr_storm_threshold, 0,
"Number of consecutive interrupts before storm protection is enabled");
@@ -231,10 +231,20 @@ intr_event_update(struct intr_event *ie)
}
/*
- * If the handler names were too long, add +'s to indicate missing
- * names. If we run out of room and still have +'s to add, change
- * the last character from a + to a *.
+ * If there is only one handler and its name is too long, just copy in
+ * as much of the end of the name (includes the unit number) as will
+ * fit. Otherwise, we have multiple handlers and not all of the names
+ * will fit. Add +'s to indicate missing names. If we run out of room
+ * and still have +'s to add, change the last character from a + to a *.
*/
+ if (missed == 1 && space == 1) {
+ ih = CK_SLIST_FIRST(&ie->ie_handlers);
+ missed = strlen(ie->ie_fullname) + strlen(ih->ih_name) + 2 -
+ sizeof(ie->ie_fullname);
+ strcat(ie->ie_fullname, (missed == 0) ? " " : "-");
+ strcat(ie->ie_fullname, &ih->ih_name[missed]);
+ missed = 0;
+ }
last = &ie->ie_fullname[sizeof(ie->ie_fullname) - 2];
while (missed-- > 0) {
if (strlen(ie->ie_fullname) + 1 == sizeof(ie->ie_fullname)) {
@@ -393,6 +403,25 @@ intr_event_bind_ithread(struct intr_event *ie, int cpu)
return (_intr_event_bind(ie, cpu, false, true));
}
+/*
+ * Bind an interrupt event's ithread to the specified cpuset.
+ */
+int
+intr_event_bind_ithread_cpuset(struct intr_event *ie, cpuset_t *cs)
+{
+ lwpid_t id;
+
+ mtx_lock(&ie->ie_lock);
+ if (ie->ie_thread != NULL) {
+ id = ie->ie_thread->it_thread->td_tid;
+ mtx_unlock(&ie->ie_lock);
+ return (cpuset_setthread(id, cs));
+ } else {
+ mtx_unlock(&ie->ie_lock);
+ }
+ return (ENODEV);
+}
+
static struct intr_event *
intr_lookup(int irq)
{
diff --git a/freebsd/sys/kern/kern_mbuf.c b/freebsd/sys/kern/kern_mbuf.c
index f94eda5b..85846acd 100644
--- a/freebsd/sys/kern/kern_mbuf.c
+++ b/freebsd/sys/kern/kern_mbuf.c
@@ -33,6 +33,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_param.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <sys/param.h>
#include <sys/conf.h>
@@ -43,13 +44,20 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
+#include <sys/ktls.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
+#include <sys/refcount.h>
+#include <sys/sf_buf.h>
#include <sys/smp.h>
+#include <sys/socket.h>
#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
@@ -112,11 +120,20 @@ int nmbjumbop; /* limits number of page size jumbo clusters */
int nmbjumbo9; /* limits number of 9k jumbo clusters */
int nmbjumbo16; /* limits number of 16k jumbo clusters */
+bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile & TLS */
+SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,
+ &mb_use_ext_pgs, 0,
+ "Use unmapped mbufs for sendfile(2) and TLS offload");
+
static quad_t maxmbufmem; /* overall real memory limit for all mbufs */
SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
"Maximum real memory allocatable to various mbuf types");
+static counter_u64_t snd_tag_count;
+SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW,
+ &snd_tag_count, "# of active mbuf send tags");
+
/*
* tunable_mbinit() has to be run before any mbuf allocations are done.
*/
@@ -285,6 +302,7 @@ uma_zone_t zone_pack;
uma_zone_t zone_jumbop;
uma_zone_t zone_jumbo9;
uma_zone_t zone_jumbo16;
+uma_zone_t zone_extpgs;
/*
* Local prototypes.
@@ -302,6 +320,9 @@ static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
/* Ensure that MSIZE is a power of 2. */
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
+_Static_assert(sizeof(struct mbuf_ext_pgs) == 256,
+ "mbuf_ext_pgs size mismatch");
+
/*
* Initialize FreeBSD Network buffer allocation.
*/
@@ -383,6 +404,15 @@ mbuf_init(void *dummy)
uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
uma_zone_set_maxaction(zone_jumbo16, mb_reclaim);
+ zone_extpgs = uma_zcreate(MBUF_EXTPGS_MEM_NAME,
+ sizeof(struct mbuf_ext_pgs),
+#ifdef INVARIANTS
+ trash_ctor, trash_dtor, trash_init, trash_fini,
+#else
+ NULL, NULL, NULL, NULL,
+#endif
+ UMA_ALIGN_CACHE, 0);
+
/*
* Hook event handler for low-memory situation, used to
* drain protocols and push data back to the caches (UMA
@@ -392,6 +422,8 @@ mbuf_init(void *dummy)
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
#endif /* __rtems__ */
+
+ snd_tag_count = counter_u64_alloc(M_WAITOK);
}
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
@@ -697,14 +729,14 @@ mb_dtor_pack(void *mem, int size, void *arg)
#endif
/*
* If there are processes blocked on zone_clust, waiting for pages
- * to be freed up, * cause them to be woken up by draining the
- * packet zone. We are exposed to a race here * (in the check for
+ * to be freed up, cause them to be woken up by draining the
+ * packet zone. We are exposed to a race here (in the check for
* the UMA_ZFLAG_FULL) where we might miss the flag set, but that
* is deliberate. We don't want to acquire the zone lock for every
* mbuf free.
*/
if (uma_zone_exhausted_nolock(zone_clust))
- zone_drain(zone_pack);
+ uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
}
/*
@@ -832,6 +864,384 @@ mb_reclaim(uma_zone_t zone __unused, int pending __unused)
}
/*
+ * Free "count" units of I/O from an mbuf chain. They could be held
+ * in EXT_PGS or just as a normal mbuf. This code is intended to be
+ * called in an error path (I/O error, closed connection, etc).
+ */
+void
+mb_free_notready(struct mbuf *m, int count)
+{
+ int i;
+
+ for (i = 0; i < count && m != NULL; i++) {
+#ifndef __rtems__
+ if ((m->m_flags & M_EXT) != 0 &&
+ m->m_ext.ext_type == EXT_PGS) {
+ m->m_ext.ext_pgs->nrdy--;
+ if (m->m_ext.ext_pgs->nrdy != 0)
+ continue;
+ }
+#endif /* __rtems__ */
+ m = m_free(m);
+ }
+ KASSERT(i == count, ("Removed only %d items from %p", i, m));
+}
+
+#ifndef __rtems__
+/*
+ * Compress an unmapped mbuf into a simple mbuf when it holds a small
+ * amount of data. This is used as a DOS defense to avoid having
+ * small packets tie up wired pages, an ext_pgs structure, and an
+ * mbuf. Since this converts the existing mbuf in place, it can only
+ * be used if there are no other references to 'm'.
+ */
+int
+mb_unmapped_compress(struct mbuf *m)
+{
+ volatile u_int *refcnt;
+ struct mbuf m_temp;
+
+ /*
+ * Assert that 'm' does not have a packet header. If 'm' had
+ * a packet header, it would only be able to hold MHLEN bytes
+ * and m_data would have to be initialized differently.
+ */
+ KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXT) &&
+ m->m_ext.ext_type == EXT_PGS,
+ ("%s: m %p !M_EXT or !EXT_PGS or M_PKTHDR", __func__, m));
+ KASSERT(m->m_len <= MLEN, ("m_len too large %p", m));
+
+ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+ refcnt = &m->m_ext.ext_count;
+ } else {
+ KASSERT(m->m_ext.ext_cnt != NULL,
+ ("%s: no refcounting pointer on %p", __func__, m));
+ refcnt = m->m_ext.ext_cnt;
+ }
+
+ if (*refcnt != 1)
+ return (EBUSY);
+
+ /*
+ * Copy mbuf header and m_ext portion of 'm' to 'm_temp' to
+ * create a "fake" EXT_PGS mbuf that can be used with
+ * m_copydata() as well as the ext_free callback.
+ */
+ memcpy(&m_temp, m, offsetof(struct mbuf, m_ext) + sizeof (m->m_ext));
+ m_temp.m_next = NULL;
+ m_temp.m_nextpkt = NULL;
+
+ /* Turn 'm' into a "normal" mbuf. */
+ m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP);
+ m->m_data = m->m_dat;
+
+ /* Copy data from template's ext_pgs. */
+ m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, caddr_t));
+
+ /* Free the backing pages. */
+ m_temp.m_ext.ext_free(&m_temp);
+
+ /* Finally, free the ext_pgs struct. */
+ uma_zfree(zone_extpgs, m_temp.m_ext.ext_pgs);
+ return (0);
+}
+
+/*
+ * These next few routines are used to permit downgrading an unmapped
+ * mbuf to a chain of mapped mbufs. This is used when an interface
+ * doesn't supported unmapped mbufs or if checksums need to be
+ * computed in software.
+ *
+ * Each unmapped mbuf is converted to a chain of mbufs. First, any
+ * TLS header data is stored in a regular mbuf. Second, each page of
+ * unmapped data is stored in an mbuf with an EXT_SFBUF external
+ * cluster. These mbufs use an sf_buf to provide a valid KVA for the
+ * associated physical page. They also hold a reference on the
+ * original EXT_PGS mbuf to ensure the physical page doesn't go away.
+ * Finally, any TLS trailer data is stored in a regular mbuf.
+ *
+ * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF
+ * mbufs. It frees the associated sf_buf and releases its reference
+ * on the original EXT_PGS mbuf.
+ *
+ * _mb_unmapped_to_ext() is a helper function that converts a single
+ * unmapped mbuf into a chain of mbufs.
+ *
+ * mb_unmapped_to_ext() is the public function that walks an mbuf
+ * chain converting any unmapped mbufs to mapped mbufs. It returns
+ * the new chain of unmapped mbufs on success. On failure it frees
+ * the original mbuf chain and returns NULL.
+ */
+static void
+mb_unmapped_free_mext(struct mbuf *m)
+{
+ struct sf_buf *sf;
+ struct mbuf *old_m;
+
+ sf = m->m_ext.ext_arg1;
+ sf_buf_free(sf);
+
+ /* Drop the reference on the backing EXT_PGS mbuf. */
+ old_m = m->m_ext.ext_arg2;
+ mb_free_ext(old_m);
+}
+
+static struct mbuf *
+_mb_unmapped_to_ext(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ struct mbuf *m_new, *top, *prev, *mref;
+ struct sf_buf *sf;
+ vm_page_t pg;
+ int i, len, off, pglen, pgoff, seglen, segoff;
+ volatile u_int *refcnt;
+ u_int ref_inc = 0;
+
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ len = m->m_len;
+ KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p",
+ __func__, m));
+
+ /* See if this is the mbuf that holds the embedded refcount. */
+ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+ refcnt = &m->m_ext.ext_count;
+ mref = m;
+ } else {
+ KASSERT(m->m_ext.ext_cnt != NULL,
+ ("%s: no refcounting pointer on %p", __func__, m));
+ refcnt = m->m_ext.ext_cnt;
+ mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
+ }
+
+ /* Skip over any data removed from the front. */
+ off = mtod(m, vm_offset_t);
+
+ top = NULL;
+ if (ext_pgs->hdr_len != 0) {
+ if (off >= ext_pgs->hdr_len) {
+ off -= ext_pgs->hdr_len;
+ } else {
+ seglen = ext_pgs->hdr_len - off;
+ segoff = off;
+ seglen = min(seglen, len);
+ off = 0;
+ len -= seglen;
+ m_new = m_get(M_NOWAIT, MT_DATA);
+ if (m_new == NULL)
+ goto fail;
+ m_new->m_len = seglen;
+ prev = top = m_new;
+ memcpy(mtod(m_new, void *), &ext_pgs->hdr[segoff],
+ seglen);
+ }
+ }
+ pgoff = ext_pgs->first_pg_off;
+ for (i = 0; i < ext_pgs->npgs && len > 0; i++) {
+ pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+ if (off >= pglen) {
+ off -= pglen;
+ pgoff = 0;
+ continue;
+ }
+ seglen = pglen - off;
+ segoff = pgoff + off;
+ off = 0;
+ seglen = min(seglen, len);
+ len -= seglen;
+
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ m_new = m_get(M_NOWAIT, MT_DATA);
+ if (m_new == NULL)
+ goto fail;
+ if (top == NULL) {
+ top = prev = m_new;
+ } else {
+ prev->m_next = m_new;
+ prev = m_new;
+ }
+ sf = sf_buf_alloc(pg, SFB_NOWAIT);
+ if (sf == NULL)
+ goto fail;
+
+ ref_inc++;
+ m_extadd(m_new, (char *)sf_buf_kva(sf), PAGE_SIZE,
+ mb_unmapped_free_mext, sf, mref, M_RDONLY, EXT_SFBUF);
+ m_new->m_data += segoff;
+ m_new->m_len = seglen;
+
+ pgoff = 0;
+ };
+ if (len != 0) {
+ KASSERT((off + len) <= ext_pgs->trail_len,
+ ("off + len > trail (%d + %d > %d)", off, len,
+ ext_pgs->trail_len));
+ m_new = m_get(M_NOWAIT, MT_DATA);
+ if (m_new == NULL)
+ goto fail;
+ if (top == NULL)
+ top = m_new;
+ else
+ prev->m_next = m_new;
+ m_new->m_len = len;
+ memcpy(mtod(m_new, void *), &ext_pgs->trail[off], len);
+ }
+
+ if (ref_inc != 0) {
+ /*
+ * Obtain an additional reference on the old mbuf for
+ * each created EXT_SFBUF mbuf. They will be dropped
+ * in mb_unmapped_free_mext().
+ */
+ if (*refcnt == 1)
+ *refcnt += ref_inc;
+ else
+ atomic_add_int(refcnt, ref_inc);
+ }
+ m_free(m);
+ return (top);
+
+fail:
+ if (ref_inc != 0) {
+ /*
+ * Obtain an additional reference on the old mbuf for
+ * each created EXT_SFBUF mbuf. They will be
+ * immediately dropped when these mbufs are freed
+ * below.
+ */
+ if (*refcnt == 1)
+ *refcnt += ref_inc;
+ else
+ atomic_add_int(refcnt, ref_inc);
+ }
+ m_free(m);
+ m_freem(top);
+ return (NULL);
+}
+
+struct mbuf *
+mb_unmapped_to_ext(struct mbuf *top)
+{
+ struct mbuf *m, *next, *prev = NULL;
+
+ prev = NULL;
+ for (m = top; m != NULL; m = next) {
+ /* m might be freed, so cache the next pointer. */
+ next = m->m_next;
+ if (m->m_flags & M_NOMAP) {
+ if (prev != NULL) {
+ /*
+ * Remove 'm' from the new chain so
+ * that the 'top' chain terminates
+ * before 'm' in case 'top' is freed
+ * due to an error.
+ */
+ prev->m_next = NULL;
+ }
+ m = _mb_unmapped_to_ext(m);
+ if (m == NULL) {
+ m_freem(top);
+ m_freem(next);
+ return (NULL);
+ }
+ if (prev == NULL) {
+ top = m;
+ } else {
+ prev->m_next = m;
+ }
+
+ /*
+ * Replaced one mbuf with a chain, so we must
+ * find the end of chain.
+ */
+ prev = m_last(m);
+ } else {
+ if (prev != NULL) {
+ prev->m_next = m;
+ }
+ prev = m;
+ }
+ }
+ return (top);
+}
+
+/*
+ * Allocate an empty EXT_PGS mbuf. The ext_free routine is
+ * responsible for freeing any pages backing this mbuf when it is
+ * freed.
+ */
+struct mbuf *
+mb_alloc_ext_pgs(int how, bool pkthdr, m_ext_free_t ext_free)
+{
+ struct mbuf *m;
+ struct mbuf_ext_pgs *ext_pgs;
+
+ if (pkthdr)
+ m = m_gethdr(how, MT_DATA);
+ else
+ m = m_get(how, MT_DATA);
+ if (m == NULL)
+ return (NULL);
+
+ ext_pgs = uma_zalloc(zone_extpgs, how);
+ if (ext_pgs == NULL) {
+ m_free(m);
+ return (NULL);
+ }
+ ext_pgs->npgs = 0;
+ ext_pgs->nrdy = 0;
+ ext_pgs->first_pg_off = 0;
+ ext_pgs->last_pg_len = 0;
+ ext_pgs->hdr_len = 0;
+ ext_pgs->trail_len = 0;
+ ext_pgs->tls = NULL;
+ ext_pgs->so = NULL;
+ m->m_data = NULL;
+ m->m_flags |= (M_EXT | M_RDONLY | M_NOMAP);
+ m->m_ext.ext_type = EXT_PGS;
+ m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+ m->m_ext.ext_count = 1;
+ m->m_ext.ext_pgs = ext_pgs;
+ m->m_ext.ext_size = 0;
+ m->m_ext.ext_free = ext_free;
+ return (m);
+}
+
+#ifdef INVARIANT_SUPPORT
+void
+mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs)
+{
+
+ /*
+ * NB: This expects a non-empty buffer (npgs > 0 and
+ * last_pg_len > 0).
+ */
+ KASSERT(ext_pgs->npgs > 0,
+ ("ext_pgs with no valid pages: %p", ext_pgs));
+ KASSERT(ext_pgs->npgs <= nitems(ext_pgs->pa),
+ ("ext_pgs with too many pages: %p", ext_pgs));
+ KASSERT(ext_pgs->nrdy <= ext_pgs->npgs,
+ ("ext_pgs with too many ready pages: %p", ext_pgs));
+ KASSERT(ext_pgs->first_pg_off < PAGE_SIZE,
+ ("ext_pgs with too large page offset: %p", ext_pgs));
+ KASSERT(ext_pgs->last_pg_len > 0,
+ ("ext_pgs with zero last page length: %p", ext_pgs));
+ KASSERT(ext_pgs->last_pg_len <= PAGE_SIZE,
+ ("ext_pgs with too large last page length: %p", ext_pgs));
+ if (ext_pgs->npgs == 1) {
+ KASSERT(ext_pgs->first_pg_off + ext_pgs->last_pg_len <=
+ PAGE_SIZE, ("ext_pgs with single page too large: %p",
+ ext_pgs));
+ }
+ KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->hdr),
+ ("ext_pgs with too large header length: %p", ext_pgs));
+ KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->trail),
+ ("ext_pgs with too large header length: %p", ext_pgs));
+}
+#endif
+#endif /* __rtems__ */
+
+/*
* Clean up after mbufs with M_EXT storage attached to them if the
* reference count hits 1.
*/
@@ -865,7 +1275,8 @@ mb_free_ext(struct mbuf *m)
*/
if (m->m_flags & M_NOFREE) {
freembuf = 0;
- KASSERT(m->m_ext.ext_type == EXT_EXTREF,
+ KASSERT(m->m_ext.ext_type == EXT_EXTREF ||
+ m->m_ext.ext_type == EXT_RXRING,
("%s: no-free mbuf %p has wrong type", __func__, m));
} else
freembuf = 1;
@@ -896,6 +1307,27 @@ mb_free_ext(struct mbuf *m)
uma_zfree(zone_mbuf, mref);
break;
#ifndef __rtems__
+ case EXT_PGS: {
+#ifdef KERN_TLS
+ struct mbuf_ext_pgs *pgs;
+ struct ktls_session *tls;
+#endif
+
+ KASSERT(mref->m_ext.ext_free != NULL,
+ ("%s: ext_free not set", __func__));
+ mref->m_ext.ext_free(mref);
+#ifdef KERN_TLS
+ pgs = mref->m_ext.ext_pgs;
+ tls = pgs->tls;
+ if (tls != NULL &&
+ !refcount_release_if_not_last(&tls->refcount))
+ ktls_enqueue_to_free(pgs);
+ else
+#endif
+ uma_zfree(zone_extpgs, mref->m_ext.ext_pgs);
+ uma_zfree(zone_mbuf, mref);
+ break;
+ }
case EXT_SFBUF:
#endif /* __rtems__ */
case EXT_NET_DRV:
@@ -911,6 +1343,10 @@ mb_free_ext(struct mbuf *m)
("%s: ext_free not set", __func__));
m->m_ext.ext_free(m);
break;
+ case EXT_RXRING:
+ KASSERT(m->m_ext.ext_free == NULL,
+ ("%s: ext_free is set", __func__));
+ break;
default:
KASSERT(m->m_ext.ext_type == 0,
("%s: unknown ext_type", __func__));
@@ -950,7 +1386,7 @@ m_clget(struct mbuf *m, int how)
* we might be able to loosen a few clusters up on the drain.
*/
if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
- zone_drain(zone_pack);
+ uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN);
uma_zalloc_arg(zone_clust, m, how);
}
MBUF_PROBE2(m__clget, m, how);
@@ -1051,8 +1487,7 @@ m_getjcl(int how, short type, int flags, int size)
* Allocate a given length worth of mbufs and/or clusters (whatever fits
* best) and return a pointer to the top of the allocated chain. If an
* existing mbuf chain is provided, then we will append the new chain
- * to the existing one but still return the top of the newly allocated
- * chain.
+ * to the existing one and return a pointer to the provided mbuf.
*/
struct mbuf *
m_getm2(struct mbuf *m, int len, int how, short type, int flags)
@@ -1165,3 +1600,24 @@ m_freem(struct mbuf *mb)
while (mb != NULL)
mb = m_free(mb);
}
+
+void
+m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp)
+{
+
+ if_ref(ifp);
+ mst->ifp = ifp;
+ refcount_init(&mst->refcount, 1);
+ counter_u64_add(snd_tag_count, 1);
+}
+
+void
+m_snd_tag_destroy(struct m_snd_tag *mst)
+{
+ struct ifnet *ifp;
+
+ ifp = mst->ifp;
+ ifp->if_snd_tag_free(mst);
+ if_rele(ifp);
+ counter_u64_add(snd_tag_count, -1);
+}
diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c
index 52aa32fb..cd9e6285 100644
--- a/freebsd/sys/kern/kern_mib.c
+++ b/freebsd/sys/kern/kern_mib.c
@@ -46,8 +46,10 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_config.h>
#include <sys/param.h>
+#include <sys/boot.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
@@ -147,7 +149,7 @@ SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD,
SYSCTL_NULL_INT_PTR, 0, "Whether saved set-group/user ID is available");
#endif
-char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */
+char kernelname[MAXPATHLEN] = PATH_KERNEL; /* XXX bloat */
SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW | CTLFLAG_MPSAFE,
kernelname, sizeof kernelname, "Name of kernel file booted");
@@ -170,15 +172,8 @@ sysctl_kern_arnd(SYSCTL_HANDLER_ARGS)
char buf[256];
size_t len;
- /*-
- * This is one of the very few legitimate uses of read_random(9).
- * Use of arc4random(9) is not recommended as that will ignore
- * an unsafe (i.e. unseeded) random(4).
- *
- * If random(4) is not seeded, then this returns 0, so the
- * sysctl will return a zero-length buffer.
- */
- len = read_random(buf, MIN(req->oldlen, sizeof(buf)));
+ len = MIN(req->oldlen, sizeof(buf));
+ read_random(buf, len);
return (SYSCTL_OUT(req, buf, len));
}
@@ -189,37 +184,51 @@ SYSCTL_PROC(_kern, KERN_ARND, arandom,
static int
sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
{
- u_long val;
+ u_long val, p;
- val = ctob(physmem);
+ p = SIZE_T_MAX >> PAGE_SHIFT;
+ if (physmem < p)
+ p = physmem;
+ val = ctob(p);
return (sysctl_handle_long(oidp, &val, 0, req));
}
-
SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_ULONG | CTLFLAG_RD,
- 0, 0, sysctl_hw_physmem, "LU", "");
+ 0, 0, sysctl_hw_physmem, "LU",
+ "Amount of physical memory (in bytes)");
static int
sysctl_hw_realmem(SYSCTL_HANDLER_ARGS)
{
- u_long val;
- val = ctob(realmem);
+ u_long val, p;
+
+ p = SIZE_T_MAX >> PAGE_SHIFT;
+ if (realmem < p)
+ p = realmem;
+ val = ctob(p);
return (sysctl_handle_long(oidp, &val, 0, req));
}
SYSCTL_PROC(_hw, HW_REALMEM, realmem, CTLTYPE_ULONG | CTLFLAG_RD,
- 0, 0, sysctl_hw_realmem, "LU", "");
+ 0, 0, sysctl_hw_realmem, "LU",
+ "Amount of memory (in bytes) reported by the firmware");
+
static int
sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
{
- u_long val;
+ u_long val, p, p1;
- val = ctob(physmem - vm_wire_count());
+ p1 = physmem - vm_wire_count();
+ p = SIZE_T_MAX >> PAGE_SHIFT;
+ if (p1 < p)
+ p = p1;
+ val = ctob(p);
return (sysctl_handle_long(oidp, &val, 0, req));
}
-
SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_ULONG | CTLFLAG_RD,
- 0, 0, sysctl_hw_usermem, "LU", "");
+ 0, 0, sysctl_hw_usermem, "LU",
+ "Amount of memory (in bytes) which is not wired");
-SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, "");
+SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0,
+ "Amount of physical memory (in pages)");
u_long pagesizes[MAXPAGESIZES] = { PAGE_SIZE };
@@ -501,6 +510,54 @@ sysctl_osreldate(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, KERN_OSRELDATE, osreldate,
CTLTYPE_INT | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
NULL, 0, sysctl_osreldate, "I", "Kernel release date");
+
+/*
+ * The build-id is copied from the ELF section .note.gnu.build-id. The linker
+ * script defines two variables to expose the beginning and end. LLVM
+ * currently uses a SHA-1 hash, but other formats can be supported by checking
+ * the length of the section.
+ */
+
+extern char __build_id_start[];
+extern char __build_id_end[];
+
+#define BUILD_ID_HEADER_LEN 0x10
+#define BUILD_ID_HASH_MAXLEN 0x14
+
+static int
+sysctl_build_id(SYSCTL_HANDLER_ARGS)
+{
+ uintptr_t sectionlen = (uintptr_t)(__build_id_end - __build_id_start);
+ int hashlen;
+ char buf[2*BUILD_ID_HASH_MAXLEN+1];
+
+ /*
+ * The ELF note section has a four byte length for the vendor name,
+ * four byte length for the value, and a four byte vendor specific
+ * type. The name for the build id is "GNU\0". We skip the first 16
+ * bytes to read the build hash. We will return the remaining bytes up
+ * to 20 (SHA-1) hash size. If the hash happens to be a custom number
+ * of bytes we will pad the value with zeros, as the section should be
+ * four byte aligned.
+ */
+ if (sectionlen <= BUILD_ID_HEADER_LEN ||
+ sectionlen > (BUILD_ID_HEADER_LEN + BUILD_ID_HASH_MAXLEN)) {
+ return (ENOENT);
+ }
+
+
+ hashlen = sectionlen - BUILD_ID_HEADER_LEN;
+ for (int i = 0; i < hashlen; i++) {
+ uint8_t c = __build_id_start[i+BUILD_ID_HEADER_LEN];
+ snprintf(&buf[2*i], 3, "%02x", c);
+ }
+
+ return (SYSCTL_OUT(req, buf, strlen(buf) + 1));
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, build_id,
+ CTLTYPE_STRING | CTLFLAG_CAPRD | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_build_id, "A", "Operating system build-id");
#endif /* __rtems__ */
SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features");
diff --git a/freebsd/sys/kern/kern_mtxpool.c b/freebsd/sys/kern/kern_mtxpool.c
index 7f6c4dce..bc47d826 100644
--- a/freebsd/sys/kern/kern_mtxpool.c
+++ b/freebsd/sys/kern/kern_mtxpool.c
@@ -64,14 +64,14 @@ static MALLOC_DEFINE(M_MTXPOOL, "mtx_pool", "mutex pool");
/* Pool sizes must be a power of two */
#ifndef MTX_POOL_SLEEP_SIZE
-#define MTX_POOL_SLEEP_SIZE 128
+#define MTX_POOL_SLEEP_SIZE 1024
#endif
struct mtxpool_header {
int mtxpool_size;
int mtxpool_mask;
int mtxpool_shift;
- int mtxpool_next;
+ int mtxpool_next __aligned(CACHE_LINE_SIZE);
};
struct mtx_pool {
diff --git a/freebsd/sys/kern/kern_synch.c b/freebsd/sys/kern/kern_synch.c
index 2597f91d..7d24c248 100644
--- a/freebsd/sys/kern/kern_synch.c
+++ b/freebsd/sys/kern/kern_synch.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/refcount.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
@@ -366,6 +367,75 @@ pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
}
/*
+ * Potentially release the last reference for refcount. Check for
+ * unlikely conditions and signal the caller as to whether it was
+ * the final ref.
+ */
+bool
+refcount_release_last(volatile u_int *count, u_int n, u_int old)
+{
+ u_int waiter;
+
+ waiter = old & REFCOUNT_WAITER;
+ old = REFCOUNT_COUNT(old);
+ if (__predict_false(n > old || REFCOUNT_SATURATED(old))) {
+ /*
+ * Avoid multiple destructor invocations if underflow occurred.
+ * This is not perfect since the memory backing the containing
+ * object may already have been reallocated.
+ */
+ _refcount_update_saturated(count);
+ return (false);
+ }
+
+ /*
+ * Attempt to atomically clear the waiter bit. Wakeup waiters
+ * if we are successful.
+ */
+ if (waiter != 0 && atomic_cmpset_int(count, REFCOUNT_WAITER, 0))
+ wakeup(__DEVOLATILE(u_int *, count));
+
+ /*
+ * Last reference. Signal the user to call the destructor.
+ *
+ * Ensure that the destructor sees all updates. The fence_rel
+ * at the start of refcount_releasen synchronizes with this fence.
+ */
+ atomic_thread_fence_acq();
+ return (true);
+}
+
+/*
+ * Wait for a refcount wakeup. This does not guarantee that the ref is still
+ * zero on return and may be subject to transient wakeups. Callers wanting
+ * a precise answer should use refcount_wait().
+ */
+void
+refcount_sleep(volatile u_int *count, const char *wmesg, int pri)
+{
+ void *wchan;
+ u_int old;
+
+ if (REFCOUNT_COUNT(*count) == 0)
+ return;
+ wchan = __DEVOLATILE(void *, count);
+ sleepq_lock(wchan);
+ old = *count;
+ for (;;) {
+ if (REFCOUNT_COUNT(old) == 0) {
+ sleepq_release(wchan);
+ return;
+ }
+ if (old & REFCOUNT_WAITER)
+ break;
+ if (atomic_fcmpset_int(count, &old, old | REFCOUNT_WAITER))
+ break;
+ }
+ sleepq_add(wchan, NULL, wmesg, 0, 0);
+ sleepq_wait(wchan, pri);
+}
+
+/*
* Make all threads sleeping on the specified identifier runnable.
*/
void
@@ -402,6 +472,19 @@ wakeup_one(void *ident)
kick_proc0();
}
+void
+wakeup_any(void *ident)
+{
+ int wakeup_swapper;
+
+ sleepq_lock(ident);
+ wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR,
+ 0, 0);
+ sleepq_release(ident);
+ if (wakeup_swapper)
+ kick_proc0();
+}
+
#ifndef __rtems__
static void
kdb_switch(void)
diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c
index dc7c4c72..1135d7f3 100644
--- a/freebsd/sys/kern/kern_sysctl.c
+++ b/freebsd/sys/kern/kern_sysctl.c
@@ -43,6 +43,7 @@
__FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_capsicum.h>
+#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/local/opt_ktrace.h>
#include <sys/param.h>
@@ -50,11 +51,13 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/capsicum.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/jail.h>
+#include <sys/kdb.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rmlock.h>
@@ -66,6 +69,11 @@ __FBSDID("$FreeBSD$");
#include <sys/ktrace.h>
#endif
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+#endif
+
#include <net/vnet.h>
#include <security/mac/mac_framework.h>
@@ -326,13 +334,6 @@ sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
}
#endif /* __rtems__ */
-static int
-sbuf_printf_drain(void *arg __unused, const char *data, int len)
-{
-
- return (printf("%.*s", len, data));
-}
-
/*
* Locate the path to a given oid. Returns the length of the resulting path,
* or -1 if the oid was not found. nodes must have room for CTL_MAXNAME
@@ -940,13 +941,18 @@ SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
* (be aware though, that the proper interface isn't as obvious as it
* may seem, there are various conflicting requirements.
*
- * {0,0} printf the entire MIB-tree.
- * {0,1,...} return the name of the "..." OID.
- * {0,2,...} return the next OID.
- * {0,3} return the OID of the name in "new"
- * {0,4,...} return the kind & format info for the "..." OID.
- * {0,5,...} return the description of the "..." OID.
- * {0,6,...} return the aggregation label of the "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_DEBUG} printf the entire MIB-tree.
+ * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...} return the name of the "..."
+ * OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...} return the next OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID} return the OID of the name in
+ * "new"
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...} return the kind & format info
+ * for the "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...} return the description of the
+ * "..." OID.
+ * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...} return the aggregation label of
+ * the "..." OID.
*/
#ifdef SYSCTL_DEBUG
@@ -1014,8 +1020,8 @@ sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
return (ENOENT);
}
-SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE,
- 0, 0, sysctl_sysctl_debug, "-", "");
+SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", "");
#endif
static int
@@ -1080,8 +1086,8 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
* capability mode.
*/
-static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
- sysctl_sysctl_name, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, "");
static int
sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
@@ -1167,8 +1173,8 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
* capability mode.
*/
-static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD,
- sysctl_sysctl_next, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
static int
name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
@@ -1254,9 +1260,9 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
* XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
* capability mode.
*/
-SYSCTL_PROC(_sysctl, 3, name2oid,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE
- | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", "");
+SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW |
+ CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0,
+ sysctl_sysctl_name2oid, "I", "");
static int
sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
@@ -1284,8 +1290,8 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
}
-static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
- sysctl_sysctl_oidfmt, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, "");
static int
sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
@@ -1309,8 +1315,8 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD,
- sysctl_sysctl_oiddescr, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD |
+ CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, "");
static int
sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
@@ -1334,8 +1340,8 @@ sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
return (error);
}
-static SYSCTL_NODE(_sysctl, 6, oidlabel,
- CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
+static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD |
+ CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
/*
* Default "handler" functions.
@@ -1622,9 +1628,10 @@ sysctl_handle_string(SYSCTL_HANDLER_ARGS)
/*
* A zero-length buffer indicates a fixed size read-only
- * string:
+ * string. In ddb, don't worry about trying to make a malloced
+ * snapshot.
*/
- if (arg2 == 0) {
+ if (arg2 == 0 || kdb_active) {
arg2 = strlen((char *)arg1) + 1;
ro_string = 1;
}
@@ -1751,6 +1758,29 @@ sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS)
return (0);
}
+/*
+ * Convert seconds to a struct timeval. Intended for use with
+ * intervals and thus does not permit negative seconds.
+ */
+int
+sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS)
+{
+ struct timeval *tv;
+ int error, secs;
+
+ tv = arg1;
+ secs = tv->tv_sec;
+
+ error = sysctl_handle_int(oidp, &secs, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ if (secs < 0)
+ return (EINVAL);
+ tv->tv_sec = secs;
+
+ return (0);
+}
/*
* Transfer functions to/from kernel space.
@@ -1853,8 +1883,8 @@ kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
size_t oidlen, plen;
int error;
- oid[0] = 0; /* sysctl internal magic */
- oid[1] = 3; /* name2oid */
+ oid[0] = CTL_SYSCTL;
+ oid[1] = CTL_SYSCTL_NAME2OID;
oidlen = sizeof(oid);
error = kernel_sysctl(td, oid, 2, oid, &oidlen,
@@ -2149,6 +2179,68 @@ sys___sysctl(struct thread *td, struct sysctl_args *uap)
return (error);
}
+int
+kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen,
+ void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval,
+ int flags, bool inkernel)
+{
+ int oid[CTL_MAXNAME];
+ char namebuf[16];
+ char *name;
+ size_t oidlen;
+ int error;
+
+ if (namelen > MAXPATHLEN || namelen == 0)
+ return (EINVAL);
+ name = namebuf;
+ if (namelen > sizeof(namebuf))
+ name = malloc(namelen, M_SYSCTL, M_WAITOK);
+ error = copyin(oname, name, namelen);
+ if (error != 0)
+ goto out;
+
+ oid[0] = CTL_SYSCTL;
+ oid[1] = CTL_SYSCTL_NAME2OID;
+ oidlen = sizeof(oid);
+ error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen,
+ retval, flags);
+ if (error != 0)
+ goto out;
+ error = userland_sysctl(td, oid, *retval / sizeof(int), old, oldlenp,
+ inkernel, new, newlen, retval, flags);
+
+out:
+ if (namelen > sizeof(namebuf))
+ free(name, M_SYSCTL);
+ return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct __sysctlbyname_args {
+ const char *name;
+ size_t namelen;
+ void *old;
+ size_t *oldlenp;
+ void *new;
+ size_t newlen;
+};
+#endif
+int
+sys___sysctlbyname(struct thread *td, struct __sysctlbyname_args *uap)
+{
+ size_t rv;
+ int error;
+
+ error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old,
+ uap->oldlenp, uap->new, uap->newlen, &rv, 0, 0);
+ if (error != 0)
+ return (error);
+ if (uap->oldlenp != NULL)
+ error = copyout(&rv, uap->oldlenp, sizeof(rv));
+
+ return (error);
+}
+
/*
* This is used from various compatibility syscalls too. That's why name
* must be in kernel space.
@@ -2254,3 +2346,528 @@ sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
sbuf_set_drain(s, sbuf_sysctl_drain, req);
return (s);
}
+
+#ifdef DDB
+
+/* The current OID the debugger is working with */
+static struct sysctl_oid *g_ddb_oid;
+
+/* The current flags specified by the user */
+static int g_ddb_sysctl_flags;
+
+/* Check to see if the last sysctl printed */
+static int g_ddb_sysctl_printed;
+
+static const int ctl_sign[CTLTYPE+1] = {
+ [CTLTYPE_INT] = 1,
+ [CTLTYPE_LONG] = 1,
+ [CTLTYPE_S8] = 1,
+ [CTLTYPE_S16] = 1,
+ [CTLTYPE_S32] = 1,
+ [CTLTYPE_S64] = 1,
+};
+
+static const int ctl_size[CTLTYPE+1] = {
+ [CTLTYPE_INT] = sizeof(int),
+ [CTLTYPE_UINT] = sizeof(u_int),
+ [CTLTYPE_LONG] = sizeof(long),
+ [CTLTYPE_ULONG] = sizeof(u_long),
+ [CTLTYPE_S8] = sizeof(int8_t),
+ [CTLTYPE_S16] = sizeof(int16_t),
+ [CTLTYPE_S32] = sizeof(int32_t),
+ [CTLTYPE_S64] = sizeof(int64_t),
+ [CTLTYPE_U8] = sizeof(uint8_t),
+ [CTLTYPE_U16] = sizeof(uint16_t),
+ [CTLTYPE_U32] = sizeof(uint32_t),
+ [CTLTYPE_U64] = sizeof(uint64_t),
+};
+
+#define DB_SYSCTL_NAME_ONLY 0x001 /* Compare with -N */
+#define DB_SYSCTL_VALUE_ONLY 0x002 /* Compare with -n */
+#define DB_SYSCTL_OPAQUE 0x004 /* Compare with -o */
+#define DB_SYSCTL_HEX 0x008 /* Compare with -x */
+
+#define DB_SYSCTL_SAFE_ONLY 0x100 /* Only simple types */
+
+static const char db_sysctl_modifs[] = {
+ 'N', 'n', 'o', 'x',
+};
+
+static const int db_sysctl_modif_values[] = {
+ DB_SYSCTL_NAME_ONLY, DB_SYSCTL_VALUE_ONLY,
+ DB_SYSCTL_OPAQUE, DB_SYSCTL_HEX,
+};
+
+/* Handlers considered safe to print while recursing */
+static int (* const db_safe_handlers[])(SYSCTL_HANDLER_ARGS) = {
+ sysctl_handle_bool,
+ sysctl_handle_8,
+ sysctl_handle_16,
+ sysctl_handle_32,
+ sysctl_handle_64,
+ sysctl_handle_int,
+ sysctl_handle_long,
+ sysctl_handle_string,
+ sysctl_handle_opaque,
+};
+
+/*
+ * Use in place of sysctl_old_kernel to print sysctl values.
+ *
+ * Compare to the output handling in show_var from sbin/sysctl/sysctl.c
+ */
+static int
+sysctl_old_ddb(struct sysctl_req *req, const void *ptr, size_t len)
+{
+ const u_char *val, *p;
+ const char *sep1;
+ size_t intlen, slen;
+ uintmax_t umv;
+ intmax_t mv;
+ int sign, ctltype, hexlen, xflag, error;
+
+ /* Suppress false-positive GCC uninitialized variable warnings */
+ mv = 0;
+ umv = 0;
+
+ slen = len;
+ val = p = ptr;
+
+ if (ptr == NULL) {
+ error = 0;
+ goto out;
+ }
+
+ /* We are going to print */
+ g_ddb_sysctl_printed = 1;
+
+ xflag = g_ddb_sysctl_flags & DB_SYSCTL_HEX;
+
+ ctltype = (g_ddb_oid->oid_kind & CTLTYPE);
+ sign = ctl_sign[ctltype];
+ intlen = ctl_size[ctltype];
+
+ switch (ctltype) {
+ case CTLTYPE_NODE:
+ case CTLTYPE_STRING:
+ db_printf("%.*s", (int) len, (const char *) p);
+ error = 0;
+ goto out;
+
+ case CTLTYPE_INT:
+ case CTLTYPE_UINT:
+ case CTLTYPE_LONG:
+ case CTLTYPE_ULONG:
+ case CTLTYPE_S8:
+ case CTLTYPE_S16:
+ case CTLTYPE_S32:
+ case CTLTYPE_S64:
+ case CTLTYPE_U8:
+ case CTLTYPE_U16:
+ case CTLTYPE_U32:
+ case CTLTYPE_U64:
+ hexlen = 2 + (intlen * CHAR_BIT + 3) / 4;
+ sep1 = "";
+ while (len >= intlen) {
+ switch (ctltype) {
+ case CTLTYPE_INT:
+ case CTLTYPE_UINT:
+ umv = *(const u_int *)p;
+ mv = *(const int *)p;
+ break;
+ case CTLTYPE_LONG:
+ case CTLTYPE_ULONG:
+ umv = *(const u_long *)p;
+ mv = *(const long *)p;
+ break;
+ case CTLTYPE_S8:
+ case CTLTYPE_U8:
+ umv = *(const uint8_t *)p;
+ mv = *(const int8_t *)p;
+ break;
+ case CTLTYPE_S16:
+ case CTLTYPE_U16:
+ umv = *(const uint16_t *)p;
+ mv = *(const int16_t *)p;
+ break;
+ case CTLTYPE_S32:
+ case CTLTYPE_U32:
+ umv = *(const uint32_t *)p;
+ mv = *(const int32_t *)p;
+ break;
+ case CTLTYPE_S64:
+ case CTLTYPE_U64:
+ umv = *(const uint64_t *)p;
+ mv = *(const int64_t *)p;
+ break;
+ }
+
+ db_printf("%s", sep1);
+ if (xflag)
+ db_printf("%#0*jx", hexlen, umv);
+ else if (!sign)
+ db_printf("%ju", umv);
+ else if (g_ddb_oid->oid_fmt[1] == 'K') {
+ /* Kelvins are currently unsupported. */
+ error = EOPNOTSUPP;
+ goto out;
+ } else
+ db_printf("%jd", mv);
+
+ sep1 = " ";
+ len -= intlen;
+ p += intlen;
+ }
+ error = 0;
+ goto out;
+
+ case CTLTYPE_OPAQUE:
+ /* TODO: Support struct functions. */
+
+ /* FALLTHROUGH */
+ default:
+ db_printf("Format:%s Length:%zu Dump:0x",
+ g_ddb_oid->oid_fmt, len);
+ while (len-- && (xflag || p < val + 16))
+ db_printf("%02x", *p++);
+ if (!xflag && len > 16)
+ db_printf("...");
+ error = 0;
+ goto out;
+ }
+
+out:
+ req->oldidx += slen;
+ return (error);
+}
+
+/*
+ * Avoid setting new sysctl values from the debugger
+ */
+static int
+sysctl_new_ddb(struct sysctl_req *req, void *p, size_t l)
+{
+
+ if (!req->newptr)
+ return (0);
+
+ /* Changing sysctls from the debugger is currently unsupported */
+ return (EPERM);
+}
+
+/*
+ * Run a sysctl handler with the DDB oldfunc and newfunc attached.
+ * Instead of copying any output to a buffer we'll dump it right to
+ * the console.
+ */
+static int
+db_sysctl(struct sysctl_oid *oidp, int *name, u_int namelen,
+ void *old, size_t *oldlenp, size_t *retval, int flags)
+{
+ struct sysctl_req req;
+ int error;
+
+ /* Setup the request */
+ bzero(&req, sizeof req);
+ req.td = kdb_thread;
+ req.oldfunc = sysctl_old_ddb;
+ req.newfunc = sysctl_new_ddb;
+ req.lock = REQ_UNWIRED;
+ if (oldlenp) {
+ req.oldlen = *oldlenp;
+ }
+ req.validlen = req.oldlen;
+ if (old) {
+ req.oldptr = old;
+ }
+
+ /* Setup our globals for sysctl_old_ddb */
+ g_ddb_oid = oidp;
+ g_ddb_sysctl_flags = flags;
+ g_ddb_sysctl_printed = 0;
+
+ error = sysctl_root(0, name, namelen, &req);
+
+ /* Reset globals */
+ g_ddb_oid = NULL;
+ g_ddb_sysctl_flags = 0;
+
+ if (retval) {
+ if (req.oldptr && req.oldidx > req.validlen)
+ *retval = req.validlen;
+ else
+ *retval = req.oldidx;
+ }
+ return (error);
+}
+
+/*
+ * Show a sysctl's name
+ */
+static void
+db_show_oid_name(int *oid, size_t nlen)
+{
+ struct sysctl_oid *oidp;
+ int qoid[CTL_MAXNAME+2];
+ int error;
+
+ qoid[0] = 0;
+ memcpy(qoid + 2, oid, nlen * sizeof(int));
+ qoid[1] = 1;
+
+ error = sysctl_find_oid(qoid, nlen + 2, &oidp, NULL, NULL);
+ if (error)
+ db_error("sysctl name oid");
+
+ error = db_sysctl(oidp, qoid, nlen + 2, NULL, NULL, NULL, 0);
+ if (error)
+ db_error("sysctl name");
+}
+
+/*
+ * Check to see if an OID is safe to print from ddb.
+ */
+static bool
+db_oid_safe(const struct sysctl_oid *oidp)
+{
+ for (unsigned int i = 0; i < nitems(db_safe_handlers); ++i) {
+ if (oidp->oid_handler == db_safe_handlers[i])
+ return (true);
+ }
+
+ return (false);
+}
+
+/*
+ * Show a sysctl at a specific OID
+ * Compare to the input handling in show_var from sbin/sysctl/sysctl.c
+ */
+static int
+db_show_oid(struct sysctl_oid *oidp, int *oid, size_t nlen, int flags)
+{
+ int error, xflag, oflag, Nflag, nflag;
+ size_t len;
+
+ xflag = flags & DB_SYSCTL_HEX;
+ oflag = flags & DB_SYSCTL_OPAQUE;
+ nflag = flags & DB_SYSCTL_VALUE_ONLY;
+ Nflag = flags & DB_SYSCTL_NAME_ONLY;
+
+ if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_OPAQUE &&
+ (!xflag && !oflag))
+ return (0);
+
+ if (Nflag) {
+ db_show_oid_name(oid, nlen);
+ error = 0;
+ goto out;
+ }
+
+ if (!nflag) {
+ db_show_oid_name(oid, nlen);
+ db_printf(": ");
+ }
+
+ if ((flags & DB_SYSCTL_SAFE_ONLY) && !db_oid_safe(oidp)) {
+ db_printf("Skipping, unsafe to print while recursing.");
+ error = 0;
+ goto out;
+ }
+
+ /* Try once, and ask about the size */
+ len = 0;
+ error = db_sysctl(oidp, oid, nlen,
+ NULL, NULL, &len, flags);
+ if (error)
+ goto out;
+
+ if (!g_ddb_sysctl_printed)
+ /* Lie about the size */
+ error = db_sysctl(oidp, oid, nlen,
+ (void *) 1, &len, NULL, flags);
+
+out:
+ db_printf("\n");
+ return (error);
+}
+
+/*
+ * Show all sysctls under a specific OID
+ * Compare to sysctl_all from sbin/sysctl/sysctl.c
+ */
+static int
+db_show_sysctl_all(int *oid, size_t len, int flags)
+{
+ struct sysctl_oid *oidp;
+ int name1[CTL_MAXNAME + 2], name2[CTL_MAXNAME + 2];
+ size_t l1, l2;
+
+ name1[0] = CTL_SYSCTL;
+ name1[1] = CTL_SYSCTL_NEXT;
+ l1 = 2;
+ if (len) {
+ memcpy(name1+2, oid, len * sizeof(int));
+ l1 +=len;
+ } else {
+ name1[2] = 1;
+ l1++;
+ }
+ for (;;) {
+ int i, error;
+
+ l2 = sizeof(name2);
+ error = kernel_sysctl(kdb_thread, name1, l1,
+ name2, &l2, NULL, 0, &l2, 0);
+ if (error != 0) {
+ if (error == ENOENT)
+ return (0);
+ else
+ db_error("sysctl(getnext)");
+ }
+
+ l2 /= sizeof(int);
+
+ if (l2 < (unsigned int)len)
+ return (0);
+
+ for (i = 0; i < len; i++)
+ if (name2[i] != oid[i])
+ return (0);
+
+ /* Find the OID in question */
+ error = sysctl_find_oid(name2, l2, &oidp, NULL, NULL);
+ if (error)
+ return (error);
+
+ i = db_show_oid(oidp, name2, l2, flags | DB_SYSCTL_SAFE_ONLY);
+
+ if (db_pager_quit)
+ return (0);
+
+ memcpy(name1+2, name2, l2 * sizeof(int));
+ l1 = 2 + l2;
+ }
+}
+
+/*
+ * Show a sysctl by its user facing string
+ */
+static int
+db_sysctlbyname(char *name, int flags)
+{
+ struct sysctl_oid *oidp;
+ int oid[CTL_MAXNAME];
+ int error, nlen;
+
+ error = name2oid(name, oid, &nlen, &oidp);
+ if (error) {
+ return (error);
+ }
+
+ if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
+ db_show_sysctl_all(oid, nlen, flags);
+ } else {
+ error = db_show_oid(oidp, oid, nlen, flags);
+ }
+
+ return (error);
+}
+
+static void
+db_sysctl_cmd_usage(void)
+{
+ db_printf(
+ " sysctl [/Nnox] <sysctl> \n"
+ " \n"
+ " <sysctl> The name of the sysctl to show. \n"
+ " \n"
+ " Show a sysctl by hooking into SYSCTL_IN and SYSCTL_OUT. \n"
+ " This will work for most sysctls, but should not be used \n"
+ " with sysctls that are known to malloc. \n"
+ " \n"
+ " While recursing any \"unsafe\" sysctls will be skipped. \n"
+ " Call sysctl directly on the sysctl to try printing the \n"
+ " skipped sysctl. This is unsafe and may make the ddb \n"
+ " session unusable. \n"
+ " \n"
+ " Arguments: \n"
+ " /N Display only the name of the sysctl. \n"
+ " /n Display only the value of the sysctl. \n"
+ " /o Display opaque values. \n"
+ " /x Display the sysctl in hex. \n"
+ " \n"
+ "For example: \n"
+ "sysctl vm.v_free_min \n"
+ "vn.v_free_min: 12669 \n"
+ );
+}
+
+/*
+ * Show a specific sysctl similar to sysctl (8).
+ */
+DB_FUNC(sysctl, db_sysctl_cmd, db_cmd_table, CS_OWN, NULL)
+{
+ char name[TOK_STRING_SIZE];
+ int error, i, t, flags;
+
+ /* Parse the modifiers */
+ t = db_read_token();
+ if (t == tSLASH || t == tMINUS) {
+ t = db_read_token();
+ if (t != tIDENT) {
+ db_printf("Bad modifier\n");
+ error = EINVAL;
+ goto out;
+ }
+ db_strcpy(modif, db_tok_string);
+ }
+ else {
+ db_unread_token(t);
+ modif[0] = '\0';
+ }
+
+ flags = 0;
+ for (i = 0; i < nitems(db_sysctl_modifs); i++) {
+ if (strchr(modif, db_sysctl_modifs[i])) {
+ flags |= db_sysctl_modif_values[i];
+ }
+ }
+
+ /* Parse the sysctl names */
+ t = db_read_token();
+ if (t != tIDENT) {
+ db_printf("Need sysctl name\n");
+ error = EINVAL;
+ goto out;
+ }
+
+ /* Copy the name into a temporary buffer */
+ db_strcpy(name, db_tok_string);
+
+ /* Ensure there is no trailing cruft */
+ t = db_read_token();
+ if (t != tEOL) {
+ db_printf("Unexpected sysctl argument\n");
+ error = EINVAL;
+ goto out;
+ }
+
+ error = db_sysctlbyname(name, flags);
+ if (error == ENOENT) {
+ db_printf("unknown oid: '%s'\n", db_tok_string);
+ goto out;
+ } else if (error) {
+ db_printf("%s: error: %d\n", db_tok_string, error);
+ goto out;
+ }
+
+out:
+ /* Ensure we eat all of our text */
+ db_flush_lex();
+
+ if (error == EINVAL) {
+ db_sysctl_cmd_usage();
+ }
+}
+
+#endif /* DDB */
diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c
index 74b144cb..47eb9032 100644
--- a/freebsd/sys/kern/kern_time.c
+++ b/freebsd/sys/kern/kern_time.c
@@ -422,7 +422,9 @@ kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats)
if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000 ||
ats->tv_sec < 0)
return (EINVAL);
- if (!allow_insane_settime && ats->tv_sec > 8000ULL * 365 * 24 * 60 * 60)
+ if (!allow_insane_settime &&
+ (ats->tv_sec > 8000ULL * 365 * 24 * 60 * 60 ||
+ ats->tv_sec < utc_offset()))
return (EINVAL);
/* XXX Don't convert nsec->usec and back */
TIMESPEC_TO_TIMEVAL(&atv, ats);
@@ -673,8 +675,8 @@ sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap)
error = copyout(&atv, uap->tp, sizeof (atv));
}
if (error == 0 && uap->tzp != NULL) {
- rtz.tz_minuteswest = tz_minuteswest;
- rtz.tz_dsttime = tz_dsttime;
+ rtz.tz_minuteswest = 0;
+ rtz.tz_dsttime = 0;
error = copyout(&rtz, uap->tzp, sizeof (rtz));
}
return (error);
@@ -726,10 +728,6 @@ kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp)
return (EINVAL);
error = settime(td, tv);
}
- if (tzp && error == 0) {
- tz_minuteswest = tzp->tz_minuteswest;
- tz_dsttime = tzp->tz_dsttime;
- }
return (error);
}
diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c
index 2f478afc..983abba2 100644
--- a/freebsd/sys/kern/kern_timeout.c
+++ b/freebsd/sys/kern/kern_timeout.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/callout.h>
+#include <sys/domainset.h>
#include <sys/file.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
@@ -67,6 +68,7 @@ __FBSDID("$FreeBSD$");
#ifdef DDB
#include <ddb/ddb.h>
+#include <ddb/db_sym.h>
#include <machine/_inttypes.h>
#endif
@@ -154,12 +156,16 @@ u_int callwheelsize, callwheelmask;
struct cc_exec {
struct callout *cc_curr;
void (*cc_drain)(void *);
+#ifndef __rtems__
+ void *cc_last_func;
+ void *cc_last_arg;
+#endif /* __rtems__ */
#ifdef SMP
void (*ce_migration_func)(void *);
void *ce_migration_arg;
- int ce_migration_cpu;
sbintime_t ce_migration_time;
sbintime_t ce_migration_prec;
+ int ce_migration_cpu;
#endif
bool cc_cancel;
bool cc_waiting;
@@ -200,6 +206,8 @@ struct callout_cpu {
#ifndef __rtems__
#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
+#define cc_exec_last_func(cc, dir) cc->cc_exec_entity[dir].cc_last_func
+#define cc_exec_last_arg(cc, dir) cc->cc_exec_entity[dir].cc_last_arg
#define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain
#else /* __rtems__ */
#define cc_exec_curr(cc, dir) cc->cc_exec_entity.cc_curr
@@ -426,8 +434,9 @@ callout_cpu_init(struct callout_cpu *cc, int cpu)
SLIST_INIT(&cc->cc_callfree);
cc->cc_inited = 1;
#ifndef __rtems__
- cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
- M_CALLOUT, M_WAITOK);
+ cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) *
+ callwheelsize, M_CALLOUT,
+ DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK);
#endif /* __rtems__ */
for (i = 0; i < callwheelsize; i++)
LIST_INIT(&cc->cc_callwheel[i]);
@@ -821,6 +830,10 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
c->c_iflags &= ~CALLOUT_PENDING;
cc_exec_curr(cc, direct) = c;
+#ifndef __rtems__
+ cc_exec_last_func(cc, direct) = c_func;
+ cc_exec_last_arg(cc, direct) = c_arg;
+#endif /* __rtems__ */
cc_exec_cancel(cc, direct) = false;
cc_exec_drain(cc, direct) = NULL;
CC_UNLOCK(cc);
@@ -1876,4 +1889,42 @@ DB_SHOW_COMMAND(callout, db_show_callout)
_show_callout((struct callout *)addr);
}
+
+static void
+_show_last_callout(int cpu, int direct, const char *dirstr)
+{
+ struct callout_cpu *cc;
+ void *func, *arg;
+
+ cc = CC_CPU(cpu);
+ func = cc_exec_last_func(cc, direct);
+ arg = cc_exec_last_arg(cc, direct);
+ db_printf("cpu %d last%s callout function: %p ", cpu, dirstr, func);
+ db_printsym((db_expr_t)func, DB_STGY_ANY);
+ db_printf("\ncpu %d last%s callout argument: %p\n", cpu, dirstr, arg);
+}
+
+DB_SHOW_COMMAND(callout_last, db_show_callout_last)
+{
+ int cpu, last;
+
+ if (have_addr) {
+ if (addr < 0 || addr > mp_maxid || CPU_ABSENT(addr)) {
+ db_printf("no such cpu: %d\n", (int)addr);
+ return;
+ }
+ cpu = last = addr;
+ } else {
+ cpu = 0;
+ last = mp_maxid;
+ }
+
+ while (cpu <= last) {
+ if (!CPU_ABSENT(cpu)) {
+ _show_last_callout(cpu, 0, "");
+ _show_last_callout(cpu, 1, " direct");
+ }
+ cpu++;
+ }
+}
#endif /* DDB */
diff --git a/freebsd/sys/kern/kern_uuid.c b/freebsd/sys/kern/kern_uuid.c
index a2316b16..c2a5986a 100644
--- a/freebsd/sys/kern/kern_uuid.c
+++ b/freebsd/sys/kern/kern_uuid.c
@@ -301,7 +301,7 @@ sbuf_printf_uuid(struct sbuf *sb, struct uuid *uuid)
char buf[38];
snprintf_uuid(buf, sizeof(buf), uuid);
- return (sbuf_printf(sb, "%s", buf));
+ return (sbuf_cat(sb, buf));
}
/*
diff --git a/freebsd/sys/kern/subr_blist.c b/freebsd/sys/kern/subr_blist.c
index 807a7f3c..8b073bf8 100644
--- a/freebsd/sys/kern/subr_blist.c
+++ b/freebsd/sys/kern/subr_blist.c
@@ -111,6 +111,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/malloc.h>
#include <sys/sbuf.h>
+#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stddef.h>
@@ -122,19 +123,20 @@ __FBSDID("$FreeBSD$");
#define malloc(a,b,c) calloc(a, 1)
#define free(a,b) free(a)
#define ummin(a,b) ((a) < (b) ? (a) : (b))
+#define imin(a,b) ((a) < (b) ? (a) : (b))
+#define KASSERT(a,b) assert(a)
#include <sys/blist.h>
-void panic(const char *ctl, ...);
-
#endif
/*
* static support functions
*/
-static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
-static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count,
- u_daddr_t radix);
+static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk,
+ int *count, int maxcount);
+static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
+ int maxcount, u_daddr_t radix);
static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
u_daddr_t radix);
@@ -194,30 +196,40 @@ bitrange(int n, int count)
/*
- * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
- * Assumes that the argument has only one bit set.
+ * Find the first bit set in a u_daddr_t.
*/
static inline int
-bitpos(u_daddr_t mask)
+generic_bitpos(u_daddr_t mask)
{
int hi, lo, mid;
+ lo = 0;
+ hi = BLIST_BMAP_RADIX;
+ while (lo + 1 < hi) {
+ mid = (lo + hi) >> 1;
+ if (mask & bitrange(0, mid))
+ hi = mid;
+ else
+ lo = mid;
+ }
+ return (lo);
+}
+
+static inline int
+bitpos(u_daddr_t mask)
+{
+
switch (sizeof(mask)) {
#ifdef HAVE_INLINE_FFSLL
case sizeof(long long):
return (ffsll(mask) - 1);
#endif
+#ifdef HAVE_INLINE_FFS
+ case sizeof(int):
+ return (ffs(mask) - 1);
+#endif
default:
- lo = 0;
- hi = BLIST_BMAP_RADIX;
- while (lo + 1 < hi) {
- mid = (lo + hi) >> 1;
- if ((mask >> mid) != 0)
- lo = mid;
- else
- hi = mid;
- }
- return (lo);
+ return (generic_bitpos(mask));
}
}
@@ -237,8 +249,7 @@ blist_create(daddr_t blocks, int flags)
blist_t bl;
u_daddr_t nodes, radix;
- if (blocks == 0)
- panic("invalid block count");
+ KASSERT(blocks > 0, ("invalid block count"));
/*
* Calculate the radix and node count used for scanning.
@@ -286,12 +297,14 @@ blist_destroy(blist_t bl)
* not be allocated.
*/
daddr_t
-blist_alloc(blist_t bl, daddr_t count)
+blist_alloc(blist_t bl, int *count, int maxcount)
{
- daddr_t blk;
+ daddr_t blk, cursor;
- if (count > BLIST_MAX_ALLOC)
- panic("allocation too large");
+ KASSERT(*count <= maxcount,
+ ("invalid parameters %d > %d", *count, maxcount));
+ KASSERT(*count <= BLIST_MAX_ALLOC,
+ ("minimum allocation too large: %d", *count));
/*
* This loop iterates at most twice. An allocation failure in the
@@ -299,18 +312,18 @@ blist_alloc(blist_t bl, daddr_t count)
* non-zero. When the cursor is zero, an allocation failure will
* stop further iterations.
*/
- for (;;) {
- blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count,
+ for (cursor = bl->bl_cursor;; cursor = 0) {
+ blk = blst_meta_alloc(bl->bl_root, cursor, count, maxcount,
bl->bl_radix);
if (blk != SWAPBLK_NONE) {
- bl->bl_avail -= count;
- bl->bl_cursor = blk + count;
+ bl->bl_avail -= *count;
+ bl->bl_cursor = blk + *count;
if (bl->bl_cursor == bl->bl_blocks)
bl->bl_cursor = 0;
return (blk);
- } else if (bl->bl_cursor == 0)
+ }
+ if (cursor == 0)
return (SWAPBLK_NONE);
- bl->bl_cursor = 0;
}
}
@@ -326,15 +339,15 @@ blist_avail(blist_t bl)
/*
* blist_free() - free up space in the block bitmap. Return the base
- * of a contiguous region. Panic if an inconsistancy is
- * found.
+ * of a contiguous region.
*/
void
blist_free(blist_t bl, daddr_t blkno, daddr_t count)
{
- if (blkno < 0 || blkno + count > bl->bl_blocks)
- panic("freeing invalid range");
+ KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
+ ("freeing invalid range: blkno %jx, count %d, blocks %jd",
+ (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
bl->bl_avail += count;
}
@@ -350,8 +363,9 @@ blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
{
daddr_t filled;
- if (blkno < 0 || blkno + count > bl->bl_blocks)
- panic("filling invalid range");
+ KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
+ ("filling invalid range: blkno %jx, count %d, blocks %jd",
+ (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix);
bl->bl_avail -= filled;
return (filled);
@@ -533,7 +547,8 @@ blist_stats(blist_t bl, struct sbuf *s)
struct gap_stats gstats;
struct gap_stats *stats = &gstats;
daddr_t i, nodes, radix;
- u_daddr_t bit, diff, mask;
+ u_daddr_t diff, mask;
+ int digit;
init_gap_stats(stats);
nodes = 0;
@@ -571,9 +586,9 @@ blist_stats(blist_t bl, struct sbuf *s)
if (gap_stats_counting(stats))
diff ^= 1;
while (diff != 0) {
- bit = diff & -diff;
- update_gap_stats(stats, i + bitpos(bit));
- diff ^= bit;
+ digit = bitpos(diff);
+ update_gap_stats(stats, i + digit);
+ diff ^= bitrange(digit, 1);
}
}
nodes += radix_to_skip(radix);
@@ -594,53 +609,104 @@ blist_stats(blist_t bl, struct sbuf *s)
*/
/*
- * BLST_NEXT_LEAF_ALLOC() - allocate the first few blocks in the next leaf.
+ * BLST_NEXT_LEAF_ALLOC() - allocate the blocks starting with the next leaf.
*
- * 'scan' is a leaf node, associated with a block containing 'blk'.
- * The next leaf node could be adjacent, or several nodes away if the
- * least common ancestor of 'scan' and its neighbor is several levels
- * up. Use 'blk' to determine how many meta-nodes lie between the
- * leaves. If the next leaf has enough initial bits set, clear them
- * and clear the bits in the meta nodes on the path up to the least
- * common ancestor to mark any subtrees made completely empty.
+ * 'scan' is a leaf node, and its first block is at address 'start'. The
+ * next leaf node could be adjacent, or several nodes away if the least
+ * common ancestor of 'scan' and its neighbor is several levels up. Use
+ * addresses to determine how many meta-nodes lie between the leaves. If
+ * sequence of leaves starting with the next one has enough initial bits
+ * set, clear them and clear the bits in the meta nodes on the path up to
+ * the least common ancestor to mark any subtrees made completely empty.
*/
static int
-blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
+blst_next_leaf_alloc(blmeta_t *scan, daddr_t start, int count, int maxcount)
{
- blmeta_t *next;
- daddr_t skip;
u_daddr_t radix;
- int digit;
+ daddr_t blk;
+ int avail, digit;
- next = scan + 1;
- blk += BLIST_BMAP_RADIX;
- radix = BLIST_BMAP_RADIX;
- while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 &&
- (next->bm_bitmap & 1) == 1) {
- next++;
- radix *= BLIST_META_RADIX;
- }
- if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) {
- /*
- * The next leaf doesn't have enough free blocks at the
- * beginning to complete the spanning allocation.
- */
- return (ENOMEM);
+ start += BLIST_BMAP_RADIX;
+ for (blk = start; blk - start < maxcount; blk += BLIST_BMAP_RADIX) {
+ /* Skip meta-nodes, as long as they promise more free blocks. */
+ radix = BLIST_BMAP_RADIX;
+ while (((++scan)->bm_bitmap & 1) == 1 &&
+ ((blk / radix) & BLIST_META_MASK) == 0)
+ radix *= BLIST_META_RADIX;
+ if (~scan->bm_bitmap != 0) {
+ /*
+ * Either there is no next leaf with any free blocks,
+ * or we've reached the next leaf and found that some
+ * of its blocks are not free. In the first case,
+ * bitpos() returns zero here.
+ */
+ avail = blk - start + bitpos(~scan->bm_bitmap);
+ if (avail < count || avail == 0) {
+ /*
+ * There isn't a next leaf with enough free
+ * blocks at its beginning to bother
+ * allocating.
+ */
+ return (avail);
+ }
+ maxcount = imin(avail, maxcount);
+ if (maxcount % BLIST_BMAP_RADIX == 0) {
+ /*
+ * There was no next leaf. Back scan up to
+ * last leaf.
+ */
+ --scan;
+ while (radix != BLIST_BMAP_RADIX) {
+ radix /= BLIST_META_RADIX;
+ --scan;
+ }
+ blk -= BLIST_BMAP_RADIX;
+ }
+ }
}
- /* Clear the first 'count' bits in the next leaf to allocate. */
- next->bm_bitmap &= (u_daddr_t)-1 << count;
-
+
/*
- * Update bitmaps of next-ancestors, up to least common ancestor.
+ * 'scan' is the last leaf that provides blocks. Clear from 1 to
+ * BLIST_BMAP_RADIX bits to represent the allocation of those last
+ * blocks.
*/
- skip = radix_to_skip(radix);
- while (radix != BLIST_BMAP_RADIX && next->bm_bitmap == 0) {
- (--next)->bm_bitmap ^= 1;
- radix /= BLIST_META_RADIX;
+ if (maxcount % BLIST_BMAP_RADIX != 0)
+ scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_BMAP_RADIX);
+ else
+ scan->bm_bitmap = 0;
+
+ for (;;) {
+ /* Back up over meta-nodes, clearing bits if necessary. */
+ blk -= BLIST_BMAP_RADIX;
+ radix = BLIST_BMAP_RADIX;
+ while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0) {
+ if ((scan--)->bm_bitmap == 0)
+ scan->bm_bitmap ^= 1;
+ radix *= BLIST_META_RADIX;
+ }
+ if ((scan--)->bm_bitmap == 0)
+ scan[-digit * radix_to_skip(radix)].bm_bitmap ^=
+ (u_daddr_t)1 << digit;
+
+ if (blk == start)
+ break;
+ /* Clear all the bits of this leaf. */
+ scan->bm_bitmap = 0;
}
- if (next->bm_bitmap == 0)
- scan[-digit * skip].bm_bitmap ^= (u_daddr_t)1 << digit;
- return (0);
+ return (maxcount);
+}
+
+/*
+ * Given a bitmask, flip all the bits from the least-significant 1-bit to the
+ * most significant bit. If the result is non-zero, then the least-significant
+ * 1-bit of the result is in the same position as the least-signification 0-bit
+ * in mask that is followed by a 1-bit.
+ */
+static inline u_daddr_t
+flip_hibits(u_daddr_t mask)
+{
+
+ return (-mask & ~mask);
}
/*
@@ -651,16 +717,16 @@ blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
* crosses a leaf boundary.
*/
static daddr_t
-blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
+blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int *count, int maxcount)
{
u_daddr_t cursor_mask, mask;
int count1, hi, lo, num_shifts, range1, range_ext;
range1 = 0;
- count1 = count - 1;
+ count1 = *count - 1;
num_shifts = fls(count1);
mask = scan->bm_bitmap;
- while ((-mask & ~mask) != 0 && num_shifts > 0) {
+ while (flip_hibits(mask) != 0 && num_shifts > 0) {
/*
* If bit i is set in mask, then bits in [i, i+range1] are set
* in scan->bm_bitmap. The value of range1 is equal to count1
@@ -712,40 +778,50 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
/*
* The least significant set bit in mask marks the start of the first
- * available range of sufficient size. Clear all the bits but that one,
- * and then find its position.
+ * available range of sufficient size. Find its position.
*/
- mask &= -mask;
lo = bitpos(mask);
- hi = lo + count;
- if (hi > BLIST_BMAP_RADIX) {
- /*
- * An allocation within this leaf is impossible, so a successful
- * allocation depends on the next leaf providing some of the blocks.
- */
- if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0)
+ /*
+ * Find how much space is available starting at that position.
+ */
+ if (flip_hibits(mask) != 0) {
+ /* Count the 1 bits starting at position lo. */
+ hi = bitpos(flip_hibits(mask)) + count1;
+ if (maxcount < hi - lo)
+ hi = lo + maxcount;
+ *count = hi - lo;
+ mask = bitrange(lo, *count);
+ } else if (maxcount <= BLIST_BMAP_RADIX - lo) {
+ /* All the blocks we can use are available here. */
+ hi = lo + maxcount;
+ *count = maxcount;
+ mask = bitrange(lo, *count);
+ } else {
+ /* Check next leaf for some of the blocks we want or need. */
+ count1 = *count - (BLIST_BMAP_RADIX - lo);
+ maxcount -= BLIST_BMAP_RADIX - lo;
+ hi = blst_next_leaf_alloc(scan, blk, count1, maxcount);
+ if (hi < count1)
/*
- * The hint cannot be updated, because the same
- * allocation request could be satisfied later, by this
- * leaf, if the state of the next leaf changes, and
- * without any changes to this leaf.
+ * The next leaf cannot supply enough blocks to reach
+ * the minimum required allocation. The hint cannot be
+ * updated, because the same allocation request could
+ * be satisfied later, by this leaf, if the state of
+ * the next leaf changes, and without any changes to
+ * this leaf.
*/
return (SWAPBLK_NONE);
+ *count = BLIST_BMAP_RADIX - lo + hi;
hi = BLIST_BMAP_RADIX;
}
- /* Set the bits of mask at position 'lo' and higher. */
- mask = -mask;
if (hi == BLIST_BMAP_RADIX) {
/*
* Update bighint. There is no allocation bigger than range1
* available in this leaf after this allocation completes.
*/
scan->bm_bighint = range1;
- } else {
- /* Clear the bits of mask at position 'hi' and higher. */
- mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi);
}
/* Clear the allocated bits from this leaf. */
scan->bm_bitmap &= ~mask;
@@ -761,15 +837,16 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
* and we have a few optimizations strewn in as well.
*/
static daddr_t
-blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
+blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
+ int maxcount, u_daddr_t radix)
{
daddr_t blk, i, r, skip;
- u_daddr_t bit, mask;
+ u_daddr_t mask;
bool scan_from_start;
int digit;
if (radix == BLIST_BMAP_RADIX)
- return (blst_leaf_alloc(scan, cursor, count));
+ return (blst_leaf_alloc(scan, cursor, count, maxcount));
blk = cursor & -radix;
scan_from_start = (cursor == blk);
radix /= BLIST_META_RADIX;
@@ -796,23 +873,22 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
* Examine the nonempty subtree associated with each bit set in mask.
*/
do {
- bit = mask & -mask;
- digit = bitpos(bit);
+ digit = bitpos(mask);
i = 1 + digit * skip;
- if (count <= scan[i].bm_bighint) {
+ if (*count <= scan[i].bm_bighint) {
/*
* The allocation might fit beginning in the i'th subtree.
*/
r = blst_meta_alloc(&scan[i], cursor + digit * radix,
- count, radix);
+ count, maxcount, radix);
if (r != SWAPBLK_NONE) {
if (scan[i].bm_bitmap == 0)
- scan->bm_bitmap ^= bit;
+ scan->bm_bitmap ^= bitrange(digit, 1);
return (r);
}
}
cursor = blk;
- } while ((mask ^= bit) != 0);
+ } while ((mask ^= bitrange(digit, 1)) != 0);
/*
* We couldn't allocate count in this subtree. If the whole tree was
@@ -820,7 +896,7 @@ blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
*/
if (scan_from_start && !(digit == BLIST_META_RADIX - 1 &&
scan[i].bm_bighint == BLIST_MAX_ALLOC))
- scan->bm_bighint = count - 1;
+ scan->bm_bighint = *count - 1;
return (SWAPBLK_NONE);
}
@@ -841,8 +917,9 @@ blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
* count n
*/
mask = bitrange(blk & BLIST_BMAP_MASK, count);
- if (scan->bm_bitmap & mask)
- panic("freeing free block");
+ KASSERT((scan->bm_bitmap & mask) == 0,
+ ("freeing free block: %jx, size %d, mask %jx",
+ (uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask));
scan->bm_bitmap |= mask;
}
@@ -1006,7 +1083,7 @@ static void
blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
{
daddr_t skip;
- u_daddr_t bit, mask;
+ u_daddr_t mask;
int digit;
if (radix == BLIST_BMAP_RADIX) {
@@ -1038,11 +1115,10 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
mask = scan->bm_bitmap;
/* Examine the nonempty subtree associated with each bit set in mask */
do {
- bit = mask & -mask;
- digit = bitpos(bit);
+ digit = bitpos(mask);
blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
radix, tab);
- } while ((mask ^= bit) != 0);
+ } while ((mask ^= bitrange(digit, 1)) != 0);
tab -= 4;
printf(
@@ -1079,7 +1155,7 @@ main(int ac, char **av)
for (;;) {
char buf[1024];
long long da = 0;
- long long count = 0;
+ int count = 0, maxcount = 0;
printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
(long long)size, (long long)bl->bl_radix);
@@ -1088,7 +1164,7 @@ main(int ac, char **av)
break;
switch(buf[0]) {
case 'r':
- if (sscanf(buf + 1, "%lld", &count) == 1) {
+ if (sscanf(buf + 1, "%d", &count) == 1) {
blist_resize(&bl, count, 1, M_WAITOK);
} else {
printf("?\n");
@@ -1104,22 +1180,23 @@ main(int ac, char **av)
sbuf_delete(s);
break;
case 'a':
- if (sscanf(buf + 1, "%lld", &count) == 1) {
- daddr_t blk = blist_alloc(bl, count);
- printf(" R=%08llx\n", (long long)blk);
+ if (sscanf(buf + 1, "%d%d", &count, &maxcount) == 2) {
+ daddr_t blk = blist_alloc(bl, &count, maxcount);
+ printf(" R=%08llx, c=%08d\n",
+ (long long)blk, count);
} else {
printf("?\n");
}
break;
case 'f':
- if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
+ if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
blist_free(bl, da, count);
} else {
printf("?\n");
}
break;
case 'l':
- if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
+ if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
printf(" n=%jd\n",
(intmax_t)blist_fill(bl, da, count));
} else {
@@ -1131,31 +1208,24 @@ main(int ac, char **av)
puts(
"p -print\n"
"s -stats\n"
- "a %d -allocate\n"
+ "a %d %d -allocate\n"
"f %x %d -free\n"
"l %x %d -fill\n"
"r %d -resize\n"
- "h/? -help"
+ "h/? -help\n"
+ "q -quit"
);
break;
+ case 'q':
+ break;
default:
printf("?\n");
break;
}
+ if (buf[0] == 'q')
+ break;
}
- return(0);
-}
-
-void
-panic(const char *ctl, ...)
-{
- va_list va;
-
- va_start(va, ctl);
- vfprintf(stderr, ctl, va);
- fprintf(stderr, "\n");
- va_end(va);
- exit(1);
+ return (0);
}
#endif
diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c
index a87c02a5..244f1af3 100644
--- a/freebsd/sys/kern/subr_bus.c
+++ b/freebsd/sys/kern/subr_bus.c
@@ -2472,13 +2472,31 @@ device_print_prettyname(device_t dev)
int
device_printf(device_t dev, const char * fmt, ...)
{
+ char buf[128];
+ struct sbuf sb;
+ const char *name;
va_list ap;
- int retval;
+ size_t retval;
+
+ retval = 0;
+
+ sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
+ sbuf_set_drain(&sb, sbuf_printf_drain, &retval);
+
+ name = device_get_name(dev);
+
+ if (name == NULL)
+ sbuf_cat(&sb, "unknown: ");
+ else
+ sbuf_printf(&sb, "%s%d: ", name, device_get_unit(dev));
- retval = device_print_prettyname(dev);
va_start(ap, fmt);
- retval += vprintf(fmt, ap);
+ sbuf_vprintf(&sb, fmt, ap);
va_end(ap);
+
+ sbuf_finish(&sb);
+ sbuf_delete(&sb);
+
return (retval);
}
@@ -3050,6 +3068,10 @@ device_detach(device_t dev)
PDEBUG(("%s", DEVICENAME(dev)));
if (dev->state == DS_BUSY)
return (EBUSY);
+ if (dev->state == DS_ATTACHING) {
+ device_printf(dev, "device in attaching state! Deferring detach.\n");
+ return (EBUSY);
+ }
if (dev->state != DS_ATTACHED)
return (0);
@@ -3914,6 +3936,95 @@ bus_generic_resume(device_t dev)
return (0);
}
+
+/**
+ * @brief Helper function for implementing BUS_RESET_POST
+ *
+ * Bus can use this function to implement common operations of
+ * re-attaching or resuming the children after the bus itself was
+ * reset, and after restoring bus-unique state of children.
+ *
+ * @param dev The bus
+ * #param flags DEVF_RESET_*
+ */
+int
+bus_helper_reset_post(device_t dev, int flags)
+{
+ device_t child;
+ int error, error1;
+
+ error = 0;
+ TAILQ_FOREACH(child, &dev->children,link) {
+ BUS_RESET_POST(dev, child);
+ error1 = (flags & DEVF_RESET_DETACH) != 0 ?
+ device_probe_and_attach(child) :
+ BUS_RESUME_CHILD(dev, child);
+ if (error == 0 && error1 != 0)
+ error = error1;
+ }
+ return (error);
+}
+
+static void
+bus_helper_reset_prepare_rollback(device_t dev, device_t child, int flags)
+{
+
+ child = TAILQ_NEXT(child, link);
+ if (child == NULL)
+ return;
+ TAILQ_FOREACH_FROM(child, &dev->children,link) {
+ BUS_RESET_POST(dev, child);
+ if ((flags & DEVF_RESET_DETACH) != 0)
+ device_probe_and_attach(child);
+ else
+ BUS_RESUME_CHILD(dev, child);
+ }
+}
+
+/**
+ * @brief Helper function for implementing BUS_RESET_PREPARE
+ *
+ * Bus can use this function to implement common operations of
+ * detaching or suspending the children before the bus itself is
+ * reset, and then save bus-unique state of children that must
+ * persists around reset.
+ *
+ * @param dev The bus
+ * #param flags DEVF_RESET_*
+ */
+int
+bus_helper_reset_prepare(device_t dev, int flags)
+{
+ device_t child;
+ int error;
+
+ if (dev->state != DS_ATTACHED)
+ return (EBUSY);
+
+ TAILQ_FOREACH_REVERSE(child, &dev->children, device_list, link) {
+ if ((flags & DEVF_RESET_DETACH) != 0) {
+ error = device_get_state(child) == DS_ATTACHED ?
+ device_detach(child) : 0;
+ } else {
+ error = BUS_SUSPEND_CHILD(dev, child);
+ }
+ if (error == 0) {
+ error = BUS_RESET_PREPARE(dev, child);
+ if (error != 0) {
+ if ((flags & DEVF_RESET_DETACH) != 0)
+ device_probe_and_attach(child);
+ else
+ BUS_RESUME_CHILD(dev, child);
+ }
+ }
+ if (error != 0) {
+ bus_helper_reset_prepare_rollback(dev, child, flags);
+ return (error);
+ }
+ }
+ return (0);
+}
+
/**
* @brief Helper function for implementing BUS_PRINT_CHILD().
*
@@ -5613,6 +5724,7 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
case DEV_CLEAR_DRIVER:
case DEV_RESCAN:
case DEV_DELETE:
+ case DEV_RESET:
error = priv_check(td, PRIV_DRIVER);
if (error == 0)
error = find_device(req, &dev);
@@ -5839,6 +5951,14 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
device_frozen = false;
}
break;
+ case DEV_RESET:
+ if ((req->dr_flags & ~(DEVF_RESET_DETACH)) != 0) {
+ error = EINVAL;
+ break;
+ }
+ error = BUS_RESET_CHILD(device_get_parent(dev), dev,
+ req->dr_flags);
+ break;
}
#endif /* __rtems__ */
mtx_unlock(&Giant);
@@ -5864,8 +5984,9 @@ devctl2_init(void)
*/
static int obsolete_panic = 0;
SYSCTL_INT(_debug, OID_AUTO, obsolete_panic, CTLFLAG_RWTUN, &obsolete_panic, 0,
- "Bus debug level");
-/* 0 - don't panic, 1 - panic if already obsolete, 2 - panic if deprecated */
+ "Panic when obsolete features are used (0 = never, 1 = if osbolete, "
+ "2 = if deprecated)");
+
static void
gone_panic(int major, int running, const char *msg)
{
@@ -5890,7 +6011,7 @@ _gone_in(int major, const char *msg)
gone_panic(major, P_OSREL_MAJOR(__FreeBSD_version), msg);
if (P_OSREL_MAJOR(__FreeBSD_version) >= major)
printf("Obsolete code will removed soon: %s\n", msg);
- else if (P_OSREL_MAJOR(__FreeBSD_version) + 1 == major)
+ else
printf("Deprecated code (to be removed in FreeBSD %d): %s\n",
major, msg);
}
@@ -5903,7 +6024,7 @@ _gone_in_dev(device_t dev, int major, const char *msg)
if (P_OSREL_MAJOR(__FreeBSD_version) >= major)
device_printf(dev,
"Obsolete code will removed soon: %s\n", msg);
- else if (P_OSREL_MAJOR(__FreeBSD_version) + 1 == major)
+ else
device_printf(dev,
"Deprecated code (to be removed in FreeBSD %d): %s\n",
major, msg);
diff --git a/freebsd/sys/kern/subr_eventhandler.c b/freebsd/sys/kern/subr_eventhandler.c
index e07248bf..6d36653d 100644
--- a/freebsd/sys/kern/subr_eventhandler.c
+++ b/freebsd/sys/kern/subr_eventhandler.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
diff --git a/freebsd/sys/kern/subr_gtaskqueue.c b/freebsd/sys/kern/subr_gtaskqueue.c
index 3f80cd2c..af9b65d4 100644
--- a/freebsd/sys/kern/subr_gtaskqueue.c
+++ b/freebsd/sys/kern/subr_gtaskqueue.c
@@ -35,7 +35,6 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/cpuset.h>
-#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/libkern.h>
@@ -69,6 +68,8 @@ struct gtaskqueue_busy {
static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
+typedef void (*gtaskqueue_enqueue_fn)(void *context);
+
struct gtaskqueue {
STAILQ_HEAD(, gtask) tq_queue;
gtaskqueue_enqueue_fn tq_enqueue;
@@ -697,7 +698,7 @@ taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
}
}
if (idx == -1)
- panic("taskqgroup_find: Failed to pick a qid.");
+ panic("%s: failed to pick a qid.", __func__);
return (idx);
}
@@ -733,36 +734,36 @@ SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
void
taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
- void *uniq, int irq, const char *name)
+ void *uniq, device_t dev, struct resource *irq, const char *name)
{
#ifndef __rtems__
- cpuset_t mask;
- int qid, error;
+ int cpu, qid, error;
#else /* __rtems__ */
int qid;
#endif /* __rtems__ */
gtask->gt_uniq = uniq;
snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
+#ifndef __rtems__
+ gtask->gt_dev = dev;
gtask->gt_irq = irq;
gtask->gt_cpu = -1;
+#endif /* __rtems__ */
mtx_lock(&qgroup->tqg_lock);
qid = taskqgroup_find(qgroup, uniq);
qgroup->tqg_queue[qid].tgc_cnt++;
LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
#ifndef __rtems__
- if (irq != -1 && tqg_smp_started) {
- gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
- CPU_ZERO(&mask);
- CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
+ if (dev != NULL && irq != NULL && tqg_smp_started) {
+ cpu = qgroup->tqg_queue[qid].tgc_cpu;
+ gtask->gt_cpu = cpu;
mtx_unlock(&qgroup->tqg_lock);
- error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ error = bus_bind_intr(dev, irq, cpu);
if (error)
- printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error);
+ printf("%s: binding interrupt failed for %s: %d\n",
+ __func__, gtask->gt_name, error);
} else
-#else /* __rtems__ */
- BSD_ASSERT(irq == -1);
#endif /* __rtems__ */
mtx_unlock(&qgroup->tqg_lock);
}
@@ -771,7 +772,6 @@ static void
taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
{
#ifndef __rtems__
- cpuset_t mask;
int qid, cpu, error;
#else /* __rtems__ */
int qid;
@@ -781,24 +781,18 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
qid = taskqgroup_find(qgroup, gtask->gt_uniq);
#ifndef __rtems__
cpu = qgroup->tqg_queue[qid].tgc_cpu;
- if (gtask->gt_irq != -1) {
+ if (gtask->gt_dev != NULL && gtask->gt_irq != NULL) {
mtx_unlock(&qgroup->tqg_lock);
-
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
- error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
+ error = bus_bind_intr(gtask->gt_dev, gtask->gt_irq, cpu);
mtx_lock(&qgroup->tqg_lock);
if (error)
- printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error);
+ printf("%s: binding interrupt failed for %s: %d\n",
+ __func__, gtask->gt_name, error);
}
-#else /* __rtems__ */
- BSD_ASSERT(gtask->gt_irq == -1);
#endif /* __rtems__ */
qgroup->tqg_queue[qid].tgc_cnt++;
-
- LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
- gt_list);
+ LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
mtx_unlock(&qgroup->tqg_lock);
@@ -806,10 +800,9 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
int
taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
- void *uniq, int cpu, int irq, const char *name)
+ void *uniq, int cpu, device_t dev, struct resource *irq, const char *name)
{
#ifndef __rtems__
- cpuset_t mask;
int i, qid, error;
#else /* __rtems__ */
int i, qid;
@@ -818,8 +811,11 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
qid = -1;
gtask->gt_uniq = uniq;
snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
+#ifndef __rtems__
+ gtask->gt_dev = dev;
gtask->gt_irq = irq;
gtask->gt_cpu = cpu;
+#endif /* __rtems__ */
mtx_lock(&qgroup->tqg_lock);
if (tqg_smp_started) {
for (i = 0; i < qgroup->tqg_cnt; i++)
@@ -843,30 +839,28 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
mtx_unlock(&qgroup->tqg_lock);
#ifndef __rtems__
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
- if (irq != -1 && tqg_smp_started) {
- error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (dev != NULL && irq != NULL && tqg_smp_started) {
+ error = bus_bind_intr(dev, irq, cpu);
if (error)
- printf("%s: setaffinity failed: %d\n", __func__, error);
+ printf("%s: binding interrupt failed for %s: %d\n",
+ __func__, gtask->gt_name, error);
}
#else /* __rtems__ */
- BSD_ASSERT(irq == -1);
+ BSD_ASSERT(irq == NULL);
#endif /* __rtems__ */
return (0);
}
+#ifndef __rtems__
static int
taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
{
-#ifndef __rtems__
- cpuset_t mask;
- int i, qid, irq, cpu, error;
-#else /* __rtems__ */
- int i, qid, irq, cpu;
-#endif /* __rtems__ */
+ device_t dev;
+ struct resource *irq;
+ int cpu, error, i, qid;
qid = -1;
+ dev = gtask->gt_dev;
irq = gtask->gt_irq;
cpu = gtask->gt_cpu;
MPASS(tqg_smp_started);
@@ -887,20 +881,15 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtas
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
mtx_unlock(&qgroup->tqg_lock);
-#ifndef __rtems__
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
-
- if (irq != -1) {
- error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
+ if (dev != NULL && irq != NULL) {
+ error = bus_bind_intr(dev, irq, cpu);
if (error)
- printf("%s: setaffinity failed: %d\n", __func__, error);
+ printf("%s: binding interrupt failed for %s: %d\n",
+ __func__, gtask->gt_name, error);
}
-#else /* __rtems__ */
- BSD_ASSERT(irq == -1);
-#endif /* __rtems__ */
return (0);
}
+#endif /* __rtems__ */
void
taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
@@ -913,7 +902,7 @@ taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
break;
if (i == qgroup->tqg_cnt)
- panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name);
+ panic("%s: task %s not in group", __func__, gtask->gt_name);
qgroup->tqg_queue[i].tgc_cnt--;
LIST_REMOVE(gtask, gt_list);
mtx_unlock(&qgroup->tqg_lock);
@@ -941,8 +930,7 @@ taskqgroup_binder(void *ctx)
thread_unlock(curthread);
if (error)
- printf("%s: setaffinity failed: %d\n", __func__,
- error);
+ printf("%s: binding curthread failed: %d\n", __func__, error);
#else /* __rtems__ */
sc = rtems_task_set_affinity(RTEMS_SELF, sizeof(mask), &mask);
if (sc != RTEMS_SUCCESSFUL)
@@ -1053,10 +1041,14 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
while ((gtask = LIST_FIRST(&gtask_head))) {
LIST_REMOVE(gtask, gt_list);
+#ifndef __rtems__
if (gtask->gt_cpu == -1)
+#endif /* __rtems__ */
taskqgroup_attach_deferred(qgroup, gtask);
+#ifndef __rtems__
else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
taskqgroup_attach_deferred(qgroup, gtask);
+#endif /* __rtems__ */
}
#ifdef INVARIANTS
@@ -1115,15 +1107,16 @@ taskqgroup_destroy(struct taskqgroup *qgroup)
void
taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
- const char *name)
+ const char *name)
{
GROUPTASK_INIT(gtask, 0, fn, ctx);
- taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
+ taskqgroup_attach(qgroup_config, gtask, gtask, NULL, NULL, name);
}
void
taskqgroup_config_gtask_deinit(struct grouptask *gtask)
{
+
taskqgroup_detach(qgroup_config, gtask);
}
diff --git a/freebsd/sys/kern/subr_kobj.c b/freebsd/sys/kern/subr_kobj.c
index a6a888d5..3736f64c 100644
--- a/freebsd/sys/kern/subr_kobj.c
+++ b/freebsd/sys/kern/subr_kobj.c
@@ -127,35 +127,40 @@ kobj_class_compile_common(kobj_class_t cls, kobj_ops_t ops)
cls->ops = ops;
}
-void
-kobj_class_compile(kobj_class_t cls)
+static int
+kobj_class_compile1(kobj_class_t cls, int mflags)
{
kobj_ops_t ops;
KOBJ_ASSERT(MA_NOTOWNED);
- /*
- * Allocate space for the compiled ops table.
- */
- ops = malloc(sizeof(struct kobj_ops), M_KOBJ, M_NOWAIT);
- if (!ops)
- panic("%s: out of memory", __func__);
+ ops = malloc(sizeof(struct kobj_ops), M_KOBJ, mflags);
+ if (ops == NULL)
+ return (ENOMEM);
- KOBJ_LOCK();
-
/*
* We may have lost a race for kobj_class_compile here - check
* to make sure someone else hasn't already compiled this
* class.
*/
+ KOBJ_LOCK();
if (cls->ops) {
KOBJ_UNLOCK();
free(ops, M_KOBJ);
- return;
+ return (0);
}
-
kobj_class_compile_common(cls, ops);
KOBJ_UNLOCK();
+ return (0);
+}
+
+void
+kobj_class_compile(kobj_class_t cls)
+{
+ int error;
+
+ error = kobj_class_compile1(cls, M_WAITOK);
+ KASSERT(error == 0, ("kobj_class_compile1 returned %d", error));
}
void
@@ -256,24 +261,6 @@ kobj_class_free(kobj_class_t cls)
free(ops, M_KOBJ);
}
-kobj_t
-kobj_create(kobj_class_t cls,
- struct malloc_type *mtype,
- int mflags)
-{
- kobj_t obj;
-
- /*
- * Allocate and initialise the new object.
- */
- obj = malloc(cls->size, mtype, mflags | M_ZERO);
- if (!obj)
- return NULL;
- kobj_init(obj, cls);
-
- return obj;
-}
-
static void
kobj_init_common(kobj_t obj, kobj_class_t cls)
{
@@ -282,30 +269,52 @@ kobj_init_common(kobj_t obj, kobj_class_t cls)
cls->refs++;
}
-void
-kobj_init(kobj_t obj, kobj_class_t cls)
+static int
+kobj_init1(kobj_t obj, kobj_class_t cls, int mflags)
{
- KOBJ_ASSERT(MA_NOTOWNED);
- retry:
- KOBJ_LOCK();
+ int error;
- /*
- * Consider compiling the class' method table.
- */
- if (!cls->ops) {
+ KOBJ_LOCK();
+ while (cls->ops == NULL) {
/*
* kobj_class_compile doesn't want the lock held
* because of the call to malloc - we drop the lock
* and re-try.
*/
KOBJ_UNLOCK();
- kobj_class_compile(cls);
- goto retry;
+ error = kobj_class_compile1(cls, mflags);
+ if (error != 0)
+ return (error);
+ KOBJ_LOCK();
}
-
kobj_init_common(obj, cls);
-
KOBJ_UNLOCK();
+ return (0);
+}
+
+kobj_t
+kobj_create(kobj_class_t cls, struct malloc_type *mtype, int mflags)
+{
+ kobj_t obj;
+
+ obj = malloc(cls->size, mtype, mflags | M_ZERO);
+ if (obj == NULL)
+ return (NULL);
+ if (kobj_init1(obj, cls, mflags) != 0) {
+ free(obj, mtype);
+ return (NULL);
+ }
+ return (obj);
+}
+
+void
+kobj_init(kobj_t obj, kobj_class_t cls)
+{
+ int error;
+
+ error = kobj_init1(obj, cls, M_NOWAIT);
+ if (error != 0)
+ panic("kobj_init1 failed: error %d", error);
}
void
diff --git a/freebsd/sys/kern/subr_lock.c b/freebsd/sys/kern/subr_lock.c
index c2587cd0..53d99743 100644
--- a/freebsd/sys/kern/subr_lock.c
+++ b/freebsd/sys/kern/subr_lock.c
@@ -4,7 +4,6 @@
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
- * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -258,7 +257,9 @@ struct lock_prof_cpu {
struct lock_prof_type lpc_types[2]; /* One for spin one for other. */
};
-struct lock_prof_cpu *lp_cpu[MAXCPU];
+DPCPU_DEFINE_STATIC(struct lock_prof_cpu, lp);
+#define LP_CPU_SELF (DPCPU_PTR(lp))
+#define LP_CPU(cpu) (DPCPU_ID_PTR((cpu), lp))
volatile int __read_mostly lock_prof_enable;
static volatile int lock_prof_resetting;
@@ -304,11 +305,9 @@ lock_prof_init(void *arg)
{
int cpu;
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- lp_cpu[cpu] = malloc(sizeof(*lp_cpu[cpu]), M_DEVBUF,
- M_WAITOK | M_ZERO);
- lock_prof_init_type(&lp_cpu[cpu]->lpc_types[0]);
- lock_prof_init_type(&lp_cpu[cpu]->lpc_types[1]);
+ CPU_FOREACH(cpu) {
+ lock_prof_init_type(&LP_CPU(cpu)->lpc_types[0]);
+ lock_prof_init_type(&LP_CPU(cpu)->lpc_types[1]);
}
}
SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
@@ -347,15 +346,15 @@ lock_prof_reset(void)
* before we zero the structures. Some items may still be linked
* into per-thread lists as well.
*/
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- lpc = lp_cpu[cpu];
+ CPU_FOREACH(cpu) {
+ lpc = LP_CPU(cpu);
for (i = 0; i < LPROF_CACHE_SIZE; i++) {
LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link);
LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link);
}
}
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- lpc = lp_cpu[cpu];
+ CPU_FOREACH(cpu) {
+ lpc = LP_CPU(cpu);
bzero(lpc, sizeof(*lpc));
lock_prof_init_type(&lpc->lpc_types[0]);
lock_prof_init_type(&lpc->lpc_types[1]);
@@ -395,10 +394,8 @@ lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash,
dst->class = match->class;
dst->name = match->name;
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- if (lp_cpu[cpu] == NULL)
- continue;
- type = &lp_cpu[cpu]->lpc_types[spin];
+ CPU_FOREACH(cpu) {
+ type = &LP_CPU(cpu)->lpc_types[spin];
SLIST_FOREACH(l, &type->lpt_hash[hash], link) {
if (l->ticks == t)
continue;
@@ -416,7 +413,6 @@ lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash,
dst->cnt_contest_locking += l->cnt_contest_locking;
}
}
-
}
static void
@@ -455,11 +451,9 @@ dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
lock_prof_enable = 0;
quiesce_all_cpus("profstat", 0);
t = ticks;
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
- if (lp_cpu[cpu] == NULL)
- continue;
- lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[0], sb, 0, t);
- lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[1], sb, 1, t);
+ CPU_FOREACH(cpu) {
+ lock_prof_type_stats(&LP_CPU(cpu)->lpc_types[0], sb, 0, t);
+ lock_prof_type_stats(&LP_CPU(cpu)->lpc_types[1], sb, 1, t);
}
lock_prof_enable = enabled;
@@ -525,7 +519,7 @@ lock_profile_lookup(struct lock_object *lo, int spin, const char *file,
p = unknown;
hash = (uintptr_t)lo->lo_name * 31 + (uintptr_t)p * 31 + line;
hash &= LPROF_HASH_MASK;
- type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ type = &LP_CPU_SELF->lpc_types[spin];
head = &type->lpt_hash[hash];
SLIST_FOREACH(lp, head, link) {
if (lp->line == line && lp->file == p &&
@@ -560,7 +554,7 @@ lock_profile_object_lookup(struct lock_object *lo, int spin, const char *file,
if (l->lpo_obj == lo && l->lpo_file == file &&
l->lpo_line == line)
return (l);
- type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ type = &LP_CPU_SELF->lpc_types[spin];
l = LIST_FIRST(&type->lpt_lpoalloc);
if (l == NULL) {
lock_prof_rejected++;
@@ -696,7 +690,7 @@ lock_profile_release_lock(struct lock_object *lo)
lp->cnt_cur += l->lpo_cnt;
release:
LIST_REMOVE(l, lpo_link);
- type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
+ type = &LP_CPU_SELF->lpc_types[spin];
LIST_INSERT_HEAD(&type->lpt_lpoalloc, l, lpo_link);
out:
critical_exit();
diff --git a/freebsd/sys/kern/subr_pcpu.c b/freebsd/sys/kern/subr_pcpu.c
index 0ab77996..a3a06c78 100644
--- a/freebsd/sys/kern/subr_pcpu.c
+++ b/freebsd/sys/kern/subr_pcpu.c
@@ -136,24 +136,20 @@ SYSINIT(dpcpu, SI_SUB_KLD, SI_ORDER_FIRST, dpcpu_startup, NULL);
/*
* UMA_PCPU_ZONE zones, that are available for all kernel
* consumers. Right now 64 bit zone is used for counter(9)
- * and pointer zone is used by flowtable.
+ * and int zone is used for mount point counters.
*/
+uma_zone_t pcpu_zone_int;
uma_zone_t pcpu_zone_64;
-uma_zone_t pcpu_zone_ptr;
static void
pcpu_zones_startup(void)
{
+ pcpu_zone_int = uma_zcreate("int pcpu", sizeof(int),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
pcpu_zone_64 = uma_zcreate("64 pcpu", sizeof(uint64_t),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
-
- if (sizeof(uint64_t) == sizeof(void *))
- pcpu_zone_ptr = pcpu_zone_64;
- else
- pcpu_zone_ptr = uma_zcreate("ptr pcpu", sizeof(void *),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
}
SYSINIT(pcpu_zones, SI_SUB_VM, SI_ORDER_ANY, pcpu_zones_startup, NULL);
diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c
index 2b45c13e..ed3c8498 100644
--- a/freebsd/sys/kern/subr_prf.c
+++ b/freebsd/sys/kern/subr_prf.c
@@ -70,6 +70,8 @@ __FBSDID("$FreeBSD$");
#include <sys/cons.h>
#endif /* __rtems__ */
#include <sys/uio.h>
+#else /* !_KERNEL */
+#include <errno.h>
#endif
#include <sys/ctype.h>
#include <sys/sbuf.h>
@@ -1300,3 +1302,46 @@ sbuf_putbuf(struct sbuf *sb)
printf("%s", sbuf_data(sb));
}
#endif /* __rtems__ */
+
+int
+sbuf_printf_drain(void *arg, const char *data, int len)
+{
+#ifndef __rtems__
+ size_t *retvalptr;
+ int r;
+#ifdef _KERNEL
+ char *dataptr;
+ char oldchr;
+
+ /*
+ * This is allowed as an extra byte is always resvered for
+ * terminating NUL byte. Save and restore the byte because
+ * we might be flushing a record, and there may be valid
+ * data after the buffer.
+ */
+ oldchr = data[len];
+ dataptr = __DECONST(char *, data);
+ dataptr[len] = '\0';
+
+ prf_putbuf(dataptr, TOLOG | TOCONS, -1);
+ r = len;
+
+ dataptr[len] = oldchr;
+
+#else /* !_KERNEL */
+
+ r = printf("%.*s", len, data);
+ if (r < 0)
+ return (-errno);
+
+#endif
+
+ retvalptr = arg;
+ if (retvalptr != NULL)
+ *retvalptr += r;
+
+ return (r);
+#else /* __rtems__ */
+ return (printf("%.*s", len, data));
+#endif /* __rtems__ */
+}
diff --git a/freebsd/sys/kern/subr_sbuf.c b/freebsd/sys/kern/subr_sbuf.c
index b51ed52c..42e6f8f0 100644
--- a/freebsd/sys/kern/subr_sbuf.c
+++ b/freebsd/sys/kern/subr_sbuf.c
@@ -58,11 +58,11 @@ __FBSDID("$FreeBSD$");
#ifdef _KERNEL
static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
-#define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK|M_ZERO)
+#define SBMALLOC(size, flags) malloc(size, M_SBUF, (flags) | M_ZERO)
#define SBFREE(buf) free(buf, M_SBUF)
#else /* _KERNEL */
#define KASSERT(e, m)
-#define SBMALLOC(size) calloc(1, size)
+#define SBMALLOC(size, flags) calloc(1, size)
#define SBFREE(buf) free(buf)
#endif /* _KERNEL */
@@ -72,6 +72,7 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBUF_ISDYNAMIC(s) ((s)->s_flags & SBUF_DYNAMIC)
#define SBUF_ISDYNSTRUCT(s) ((s)->s_flags & SBUF_DYNSTRUCT)
#define SBUF_ISFINISHED(s) ((s)->s_flags & SBUF_FINISHED)
+#define SBUF_ISDRAINATEOL(s) ((s)->s_flags & SBUF_DRAINATEOL)
#define SBUF_HASROOM(s) ((s)->s_len < (s)->s_size - 1)
#define SBUF_FREESPACE(s) ((s)->s_size - ((s)->s_len + 1))
#define SBUF_CANEXTEND(s) ((s)->s_flags & SBUF_AUTOEXTEND)
@@ -79,6 +80,8 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBUF_NULINCLUDED(s) ((s)->s_flags & SBUF_INCLUDENUL)
#define SBUF_ISDRAINTOEOR(s) ((s)->s_flags & SBUF_DRAINTOEOR)
#define SBUF_DODRAINTOEOR(s) (SBUF_ISSECTION(s) && SBUF_ISDRAINTOEOR(s))
+#define SBUF_MALLOCFLAG(s) \
+ (((s)->s_flags & SBUF_NOWAIT) ? M_NOWAIT : M_WAITOK)
/*
* Set / clear flags
@@ -173,7 +176,7 @@ sbuf_extend(struct sbuf *s, int addlen)
if (!SBUF_CANEXTEND(s))
return (-1);
newsize = sbuf_extendsize(s->s_size + addlen);
- newbuf = SBMALLOC(newsize);
+ newbuf = SBMALLOC(newsize, SBUF_MALLOCFLAG(s));
if (newbuf == NULL)
return (-1);
memcpy(newbuf, s->s_buf, s->s_size);
@@ -187,39 +190,6 @@ sbuf_extend(struct sbuf *s, int addlen)
}
/*
- * Initialize the internals of an sbuf.
- * If buf is non-NULL, it points to a static or already-allocated string
- * big enough to hold at least length characters.
- */
-static struct sbuf *
-sbuf_newbuf(struct sbuf *s, char *buf, int length, int flags)
-{
-
- memset(s, 0, sizeof(*s));
- s->s_flags = flags;
- s->s_size = length;
- s->s_buf = buf;
-
- if ((s->s_flags & SBUF_AUTOEXTEND) == 0) {
- KASSERT(s->s_size >= SBUF_MINSIZE,
- ("attempt to create an sbuf smaller than %d bytes",
- SBUF_MINSIZE));
- }
-
- if (s->s_buf != NULL)
- return (s);
-
- if ((flags & SBUF_AUTOEXTEND) != 0)
- s->s_size = sbuf_extendsize(s->s_size);
-
- s->s_buf = SBMALLOC(s->s_size);
- if (s->s_buf == NULL)
- return (NULL);
- SBUF_SETFLAG(s, SBUF_DYNAMIC);
- return (s);
-}
-
-/*
* Initialize an sbuf.
* If buf is non-NULL, it points to a static or already-allocated string
* big enough to hold at least length characters.
@@ -232,19 +202,56 @@ sbuf_new(struct sbuf *s, char *buf, int length, int flags)
("attempt to create an sbuf of negative length (%d)", length));
KASSERT((flags & ~SBUF_USRFLAGMSK) == 0,
("%s called with invalid flags", __func__));
+ KASSERT((flags & SBUF_AUTOEXTEND) || length >= SBUF_MINSIZE,
+ ("sbuf buffer %d smaller than minimum %d bytes", length,
+ SBUF_MINSIZE));
flags &= SBUF_USRFLAGMSK;
- if (s != NULL)
- return (sbuf_newbuf(s, buf, length, flags));
- s = SBMALLOC(sizeof(*s));
- if (s == NULL)
- return (NULL);
- if (sbuf_newbuf(s, buf, length, flags) == NULL) {
- SBFREE(s);
- return (NULL);
+ /*
+ * Allocate 'DYNSTRUCT' sbuf from the heap, if NULL 's' was provided.
+ */
+ if (s == NULL) {
+ s = SBMALLOC(sizeof(*s),
+ (flags & SBUF_NOWAIT) ? M_NOWAIT : M_WAITOK);
+ if (s == NULL)
+ goto out;
+ SBUF_SETFLAG(s, SBUF_DYNSTRUCT);
+ } else {
+ /*
+ * DYNSTRUCT SBMALLOC sbufs are allocated with M_ZERO, but
+ * user-provided sbuf objects must be initialized.
+ */
+ memset(s, 0, sizeof(*s));
+ }
+
+ s->s_flags |= flags;
+ s->s_size = length;
+ s->s_buf = buf;
+ /*
+ * Never-written sbufs do not need \n termination.
+ */
+ SBUF_SETFLAG(s, SBUF_DRAINATEOL);
+
+ /*
+ * Allocate DYNAMIC, i.e., heap data buffer backing the sbuf, if no
+ * buffer was provided.
+ */
+ if (s->s_buf == NULL) {
+ if (SBUF_CANEXTEND(s))
+ s->s_size = sbuf_extendsize(s->s_size);
+ s->s_buf = SBMALLOC(s->s_size, SBUF_MALLOCFLAG(s));
+ if (s->s_buf == NULL)
+ goto out;
+ SBUF_SETFLAG(s, SBUF_DYNAMIC);
+ }
+
+out:
+ if (s != NULL && s->s_buf == NULL) {
+ if (SBUF_ISDYNSTRUCT(s))
+ SBFREE(s);
+ s = NULL;
}
- SBUF_SETFLAG(s, SBUF_DYNSTRUCT);
return (s);
}
@@ -310,6 +317,8 @@ sbuf_clear(struct sbuf *s)
assert_sbuf_integrity(s);
/* don't care if it's finished or not */
+ KASSERT(s->s_drain_func == NULL,
+ ("%s makes no sense on sbuf %p with drain", __func__, s));
SBUF_CLEARFLAG(s, SBUF_FINISHED);
s->s_error = 0;
@@ -344,6 +353,21 @@ sbuf_setpos(struct sbuf *s, ssize_t pos)
}
/*
+ * Drain into a counter. Counts amount of data without producing output.
+ * Useful for cases like sysctl, where user may first request only size.
+ * This allows to avoid pointless allocation/freeing of large buffers.
+ */
+int
+sbuf_count_drain(void *arg, const char *data __unused, int len)
+{
+ size_t *sizep;
+
+ sizep = (size_t *)arg;
+ *sizep += len;
+ return (len);
+}
+
+/*
* Set up a drain function and argument on an sbuf to flush data to
* when the sbuf buffer overflows.
*/
@@ -369,6 +393,7 @@ sbuf_drain(struct sbuf *s)
KASSERT(s->s_len > 0, ("Shouldn't drain empty sbuf %p", s));
KASSERT(s->s_error == 0, ("Called %s with error on %p", __func__, s));
+
if (SBUF_DODRAINTOEOR(s) && s->s_rec_off == 0)
return (s->s_error = EDEADLK);
len = s->s_drain_func(s->s_drain_arg, s->s_buf,
@@ -385,8 +410,18 @@ sbuf_drain(struct sbuf *s)
* Fast path for the expected case where all the data was
* drained.
*/
- if (s->s_len == 0)
+ if (s->s_len == 0) {
+ /*
+ * When the s_buf is entirely drained, we need to remember if
+ * the last character was a '\n' or not for
+ * sbuf_nl_terminate().
+ */
+ if (s->s_buf[len - 1] == '\n')
+ SBUF_SETFLAG(s, SBUF_DRAINATEOL);
+ else
+ SBUF_CLEARFLAG(s, SBUF_DRAINATEOL);
return (0);
+ }
/*
* Move the remaining characters to the beginning of the
* string.
@@ -702,6 +737,38 @@ sbuf_putc(struct sbuf *s, int c)
}
/*
+ * Append a trailing newline to a non-empty sbuf, if one is not already
+ * present. Handles sbufs with drain functions correctly.
+ */
+int
+sbuf_nl_terminate(struct sbuf *s)
+{
+
+ assert_sbuf_integrity(s);
+ assert_sbuf_state(s, 0);
+
+ /*
+ * If the s_buf isn't empty, the last byte is simply s_buf[s_len - 1].
+ *
+ * If the s_buf is empty because a drain function drained it, we
+ * remember if the last byte was a \n with the SBUF_DRAINATEOL flag in
+ * sbuf_drain().
+ *
+ * In either case, we only append a \n if the previous character was
+ * something else.
+ */
+ if (s->s_len == 0) {
+ if (!SBUF_ISDRAINATEOL(s))
+ sbuf_put_byte(s, '\n');
+ } else if (s->s_buf[s->s_len - 1] != '\n')
+ sbuf_put_byte(s, '\n');
+
+ if (s->s_error != 0)
+ return (-1);
+ return (0);
+}
+
+/*
* Trim whitespace characters from end of an sbuf.
*/
int
diff --git a/freebsd/sys/kern/subr_sleepqueue.c b/freebsd/sys/kern/subr_sleepqueue.c
index 57681cce..9665c02f 100644
--- a/freebsd/sys/kern/subr_sleepqueue.c
+++ b/freebsd/sys/kern/subr_sleepqueue.c
@@ -5,7 +5,6 @@
*
* Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
* Copyright (c) 2015 embedded brains GmbH <rtems@embedded-brains.de>
- * All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -132,7 +131,7 @@ CTASSERT(powerof2(SC_TABLESIZE));
* c - sleep queue chain lock
*/
struct sleepqueue {
- TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
+ struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
u_int sq_blockedcnt[NR_SLEEPQS]; /* (c) N. of blocked threads. */
LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */
LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */
@@ -593,6 +592,19 @@ sleepq_catch_signals(void *wchan, int pri)
} else {
mtx_unlock(&ps->ps_mtx);
}
+
+ /*
+ * Do not go into sleep if this thread was the
+ * ptrace(2) attach leader. cursig() consumed
+ * SIGSTOP from PT_ATTACH, but we usually act
+ * on the signal by interrupting sleep, and
+ * should do that here as well.
+ */
+ if ((td->td_dbgflags & TDB_FSTP) != 0) {
+ if (ret == 0)
+ ret = EINTR;
+ td->td_dbgflags &= ~TDB_FSTP;
+ }
}
/*
* Lock the per-process spinlock prior to dropping the PROC_LOCK
@@ -1127,13 +1139,15 @@ sleepq_init(void *mem, int size, int flags)
}
/*
- * Find the highest priority thread sleeping on a wait channel and resume it.
+ * Find thread sleeping on a wait channel and resume it.
*/
int
sleepq_signal(void *wchan, int flags, int pri, int queue)
{
+ struct sleepqueue_chain *sc;
struct sleepqueue *sq;
#ifndef __rtems__
+ struct threadqueue *head;
struct thread *td, *besttd;
#else /* __rtems__ */
struct thread *besttd;
@@ -1150,16 +1164,33 @@ sleepq_signal(void *wchan, int flags, int pri, int queue)
("%s: mismatch between sleep/wakeup and cv_*", __func__));
#ifndef __rtems__
- /*
- * Find the highest priority thread on the queue. If there is a
- * tie, use the thread that first appears in the queue as it has
- * been sleeping the longest since threads are always added to
- * the tail of sleep queues.
- */
- besttd = TAILQ_FIRST(&sq->sq_blocked[queue]);
- TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
- if (td->td_priority < besttd->td_priority)
+ head = &sq->sq_blocked[queue];
+ if (flags & SLEEPQ_UNFAIR) {
+ /*
+ * Find the most recently sleeping thread, but try to
+ * skip threads still in process of context switch to
+ * avoid spinning on the thread lock.
+ */
+ sc = SC_LOOKUP(wchan);
+ besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
+ while (besttd->td_lock != &sc->sc_lock) {
+ td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
+ if (td == NULL)
+ break;
besttd = td;
+ }
+ } else {
+ /*
+ * Find the highest priority thread on the queue. If there
+ * is a tie, use the thread that first appears in the queue
+ * as it has been sleeping the longest since threads are
+ * always added to the tail of sleep queues.
+ */
+ besttd = td = TAILQ_FIRST(head);
+ while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
+ if (td->td_priority < besttd->td_priority)
+ besttd = td;
+ }
}
#else /* __rtems__ */
besttd = TAILQ_FIRST(&sq->sq_blocked[queue]);
diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c
index 39d9f939..67e62fc8 100644
--- a/freebsd/sys/kern/subr_taskqueue.c
+++ b/freebsd/sys/kern/subr_taskqueue.c
@@ -841,7 +841,7 @@ taskqueue_thread_enqueue(void *context)
tqp = context;
tq = *tqp;
- wakeup_one(tq);
+ wakeup_any(tq);
}
TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,
diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c
index cc208d6e..1bc4fa6b 100644
--- a/freebsd/sys/kern/sys_generic.c
+++ b/freebsd/sys/kern/sys_generic.c
@@ -772,7 +772,11 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
fp = NULL; /* fhold() was not called yet */
goto out;
}
- fhold(fp);
+ if (!fhold(fp)) {
+ error = EBADF;
+ fp = NULL;
+ goto out;
+ }
if (locked == LA_SLOCKED) {
FILEDESC_SUNLOCK(fdp);
locked = LA_UNLOCKED;
diff --git a/freebsd/sys/kern/sys_pipe.c b/freebsd/sys/kern/sys_pipe.c
index 050d63a4..d9b502f0 100755
--- a/freebsd/sys/kern/sys_pipe.c
+++ b/freebsd/sys/kern/sys_pipe.c
@@ -177,7 +177,6 @@ struct fileops pipeops = {
};
#else /* __rtems__ */
#define PIPE_NODIRECT
-#define PRIBIO (0)
static int rtems_bsd_pipe_open(rtems_libio_t *iop, const char *path,
int oflag, mode_t mode);
@@ -433,9 +432,7 @@ void
pipe_dtor(struct pipe *dpipe)
{
struct pipe *peer;
- ino_t ino;
- ino = dpipe->pipe_ino;
peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL;
funsetown(&dpipe->pipe_sigio);
pipeclose(dpipe);
@@ -802,11 +799,9 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
/*
* Direct copy, bypassing a kernel buffer.
*/
- } else if ((size = rpipe->pipe_map.cnt) &&
- (rpipe->pipe_state & PIPE_DIRECTW)) {
+ } else if ((size = rpipe->pipe_map.cnt) != 0) {
if (size > uio->uio_resid)
size = (u_int) uio->uio_resid;
-
PIPE_UNLOCK(rpipe);
error = uiomove_fromphys(rpipe->pipe_map.ms,
rpipe->pipe_map.pos, size, uio);
@@ -817,7 +812,7 @@ pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
rpipe->pipe_map.pos += size;
rpipe->pipe_map.cnt -= size;
if (rpipe->pipe_map.cnt == 0) {
- rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW);
+ rpipe->pipe_state &= ~PIPE_WANTW;
wakeup(rpipe);
}
#endif
@@ -984,32 +979,33 @@ pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio)
u_int size;
int i;
- PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
- KASSERT(wpipe->pipe_state & PIPE_DIRECTW,
- ("Clone attempt on non-direct write pipe!"));
+ PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
+ KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0,
+ ("%s: PIPE_DIRECTW set on %p", __func__, wpipe));
+ KASSERT(wpipe->pipe_map.cnt == 0,
+ ("%s: pipe map for %p contains residual data", __func__, wpipe));
if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size)
size = wpipe->pipe_buffer.size;
else
size = uio->uio_iov->iov_len;
- if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
+ wpipe->pipe_state |= PIPE_DIRECTW;
+ PIPE_UNLOCK(wpipe);
+ i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
(vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ,
- wpipe->pipe_map.ms, PIPENPAGES)) < 0)
+ wpipe->pipe_map.ms, PIPENPAGES);
+ PIPE_LOCK(wpipe);
+ if (i < 0) {
+ wpipe->pipe_state &= ~PIPE_DIRECTW;
return (EFAULT);
+ }
-/*
- * set up the control block
- */
wpipe->pipe_map.npages = i;
wpipe->pipe_map.pos =
((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
wpipe->pipe_map.cnt = size;
-/*
- * and update the uio data
- */
-
uio->uio_iov->iov_len -= size;
uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size;
if (uio->uio_iov->iov_len == 0)
@@ -1020,13 +1016,19 @@ pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio)
}
/*
- * unmap and unwire the process buffer
+ * Unwire the process buffer.
*/
static void
pipe_destroy_write_buffer(struct pipe *wpipe)
{
PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
+ KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0,
+ ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe));
+ KASSERT(wpipe->pipe_map.cnt == 0,
+ ("%s: pipe map for %p contains residual data", __func__, wpipe));
+
+ wpipe->pipe_state &= ~PIPE_DIRECTW;
vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages);
wpipe->pipe_map.npages = 0;
}
@@ -1045,13 +1047,16 @@ pipe_clone_write_buffer(struct pipe *wpipe)
int pos;
PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
+ KASSERT((wpipe->pipe_state & PIPE_DIRECTW) != 0,
+ ("%s: PIPE_DIRECTW not set on %p", __func__, wpipe));
+
size = wpipe->pipe_map.cnt;
pos = wpipe->pipe_map.pos;
+ wpipe->pipe_map.cnt = 0;
wpipe->pipe_buffer.in = size;
wpipe->pipe_buffer.out = 0;
wpipe->pipe_buffer.cnt = size;
- wpipe->pipe_state &= ~PIPE_DIRECTW;
PIPE_UNLOCK(wpipe);
iov.iov_base = wpipe->pipe_buffer.buffer;
@@ -1090,7 +1095,7 @@ retry:
pipeunlock(wpipe);
goto error1;
}
- while (wpipe->pipe_state & PIPE_DIRECTW) {
+ if (wpipe->pipe_state & PIPE_DIRECTW) {
if (wpipe->pipe_state & PIPE_WANTR) {
wpipe->pipe_state &= ~PIPE_WANTR;
wakeup(wpipe);
@@ -1105,7 +1110,6 @@ retry:
else
goto retry;
}
- wpipe->pipe_map.cnt = 0; /* transfer not ready yet */
if (wpipe->pipe_buffer.cnt > 0) {
if (wpipe->pipe_state & PIPE_WANTR) {
wpipe->pipe_state &= ~PIPE_WANTR;
@@ -1122,20 +1126,15 @@ retry:
goto retry;
}
- wpipe->pipe_state |= PIPE_DIRECTW;
-
- PIPE_UNLOCK(wpipe);
error = pipe_build_write_buffer(wpipe, uio);
- PIPE_LOCK(wpipe);
if (error) {
- wpipe->pipe_state &= ~PIPE_DIRECTW;
pipeunlock(wpipe);
goto error1;
}
- error = 0;
- while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
+ while (wpipe->pipe_map.cnt != 0) {
if (wpipe->pipe_state & PIPE_EOF) {
+ wpipe->pipe_map.cnt = 0;
pipe_destroy_write_buffer(wpipe);
pipeselwakeup(wpipe);
pipeunlock(wpipe);
@@ -1152,20 +1151,19 @@ retry:
error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
"pipdwt", 0);
pipelock(wpipe, 0);
+ if (error != 0)
+ break;
}
if (wpipe->pipe_state & PIPE_EOF)
error = EPIPE;
- if (wpipe->pipe_state & PIPE_DIRECTW) {
- /*
- * this bit of trickery substitutes a kernel buffer for
- * the process that might be going away.
- */
+ if (error == EINTR || error == ERESTART)
pipe_clone_write_buffer(wpipe);
- } else {
+ else
pipe_destroy_write_buffer(wpipe);
- }
pipeunlock(wpipe);
+ KASSERT((wpipe->pipe_state & PIPE_DIRECTW) == 0,
+ ("pipe %p leaked PIPE_DIRECTW", wpipe));
return (error);
error1:
@@ -1290,7 +1288,7 @@ pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
* pipe buffer. We break out if a signal occurs or the
* reader goes away.
*/
- if (wpipe->pipe_state & PIPE_DIRECTW) {
+ if (wpipe->pipe_map.cnt != 0) {
if (wpipe->pipe_state & PIPE_WANTR) {
wpipe->pipe_state &= ~PIPE_WANTR;
wakeup(wpipe);
@@ -1586,7 +1584,7 @@ pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
PIPE_UNLOCK(mpipe);
return (0);
}
- if (mpipe->pipe_state & PIPE_DIRECTW)
+ if (mpipe->pipe_map.cnt != 0)
*(int *)data = mpipe->pipe_map.cnt;
else
*(int *)data = mpipe->pipe_buffer.cnt;
@@ -1663,8 +1661,7 @@ pipe_poll(struct file *fp, int events, struct ucred *active_cred,
#else /* __rtems__ */
if (rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && events & (POLLIN | POLLRDNORM))
#endif /* __rtems__ */
- if ((rpipe->pipe_state & PIPE_DIRECTW) ||
- (rpipe->pipe_buffer.cnt > 0))
+ if (rpipe->pipe_map.cnt > 0 || rpipe->pipe_buffer.cnt > 0)
revents |= events & (POLLIN | POLLRDNORM);
#ifndef __rtems__
@@ -1674,7 +1671,7 @@ pipe_poll(struct file *fp, int events, struct ucred *active_cred,
#endif /* __rtems__ */
if (wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF) ||
- (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
+ ((wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF ||
wpipe->pipe_buffer.size == 0)))
revents |= events & (POLLOUT | POLLWRNORM);
@@ -1683,7 +1680,7 @@ pipe_poll(struct file *fp, int events, struct ucred *active_cred,
(POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND);
#ifndef __rtems__
if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents &&
- fp->f_seqcount == rpipe->pipe_wgen)
+ fp->f_pipegen == rpipe->pipe_wgen)
#else /* __rtems__ */
if (rpipe->pipe_state & PIPE_NAMED && rtems_bsd_libio_flags_to_fflag(fp->f_io.flags) & FREAD && levents)
#endif /* __rtems__ */
@@ -1792,7 +1789,7 @@ pipe_stat(struct pipe *pipe, struct stat *ub)
#endif /* __rtems__ */
ub->st_mode = S_IFIFO;
ub->st_blksize = PAGE_SIZE;
- if (pipe->pipe_state & PIPE_DIRECTW)
+ if (pipe->pipe_map.cnt != 0)
ub->st_size = pipe->pipe_map.cnt;
else
ub->st_size = pipe->pipe_buffer.cnt;
@@ -2081,7 +2078,7 @@ filt_piperead(struct knote *kn, long hint)
PIPE_LOCK_ASSERT(rpipe, MA_OWNED);
kn->kn_data = rpipe->pipe_buffer.cnt;
- if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
+ if (kn->kn_data == 0)
kn->kn_data = rpipe->pipe_map.cnt;
if ((rpipe->pipe_state & PIPE_EOF) ||
@@ -2099,15 +2096,19 @@ static int
filt_pipewrite(struct knote *kn, long hint)
{
struct pipe *wpipe;
-
+
+ /*
+ * If this end of the pipe is closed, the knote was removed from the
+ * knlist and the list lock (i.e., the pipe lock) is therefore not held.
+ */
wpipe = kn->kn_hook;
- PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
if (wpipe->pipe_present != PIPE_ACTIVE ||
(wpipe->pipe_state & PIPE_EOF)) {
kn->kn_data = 0;
kn->kn_flags |= EV_EOF;
return (1);
}
+ PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
kn->kn_data = (wpipe->pipe_buffer.size > 0) ?
(wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) : PIPE_BUF;
if (wpipe->pipe_state & PIPE_DIRECTW)
diff --git a/freebsd/sys/kern/tty.c b/freebsd/sys/kern/tty.c
index 5d9c8a57..ee46a44f 100644
--- a/freebsd/sys/kern/tty.c
+++ b/freebsd/sys/kern/tty.c
@@ -95,7 +95,7 @@ static const char *dev_console_filename;
FLUSHO|NOKERNINFO|NOFLSH)
#define TTYSUP_CFLAG (CIGNORE|CSIZE|CSTOPB|CREAD|PARENB|PARODD|\
HUPCL|CLOCAL|CCTS_OFLOW|CRTS_IFLOW|CDTR_IFLOW|\
- CDSR_OFLOW|CCAR_OFLOW)
+ CDSR_OFLOW|CCAR_OFLOW|CNO_RTSDTR)
#define TTY_CALLOUT(tp,d) (dev2unit(d) & TTYUNIT_CALLOUT)
@@ -336,7 +336,8 @@ ttydev_open(struct cdev *dev, int oflags, int devtype __unused,
if (TTY_CALLOUT(tp, dev) || dev == dev_console)
tp->t_termios.c_cflag |= CLOCAL;
- ttydevsw_modem(tp, SER_DTR|SER_RTS, 0);
+ if ((tp->t_termios.c_cflag & CNO_RTSDTR) == 0)
+ ttydevsw_modem(tp, SER_DTR|SER_RTS, 0);
error = ttydevsw_open(tp);
if (error != 0)
@@ -1147,6 +1148,9 @@ tty_rel_free(struct tty *tp)
return;
}
+ /* Stop asynchronous I/O. */
+ funsetown(&tp->t_sigio);
+
/* TTY can be deallocated. */
dev = tp->t_dev;
tp->t_dev = NULL;
diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c
index 185d14a0..2f1768da 100644
--- a/freebsd/sys/kern/uipc_mbuf.c
+++ b/freebsd/sys/kern/uipc_mbuf.c
@@ -51,7 +51,11 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/uio.h>
+#include <sys/vmmeter.h>
#include <sys/sdt.h>
+#include <vm/vm.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_page.h>
SDT_PROBE_DEFINE5_XLATE(sdt, , , m__init,
"struct mbuf *", "mbufinfo_t *",
@@ -204,7 +208,7 @@ mb_dupcl(struct mbuf *n, struct mbuf *m)
else
bcopy(&m->m_ext, &n->m_ext, m_ext_copylen);
n->m_flags |= M_EXT;
- n->m_flags |= m->m_flags & M_RDONLY;
+ n->m_flags |= m->m_flags & (M_RDONLY | M_NOMAP);
/* See if this is the mbuf that holds the embedded refcount. */
if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
@@ -248,7 +252,8 @@ m_demote(struct mbuf *m0, int all, int flags)
__func__, m, m0));
if (m->m_flags & M_PKTHDR)
m_demote_pkthdr(m);
- m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
+ m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE |
+ M_NOMAP | flags);
}
}
@@ -343,6 +348,9 @@ m_pkthdr_init(struct mbuf *m, int how)
#endif
m->m_data = m->m_pktdat;
bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
+#ifdef NUMA
+ m->m_pkthdr.numa_domain = M_NODOM;
+#endif
#ifdef MAC
/* If the label init fails, fail the alloc */
error = mac_mbuf_init(m, how);
@@ -375,12 +383,17 @@ m_move_pkthdr(struct mbuf *to, struct mbuf *from)
if (to->m_flags & M_PKTHDR)
m_tag_delete_chain(to, NULL);
#endif
- to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+ to->m_flags = (from->m_flags & M_COPYFLAGS) |
+ (to->m_flags & (M_EXT | M_NOMAP));
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr; /* especially tags */
SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
from->m_flags &= ~M_PKTHDR;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ from->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ from->m_pkthdr.snd_tag = NULL;
+ }
}
/*
@@ -409,10 +422,13 @@ m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
if (to->m_flags & M_PKTHDR)
m_tag_delete_chain(to, NULL);
#endif
- to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+ to->m_flags = (from->m_flags & M_COPYFLAGS) |
+ (to->m_flags & (M_EXT | M_NOMAP));
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_ref(from->m_pkthdr.snd_tag);
SLIST_INIT(&to->m_pkthdr.tags);
return (m_tag_copy_chain(to, from, how));
}
@@ -572,6 +588,32 @@ nospace:
return (NULL);
}
+#ifndef __rtems__
+static void
+m_copyfromunmapped(const struct mbuf *m, int off, int len, caddr_t cp)
+{
+ struct iovec iov;
+ struct uio uio;
+ int error;
+
+ KASSERT(off >= 0, ("m_copyfromunmapped: negative off %d", off));
+ KASSERT(len >= 0, ("m_copyfromunmapped: negative len %d", len));
+ KASSERT(off < m->m_len,
+ ("m_copyfromunmapped: len exceeds mbuf length"));
+ iov.iov_base = cp;
+ iov.iov_len = len;
+ uio.uio_resid = len;
+ uio.uio_iov = &iov;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_rw = UIO_READ;
+ error = m_unmappedtouio(m, off, &uio, len);
+ KASSERT(error == 0, ("m_unmappedtouio failed: off %d, len %d", off,
+ len));
+}
+#endif /* __rtems__ */
+
/*
* Copy data from an mbuf chain starting "off" bytes from the beginning,
* continuing for "len" bytes, into the indicated buffer.
@@ -593,7 +635,12 @@ m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
while (len > 0) {
KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
count = min(m->m_len - off, len);
- bcopy(mtod(m, caddr_t) + off, cp, count);
+#ifndef __rtems__
+ if ((m->m_flags & M_NOMAP) != 0)
+ m_copyfromunmapped(m, off, count, cp);
+ else
+#endif /* __rtems__ */
+ bcopy(mtod(m, caddr_t) + off, cp, count);
len -= count;
cp += count;
off = 0;
@@ -688,6 +735,7 @@ m_cat(struct mbuf *m, struct mbuf *n)
m = m->m_next;
while (n) {
if (!M_WRITABLE(m) ||
+ (n->m_flags & M_NOMAP) != 0 ||
M_TRAILINGSPACE(m) < n->m_len) {
/* just join the two chains */
m->m_next = n;
@@ -805,6 +853,9 @@ m_pullup(struct mbuf *n, int len)
int count;
int space;
+ KASSERT((n->m_flags & M_NOMAP) == 0,
+ ("%s: unmapped mbuf %p", __func__, n));
+
/*
* If first mbuf has no cluster, and has room for len bytes
* without shifting current data, pullup into it,
@@ -923,7 +974,12 @@ m_split(struct mbuf *m0, int len0, int wait)
return (NULL);
n->m_next = m->m_next;
m->m_next = NULL;
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
return (n);
@@ -931,7 +987,12 @@ m_split(struct mbuf *m0, int len0, int wait)
n = m_gethdr(wait, m0->m_type);
if (n == NULL)
return (NULL);
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
if (m->m_flags & M_EXT)
@@ -1348,6 +1409,41 @@ nospace:
}
/*
+ * Return the number of fragments an mbuf will use. This is usually
+ * used as a proxy for the number of scatter/gather elements needed by
+ * a DMA engine to access an mbuf. In general mapped mbufs are
+ * assumed to be backed by physically contiguous buffers that only
+ * need a single fragment. Unmapped mbufs, on the other hand, can
+ * span disjoint physical pages.
+ */
+static int
+frags_per_mbuf(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ int frags;
+
+ if ((m->m_flags & M_NOMAP) == 0)
+ return (1);
+
+ /*
+ * The header and trailer are counted as a single fragment
+ * each when present.
+ *
+ * XXX: This overestimates the number of fragments by assuming
+ * all the backing physical pages are disjoint.
+ */
+ ext_pgs = m->m_ext.ext_pgs;
+ frags = 0;
+ if (ext_pgs->hdr_len != 0)
+ frags++;
+ frags += ext_pgs->npgs;
+ if (ext_pgs->trail_len != 0)
+ frags++;
+
+ return (frags);
+}
+
+/*
* Defragment an mbuf chain, returning at most maxfrags separate
* mbufs+clusters. If this is not possible NULL is returned and
* the original mbuf chain is left in its present (potentially
@@ -1367,7 +1463,7 @@ m_collapse(struct mbuf *m0, int how, int maxfrags)
*/
curfrags = 0;
for (m = m0; m != NULL; m = m->m_next)
- curfrags++;
+ curfrags += frags_per_mbuf(m);
/*
* First, try to collapse mbufs. Note that we always collapse
* towards the front so we don't need to deal with moving the
@@ -1382,12 +1478,13 @@ again:
break;
if (M_WRITABLE(m) &&
n->m_len < M_TRAILINGSPACE(m)) {
- bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
- n->m_len);
+ m_copydata(n, 0, n->m_len,
+ mtod(m, char *) + m->m_len);
m->m_len += n->m_len;
m->m_next = n->m_next;
+ curfrags -= frags_per_mbuf(n);
m_free(n);
- if (--curfrags <= maxfrags)
+ if (curfrags <= maxfrags)
return m0;
} else
m = n;
@@ -1404,15 +1501,18 @@ again:
m = m_getcl(how, MT_DATA, 0);
if (m == NULL)
goto bad;
- bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
- bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
- n2->m_len);
+ m_copydata(n, 0, n->m_len, mtod(m, char *));
+ m_copydata(n2, 0, n2->m_len,
+ mtod(m, char *) + n->m_len);
m->m_len = n->m_len + n2->m_len;
m->m_next = n2->m_next;
*prev = m;
+ curfrags += 1; /* For the new cluster */
+ curfrags -= frags_per_mbuf(n);
+ curfrags -= frags_per_mbuf(n2);
m_free(n);
m_free(n2);
- if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */
+ if (curfrags <= maxfrags)
return m0;
/*
* Still not there, try the normal collapse
@@ -1512,6 +1612,100 @@ nospace:
#endif
+#ifndef __rtems__
+/*
+ * Free pages from mbuf_ext_pgs, assuming they were allocated via
+ * vm_page_alloc() and aren't associated with any object. Complement
+ * to allocator from m_uiotombuf_nomap().
+ */
+void
+mb_free_mext_pgs(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ vm_page_t pg;
+
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ for (int i = 0; i < ext_pgs->npgs; i++) {
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ vm_page_unwire_noq(pg);
+ vm_page_free(pg);
+ }
+}
+
+static struct mbuf *
+m_uiotombuf_nomap(struct uio *uio, int how, int len, int maxseg, int flags)
+{
+ struct mbuf *m, *mb, *prev;
+ struct mbuf_ext_pgs *pgs;
+ vm_page_t pg_array[MBUF_PEXT_MAX_PGS];
+ int error, length, i, needed;
+ ssize_t total;
+ int pflags = malloc2vm_flags(how) | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP |
+ VM_ALLOC_WIRED;
+
+ /*
+ * len can be zero or an arbitrary large value bound by
+ * the total data supplied by the uio.
+ */
+ if (len > 0)
+ total = MIN(uio->uio_resid, len);
+ else
+ total = uio->uio_resid;
+
+ if (maxseg == 0)
+ maxseg = MBUF_PEXT_MAX_PGS * PAGE_SIZE;
+
+ /*
+ * Allocate the pages
+ */
+ m = NULL;
+ while (total > 0) {
+ mb = mb_alloc_ext_pgs(how, (flags & M_PKTHDR),
+ mb_free_mext_pgs);
+ if (mb == NULL)
+ goto failed;
+ if (m == NULL)
+ m = mb;
+ else
+ prev->m_next = mb;
+ prev = mb;
+ pgs = mb->m_ext.ext_pgs;
+ needed = length = MIN(maxseg, total);
+ for (i = 0; needed > 0; i++, needed -= PAGE_SIZE) {
+retry_page:
+ pg_array[i] = vm_page_alloc(NULL, 0, pflags);
+ if (pg_array[i] == NULL) {
+ if (how & M_NOWAIT) {
+ goto failed;
+ } else {
+ vm_wait(NULL);
+ goto retry_page;
+ }
+ }
+ pg_array[i]->flags &= ~PG_ZERO;
+ pgs->pa[i] = VM_PAGE_TO_PHYS(pg_array[i]);
+ pgs->npgs++;
+ }
+ pgs->last_pg_len = length - PAGE_SIZE * (pgs->npgs - 1);
+ MBUF_EXT_PGS_ASSERT_SANITY(pgs);
+ total -= length;
+ error = uiomove_fromphys(pg_array, 0, length, uio);
+ if (error != 0)
+ goto failed;
+ mb->m_len = length;
+ mb->m_ext.ext_size += PAGE_SIZE * pgs->npgs;
+ if (flags & M_PKTHDR)
+ m->m_pkthdr.len += length;
+ }
+ return (m);
+
+failed:
+ m_freem(m);
+ return (NULL);
+}
+#endif /* __rtems__ */
+
/*
* Copy the contents of uio into a properly sized mbuf chain.
*/
@@ -1523,6 +1717,11 @@ m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
ssize_t total;
int progress = 0;
+#ifndef __rtems__
+ if (flags & M_NOMAP)
+ return (m_uiotombuf_nomap(uio, how, len, align, flags));
+#endif /* __rtems__ */
+
/*
* len can be zero or an arbitrary large value bound by
* the total data supplied by the uio.
@@ -1569,6 +1768,62 @@ m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
}
/*
+ * Copy data from an unmapped mbuf into a uio limited by len if set.
+ */
+int
+m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ vm_page_t pg;
+ int error, i, off, pglen, pgoff, seglen, segoff;
+
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ error = 0;
+
+ /* Skip over any data removed from the front. */
+ off = mtod(m, vm_offset_t);
+
+ off += m_off;
+ if (ext_pgs->hdr_len != 0) {
+ if (off >= ext_pgs->hdr_len) {
+ off -= ext_pgs->hdr_len;
+ } else {
+ seglen = ext_pgs->hdr_len - off;
+ segoff = off;
+ seglen = min(seglen, len);
+ off = 0;
+ len -= seglen;
+ error = uiomove(&ext_pgs->hdr[segoff], seglen, uio);
+ }
+ }
+ pgoff = ext_pgs->first_pg_off;
+ for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) {
+ pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+ if (off >= pglen) {
+ off -= pglen;
+ pgoff = 0;
+ continue;
+ }
+ seglen = pglen - off;
+ segoff = pgoff + off;
+ off = 0;
+ seglen = min(seglen, len);
+ len -= seglen;
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ error = uiomove_fromphys(&pg, segoff, seglen, uio);
+ pgoff = 0;
+ };
+ if (len != 0 && error == 0) {
+ KASSERT((off + len) <= ext_pgs->trail_len,
+ ("off + len > trail (%d + %d > %d, m_off = %d)", off, len,
+ ext_pgs->trail_len, m_off));
+ error = uiomove(&ext_pgs->trail[off], len, uio);
+ }
+ return (error);
+}
+
+/*
* Copy an mbuf chain into a uio limited by len if set.
*/
int
@@ -1586,7 +1841,12 @@ m_mbuftouio(struct uio *uio, const struct mbuf *m, int len)
for (; m != NULL; m = m->m_next) {
length = min(m->m_len, total - progress);
- error = uiomove(mtod(m, void *), length, uio);
+#ifndef __rtems__
+ if ((m->m_flags & M_NOMAP) != 0)
+ error = m_unmappedtouio(m, 0, uio, length);
+ else
+#endif /* __rtems__ */
+ error = uiomove(mtod(m, void *), length, uio);
if (error)
return (error);
diff --git a/freebsd/sys/kern/uipc_mbuf2.c b/freebsd/sys/kern/uipc_mbuf2.c
index 7dd2840c..6f98b0a2 100644
--- a/freebsd/sys/kern/uipc_mbuf2.c
+++ b/freebsd/sys/kern/uipc_mbuf2.c
@@ -218,7 +218,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp)
goto ok;
}
if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen
- && writable) {
+ && writable && n->m_next->m_len >= tlen) {
n->m_next->m_data -= hlen;
n->m_next->m_len += hlen;
bcopy(mtod(n, caddr_t) + off, mtod(n->m_next, caddr_t), hlen);
diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c
index 0830206a..2305b333 100644
--- a/freebsd/sys/kern/uipc_sockbuf.c
+++ b/freebsd/sys/kern/uipc_sockbuf.c
@@ -36,11 +36,13 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_param.h>
#include <sys/param.h>
#include <sys/aio.h> /* for aio_swake proto */
#include <sys/kernel.h>
+#include <sys/ktls.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -91,28 +93,135 @@ sbm_clrprotoflags(struct mbuf *m, int flags)
}
/*
- * Mark ready "count" mbufs starting with "m".
+ * Compress M_NOTREADY mbufs after they have been readied by sbready().
+ *
+ * sbcompress() skips M_NOTREADY mbufs since the data is not available to
+ * be copied at the time of sbcompress(). This function combines small
+ * mbufs similar to sbcompress() once mbufs are ready. 'm0' is the first
+ * mbuf sbready() marked ready, and 'end' is the first mbuf still not
+ * ready.
+ */
+static void
+sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
+{
+ struct mbuf *m, *n;
+ int ext_size;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ if ((sb->sb_flags & SB_NOCOALESCE) != 0)
+ return;
+
+ for (m = m0; m != end; m = m->m_next) {
+ MPASS((m->m_flags & M_NOTREADY) == 0);
+
+ /* Compress small unmapped mbufs into plain mbufs. */
+ if ((m->m_flags & M_NOMAP) && m->m_len <= MLEN &&
+ !mbuf_has_tls_session(m)) {
+ MPASS(m->m_flags & M_EXT);
+ ext_size = m->m_ext.ext_size;
+ if (mb_unmapped_compress(m) == 0) {
+ sb->sb_mbcnt -= ext_size;
+ sb->sb_ccnt -= 1;
+ }
+ }
+
+ /*
+ * NB: In sbcompress(), 'n' is the last mbuf in the
+ * socket buffer and 'm' is the new mbuf being copied
+ * into the trailing space of 'n'. Here, the roles
+ * are reversed and 'n' is the next mbuf after 'm'
+ * that is being copied into the trailing space of
+ * 'm'.
+ */
+ n = m->m_next;
+ while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
+ M_WRITABLE(m) &&
+ (m->m_flags & M_NOMAP) == 0 &&
+ !mbuf_has_tls_session(n) &&
+ !mbuf_has_tls_session(m) &&
+ n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
+ n->m_len <= M_TRAILINGSPACE(m) &&
+ m->m_type == n->m_type) {
+ KASSERT(sb->sb_lastrecord != n,
+ ("%s: merging start of record (%p) into previous mbuf (%p)",
+ __func__, n, m));
+ m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
+ m->m_len += n->m_len;
+ m->m_next = n->m_next;
+ m->m_flags |= n->m_flags & M_EOR;
+ if (sb->sb_mbtail == n)
+ sb->sb_mbtail = m;
+
+ sb->sb_mbcnt -= MSIZE;
+ sb->sb_mcnt -= 1;
+ if (n->m_flags & M_EXT) {
+ sb->sb_mbcnt -= n->m_ext.ext_size;
+ sb->sb_ccnt -= 1;
+ }
+ m_free(n);
+ n = m->m_next;
+ }
+ }
+ SBLASTRECORDCHK(sb);
+ SBLASTMBUFCHK(sb);
+}
+
+/*
+ * Mark ready "count" units of I/O starting with "m". Most mbufs
+ * count as a single unit of I/O except for EXT_PGS-backed mbufs which
+ * can be backed by multiple pages.
*/
int
-sbready(struct sockbuf *sb, struct mbuf *m, int count)
+sbready(struct sockbuf *sb, struct mbuf *m0, int count)
{
+ struct mbuf *m;
u_int blocker;
SOCKBUF_LOCK_ASSERT(sb);
KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
+ KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
+ m = m0;
blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
- for (int i = 0; i < count; i++, m = m->m_next) {
+ while (count > 0) {
KASSERT(m->m_flags & M_NOTREADY,
("%s: m %p !M_NOTREADY", __func__, m));
+#ifndef __rtems__
+ if ((m->m_flags & M_EXT) != 0 &&
+ m->m_ext.ext_type == EXT_PGS) {
+ if (count < m->m_ext.ext_pgs->nrdy) {
+ m->m_ext.ext_pgs->nrdy -= count;
+ count = 0;
+ break;
+ }
+ count -= m->m_ext.ext_pgs->nrdy;
+ m->m_ext.ext_pgs->nrdy = 0;
+ } else
+#endif /* __rtems__ */
+ count--;
+
m->m_flags &= ~(M_NOTREADY | blocker);
if (blocker)
sb->sb_acc += m->m_len;
+ m = m->m_next;
+ }
+
+ /*
+ * If the first mbuf is still not fully ready because only
+ * some of its backing pages were readied, no further progress
+ * can be made.
+ */
+ if (m0 == m) {
+ MPASS(m->m_flags & M_NOTREADY);
+ return (EINPROGRESS);
}
- if (!blocker)
+ if (!blocker) {
+ sbready_compress(sb, m0, m);
return (EINPROGRESS);
+ }
/* This one was blocking all the queue. */
for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
@@ -123,6 +232,7 @@ sbready(struct sockbuf *sb, struct mbuf *m, int count)
}
sb->sb_fnrdy = m;
+ sbready_compress(sb, m0, m);
return (0);
}
@@ -571,6 +681,11 @@ sbdestroy(struct sockbuf *sb, struct socket *so)
{
sbrelease_internal(sb, so);
+#ifdef KERN_TLS
+ if (sb->sb_tls_info != NULL)
+ ktls_free(sb->sb_tls_info);
+ sb->sb_tls_info = NULL;
+#endif
}
/*
@@ -734,6 +849,11 @@ sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
SBLASTMBUFCHK(sb);
+#ifdef KERN_TLS
+ if (sb->sb_tls_info != NULL)
+ ktls_seq(sb, m);
+#endif
+
/* Remove all packet headers and mbuf tags to get a pure data chain. */
m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
@@ -1036,12 +1156,13 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
M_WRITABLE(n) &&
((sb->sb_flags & SB_NOCOALESCE) == 0) &&
!(m->m_flags & M_NOTREADY) &&
- !(n->m_flags & M_NOTREADY) &&
+ !(n->m_flags & (M_NOTREADY | M_NOMAP)) &&
+ !mbuf_has_tls_session(m) &&
+ !mbuf_has_tls_session(n) &&
m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
m->m_len <= M_TRAILINGSPACE(n) &&
n->m_type == m->m_type) {
- bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
- (unsigned)m->m_len);
+ m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
n->m_len += m->m_len;
sb->sb_ccc += m->m_len;
if (sb->sb_fnrdy == NULL)
@@ -1052,6 +1173,10 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
m = m_free(m);
continue;
}
+ if (m->m_len <= MLEN && (m->m_flags & M_NOMAP) &&
+ (m->m_flags & M_NOTREADY) == 0 &&
+ !mbuf_has_tls_session(m))
+ (void)mb_unmapped_compress(m);
if (n)
n->m_next = m;
else
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index 380c97dd..c01535c4 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_inet.h>
#include <rtems/bsd/local/opt_inet6.h>
+#include <rtems/bsd/local/opt_kern_tls.h>
#include <rtems/bsd/local/opt_sctp.h>
#include <sys/param.h>
@@ -125,6 +126,7 @@ __FBSDID("$FreeBSD$");
#include <sys/hhook.h>
#include <sys/kernel.h>
#include <sys/khelp.h>
+#include <sys/ktls.h>
#include <sys/event.h>
#include <sys/eventhandler.h>
#include <sys/poll.h>
@@ -143,6 +145,7 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <sys/syslog.h>
#include <netinet/in.h>
+#include <netinet/tcp.h>
#include <net/vnet.h>
@@ -911,6 +914,8 @@ solisten_wakeup(struct socket *sol)
}
SOLISTEN_UNLOCK(sol);
wakeup_one(&sol->sol_comp);
+ if ((sol->so_state & SS_ASYNC) && sol->so_sigio != NULL)
+ pgsigio(&sol->so_sigio, SIGIO, 0);
}
/*
@@ -1067,7 +1072,7 @@ sofree(struct socket *so)
*
* We used to do a lot of socket buffer and socket locking here, as
* well as invoke sorflush() and perform wakeups. The direct call to
- * dom_dispose() and sbrelease_internal() are an inlining of what was
+ * dom_dispose() and sbdestroy() are an inlining of what was
* necessary from sorflush().
*
* Notice that the socket buffer and kqueue state are torn down
@@ -1154,9 +1159,9 @@ drop:
so->so_state |= SS_NOFDREF;
sorele(so);
if (listening) {
- struct socket *sp;
+ struct socket *sp, *tsp;
- TAILQ_FOREACH(sp, &lqueue, so_list) {
+ TAILQ_FOREACH_SAFE(sp, &lqueue, so_list, tsp) {
SOCK_LOCK(sp);
if (sp->so_count == 0) {
SOCK_UNLOCK(sp);
@@ -1197,7 +1202,6 @@ soabort(struct socket *so)
KASSERT(so->so_count == 0, ("soabort: so_count"));
KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
- KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE"));
VNET_SO_ASSERT(so);
if (so->so_proto->pr_usrreqs->pru_abort != NULL)
@@ -1468,7 +1472,15 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
ssize_t resid;
int clen = 0, error, dontroute;
int atomic = sosendallatonce(so) || top;
-
+ int pru_flag;
+#ifdef KERN_TLS
+ struct ktls_session *tls;
+ int tls_enq_cnt, tls_pruflag;
+ uint8_t tls_rtype;
+
+ tls = NULL;
+ tls_rtype = TLS_RLTYPE_APP;
+#endif
if (uio != NULL)
resid = uio->uio_resid;
else
@@ -1502,6 +1514,28 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
if (error)
goto out;
+#ifdef KERN_TLS
+ tls_pruflag = 0;
+ tls = ktls_hold(so->so_snd.sb_tls_info);
+ if (tls != NULL) {
+ if (tls->sw_encrypt != NULL)
+ tls_pruflag = PRUS_NOTREADY;
+
+ if (control != NULL) {
+ struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+
+ if (clen >= sizeof(*cm) &&
+ cm->cmsg_type == TLS_SET_RECORD_TYPE) {
+ tls_rtype = *((uint8_t *)CMSG_DATA(cm));
+ clen = 0;
+ m_freem(control);
+ control = NULL;
+ atomic = 1;
+ }
+ }
+ }
+#endif
+
restart:
do {
SOCKBUF_LOCK(&so->so_snd);
@@ -1551,7 +1585,8 @@ restart:
}
if (space < resid + clen &&
(atomic || space < so->so_snd.sb_lowat || space < clen)) {
- if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
+ if ((so->so_state & SS_NBIO) ||
+ (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
SOCKBUF_UNLOCK(&so->so_snd);
error = EWOULDBLOCK;
goto release;
@@ -1578,10 +1613,27 @@ restart:
* is a workaround to prevent protocol send
* methods to panic.
*/
- top = m_uiotombuf(uio, M_WAITOK, space,
- (atomic ? max_hdr : 0),
- (atomic ? M_PKTHDR : 0) |
- ((flags & MSG_EOR) ? M_EOR : 0));
+#ifdef KERN_TLS
+ if (tls != NULL) {
+ top = m_uiotombuf(uio, M_WAITOK, space,
+ tls->params.max_frame_len,
+ M_NOMAP |
+ ((flags & MSG_EOR) ? M_EOR : 0));
+ if (top != NULL) {
+ error = ktls_frame(top, tls,
+ &tls_enq_cnt, tls_rtype);
+ if (error) {
+ m_freem(top);
+ goto release;
+ }
+ }
+ tls_rtype = TLS_RLTYPE_APP;
+ } else
+#endif
+ top = m_uiotombuf(uio, M_WAITOK, space,
+ (atomic ? max_hdr : 0),
+ (atomic ? M_PKTHDR : 0) |
+ ((flags & MSG_EOR) ? M_EOR : 0));
if (top == NULL) {
error = EFAULT; /* only possible error */
goto release;
@@ -1605,8 +1657,8 @@ restart:
* this.
*/
VNET_SO_ASSERT(so);
- error = (*so->so_proto->pr_usrreqs->pru_send)(so,
- (flags & MSG_OOB) ? PRUS_OOB :
+
+ pru_flag = (flags & MSG_OOB) ? PRUS_OOB :
/*
* If the user set MSG_EOF, the protocol understands
* this flag and nothing left to send then use
@@ -1618,13 +1670,37 @@ restart:
PRUS_EOF :
/* If there is more to send set PRUS_MORETOCOME. */
(flags & MSG_MORETOCOME) ||
- (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
- top, addr, control, td);
+ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
+
+#ifdef KERN_TLS
+ pru_flag |= tls_pruflag;
+#endif
+
+ error = (*so->so_proto->pr_usrreqs->pru_send)(so,
+ pru_flag, top, addr, control, td);
+
if (dontroute) {
SOCK_LOCK(so);
so->so_options &= ~SO_DONTROUTE;
SOCK_UNLOCK(so);
}
+
+#ifdef KERN_TLS
+ if (tls != NULL && tls->sw_encrypt != NULL) {
+ /*
+ * Note that error is intentionally
+ * ignored.
+ *
+ * Like sendfile(), we rely on the
+ * completion routine (pru_ready())
+ * to free the mbufs in the event that
+ * pru_send() encountered an error and
+ * did not append them to the sockbuf.
+ */
+ soref(so);
+ ktls_enqueue(top, so, tls_enq_cnt);
+ }
+#endif
clen = 0;
control = NULL;
top = NULL;
@@ -1636,6 +1712,10 @@ restart:
release:
sbunlock(&so->so_snd);
out:
+#ifdef KERN_TLS
+ if (tls != NULL)
+ ktls_free(tls);
+#endif
if (top != NULL)
m_freem(top);
if (control != NULL)
@@ -2011,7 +2091,13 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
- error = uiomove(mtod(m, char *) + moff, (int)len, uio);
+#ifndef __rtems__
+ if ((m->m_flags & M_NOMAP) != 0)
+ error = m_unmappedtouio(m, moff, uio, (int)len);
+ else
+#endif /* __rtems__ */
+ error = uiomove(mtod(m, char *) + moff,
+ (int)len, uio);
SOCKBUF_LOCK(&so->so_rcv);
if (error) {
/*
@@ -2225,7 +2311,7 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
/* Prevent other readers from entering the socket. */
error = sblock(sb, SBLOCKWAIT(flags));
if (error)
- goto out;
+ return (error);
SOCKBUF_LOCK(sb);
/* Easy one, no space to copyout anything. */
@@ -2793,12 +2879,10 @@ sosetopt(struct socket *so, struct sockopt *sopt)
CURVNET_SET(so->so_vnet);
error = 0;
if (sopt->sopt_level != SOL_SOCKET) {
- if (so->so_proto->pr_ctloutput != NULL) {
+ if (so->so_proto->pr_ctloutput != NULL)
error = (*so->so_proto->pr_ctloutput)(so, sopt);
- CURVNET_RESTORE();
- return (error);
- }
- error = ENOPROTOOPT;
+ else
+ error = ENOPROTOOPT;
} else {
switch (sopt->sopt_name) {
case SO_ACCEPTFILTER:
@@ -2811,7 +2895,12 @@ sosetopt(struct socket *so, struct sockopt *sopt)
error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
if (error)
goto bad;
-
+ if (l.l_linger < 0 ||
+ l.l_linger > USHRT_MAX ||
+ l.l_linger > (INT_MAX / hz)) {
+ error = EDOM;
+ goto bad;
+ }
SOCK_LOCK(so);
so->so_linger = l.l_linger;
if (l.l_onoff)
@@ -4162,6 +4251,9 @@ void
so_linger_set(struct socket *so, int val)
{
+ KASSERT(val >= 0 && val <= USHRT_MAX && val <= (INT_MAX / hz),
+ ("%s: val %d out of range", __func__, val));
+
so->so_linger = val;
}
diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c
index 529268a9..39e96abe 100644
--- a/freebsd/sys/kern/uipc_syscalls.c
+++ b/freebsd/sys/kern/uipc_syscalls.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syscallsubr.h>
+#include <sys/sysent.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/unpcb.h>
@@ -451,7 +452,8 @@ accept1(td, s, uname, anamelen, flags)
if (error == 0 && uname != NULL) {
#ifdef COMPAT_OLDSOCK
- if (flags & ACCEPT4_COMPAT)
+ if (SV_PROC_FLAG(td->td_proc, SV_AOUT) &&
+ (flags & ACCEPT4_COMPAT) != 0)
((struct osockaddr *)name)->sa_family =
name->sa_family;
#endif
@@ -968,7 +970,8 @@ sendit(struct thread *td, int s, struct msghdr *mp, int flags)
if (mp->msg_control) {
if (mp->msg_controllen < sizeof(struct cmsghdr)
#ifdef COMPAT_OLDSOCK
- && mp->msg_flags != MSG_COMPAT
+ && (mp->msg_flags != MSG_COMPAT ||
+ !SV_PROC_FLAG(td->td_proc, SV_AOUT))
#endif
) {
error = EINVAL;
@@ -979,7 +982,8 @@ sendit(struct thread *td, int s, struct msghdr *mp, int flags)
if (error != 0)
goto bad;
#ifdef COMPAT_OLDSOCK
- if (mp->msg_flags == MSG_COMPAT) {
+ if (mp->msg_flags == MSG_COMPAT &&
+ SV_PROC_FLAG(td->td_proc, SV_AOUT)) {
struct cmsghdr *cm;
M_PREPEND(control, sizeof(*cm), M_WAITOK);
@@ -1120,7 +1124,8 @@ sys_sendto(struct thread *td, struct sendto_args *uap)
msg.msg_iovlen = 1;
msg.msg_control = 0;
#ifdef COMPAT_OLDSOCK
- msg.msg_flags = 0;
+ if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
+ msg.msg_flags = 0;
#endif
aiov.iov_base = __DECONST(void *, uap->buf);
aiov.iov_len = uap->len;
@@ -1239,7 +1244,8 @@ sys_sendmsg(struct thread *td, struct sendmsg_args *uap)
return (error);
msg.msg_iov = iov;
#ifdef COMPAT_OLDSOCK
- msg.msg_flags = 0;
+ if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
+ msg.msg_flags = 0;
#endif
error = sendit(td, uap->s, &msg, uap->flags);
free(iov, M_IOV);
@@ -1356,7 +1362,8 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
/* save sa_len before it is destroyed by MSG_COMPAT */
len = MIN(len, fromsa->sa_len);
#ifdef COMPAT_OLDSOCK
- if (mp->msg_flags & MSG_COMPAT)
+ if ((mp->msg_flags & MSG_COMPAT) != 0 &&
+ SV_PROC_FLAG(td->td_proc, SV_AOUT))
((struct osockaddr *)fromsa)->sa_family =
fromsa->sa_family;
#endif
@@ -1379,7 +1386,8 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
* If we receive rights, trim the cmsghdr; anything else
* is tossed.
*/
- if (control && mp->msg_flags & MSG_COMPAT) {
+ if (control && (mp->msg_flags & MSG_COMPAT) != 0 &&
+ SV_PROC_FLAG(td->td_proc, SV_AOUT)) {
if (mtod(control, struct cmsghdr *)->cmsg_level !=
SOL_SOCKET ||
mtod(control, struct cmsghdr *)->cmsg_type !=
@@ -1438,7 +1446,8 @@ recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp)
if (namelenp != NULL) {
error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
#ifdef COMPAT_OLDSOCK
- if (mp->msg_flags & MSG_COMPAT)
+ if ((mp->msg_flags & MSG_COMPAT) != 0 &&
+ SV_PROC_FLAG(td->td_proc, SV_AOUT))
error = 0; /* old recvfrom didn't check */
#endif
}
@@ -1581,7 +1590,8 @@ sys_recvmsg(struct thread *td, struct recvmsg_args *uap)
return (error);
msg.msg_flags = uap->flags;
#ifdef COMPAT_OLDSOCK
- msg.msg_flags &= ~MSG_COMPAT;
+ if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
+ msg.msg_flags &= ~MSG_COMPAT;
#endif
uiov = msg.msg_iov;
msg.msg_iov = iov;
@@ -1863,7 +1873,7 @@ getsockname1(struct thread *td, struct getsockname_args *uap, int compat)
if (len != 0) {
#ifdef COMPAT_OLDSOCK
- if (compat)
+ if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT))
((struct osockaddr *)sa)->sa_family = sa->sa_family;
#endif
error = copyout(sa, uap->asa, (u_int)len);
@@ -1978,7 +1988,7 @@ getpeername1(struct thread *td, struct getpeername_args *uap, int compat)
if (len != 0) {
#ifdef COMPAT_OLDSOCK
- if (compat)
+ if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT))
((struct osockaddr *)sa)->sa_family = sa->sa_family;
#endif
error = copyout(sa, uap->asa, (u_int)len);
@@ -2083,7 +2093,8 @@ sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
if (buflen > MLEN) {
#ifdef COMPAT_OLDSOCK
- if (type == MT_SONAME && buflen <= 112)
+ if (type == MT_SONAME && buflen <= 112 &&
+ SV_CURPROC_FLAG(SV_AOUT))
buflen = MLEN; /* unix domain compat. hack */
else
#endif
@@ -2101,7 +2112,8 @@ sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
sa = mtod(m, struct sockaddr *);
#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
- if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
+ if (sa->sa_family == 0 && sa->sa_len < AF_MAX &&
+ SV_CURPROC_FLAG(SV_AOUT))
sa->sa_family = sa->sa_len;
#endif
sa->sa_len = buflen;
@@ -2129,7 +2141,8 @@ getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, size_t len)
free(sa, M_SONAME);
} else {
#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
- if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
+ if (sa->sa_family == 0 && sa->sa_len < AF_MAX &&
+ SV_CURPROC_FLAG(SV_AOUT))
sa->sa_family = sa->sa_len;
#endif
sa->sa_len = len;
@@ -2180,8 +2193,10 @@ m_dispose_extcontrolm(struct mbuf *m)
fd = *fds++;
error = fget(td, fd, &cap_no_rights,
&fp);
- if (error == 0)
+ if (error == 0) {
fdclose(td, fp, fd);
+ fdrop(fp, td);
+ }
}
}
clen -= datalen;
diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c
index 6b34dcb8..39f28b4b 100644
--- a/freebsd/sys/kern/uipc_usrreq.c
+++ b/freebsd/sys/kern/uipc_usrreq.c
@@ -1032,7 +1032,7 @@ uipc_listen(struct socket *so, int backlog, struct thread *td)
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error == 0) {
- cru2x(td->td_ucred, &unp->unp_peercred);
+ cru2xt(td, &unp->unp_peercred);
solisten_proto(so, backlog);
}
SOCK_UNLOCK(so);
@@ -1837,7 +1837,7 @@ void
unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
struct unpcb *server_unp, struct unpcb *listen_unp)
{
- cru2x(td->td_ucred, &client_unp->unp_peercred);
+ cru2xt(td, &client_unp->unp_peercred);
client_unp->unp_flags |= UNP_HAVEPC;
memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
@@ -2306,30 +2306,53 @@ unp_init(void)
}
#ifndef __rtems__
+static void
+unp_internalize_cleanup_rights(struct mbuf *control)
+{
+ struct cmsghdr *cp;
+ struct mbuf *m;
+ void *data;
+ socklen_t datalen;
+
+ for (m = control; m != NULL; m = m->m_next) {
+ cp = mtod(m, struct cmsghdr *);
+ if (cp->cmsg_level != SOL_SOCKET ||
+ cp->cmsg_type != SCM_RIGHTS)
+ continue;
+ data = CMSG_DATA(cp);
+ datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
+ unp_freerights(data, datalen / sizeof(struct filedesc *));
+ }
+}
+
static int
unp_internalize(struct mbuf **controlp, struct thread *td)
{
- struct mbuf *control = *controlp;
- struct proc *p = td->td_proc;
- struct filedesc *fdesc = p->p_fd;
+ struct mbuf *control, **initial_controlp;
+ struct proc *p;
+ struct filedesc *fdesc;
struct bintime *bt;
- struct cmsghdr *cm = mtod(control, struct cmsghdr *);
+ struct cmsghdr *cm;
struct cmsgcred *cmcred;
struct filedescent *fde, **fdep, *fdev;
struct file *fp;
struct timeval *tv;
struct timespec *ts;
- int i, *fdp;
void *data;
- socklen_t clen = control->m_len, datalen;
- int error, oldfds;
+ socklen_t clen, datalen;
+ int i, j, error, *fdp, oldfds;
u_int newlen;
UNP_LINK_UNLOCK_ASSERT();
+ p = td->td_proc;
+ fdesc = p->p_fd;
error = 0;
+ control = *controlp;
+ clen = control->m_len;
*controlp = NULL;
- while (cm != NULL) {
+ initial_controlp = controlp;
+ for (cm = mtod(control, struct cmsghdr *); cm != NULL;) {
if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
|| cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
error = EINVAL;
@@ -2400,6 +2423,19 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
goto out;
}
fdp = data;
+ for (i = 0; i < oldfds; i++, fdp++) {
+ if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
+ fdp = data;
+ for (j = 0; j < i; j++, fdp++) {
+ fdrop(fdesc->fd_ofiles[*fdp].
+ fde_file, td);
+ }
+ FILEDESC_SUNLOCK(fdesc);
+ error = EBADF;
+ goto out;
+ }
+ }
+ fdp = data;
fdep = (struct filedescent **)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
@@ -2480,6 +2516,8 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
}
out:
+ if (error != 0 && initial_controlp != NULL)
+ unp_internalize_cleanup_rights(*initial_controlp);
m_freem(control);
return (error);
}
@@ -2601,7 +2639,6 @@ unp_internalize_fp(struct file *fp)
unp->unp_file = fp;
unp->unp_msgcount++;
}
- fhold(fp);
unp_rights++;
UNP_LINK_WUNLOCK();
}
@@ -2762,10 +2799,10 @@ unp_gc(__unused void *arg, int pending)
if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
f = unp->unp_file;
if (unp->unp_msgcount == 0 || f == NULL ||
- f->f_count != unp->unp_msgcount)
+ f->f_count != unp->unp_msgcount ||
+ !fhold(f))
continue;
unref[total++] = f;
- fhold(f);
KASSERT(total <= unp_unreachable,
("unp_gc: incorrect unreachable count."));
}
@@ -2942,8 +2979,8 @@ db_print_xucred(int indent, struct xucred *xu)
int comma, i;
db_print_indent(indent);
- db_printf("cr_version: %u cr_uid: %u cr_ngroups: %d\n",
- xu->cr_version, xu->cr_uid, xu->cr_ngroups);
+ db_printf("cr_version: %u cr_uid: %u cr_pid: %d cr_ngroups: %d\n",
+ xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
db_print_indent(indent);
db_printf("cr_groups: ");
comma = 0;