summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/kern/uipc_mbuf.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/kern/uipc_mbuf.c')
-rw-r--r--freebsd/sys/kern/uipc_mbuf.c292
1 files changed, 276 insertions, 16 deletions
diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c
index 185d14a0..2f1768da 100644
--- a/freebsd/sys/kern/uipc_mbuf.c
+++ b/freebsd/sys/kern/uipc_mbuf.c
@@ -51,7 +51,11 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/uio.h>
+#include <sys/vmmeter.h>
#include <sys/sdt.h>
+#include <vm/vm.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_page.h>
SDT_PROBE_DEFINE5_XLATE(sdt, , , m__init,
"struct mbuf *", "mbufinfo_t *",
@@ -204,7 +208,7 @@ mb_dupcl(struct mbuf *n, struct mbuf *m)
else
bcopy(&m->m_ext, &n->m_ext, m_ext_copylen);
n->m_flags |= M_EXT;
- n->m_flags |= m->m_flags & M_RDONLY;
+ n->m_flags |= m->m_flags & (M_RDONLY | M_NOMAP);
/* See if this is the mbuf that holds the embedded refcount. */
if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
@@ -248,7 +252,8 @@ m_demote(struct mbuf *m0, int all, int flags)
__func__, m, m0));
if (m->m_flags & M_PKTHDR)
m_demote_pkthdr(m);
- m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
+ m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE |
+ M_NOMAP | flags);
}
}
@@ -343,6 +348,9 @@ m_pkthdr_init(struct mbuf *m, int how)
#endif
m->m_data = m->m_pktdat;
bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
+#ifdef NUMA
+ m->m_pkthdr.numa_domain = M_NODOM;
+#endif
#ifdef MAC
/* If the label init fails, fail the alloc */
error = mac_mbuf_init(m, how);
@@ -375,12 +383,17 @@ m_move_pkthdr(struct mbuf *to, struct mbuf *from)
if (to->m_flags & M_PKTHDR)
m_tag_delete_chain(to, NULL);
#endif
- to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+ to->m_flags = (from->m_flags & M_COPYFLAGS) |
+ (to->m_flags & (M_EXT | M_NOMAP));
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr; /* especially tags */
SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
from->m_flags &= ~M_PKTHDR;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ from->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
+ from->m_pkthdr.snd_tag = NULL;
+ }
}
/*
@@ -409,10 +422,13 @@ m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
if (to->m_flags & M_PKTHDR)
m_tag_delete_chain(to, NULL);
#endif
- to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+ to->m_flags = (from->m_flags & M_COPYFLAGS) |
+ (to->m_flags & (M_EXT | M_NOMAP));
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr;
+ if (from->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_ref(from->m_pkthdr.snd_tag);
SLIST_INIT(&to->m_pkthdr.tags);
return (m_tag_copy_chain(to, from, how));
}
@@ -572,6 +588,32 @@ nospace:
return (NULL);
}
+#ifndef __rtems__
+static void
+m_copyfromunmapped(const struct mbuf *m, int off, int len, caddr_t cp)
+{
+ struct iovec iov;
+ struct uio uio;
+ int error;
+
+ KASSERT(off >= 0, ("m_copyfromunmapped: negative off %d", off));
+ KASSERT(len >= 0, ("m_copyfromunmapped: negative len %d", len));
+ KASSERT(off < m->m_len,
+ ("m_copyfromunmapped: len exceeds mbuf length"));
+ iov.iov_base = cp;
+ iov.iov_len = len;
+ uio.uio_resid = len;
+ uio.uio_iov = &iov;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_rw = UIO_READ;
+ error = m_unmappedtouio(m, off, &uio, len);
+ KASSERT(error == 0, ("m_unmappedtouio failed: off %d, len %d", off,
+ len));
+}
+#endif /* __rtems__ */
+
/*
* Copy data from an mbuf chain starting "off" bytes from the beginning,
* continuing for "len" bytes, into the indicated buffer.
@@ -593,7 +635,12 @@ m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
while (len > 0) {
KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
count = min(m->m_len - off, len);
- bcopy(mtod(m, caddr_t) + off, cp, count);
+#ifndef __rtems__
+ if ((m->m_flags & M_NOMAP) != 0)
+ m_copyfromunmapped(m, off, count, cp);
+ else
+#endif /* __rtems__ */
+ bcopy(mtod(m, caddr_t) + off, cp, count);
len -= count;
cp += count;
off = 0;
@@ -688,6 +735,7 @@ m_cat(struct mbuf *m, struct mbuf *n)
m = m->m_next;
while (n) {
if (!M_WRITABLE(m) ||
+ (n->m_flags & M_NOMAP) != 0 ||
M_TRAILINGSPACE(m) < n->m_len) {
/* just join the two chains */
m->m_next = n;
@@ -805,6 +853,9 @@ m_pullup(struct mbuf *n, int len)
int count;
int space;
+ KASSERT((n->m_flags & M_NOMAP) == 0,
+ ("%s: unmapped mbuf %p", __func__, n));
+
/*
* If first mbuf has no cluster, and has room for len bytes
* without shifting current data, pullup into it,
@@ -923,7 +974,12 @@ m_split(struct mbuf *m0, int len0, int wait)
return (NULL);
n->m_next = m->m_next;
m->m_next = NULL;
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
return (n);
@@ -931,7 +987,12 @@ m_split(struct mbuf *m0, int len0, int wait)
n = m_gethdr(wait, m0->m_type);
if (n == NULL)
return (NULL);
- n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
+ if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
+ n->m_pkthdr.snd_tag =
+ m_snd_tag_ref(m0->m_pkthdr.snd_tag);
+ n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
+ } else
+ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
if (m->m_flags & M_EXT)
@@ -1348,6 +1409,41 @@ nospace:
}
/*
+ * Return the number of fragments an mbuf will use. This is usually
+ * used as a proxy for the number of scatter/gather elements needed by
+ * a DMA engine to access an mbuf. In general mapped mbufs are
+ * assumed to be backed by physically contiguous buffers that only
+ * need a single fragment. Unmapped mbufs, on the other hand, can
+ * span disjoint physical pages.
+ */
+static int
+frags_per_mbuf(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ int frags;
+
+ if ((m->m_flags & M_NOMAP) == 0)
+ return (1);
+
+ /*
+ * The header and trailer are counted as a single fragment
+ * each when present.
+ *
+ * XXX: This overestimates the number of fragments by assuming
+ * all the backing physical pages are disjoint.
+ */
+ ext_pgs = m->m_ext.ext_pgs;
+ frags = 0;
+ if (ext_pgs->hdr_len != 0)
+ frags++;
+ frags += ext_pgs->npgs;
+ if (ext_pgs->trail_len != 0)
+ frags++;
+
+ return (frags);
+}
+
+/*
* Defragment an mbuf chain, returning at most maxfrags separate
* mbufs+clusters. If this is not possible NULL is returned and
* the original mbuf chain is left in its present (potentially
@@ -1367,7 +1463,7 @@ m_collapse(struct mbuf *m0, int how, int maxfrags)
*/
curfrags = 0;
for (m = m0; m != NULL; m = m->m_next)
- curfrags++;
+ curfrags += frags_per_mbuf(m);
/*
* First, try to collapse mbufs. Note that we always collapse
* towards the front so we don't need to deal with moving the
@@ -1382,12 +1478,13 @@ again:
break;
if (M_WRITABLE(m) &&
n->m_len < M_TRAILINGSPACE(m)) {
- bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
- n->m_len);
+ m_copydata(n, 0, n->m_len,
+ mtod(m, char *) + m->m_len);
m->m_len += n->m_len;
m->m_next = n->m_next;
+ curfrags -= frags_per_mbuf(n);
m_free(n);
- if (--curfrags <= maxfrags)
+ if (curfrags <= maxfrags)
return m0;
} else
m = n;
@@ -1404,15 +1501,18 @@ again:
m = m_getcl(how, MT_DATA, 0);
if (m == NULL)
goto bad;
- bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
- bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
- n2->m_len);
+ m_copydata(n, 0, n->m_len, mtod(m, char *));
+ m_copydata(n2, 0, n2->m_len,
+ mtod(m, char *) + n->m_len);
m->m_len = n->m_len + n2->m_len;
m->m_next = n2->m_next;
*prev = m;
+ curfrags += 1; /* For the new cluster */
+ curfrags -= frags_per_mbuf(n);
+ curfrags -= frags_per_mbuf(n2);
m_free(n);
m_free(n2);
- if (--curfrags <= maxfrags) /* +1 cl -2 mbufs */
+ if (curfrags <= maxfrags)
return m0;
/*
* Still not there, try the normal collapse
@@ -1512,6 +1612,100 @@ nospace:
#endif
+#ifndef __rtems__
+/*
+ * Free pages from mbuf_ext_pgs, assuming they were allocated via
+ * vm_page_alloc() and aren't associated with any object. Complement
+ * to allocator from m_uiotombuf_nomap().
+ */
+void
+mb_free_mext_pgs(struct mbuf *m)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ vm_page_t pg;
+
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ for (int i = 0; i < ext_pgs->npgs; i++) {
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ vm_page_unwire_noq(pg);
+ vm_page_free(pg);
+ }
+}
+
+static struct mbuf *
+m_uiotombuf_nomap(struct uio *uio, int how, int len, int maxseg, int flags)
+{
+ struct mbuf *m, *mb, *prev;
+ struct mbuf_ext_pgs *pgs;
+ vm_page_t pg_array[MBUF_PEXT_MAX_PGS];
+ int error, length, i, needed;
+ ssize_t total;
+ int pflags = malloc2vm_flags(how) | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP |
+ VM_ALLOC_WIRED;
+
+ /*
+ * len can be zero or an arbitrary large value bound by
+ * the total data supplied by the uio.
+ */
+ if (len > 0)
+ total = MIN(uio->uio_resid, len);
+ else
+ total = uio->uio_resid;
+
+ if (maxseg == 0)
+ maxseg = MBUF_PEXT_MAX_PGS * PAGE_SIZE;
+
+ /*
+ * Allocate the pages
+ */
+ m = NULL;
+ while (total > 0) {
+ mb = mb_alloc_ext_pgs(how, (flags & M_PKTHDR),
+ mb_free_mext_pgs);
+ if (mb == NULL)
+ goto failed;
+ if (m == NULL)
+ m = mb;
+ else
+ prev->m_next = mb;
+ prev = mb;
+ pgs = mb->m_ext.ext_pgs;
+ needed = length = MIN(maxseg, total);
+ for (i = 0; needed > 0; i++, needed -= PAGE_SIZE) {
+retry_page:
+ pg_array[i] = vm_page_alloc(NULL, 0, pflags);
+ if (pg_array[i] == NULL) {
+ if (how & M_NOWAIT) {
+ goto failed;
+ } else {
+ vm_wait(NULL);
+ goto retry_page;
+ }
+ }
+ pg_array[i]->flags &= ~PG_ZERO;
+ pgs->pa[i] = VM_PAGE_TO_PHYS(pg_array[i]);
+ pgs->npgs++;
+ }
+ pgs->last_pg_len = length - PAGE_SIZE * (pgs->npgs - 1);
+ MBUF_EXT_PGS_ASSERT_SANITY(pgs);
+ total -= length;
+ error = uiomove_fromphys(pg_array, 0, length, uio);
+ if (error != 0)
+ goto failed;
+ mb->m_len = length;
+ mb->m_ext.ext_size += PAGE_SIZE * pgs->npgs;
+ if (flags & M_PKTHDR)
+ m->m_pkthdr.len += length;
+ }
+ return (m);
+
+failed:
+ m_freem(m);
+ return (NULL);
+}
+#endif /* __rtems__ */
+
/*
* Copy the contents of uio into a properly sized mbuf chain.
*/
@@ -1523,6 +1717,11 @@ m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
ssize_t total;
int progress = 0;
+#ifndef __rtems__
+ if (flags & M_NOMAP)
+ return (m_uiotombuf_nomap(uio, how, len, align, flags));
+#endif /* __rtems__ */
+
/*
* len can be zero or an arbitrary large value bound by
* the total data supplied by the uio.
@@ -1569,6 +1768,62 @@ m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
}
/*
+ * Copy data from an unmapped mbuf into a uio limited by len if set.
+ */
+int
+m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len)
+{
+ struct mbuf_ext_pgs *ext_pgs;
+ vm_page_t pg;
+ int error, i, off, pglen, pgoff, seglen, segoff;
+
+ MBUF_EXT_PGS_ASSERT(m);
+ ext_pgs = m->m_ext.ext_pgs;
+ error = 0;
+
+ /* Skip over any data removed from the front. */
+ off = mtod(m, vm_offset_t);
+
+ off += m_off;
+ if (ext_pgs->hdr_len != 0) {
+ if (off >= ext_pgs->hdr_len) {
+ off -= ext_pgs->hdr_len;
+ } else {
+ seglen = ext_pgs->hdr_len - off;
+ segoff = off;
+ seglen = min(seglen, len);
+ off = 0;
+ len -= seglen;
+ error = uiomove(&ext_pgs->hdr[segoff], seglen, uio);
+ }
+ }
+ pgoff = ext_pgs->first_pg_off;
+ for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) {
+ pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+ if (off >= pglen) {
+ off -= pglen;
+ pgoff = 0;
+ continue;
+ }
+ seglen = pglen - off;
+ segoff = pgoff + off;
+ off = 0;
+ seglen = min(seglen, len);
+ len -= seglen;
+ pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+ error = uiomove_fromphys(&pg, segoff, seglen, uio);
+ pgoff = 0;
+ };
+ if (len != 0 && error == 0) {
+ KASSERT((off + len) <= ext_pgs->trail_len,
+ ("off + len > trail (%d + %d > %d, m_off = %d)", off, len,
+ ext_pgs->trail_len, m_off));
+ error = uiomove(&ext_pgs->trail[off], len, uio);
+ }
+ return (error);
+}
+
+/*
* Copy an mbuf chain into a uio limited by len if set.
*/
int
@@ -1586,7 +1841,12 @@ m_mbuftouio(struct uio *uio, const struct mbuf *m, int len)
for (; m != NULL; m = m->m_next) {
length = min(m->m_len, total - progress);
- error = uiomove(mtod(m, void *), length, uio);
+#ifndef __rtems__
+ if ((m->m_flags & M_NOMAP) != 0)
+ error = m_unmappedtouio(m, 0, uio, length);
+ else
+#endif /* __rtems__ */
+ error = uiomove(mtod(m, void *), length, uio);
if (error)
return (error);