summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/sys/mbuf.h
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/sys/mbuf.h')
-rw-r--r--freebsd/sys/sys/mbuf.h257
1 files changed, 206 insertions, 51 deletions
diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h
index 634f7d9e..ba2e1873 100644
--- a/freebsd/sys/sys/mbuf.h
+++ b/freebsd/sys/sys/mbuf.h
@@ -40,6 +40,7 @@
#include <sys/queue.h>
#ifdef _KERNEL
#include <sys/systm.h>
+#include <sys/refcount.h>
#include <vm/uma.h>
#ifdef WITNESS
#include <sys/lock.h>
@@ -98,6 +99,7 @@ struct mbuf;
#define MLEN ((int)(MSIZE - MHSIZE))
#define MHLEN ((int)(MSIZE - MPKTHSIZE))
#define MINCLSIZE (MHLEN + 1)
+#define M_NODOM 255
#ifdef _KERNEL
/*-
@@ -137,6 +139,7 @@ struct m_tag {
*/
struct m_snd_tag {
struct ifnet *ifp; /* network interface tag belongs to */
+ volatile u_int refcount;
};
/*
@@ -158,7 +161,7 @@ struct pkthdr {
uint32_t flowid; /* packet's 4-tuple system */
uint32_t csum_flags; /* checksum and offload features */
uint16_t fibnum; /* this packet should use this fib */
- uint8_t cosqos; /* class/quality of service */
+ uint8_t numa_domain; /* NUMA domain of recvd pkt */
uint8_t rsstype; /* hash type */
union {
uint64_t rcv_tstmp; /* timestamp in ns */
@@ -196,6 +199,8 @@ struct pkthdr {
#define lro_nsegs tso_segsz
#define csum_phsum PH_per.sixteen[2]
#define csum_data PH_per.thirtytwo[1]
+#define lro_len PH_per.sixteen[0] /* inbound during LRO */
+#define lro_csum PH_per.sixteen[1] /* inbound during LRO */
#define pace_thoff PH_loc.sixteen[0]
#define pace_tlen PH_loc.sixteen[1]
#define pace_drphdrlen PH_loc.sixteen[2]
@@ -224,7 +229,15 @@ struct m_ext {
volatile u_int ext_count;
volatile u_int *ext_cnt;
};
- char *ext_buf; /* start of buffer */
+ union {
+ /*
+ * If ext_type == EXT_PGS, 'ext_pgs' points to a
+ * structure describing the buffer. Otherwise,
+ * 'ext_buf' points to the start of the buffer.
+ */
+ struct mbuf_ext_pgs *ext_pgs;
+ char *ext_buf;
+ };
uint32_t ext_size; /* size of buffer, for ext_free */
uint32_t ext_type:8, /* type of external storage */
ext_flags:24; /* external storage mbuf flags */
@@ -290,10 +303,98 @@ struct mbuf {
};
};
+struct ktls_session;
+struct socket;
+
+/*
+ * TLS records for TLS 1.0-1.2 can have the following header lengths:
+ * - 5 (AES-CBC with implicit IV)
+ * - 21 (AES-CBC with explicit IV)
+ * - 13 (AES-GCM with 8 byte explicit IV)
+ */
+#define MBUF_PEXT_HDR_LEN 24
+
+/*
+ * TLS records for TLS 1.0-1.2 can have the following maximum trailer
+ * lengths:
+ * - 16 (AES-GCM)
+ * - 36 (AES-CBC with SHA1 and up to 16 bytes of padding)
+ * - 48 (AES-CBC with SHA2-256 and up to 16 bytes of padding)
+ * - 64 (AES-CBC with SHA2-384 and up to 16 bytes of padding)
+ */
+#define MBUF_PEXT_TRAIL_LEN 64
+
+#ifdef __LP64__
+#define MBUF_PEXT_MAX_PGS (152 / sizeof(vm_paddr_t))
+#else
+#define MBUF_PEXT_MAX_PGS (156 / sizeof(vm_paddr_t))
+#endif
+
+#define MBUF_PEXT_MAX_BYTES \
+ (MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN)
+
+/*
+ * This struct is 256 bytes in size and is arranged so that the most
+ * common case (accessing the first 4 pages of a 16KB TLS record) will
+ * fit in a single 64 byte cacheline.
+ */
+struct mbuf_ext_pgs {
+ uint8_t npgs; /* Number of attached pages */
+ uint8_t nrdy; /* Pages with I/O pending */
+ uint8_t hdr_len; /* TLS header length */
+ uint8_t trail_len; /* TLS trailer length */
+ uint16_t first_pg_off; /* Offset into 1st page */
+ uint16_t last_pg_len; /* Length of last page */
+ vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pages */
+ char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */
+ struct ktls_session *tls; /* TLS session */
+#if defined(__i386__) || \
+ (defined(__powerpc__) && !defined(__powerpc64__) && defined(BOOKE))
+ /*
+ * i386 and Book-E PowerPC have 64-bit vm_paddr_t, so there is
+ * a 4 byte remainder from the space allocated for pa[].
+ */
+ uint32_t pad;
+#endif
+ union {
+ char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */
+ struct {
+ struct socket *so;
+ struct mbuf *mbuf;
+ uint64_t seqno;
+ STAILQ_ENTRY(mbuf_ext_pgs) stailq;
+ int enc_cnt;
+ };
+ };
+};
+
+#ifdef _KERNEL
+static inline int
+mbuf_ext_pg_len(struct mbuf_ext_pgs *ext_pgs, int pidx, int pgoff)
+{
+ KASSERT(pgoff == 0 || pidx == 0,
+ ("page %d with non-zero offset %d in %p", pidx, pgoff, ext_pgs));
+ if (pidx == ext_pgs->npgs - 1) {
+ return (ext_pgs->last_pg_len);
+ } else {
+ return (PAGE_SIZE - pgoff);
+ }
+}
+
+#ifdef INVARIANT_SUPPORT
+void mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs);
+#endif
+#ifdef INVARIANTS
+#define MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs) mb_ext_pgs_check((ext_pgs))
+#else
+#define MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs)
+#endif
+#endif
+
/*
* mbuf flags of global significance and layer crossing.
* Those of only protocol/layer specific significance are to be mapped
- * to M_PROTO[1-12] and cleared at layer handoff boundaries.
+ * to M_PROTO[1-11] and cleared at layer handoff boundaries.
* NB: Limited to the lower 24 bits.
*/
#define M_EXT 0x00000001 /* has associated external storage */
@@ -304,25 +405,29 @@ struct mbuf {
#define M_MCAST 0x00000020 /* send/received as link-level multicast */
#define M_PROMISC 0x00000040 /* packet was not for us */
#define M_VLANTAG 0x00000080 /* ether_vtag is valid */
-#define M_NOMAP 0x00000100 /* mbuf data is unmapped (soon from Drew) */
+#ifndef __rtems__
+#define M_NOMAP 0x00000100 /* mbuf data is unmapped */
+#else /* __rtems__ */
+#define M_NOMAP 0x00000000 /* disable unmapped mbuf data */
+#endif /* __rtems__ */
#define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */
#define M_TSTMP 0x00000400 /* rcv_tstmp field is valid */
#define M_TSTMP_HPREC 0x00000800 /* rcv_tstmp is high-prec, typically
hw-stamped on port (useful for IEEE 1588
and 802.1AS) */
-
-#define M_PROTO1 0x00001000 /* protocol-specific */
-#define M_PROTO2 0x00002000 /* protocol-specific */
-#define M_PROTO3 0x00004000 /* protocol-specific */
-#define M_PROTO4 0x00008000 /* protocol-specific */
-#define M_PROTO5 0x00010000 /* protocol-specific */
-#define M_PROTO6 0x00020000 /* protocol-specific */
-#define M_PROTO7 0x00040000 /* protocol-specific */
-#define M_PROTO8 0x00080000 /* protocol-specific */
-#define M_PROTO9 0x00100000 /* protocol-specific */
-#define M_PROTO10 0x00200000 /* protocol-specific */
-#define M_PROTO11 0x00400000 /* protocol-specific */
-#define M_PROTO12 0x00800000 /* protocol-specific */
+#define M_TSTMP_LRO 0x00001000 /* Time LRO pushed in pkt is valid in (PH_loc) */
+
+#define M_PROTO1 0x00002000 /* protocol-specific */
+#define M_PROTO2 0x00004000 /* protocol-specific */
+#define M_PROTO3 0x00008000 /* protocol-specific */
+#define M_PROTO4 0x00010000 /* protocol-specific */
+#define M_PROTO5 0x00020000 /* protocol-specific */
+#define M_PROTO6 0x00040000 /* protocol-specific */
+#define M_PROTO7 0x00080000 /* protocol-specific */
+#define M_PROTO8 0x00100000 /* protocol-specific */
+#define M_PROTO9 0x00200000 /* protocol-specific */
+#define M_PROTO10 0x00400000 /* protocol-specific */
+#define M_PROTO11 0x00800000 /* protocol-specific */
#define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */
@@ -331,7 +436,7 @@ struct mbuf {
*/
#define M_PROTOFLAGS \
(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\
- M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12)
+ M_PROTO9|M_PROTO10|M_PROTO11)
/*
* Flags preserved when copying m_pkthdr.
@@ -345,11 +450,11 @@ struct mbuf {
*/
#define M_FLAG_BITS \
"\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \
- "\7M_PROMISC\10M_VLANTAG\13M_TSTMP\14M_TSTMP_HPREC"
+ "\7M_PROMISC\10M_VLANTAG\11M_NOMAP\12M_NOFREE\13M_TSTMP\14M_TSTMP_HPREC"
#define M_FLAG_PROTOBITS \
"\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \
"\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \
- "\27M_PROTO11\30M_PROTO12"
+ "\27M_PROTO11"
#define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS)
/*
@@ -407,33 +512,6 @@ struct mbuf {
#define M_HASHTYPE_ISHASH(m) (M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP)
/*
- * COS/QOS class and quality of service tags.
- * It uses DSCP code points as base.
- */
-#define QOS_DSCP_CS0 0x00
-#define QOS_DSCP_DEF QOS_DSCP_CS0
-#define QOS_DSCP_CS1 0x20
-#define QOS_DSCP_AF11 0x28
-#define QOS_DSCP_AF12 0x30
-#define QOS_DSCP_AF13 0x38
-#define QOS_DSCP_CS2 0x40
-#define QOS_DSCP_AF21 0x48
-#define QOS_DSCP_AF22 0x50
-#define QOS_DSCP_AF23 0x58
-#define QOS_DSCP_CS3 0x60
-#define QOS_DSCP_AF31 0x68
-#define QOS_DSCP_AF32 0x70
-#define QOS_DSCP_AF33 0x78
-#define QOS_DSCP_CS4 0x80
-#define QOS_DSCP_AF41 0x88
-#define QOS_DSCP_AF42 0x90
-#define QOS_DSCP_AF43 0x98
-#define QOS_DSCP_CS5 0xa0
-#define QOS_DSCP_EF 0xb8
-#define QOS_DSCP_CS6 0xc0
-#define QOS_DSCP_CS7 0xe0
-
-/*
* External mbuf storage buffer types.
*/
#define EXT_CLUSTER 1 /* mbuf cluster */
@@ -445,6 +523,10 @@ struct mbuf {
#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */
#define EXT_PACKET 6 /* mbuf+cluster from packet zone */
#define EXT_MBUF 7 /* external mbuf reference */
+#define EXT_RXRING 8 /* data in NIC receive ring */
+#ifndef __rtems__
+#define EXT_PGS 9 /* array of unmapped pages */
+#endif /* __rtems__ */
#define EXT_VENDOR1 224 /* for vendor-internal use */
#define EXT_VENDOR2 225 /* for vendor-internal use */
@@ -489,6 +571,11 @@ struct mbuf {
"\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \
"\30EXT_FLAG_EXP4"
+#define MBUF_EXT_PGS_ASSERT(m) \
+ KASSERT((((m)->m_flags & M_EXT) != 0) && \
+ ((m)->m_ext.ext_type == EXT_PGS), \
+ ("%s: m %p !M_EXT or !EXT_PGS", __func__, m))
+
/*
* Flags indicating checksum, segmentation and other offload work to be
* done, or already done, by hardware or lower layers. It is split into
@@ -521,6 +608,8 @@ struct mbuf {
#define CSUM_L5_VALID 0x20000000 /* checksum is correct */
#define CSUM_COALESCED 0x40000000 /* contains merged segments */
+#define CSUM_SND_TAG 0x80000000 /* Packet header has send tag */
+
/*
* CSUM flag description for use with printf(9) %b identifier.
*/
@@ -530,7 +619,7 @@ struct mbuf {
"\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
"\16CSUM_IP6_ISCSI" \
"\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
- "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED"
+ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED\40CSUM_SND_TAG"
/* CSUM flags compatibility mappings. */
#define CSUM_IP_CHECKED CSUM_L3_CALC
@@ -589,6 +678,7 @@ struct mbuf {
#define MBUF_JUMBO16_MEM_NAME "mbuf_jumbo_16k"
#define MBUF_TAG_MEM_NAME "mbuf_tag"
#define MBUF_EXTREFCNT_MEM_NAME "mbuf_ext_refcnt"
+#define MBUF_EXTPGS_MEM_NAME "mbuf_extpgs"
#ifdef _KERNEL
@@ -613,9 +703,25 @@ extern uma_zone_t zone_pack;
extern uma_zone_t zone_jumbop;
extern uma_zone_t zone_jumbo9;
extern uma_zone_t zone_jumbo16;
+extern uma_zone_t zone_extpgs;
void mb_dupcl(struct mbuf *, struct mbuf *);
void mb_free_ext(struct mbuf *);
+void mb_free_mext_pgs(struct mbuf *);
+struct mbuf *mb_alloc_ext_pgs(int, bool, m_ext_free_t);
+int mb_unmapped_compress(struct mbuf *m);
+#ifndef __rtems__
+struct mbuf *mb_unmapped_to_ext(struct mbuf *m);
+#else /* __rtems__ */
+static __inline struct mbuf *
+mb_unmapped_to_ext(struct mbuf *m)
+{
+
+ return (m);
+}
+
+#endif /* __rtems__ */
+void mb_free_notready(struct mbuf *m, int count);
void m_adj(struct mbuf *, int);
int m_apply(struct mbuf *, int, int,
int (*)(void *, void *, u_int), void *);
@@ -650,6 +756,7 @@ struct mbuf *m_getm2(struct mbuf *, int, int, short, int);
struct mbuf *m_getptr(struct mbuf *, int, int *);
u_int m_length(struct mbuf *, struct mbuf **);
int m_mbuftouio(struct uio *, const struct mbuf *, int);
+int m_unmappedtouio(const struct mbuf *, int, struct uio *, int);
void m_move_pkthdr(struct mbuf *, struct mbuf *);
int m_pkthdr_init(struct mbuf *, int);
struct mbuf *m_prepend(struct mbuf *, int, int);
@@ -660,6 +767,8 @@ int m_sanity(struct mbuf *, int);
struct mbuf *m_split(struct mbuf *, int, int);
struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
struct mbuf *m_unshare(struct mbuf *, int);
+void m_snd_tag_init(struct m_snd_tag *, struct ifnet *);
+void m_snd_tag_destroy(struct m_snd_tag *);
static __inline int
m_gettype(int size)
@@ -903,7 +1012,7 @@ m_extrefcnt(struct mbuf *m)
* be both the local data payload, or an external buffer area, depending on
* whether M_EXT is set).
*/
-#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \
+#define M_WRITABLE(m) (((m)->m_flags & (M_RDONLY | M_NOMAP)) == 0 && \
(!(((m)->m_flags & M_EXT)) || \
(m_extrefcnt(m) == 1)))
@@ -926,7 +1035,8 @@ m_extrefcnt(struct mbuf *m)
* handling external storage, packet-header mbufs, and regular data mbufs.
*/
#define M_START(m) \
- (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \
+ (((m)->m_flags & M_NOMAP) ? NULL : \
+ ((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \
((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \
&(m)->m_dat[0])
@@ -1023,6 +1133,17 @@ m_align(struct mbuf *m, int len)
*/
#define MCHTYPE(m, t) m_chtype((m), (t))
+/* Return the rcvif of a packet header. */
+static __inline struct ifnet *
+m_rcvif(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+ if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ return (NULL);
+ return (m->m_pkthdr.rcvif);
+}
+
/* Length to m_copy to copy all. */
#define M_COPYALL 1000000000
@@ -1031,6 +1152,7 @@ extern int max_hdr; /* Largest link + protocol header */
extern int max_linkhdr; /* Largest link-level header */
extern int max_protohdr; /* Largest protocol header */
extern int nmbclusters; /* Maximum number of clusters */
+extern bool mb_use_ext_pgs; /* Use ext_pgs for sendfile */
/*-
* Network packets may have annotations attached by affixing a list of
@@ -1213,6 +1335,22 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
}
+static inline struct m_snd_tag *
+m_snd_tag_ref(struct m_snd_tag *mst)
+{
+
+ refcount_acquire(&mst->refcount);
+ return (mst);
+}
+
+static inline void
+m_snd_tag_rele(struct m_snd_tag *mst)
+{
+
+ if (refcount_release(&mst->refcount))
+ m_snd_tag_destroy(mst);
+}
+
static __inline struct mbuf *
m_free(struct mbuf *m)
{
@@ -1221,6 +1359,8 @@ m_free(struct mbuf *m)
MBUF_PROBE1(m__free, m);
if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
m_tag_delete_chain(m, NULL);
+ if (m->m_flags & M_PKTHDR && m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+ m_snd_tag_rele(m->m_pkthdr.snd_tag);
if (m->m_flags & M_EXT)
mb_free_ext(m);
else if ((m->m_flags & M_NOFREE) == 0)
@@ -1314,7 +1454,7 @@ static inline int
mbufq_full(const struct mbufq *mq)
{
- return (mq->mq_len >= mq->mq_maxlen);
+ return (mq->mq_maxlen > 0 && mq->mq_len >= mq->mq_maxlen);
}
static inline int
@@ -1388,5 +1528,20 @@ void netdump_mbuf_dump(void);
void netdump_mbuf_reinit(int nmbuf, int nclust, int clsize);
#endif
+static inline bool
+mbuf_has_tls_session(struct mbuf *m)
+{
+
+#ifndef __rtems__
+ if (m->m_flags & M_NOMAP) {
+ MBUF_EXT_PGS_ASSERT(m);
+ if (m->m_ext.ext_pgs->tls != NULL) {
+ return (true);
+ }
+ }
+#endif /* __rtems__ */
+ return (false);
+}
+
#endif /* _KERNEL */
#endif /* !_SYS_MBUF_H_ */