summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/sys/mbuf.h
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/sys/mbuf.h')
-rw-r--r--freebsd/sys/sys/mbuf.h1077
1 files changed, 643 insertions, 434 deletions
diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h
index b6d58a25..95194e0b 100644
--- a/freebsd/sys/sys/mbuf.h
+++ b/freebsd/sys/sys/mbuf.h
@@ -44,6 +44,32 @@
#endif
#endif
+#ifdef _KERNEL
+#include <sys/sdt.h>
+
+#define MBUF_PROBE1(probe, arg0) \
+ SDT_PROBE1(sdt, , , probe, arg0)
+#define MBUF_PROBE2(probe, arg0, arg1) \
+ SDT_PROBE2(sdt, , , probe, arg0, arg1)
+#define MBUF_PROBE3(probe, arg0, arg1, arg2) \
+ SDT_PROBE3(sdt, , , probe, arg0, arg1, arg2)
+#define MBUF_PROBE4(probe, arg0, arg1, arg2, arg3) \
+ SDT_PROBE4(sdt, , , probe, arg0, arg1, arg2, arg3)
+#define MBUF_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \
+ SDT_PROBE5(sdt, , , probe, arg0, arg1, arg2, arg3, arg4)
+
+SDT_PROBE_DECLARE(sdt, , , m__init);
+SDT_PROBE_DECLARE(sdt, , , m__gethdr);
+SDT_PROBE_DECLARE(sdt, , , m__get);
+SDT_PROBE_DECLARE(sdt, , , m__getcl);
+SDT_PROBE_DECLARE(sdt, , , m__clget);
+SDT_PROBE_DECLARE(sdt, , , m__cljget);
+SDT_PROBE_DECLARE(sdt, , , m__cljset);
+SDT_PROBE_DECLARE(sdt, , , m__free);
+SDT_PROBE_DECLARE(sdt, , , m__freem);
+
+#endif /* _KERNEL */
+
/*
* Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead.
* An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in
@@ -52,11 +78,24 @@
* stored. Additionally, it is possible to allocate a separate buffer
* externally and attach it to the mbuf in a way similar to that of mbuf
* clusters.
+ *
+ * NB: These calculation do not take actual compiler-induced alignment and
+ * padding inside the complete struct mbuf into account. Appropriate
+ * attention is required when changing members of struct mbuf.
+ *
+ * MLEN is data length in a normal mbuf.
+ * MHLEN is data length in an mbuf with pktheader.
+ * MINCLSIZE is a smallest amount of data that should be put into cluster.
+ *
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are sensible.
*/
-#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */
-#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */
-#define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */
-#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */
+struct mbuf;
+#define MHSIZE offsetof(struct mbuf, m_dat)
+#define MPKTHSIZE offsetof(struct mbuf, m_pktdat)
+#define MLEN ((int)(MSIZE - MHSIZE))
+#define MHLEN ((int)(MSIZE - MPKTHSIZE))
+#define MINCLSIZE (MHLEN + 1)
#ifdef _KERNEL
/*-
@@ -64,8 +103,10 @@
* type:
*
* mtod(m, t) -- Convert mbuf pointer to data pointer of correct type.
+ * mtodo(m, o) -- Same as above but with offset 'o' into data.
*/
#define mtod(m, t) ((t)((m)->m_data))
+#define mtodo(m, o) ((void *)(((m)->m_data) + (o)))
/*
* Argument structure passed to UMA routines during mbuf and packet
@@ -77,25 +118,6 @@ struct mb_args {
};
#endif /* _KERNEL */
-#if defined(__LP64__)
-#define M_HDR_PAD 6
-#else
-#define M_HDR_PAD 2
-#endif
-
-/*
- * Header present at the beginning of every mbuf.
- */
-struct m_hdr {
- struct mbuf *mh_next; /* next buffer in chain */
- struct mbuf *mh_nextpkt; /* next chain in queue/record */
- caddr_t mh_data; /* location of data */
- int mh_len; /* amount of data in this mbuf */
- int mh_flags; /* flags; see below */
- short mh_type; /* type of data in this mbuf */
- uint8_t pad[M_HDR_PAD];/* word align */
-};
-
/*
* Packet tag structure (see below for details).
*/
@@ -109,40 +131,73 @@ struct m_tag {
/*
* Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
+ * Size ILP32: 48
+ * LP64: 56
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are correct.
*/
struct pkthdr {
struct ifnet *rcvif; /* rcv interface */
- /* variables for ip and tcp reassembly */
- void *header; /* pointer to packet header */
- int len; /* total packet length */
- uint32_t flowid; /* packet's 4-tuple system
- * flow identifier
- */
- /* variables for hardware checksum */
- int csum_flags; /* flags regarding checksum */
- int csum_data; /* data field used by csum routines */
- u_int16_t tso_segsz; /* TSO segment size */
- union {
- u_int16_t vt_vtag; /* Ethernet 802.1p+q vlan tag */
- u_int16_t vt_nrecs; /* # of IGMPv3 records in this chain */
- } PH_vt;
SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
+ int32_t len; /* total packet length */
+
+ /* Layer crossing persistent information. */
+ uint32_t flowid; /* packet's 4-tuple system */
+ uint64_t csum_flags; /* checksum and offload features */
+ uint16_t fibnum; /* this packet should use this fib */
+ uint8_t cosqos; /* class/quality of service */
+ uint8_t rsstype; /* hash type */
+ uint8_t l2hlen; /* layer 2 header length */
+ uint8_t l3hlen; /* layer 3 header length */
+ uint8_t l4hlen; /* layer 4 header length */
+ uint8_t l5hlen; /* layer 5 header length */
+ union {
+ uint8_t eight[8];
+ uint16_t sixteen[4];
+ uint32_t thirtytwo[2];
+ uint64_t sixtyfour[1];
+ uintptr_t unintptr[1];
+ void *ptr;
+ } PH_per;
+
+ /* Layer specific non-persistent local storage for reassembly, etc. */
+ union {
+ uint8_t eight[8];
+ uint16_t sixteen[4];
+ uint32_t thirtytwo[2];
+ uint64_t sixtyfour[1];
+ uintptr_t unintptr[1];
+ void *ptr;
+ } PH_loc;
};
-#define ether_vtag PH_vt.vt_vtag
+#define ether_vtag PH_per.sixteen[0]
+#define PH_vt PH_per
+#define vt_nrecs sixteen[0]
+#define tso_segsz PH_per.sixteen[1]
+#define csum_phsum PH_per.sixteen[2]
+#define csum_data PH_per.thirtytwo[1]
/*
* Description of external storage mapped into mbuf; valid only if M_EXT is
* set.
+ * Size ILP32: 28
+ * LP64: 48
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure that
+ * they are correct.
*/
struct m_ext {
+ union {
+ volatile u_int ext_count; /* value of ref count info */
+ volatile u_int *ext_cnt; /* pointer to ref count info */
+ };
caddr_t ext_buf; /* start of buffer */
+ uint32_t ext_size; /* size of buffer, for ext_free */
+ uint32_t ext_type:8, /* type of external storage */
+ ext_flags:24; /* external storage mbuf flags */
void (*ext_free) /* free routine if not the usual */
- (void *, void *);
+ (struct mbuf *, void *, void *);
void *ext_arg1; /* optional argument pointer */
void *ext_arg2; /* optional argument pointer */
- u_int ext_size; /* size of buffer, for ext_free */
- volatile u_int *ref_cnt; /* pointer to ref count info */
- int ext_type; /* type of external storage */
};
/*
@@ -150,71 +205,107 @@ struct m_ext {
* purposes.
*/
struct mbuf {
- struct m_hdr m_hdr;
+ /*
+ * Header present at the beginning of every mbuf.
+ * Size ILP32: 24
+ * LP64: 32
+ * Compile-time assertions in uipc_mbuf.c test these values to ensure
+ * that they are correct.
+ */
+ union { /* next buffer in chain */
+ struct mbuf *m_next;
+ SLIST_ENTRY(mbuf) m_slist;
+ STAILQ_ENTRY(mbuf) m_stailq;
+ };
+ union { /* next chain in queue/record */
+ struct mbuf *m_nextpkt;
+ SLIST_ENTRY(mbuf) m_slistpkt;
+ STAILQ_ENTRY(mbuf) m_stailqpkt;
+ };
+ caddr_t m_data; /* location of data */
+ int32_t m_len; /* amount of data in this mbuf */
+ uint32_t m_type:8, /* type of data in this mbuf */
+ m_flags:24; /* flags; see below */
+#if !defined(__LP64__)
+ uint32_t m_pad; /* pad for 64bit alignment */
+#endif
+
+ /*
+ * A set of optional headers (packet header, external storage header)
+ * and internal data storage. Historically, these arrays were sized
+ * to MHLEN (space left after a packet header) and MLEN (space left
+ * after only a regular mbuf header); they are now variable size in
+ * order to support future work on variable-size mbufs.
+ */
union {
struct {
- struct pkthdr MH_pkthdr; /* M_PKTHDR set */
+ struct pkthdr m_pkthdr; /* M_PKTHDR set */
union {
- struct m_ext MH_ext; /* M_EXT set */
- char MH_databuf[MHLEN];
- } MH_dat;
- } MH;
- char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */
- } M_dat;
+ struct m_ext m_ext; /* M_EXT set */
+ char m_pktdat[0];
+ };
+ };
+ char m_dat[0]; /* !M_PKTHDR, !M_EXT */
+ };
};
-#define m_next m_hdr.mh_next
-#define m_len m_hdr.mh_len
-#define m_data m_hdr.mh_data
-#define m_type m_hdr.mh_type
-#define m_flags m_hdr.mh_flags
-#define m_nextpkt m_hdr.mh_nextpkt
-#define m_act m_nextpkt
-#define m_pkthdr M_dat.MH.MH_pkthdr
-#define m_ext M_dat.MH.MH_dat.MH_ext
-#define m_pktdat M_dat.MH.MH_dat.MH_databuf
-#define m_dat M_dat.M_databuf
/*
- * mbuf flags.
+ * mbuf flags of global significance and layer crossing.
+ * Those of only protocol/layer specific significance are to be mapped
+ * to M_PROTO[1-12] and cleared at layer handoff boundaries.
+ * NB: Limited to the lower 24 bits.
*/
#define M_EXT 0x00000001 /* has associated external storage */
#define M_PKTHDR 0x00000002 /* start of record */
#define M_EOR 0x00000004 /* end of record */
#define M_RDONLY 0x00000008 /* associated data is marked read-only */
-#define M_PROTO1 0x00000010 /* protocol-specific */
-#define M_PROTO2 0x00000020 /* protocol-specific */
-#define M_PROTO3 0x00000040 /* protocol-specific */
-#define M_PROTO4 0x00000080 /* protocol-specific */
-#define M_PROTO5 0x00000100 /* protocol-specific */
-#define M_BCAST 0x00000200 /* send/received as link-level broadcast */
-#define M_MCAST 0x00000400 /* send/received as link-level multicast */
-#define M_FRAG 0x00000800 /* packet is a fragment of a larger packet */
-#define M_FIRSTFRAG 0x00001000 /* packet is first fragment */
-#define M_LASTFRAG 0x00002000 /* packet is last fragment */
-#define M_SKIP_FIREWALL 0x00004000 /* skip firewall processing */
-#define M_FREELIST 0x00008000 /* mbuf is on the free list */
-#define M_VLANTAG 0x00010000 /* ether_vtag is valid */
-#define M_PROMISC 0x00020000 /* packet was not for us */
-#define M_NOFREE 0x00040000 /* do not free mbuf, embedded in cluster */
-#define M_PROTO6 0x00080000 /* protocol-specific */
-#define M_PROTO7 0x00100000 /* protocol-specific */
-#define M_PROTO8 0x00200000 /* protocol-specific */
-#define M_FLOWID 0x00400000 /* deprecated: flowid is valid */
-#define M_HASHTYPEBITS 0x0F000000 /* mask of bits holding flowid hash type */
+#define M_BCAST 0x00000010 /* send/received as link-level broadcast */
+#define M_MCAST 0x00000020 /* send/received as link-level multicast */
+#define M_PROMISC 0x00000040 /* packet was not for us */
+#define M_VLANTAG 0x00000080 /* ether_vtag is valid */
+#define M_UNUSED_8 0x00000100 /* --available-- */
+#define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */
+
+#define M_PROTO1 0x00001000 /* protocol-specific */
+#define M_PROTO2 0x00002000 /* protocol-specific */
+#define M_PROTO3 0x00004000 /* protocol-specific */
+#define M_PROTO4 0x00008000 /* protocol-specific */
+#define M_PROTO5 0x00010000 /* protocol-specific */
+#define M_PROTO6 0x00020000 /* protocol-specific */
+#define M_PROTO7 0x00040000 /* protocol-specific */
+#define M_PROTO8 0x00080000 /* protocol-specific */
+#define M_PROTO9 0x00100000 /* protocol-specific */
+#define M_PROTO10 0x00200000 /* protocol-specific */
+#define M_PROTO11 0x00400000 /* protocol-specific */
+#define M_PROTO12 0x00800000 /* protocol-specific */
+
+#define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */
/*
- * For RELENG_{6,7} steal these flags for limited multiple routing table
- * support. In RELENG_8 and beyond, use just one flag and a tag.
+ * Flags to purge when crossing layers.
*/
-#define M_FIB 0xF0000000 /* steal some bits to store fib number. */
+#define M_PROTOFLAGS \
+ (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\
+ M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12)
-#define M_NOTIFICATION M_PROTO5 /* SCTP notification */
+/*
+ * Flags preserved when copying m_pkthdr.
+ */
+#define M_COPYFLAGS \
+ (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \
+ M_PROTOFLAGS)
/*
- * Flags to purge when crossing layers.
+ * Mbuf flag description for use with printf(9) %b identifier.
*/
-#define M_PROTOFLAGS \
- (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8)
+#define M_FLAG_BITS \
+ "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \
+ "\7M_PROMISC\10M_VLANTAG"
+#define M_FLAG_PROTOBITS \
+ "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \
+ "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \
+ "\27M_PROTO11\30M_PROTO12"
+#define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS)
/*
* Network interface cards are able to hash protocol fields (such as IPv4
@@ -227,88 +318,214 @@ struct mbuf {
*
* Most NICs support RSS, which provides ordering and explicit affinity, and
* use the hash m_flag bits to indicate what header fields were covered by
- * the hash. M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations
- * that provide an opaque flow identifier, allowing for ordering and
- * distribution without explicit affinity.
+ * the hash. M_HASHTYPE_OPAQUE and M_HASHTYPE_OPAQUE_HASH can be set by non-
+ * RSS cards or configurations that provide an opaque flow identifier, allowing
+ * for ordering and distribution without explicit affinity. Additionally,
+ * M_HASHTYPE_OPAQUE_HASH indicates that the flow identifier has hash
+ * properties.
*/
-#define M_HASHTYPE_SHIFT 24
-#define M_HASHTYPE_NONE 0x0
-#define M_HASHTYPE_RSS_IPV4 0x1 /* IPv4 2-tuple */
-#define M_HASHTYPE_RSS_TCP_IPV4 0x2 /* TCPv4 4-tuple */
-#define M_HASHTYPE_RSS_IPV6 0x3 /* IPv6 2-tuple */
-#define M_HASHTYPE_RSS_TCP_IPV6 0x4 /* TCPv6 4-tuple */
-#define M_HASHTYPE_RSS_IPV6_EX 0x5 /* IPv6 2-tuple + ext hdrs */
-#define M_HASHTYPE_RSS_TCP_IPV6_EX 0x6 /* TCPv6 4-tiple + ext hdrs */
-#define M_HASHTYPE_OPAQUE 0xf /* ordering, not affinity */
-
-#define M_HASHTYPE_CLEAR(m) (m)->m_flags &= ~(M_HASHTYPEBITS)
-#define M_HASHTYPE_GET(m) (((m)->m_flags & M_HASHTYPEBITS) >> \
- M_HASHTYPE_SHIFT)
-#define M_HASHTYPE_SET(m, v) do { \
- (m)->m_flags &= ~M_HASHTYPEBITS; \
- (m)->m_flags |= ((v) << M_HASHTYPE_SHIFT); \
-} while (0)
+#define M_HASHTYPE_HASHPROP 0x80 /* has hash properties */
+#define M_HASHTYPE_HASH(t) (M_HASHTYPE_HASHPROP | (t))
+/* Microsoft RSS standard hash types */
+#define M_HASHTYPE_NONE 0
+#define M_HASHTYPE_RSS_IPV4 M_HASHTYPE_HASH(1) /* IPv4 2-tuple */
+#define M_HASHTYPE_RSS_TCP_IPV4 M_HASHTYPE_HASH(2) /* TCPv4 4-tuple */
+#define M_HASHTYPE_RSS_IPV6 M_HASHTYPE_HASH(3) /* IPv6 2-tuple */
+#define M_HASHTYPE_RSS_TCP_IPV6 M_HASHTYPE_HASH(4) /* TCPv6 4-tuple */
+#define M_HASHTYPE_RSS_IPV6_EX M_HASHTYPE_HASH(5) /* IPv6 2-tuple +
+ * ext hdrs */
+#define M_HASHTYPE_RSS_TCP_IPV6_EX M_HASHTYPE_HASH(6) /* TCPv6 4-tiple +
+ * ext hdrs */
+/* Non-standard RSS hash types */
+#define M_HASHTYPE_RSS_UDP_IPV4 M_HASHTYPE_HASH(7) /* IPv4 UDP 4-tuple*/
+#define M_HASHTYPE_RSS_UDP_IPV4_EX M_HASHTYPE_HASH(8) /* IPv4 UDP 4-tuple +
+ * ext hdrs */
+#define M_HASHTYPE_RSS_UDP_IPV6 M_HASHTYPE_HASH(9) /* IPv6 UDP 4-tuple*/
+#define M_HASHTYPE_RSS_UDP_IPV6_EX M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple +
+ * ext hdrs */
+
+#define M_HASHTYPE_OPAQUE 63 /* ordering, not affinity */
+#define M_HASHTYPE_OPAQUE_HASH M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE)
+ /* ordering+hash, not affinity*/
+
+#define M_HASHTYPE_CLEAR(m) ((m)->m_pkthdr.rsstype = 0)
+#define M_HASHTYPE_GET(m) ((m)->m_pkthdr.rsstype)
+#define M_HASHTYPE_SET(m, v) ((m)->m_pkthdr.rsstype = (v))
#define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v))
+#define M_HASHTYPE_ISHASH(m) (M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP)
/*
- * Flags preserved when copying m_pkthdr.
+ * COS/QOS class and quality of service tags.
+ * It uses DSCP code points as base.
*/
-#define M_COPYFLAGS \
- (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\
- M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB|M_HASHTYPEBITS)
+#define QOS_DSCP_CS0 0x00
+#define QOS_DSCP_DEF QOS_DSCP_CS0
+#define QOS_DSCP_CS1 0x20
+#define QOS_DSCP_AF11 0x28
+#define QOS_DSCP_AF12 0x30
+#define QOS_DSCP_AF13 0x38
+#define QOS_DSCP_CS2 0x40
+#define QOS_DSCP_AF21 0x48
+#define QOS_DSCP_AF22 0x50
+#define QOS_DSCP_AF23 0x58
+#define QOS_DSCP_CS3 0x60
+#define QOS_DSCP_AF31 0x68
+#define QOS_DSCP_AF32 0x70
+#define QOS_DSCP_AF33 0x78
+#define QOS_DSCP_CS4 0x80
+#define QOS_DSCP_AF41 0x88
+#define QOS_DSCP_AF42 0x90
+#define QOS_DSCP_AF43 0x98
+#define QOS_DSCP_CS5 0xa0
+#define QOS_DSCP_EF 0xb8
+#define QOS_DSCP_CS6 0xc0
+#define QOS_DSCP_CS7 0xe0
/*
- * External buffer types: identify ext_buf type.
+ * External mbuf storage buffer types.
*/
#define EXT_CLUSTER 1 /* mbuf cluster */
-#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */
-#define EXT_JUMBOP 3 /* jumbo cluster 4096 bytes */
+#ifndef __rtems__
+#define EXT_SFBUF 2 /* sendfile(2)'s sf_buf */
+#endif /* __rtems__ */
+#define EXT_JUMBOP 3 /* jumbo cluster page sized */
#define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */
#define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */
#define EXT_PACKET 6 /* mbuf+cluster from packet zone */
#define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */
-#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */
-#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */
-#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */
-#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */
+#ifndef __rtems__
+#define EXT_SFBUF_NOCACHE 8 /* sendfile(2)'s sf_buf not to be cached */
+#endif /* __rtems__ */
+
+#define EXT_VENDOR1 224 /* for vendor-internal use */
+#define EXT_VENDOR2 225 /* for vendor-internal use */
+#define EXT_VENDOR3 226 /* for vendor-internal use */
+#define EXT_VENDOR4 227 /* for vendor-internal use */
+
+#define EXT_EXP1 244 /* for experimental use */
+#define EXT_EXP2 245 /* for experimental use */
+#define EXT_EXP3 246 /* for experimental use */
+#define EXT_EXP4 247 /* for experimental use */
+
+#define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */
+#define EXT_MOD_TYPE 253 /* custom module's ext_buf type */
+#define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */
+#define EXT_EXTREF 255 /* has externally maintained ext_cnt ptr */
/*
- * Flags indicating hw checksum support and sw checksum requirements. This
- * field can be directly tested against if_data.ifi_hwassist.
+ * Flags for external mbuf buffer types.
+ * NB: limited to the lower 24 bits.
*/
-#define CSUM_IP 0x0001 /* will csum IP */
-#define CSUM_TCP 0x0002 /* will csum TCP */
-#define CSUM_UDP 0x0004 /* will csum UDP */
-#define CSUM_IP_FRAGS 0x0008 /* removed, left for compat */
-#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */
-#define CSUM_TSO 0x0020 /* will do TSO */
-#define CSUM_SCTP 0x0040 /* will csum SCTP */
-#define CSUM_SCTP_IPV6 0x0080 /* will csum IPv6/SCTP */
-
-#define CSUM_IP_CHECKED 0x0100 /* did csum IP */
-#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */
-#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */
-#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */
-#define CSUM_SCTP_VALID 0x1000 /* SCTP checksum is valid */
-#define CSUM_UDP_IPV6 0x2000 /* will csum IPv6/UDP */
-#define CSUM_TCP_IPV6 0x4000 /* will csum IPv6/TCP */
-/* CSUM_TSO_IPV6 0x8000 will do IPv6/TSO */
-
-/* CSUM_FRAGMENT_IPV6 0x10000 will do IPv6 fragementation */
-
-#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6)
-#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID
+#define EXT_FLAG_EMBREF 0x000001 /* embedded ext_count */
+#define EXT_FLAG_EXTREF 0x000002 /* external ext_cnt, notyet */
+
+#define EXT_FLAG_NOFREE 0x000010 /* don't free mbuf to pool, notyet */
-#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP)
-#define CSUM_DELAY_IP (CSUM_IP) /* Only v4, no v6 IP hdr csum */
+#define EXT_FLAG_VENDOR1 0x010000 /* for vendor-internal use */
+#define EXT_FLAG_VENDOR2 0x020000 /* for vendor-internal use */
+#define EXT_FLAG_VENDOR3 0x040000 /* for vendor-internal use */
+#define EXT_FLAG_VENDOR4 0x080000 /* for vendor-internal use */
+
+#define EXT_FLAG_EXP1 0x100000 /* for experimental use */
+#define EXT_FLAG_EXP2 0x200000 /* for experimental use */
+#define EXT_FLAG_EXP3 0x400000 /* for experimental use */
+#define EXT_FLAG_EXP4 0x800000 /* for experimental use */
+
+/*
+ * EXT flag description for use with printf(9) %b identifier.
+ */
+#define EXT_FLAG_BITS \
+ "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \
+ "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \
+ "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \
+ "\30EXT_FLAG_EXP4"
+
+/*
+ * External reference/free functions.
+ */
+void sf_ext_free(void *, void *);
+void sf_ext_free_nocache(void *, void *);
/*
- * mbuf types.
+ * Flags indicating checksum, segmentation and other offload work to be
+ * done, or already done, by hardware or lower layers. It is split into
+ * separate inbound and outbound flags.
+ *
+ * Outbound flags that are set by upper protocol layers requesting lower
+ * layers, or ideally the hardware, to perform these offloading tasks.
+ * For outbound packets this field and its flags can be directly tested
+ * against ifnet if_hwassist.
+ */
+#define CSUM_IP 0x00000001 /* IP header checksum offload */
+#define CSUM_IP_UDP 0x00000002 /* UDP checksum offload */
+#define CSUM_IP_TCP 0x00000004 /* TCP checksum offload */
+#define CSUM_IP_SCTP 0x00000008 /* SCTP checksum offload */
+#define CSUM_IP_TSO 0x00000010 /* TCP segmentation offload */
+#define CSUM_IP_ISCSI 0x00000020 /* iSCSI checksum offload */
+
+#define CSUM_IP6_UDP 0x00000200 /* UDP checksum offload */
+#define CSUM_IP6_TCP 0x00000400 /* TCP checksum offload */
+#define CSUM_IP6_SCTP 0x00000800 /* SCTP checksum offload */
+#define CSUM_IP6_TSO 0x00001000 /* TCP segmentation offload */
+#define CSUM_IP6_ISCSI 0x00002000 /* iSCSI checksum offload */
+
+/* Inbound checksum support where the checksum was verified by hardware. */
+#define CSUM_L3_CALC 0x01000000 /* calculated layer 3 csum */
+#define CSUM_L3_VALID 0x02000000 /* checksum is correct */
+#define CSUM_L4_CALC 0x04000000 /* calculated layer 4 csum */
+#define CSUM_L4_VALID 0x08000000 /* checksum is correct */
+#define CSUM_L5_CALC 0x10000000 /* calculated layer 5 csum */
+#define CSUM_L5_VALID 0x20000000 /* checksum is correct */
+#define CSUM_COALESED 0x40000000 /* contains merged segments */
+
+/*
+ * CSUM flag description for use with printf(9) %b identifier.
+ */
+#define CSUM_BITS \
+ "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \
+ "\6CSUM_IP_ISCSI" \
+ "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
+ "\16CSUM_IP6_ISCSI" \
+ "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
+ "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESED"
+
+/* CSUM flags compatibility mappings. */
+#define CSUM_IP_CHECKED CSUM_L3_CALC
+#define CSUM_IP_VALID CSUM_L3_VALID
+#define CSUM_DATA_VALID CSUM_L4_VALID
+#define CSUM_PSEUDO_HDR CSUM_L4_CALC
+#define CSUM_SCTP_VALID CSUM_L4_VALID
+#define CSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP)
+#define CSUM_DELAY_IP CSUM_IP /* Only v4, no v6 IP hdr csum */
+#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6)
+#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID
+#define CSUM_TCP CSUM_IP_TCP
+#define CSUM_UDP CSUM_IP_UDP
+#define CSUM_SCTP CSUM_IP_SCTP
+#define CSUM_TSO (CSUM_IP_TSO|CSUM_IP6_TSO)
+#define CSUM_UDP_IPV6 CSUM_IP6_UDP
+#define CSUM_TCP_IPV6 CSUM_IP6_TCP
+#define CSUM_SCTP_IPV6 CSUM_IP6_SCTP
+
+/*
+ * mbuf types describing the content of the mbuf (including external storage).
*/
#define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */
#define MT_DATA 1 /* dynamic (data) allocation */
#define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */
+
+#define MT_VENDOR1 4 /* for vendor-internal use */
+#define MT_VENDOR2 5 /* for vendor-internal use */
+#define MT_VENDOR3 6 /* for vendor-internal use */
+#define MT_VENDOR4 7 /* for vendor-internal use */
+
#define MT_SONAME 8 /* socket name */
+
+#define MT_EXP1 9 /* for experimental use */
+#define MT_EXP2 10 /* for experimental use */
+#define MT_EXP3 11 /* for experimental use */
+#define MT_EXP4 12 /* for experimental use */
+
#define MT_CONTROL 14 /* extra-data protocol message */
#define MT_OOBDATA 15 /* expedited data */
#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
@@ -316,55 +533,6 @@ struct mbuf {
#define MT_NOINIT 255 /* Not a type but a flag to allocate
a non-initialized mbuf */
-#define MB_NOTAGS 0x1UL /* no tags attached to mbuf */
-
-/*
- * General mbuf allocator statistics structure.
- *
- * Many of these statistics are no longer used; we instead track many
- * allocator statistics through UMA's built in statistics mechanism.
- */
-struct mbstat {
- u_long m_mbufs; /* XXX */
- u_long m_mclusts; /* XXX */
-
- u_long m_drain; /* times drained protocols for space */
- u_long m_mcfail; /* XXX: times m_copym failed */
- u_long m_mpfail; /* XXX: times m_pullup failed */
- u_long m_msize; /* length of an mbuf */
- u_long m_mclbytes; /* length of an mbuf cluster */
- u_long m_minclsize; /* min length of data to allocate a cluster */
- u_long m_mlen; /* length of data in an mbuf */
- u_long m_mhlen; /* length of data in a header mbuf */
-
- /* Number of mbtypes (gives # elems in mbtypes[] array) */
- short m_numtypes;
-
- /* XXX: Sendfile stats should eventually move to their own struct */
- u_long sf_iocnt; /* times sendfile had to do disk I/O */
- u_long sf_allocfail; /* times sfbuf allocation failed */
- u_long sf_allocwait; /* times sfbuf allocation had to wait */
-};
-
-/*
- * Flags specifying how an allocation should be made.
- *
- * The flag to use is as follows:
- * - M_NOWAIT (M_DONTWAIT) from an interrupt handler to not block allocation.
- * - M_WAITOK (M_WAIT) from wherever it is safe to block.
- *
- * M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly and
- * if we cannot allocate immediately we may return NULL, whereas
- * M_WAIT/M_WAITOK means that if we cannot allocate resources we
- * will block until they are available, and thus never return NULL.
- *
- * XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT.
- */
-#define MBTOM(how) (how)
-#define M_DONTWAIT M_NOWAIT
-#define M_TRYWAIT M_WAITOK
-#define M_WAIT M_WAITOK
-
/*
* String names of mbuf-related UMA(9) and malloc(9) types. Exposed to
* !_KERNEL so that monitoring tools can look up the zones with
@@ -402,23 +570,53 @@ extern uma_zone_t zone_pack;
extern uma_zone_t zone_jumbop;
extern uma_zone_t zone_jumbo9;
extern uma_zone_t zone_jumbo16;
-extern uma_zone_t zone_ext_refcnt;
-
-static __inline struct mbuf *m_getcl(int how, short type, int flags);
-static __inline struct mbuf *m_get(int how, short type);
-static __inline struct mbuf *m_gethdr(int how, short type);
-static __inline struct mbuf *m_getjcl(int how, short type, int flags,
- int size);
-static __inline struct mbuf *m_getclr(int how, short type); /* XXX */
-static __inline int m_init(struct mbuf *m, uma_zone_t zone,
- int size, int how, short type, int flags);
-static __inline struct mbuf *m_free(struct mbuf *m);
-static __inline void m_clget(struct mbuf *m, int how);
-static __inline void *m_cljget(struct mbuf *m, int how, int size);
-static __inline void m_chtype(struct mbuf *m, short new_type);
-void mb_free_ext(struct mbuf *);
-static __inline struct mbuf *m_last(struct mbuf *m);
-int m_pkthdr_init(struct mbuf *m, int how);
+
+void mb_dupcl(struct mbuf *, struct mbuf *);
+void mb_free_ext(struct mbuf *);
+void m_adj(struct mbuf *, int);
+int m_apply(struct mbuf *, int, int,
+ int (*)(void *, void *, u_int), void *);
+int m_append(struct mbuf *, int, c_caddr_t);
+void m_cat(struct mbuf *, struct mbuf *);
+void m_catpkt(struct mbuf *, struct mbuf *);
+int m_clget(struct mbuf *m, int how);
+void *m_cljget(struct mbuf *m, int how, int size);
+struct mbuf *m_collapse(struct mbuf *, int, int);
+void m_copyback(struct mbuf *, int, int, c_caddr_t);
+void m_copydata(const struct mbuf *, int, int, caddr_t);
+struct mbuf *m_copym(struct mbuf *, int, int, int);
+struct mbuf *m_copypacket(struct mbuf *, int);
+void m_copy_pkthdr(struct mbuf *, struct mbuf *);
+struct mbuf *m_copyup(struct mbuf *, int, int);
+struct mbuf *m_defrag(struct mbuf *, int);
+void m_demote_pkthdr(struct mbuf *);
+void m_demote(struct mbuf *, int, int);
+struct mbuf *m_devget(char *, int, int, struct ifnet *,
+ void (*)(char *, caddr_t, u_int));
+struct mbuf *m_dup(const struct mbuf *, int);
+int m_dup_pkthdr(struct mbuf *, const struct mbuf *, int);
+void m_extadd(struct mbuf *, caddr_t, u_int,
+ void (*)(struct mbuf *, void *, void *), void *, void *,
+ int, int);
+u_int m_fixhdr(struct mbuf *);
+struct mbuf *m_fragment(struct mbuf *, int, int);
+void m_freem(struct mbuf *);
+struct mbuf *m_get2(int, int, short, int);
+struct mbuf *m_getjcl(int, short, int, int);
+struct mbuf *m_getm2(struct mbuf *, int, int, short, int);
+struct mbuf *m_getptr(struct mbuf *, int, int *);
+u_int m_length(struct mbuf *, struct mbuf **);
+int m_mbuftouio(struct uio *, struct mbuf *, int);
+void m_move_pkthdr(struct mbuf *, struct mbuf *);
+int m_pkthdr_init(struct mbuf *, int);
+struct mbuf *m_prepend(struct mbuf *, int, int);
+void m_print(const struct mbuf *, int);
+struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
+struct mbuf *m_pullup(struct mbuf *, int);
+int m_sanity(struct mbuf *, int);
+struct mbuf *m_split(struct mbuf *, int, int);
+struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
+struct mbuf *m_unshare(struct mbuf *, int);
static __inline int
m_gettype(int size)
@@ -444,7 +642,7 @@ m_gettype(int size)
type = EXT_JUMBO16;
break;
default:
- panic("%s: m_getjcl: invalid cluster size", __func__);
+ panic("%s: invalid cluster size %d", __func__, size);
}
return (type);
@@ -455,7 +653,7 @@ m_gettype(int size)
*/
static __inline void
m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt,
- void (*freef)(void *, void *), void *arg1, void *arg2)
+ void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2)
{
KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__));
@@ -463,13 +661,14 @@ m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt,
atomic_add_int((int*)ref_cnt, 1);
m->m_flags |= M_EXT;
m->m_ext.ext_buf = buf;
- m->m_ext.ref_cnt = ref_cnt;
+ m->m_ext.ext_cnt = ref_cnt;
m->m_data = m->m_ext.ext_buf;
m->m_ext.ext_size = size;
m->m_ext.ext_free = freef;
m->m_ext.ext_arg1 = arg1;
m->m_ext.ext_arg2 = arg2;
m->m_ext.ext_type = EXT_EXTREF;
+ m->m_ext.ext_flags = 0;
}
static __inline uma_zone_t
@@ -478,9 +677,6 @@ m_getzone(int size)
uma_zone_t zone;
switch (size) {
- case MSIZE:
- zone = zone_mbuf;
- break;
case MCLBYTES:
zone = zone_clust;
break;
@@ -496,7 +692,7 @@ m_getzone(int size)
zone = zone_jumbo16;
break;
default:
- panic("%s: m_getjcl: invalid cluster type", __func__);
+ panic("%s: invalid cluster size %d", __func__, size);
}
return (zone);
@@ -510,8 +706,7 @@ m_getzone(int size)
* should go away with constant propagation for !MGETHDR.
*/
static __inline int
-m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type,
- int flags)
+m_init(struct mbuf *m, int how, short type, int flags)
{
int error;
@@ -521,182 +716,81 @@ m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type,
m->m_len = 0;
m->m_flags = flags;
m->m_type = type;
- if (flags & M_PKTHDR) {
- if ((error = m_pkthdr_init(m, how)) != 0)
- return (error);
- }
+ if (flags & M_PKTHDR)
+ error = m_pkthdr_init(m, how);
+ else
+ error = 0;
- return (0);
+ MBUF_PROBE5(m__init, m, how, type, flags, error);
+ return (error);
}
static __inline struct mbuf *
m_get(int how, short type)
{
- struct mb_args args;
-
- args.flags = 0;
- args.type = type;
- return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how)));
-}
-
-/*
- * XXX This should be deprecated, very little use.
- */
-static __inline struct mbuf *
-m_getclr(int how, short type)
-{
struct mbuf *m;
struct mb_args args;
args.flags = 0;
args.type = type;
- m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, how);
- if (m != NULL)
- bzero(m->m_data, MLEN);
+ m = uma_zalloc_arg(zone_mbuf, &args, how);
+ MBUF_PROBE3(m__get, how, type, m);
return (m);
}
static __inline struct mbuf *
m_gethdr(int how, short type)
{
+ struct mbuf *m;
struct mb_args args;
args.flags = M_PKTHDR;
args.type = type;
- return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how)));
+ m = uma_zalloc_arg(zone_mbuf, &args, how);
+ MBUF_PROBE3(m__gethdr, how, type, m);
+ return (m);
}
static __inline struct mbuf *
m_getcl(int how, short type, int flags)
{
+ struct mbuf *m;
struct mb_args args;
args.flags = flags;
args.type = type;
- return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how)));
-}
-
-/*
- * m_getjcl() returns an mbuf with a cluster of the specified size attached.
- * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
- *
- * XXX: This is rather large, should be real function maybe.
- */
-static __inline struct mbuf *
-m_getjcl(int how, short type, int flags, int size)
-{
- struct mb_args args;
- struct mbuf *m, *n;
- uma_zone_t zone;
-
- if (size == MCLBYTES)
- return m_getcl(how, type, flags);
-
- args.flags = flags;
- args.type = type;
-
- m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, how);
- if (m == NULL)
- return (NULL);
-
- zone = m_getzone(size);
- n = (struct mbuf *)uma_zalloc_arg(zone, m, how);
- if (n == NULL) {
- uma_zfree(zone_mbuf, m);
- return (NULL);
- }
+ m = uma_zalloc_arg(zone_pack, &args, how);
+ MBUF_PROBE4(m__getcl, how, type, flags, m);
return (m);
}
-static __inline void
-m_free_fast(struct mbuf *m)
-{
-#ifdef INVARIANTS
- if (m->m_flags & M_PKTHDR)
- KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags"));
-#endif
-
- uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS);
-}
-
-static __inline struct mbuf *
-m_free(struct mbuf *m)
-{
- struct mbuf *n = m->m_next;
-
- if (m->m_flags & M_EXT)
- mb_free_ext(m);
- else if ((m->m_flags & M_NOFREE) == 0)
- uma_zfree(zone_mbuf, m);
- return (n);
-}
-
-static __inline void
-m_clget(struct mbuf *m, int how)
-{
-
- if (m->m_flags & M_EXT)
- printf("%s: %p mbuf already has cluster\n", __func__, m);
- m->m_ext.ext_buf = (char *)NULL;
- uma_zalloc_arg(zone_clust, m, how);
- /*
- * On a cluster allocation failure, drain the packet zone and retry,
- * we might be able to loosen a few clusters up on the drain.
- */
- if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
- zone_drain(zone_pack);
- uma_zalloc_arg(zone_clust, m, how);
- }
-}
-
/*
- * m_cljget() is different from m_clget() as it can allocate clusters without
- * attaching them to an mbuf. In that case the return value is the pointer
- * to the cluster of the requested size. If an mbuf was specified, it gets
- * the cluster attached to it and the return value can be safely ignored.
- * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
+ * XXX: m_cljset() is a dangerous API. One must attach only a new,
+ * unreferenced cluster to an mbuf(9). It is not possible to assert
+ * that, so care can be taken only by users of the API.
*/
-static __inline void *
-m_cljget(struct mbuf *m, int how, int size)
-{
- uma_zone_t zone;
-
- if (m && m->m_flags & M_EXT)
- printf("%s: %p mbuf already has cluster\n", __func__, m);
- if (m != NULL)
- m->m_ext.ext_buf = NULL;
-
- zone = m_getzone(size);
- return (uma_zalloc_arg(zone, m, how));
-}
-
static __inline void
m_cljset(struct mbuf *m, void *cl, int type)
{
- uma_zone_t zone;
int size;
switch (type) {
case EXT_CLUSTER:
size = MCLBYTES;
- zone = zone_clust;
break;
#if MJUMPAGESIZE != MCLBYTES
case EXT_JUMBOP:
size = MJUMPAGESIZE;
- zone = zone_jumbop;
break;
#endif
case EXT_JUMBO9:
size = MJUM9BYTES;
- zone = zone_jumbo9;
break;
case EXT_JUMBO16:
size = MJUM16BYTES;
- zone = zone_jumbo16;
break;
default:
- panic("unknown cluster type");
+ panic("%s: unknown cluster type %d", __func__, type);
break;
}
@@ -705,9 +799,10 @@ m_cljset(struct mbuf *m, void *cl, int type)
m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
m->m_ext.ext_size = size;
m->m_ext.ext_type = type;
- m->m_ext.ref_cnt = (volatile u_int *) uma_find_refcnt(zone, cl);
+ m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+ m->m_ext.ext_count = 1;
m->m_flags |= M_EXT;
-
+ MBUF_PROBE3(m__cljset, m, cl, type);
}
static __inline void
@@ -717,6 +812,16 @@ m_chtype(struct mbuf *m, short new_type)
m->m_type = new_type;
}
+static __inline void
+m_clrprotoflags(struct mbuf *m)
+{
+
+ while (m) {
+ m->m_flags &= ~M_PROTOFLAGS;
+ m = m->m_next;
+ }
+}
+
static __inline struct mbuf *
m_last(struct mbuf *m)
{
@@ -726,14 +831,14 @@ m_last(struct mbuf *m)
return (m);
}
-extern void (*m_addr_chg_pf_p)(struct mbuf *m);
-
-static __inline void
-m_addr_changed(struct mbuf *m)
+static inline u_int
+m_extrefcnt(struct mbuf *m)
{
- if (m_addr_chg_pf_p)
- m_addr_chg_pf_p(m);
+ KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__));
+
+ return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count :
+ *m->m_ext.ext_cnt);
}
/*
@@ -745,7 +850,8 @@ m_addr_changed(struct mbuf *m)
#define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type)))
#define MCLGET(m, how) m_clget((m), (how))
#define MEXTADD(m, buf, size, free, arg1, arg2, flags, type) \
- m_extadd((m), (caddr_t)(buf), (size), (free),(arg1),(arg2),(flags), (type))
+ m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2), \
+ (flags), (type))
#define m_getm(m, len, how, type) \
m_getm2((m), (len), (how), (type), M_PKTHDR)
@@ -756,7 +862,7 @@ m_addr_changed(struct mbuf *m)
*/
#define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \
(!(((m)->m_flags & M_EXT)) || \
- (*((m)->m_ext.ref_cnt) == 1)) ) \
+ (m_extrefcnt(m) == 1)))
/* Check if the supplied mbuf has a packet header, or else panic. */
#define M_ASSERTPKTHDR(m) \
@@ -773,28 +879,50 @@ m_addr_changed(struct mbuf *m)
("%s: attempted use of a free mbuf!", __func__))
/*
- * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an
- * object of the specified size at the end of the mbuf, longword aligned.
+ * Return the address of the start of the buffer associated with an mbuf,
+ * handling external storage, packet-header mbufs, and regular data mbufs.
*/
-#define M_ALIGN(m, len) do { \
- KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)), \
- ("%s: M_ALIGN not normal mbuf", __func__)); \
- KASSERT((m)->m_data == (m)->m_dat, \
- ("%s: M_ALIGN not a virgin mbuf", __func__)); \
- (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \
-} while (0)
+#define M_START(m) \
+ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \
+ ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \
+ &(m)->m_dat[0])
/*
- * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by
- * M_DUP/MOVE_PKTHDR.
+ * Return the size of the buffer associated with an mbuf, handling external
+ * storage, packet-header mbufs, and regular data mbufs.
*/
-#define MH_ALIGN(m, len) do { \
- KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT), \
- ("%s: MH_ALIGN not PKTHDR mbuf", __func__)); \
- KASSERT((m)->m_data == (m)->m_pktdat, \
- ("%s: MH_ALIGN not a virgin mbuf", __func__)); \
- (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \
-} while (0)
+#define M_SIZE(m) \
+ (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \
+ ((m)->m_flags & M_PKTHDR) ? MHLEN : \
+ MLEN)
+
+/*
+ * Set the m_data pointer of a newly allocated mbuf to place an object of the
+ * specified size at the end of the mbuf, longword aligned.
+ *
+ * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as
+ * separate macros, each asserting that it was called at the proper moment.
+ * This required callers to themselves test the storage type and call the
+ * right one. Rather than require callers to be aware of those layout
+ * decisions, we centralize here.
+ */
+static __inline void
+m_align(struct mbuf *m, int len)
+{
+#ifdef INVARIANTS
+ const char *msg = "%s: not a virgin mbuf";
+#endif
+ int adjust;
+
+ KASSERT(m->m_data == M_START(m), (msg, __func__));
+
+ adjust = M_SIZE(m) - len;
+ m->m_data += adjust &~ (sizeof(long)-1);
+}
+
+#define M_ALIGN(m, len) m_align(m, len)
+#define MH_ALIGN(m, len) m_align(m, len)
+#define MEXT_ALIGN(m, len) m_align(m, len)
/*
* Compute the amount of space available before the current start of data in
@@ -802,24 +930,27 @@ m_addr_changed(struct mbuf *m)
*
* The M_WRITABLE() is a temporary, conservative safety measure: the burden
* of checking writability of the mbuf data area rests solely with the caller.
+ *
+ * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE()
+ * for mbufs with external storage. We now allow mbuf-embedded data to be
+ * read-only as well.
*/
#define M_LEADINGSPACE(m) \
- ((m)->m_flags & M_EXT ? \
- (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \
- (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \
- (m)->m_data - (m)->m_dat)
+ (M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0)
/*
* Compute the amount of space available after the end of data in an mbuf.
*
* The M_WRITABLE() is a temporary, conservative safety measure: the burden
* of checking writability of the mbuf data area rests solely with the caller.
+ *
+ * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE()
+ * for mbufs with external storage. We now allow mbuf-embedded data to be
+ * read-only as well.
*/
#define M_TRAILINGSPACE(m) \
- ((m)->m_flags & M_EXT ? \
- (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \
- - ((m)->m_data + (m)->m_len) : 0) : \
- &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
+ (M_WRITABLE(m) ? \
+ ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0)
/*
* Arrange to prepend space of size plen to mbuf m. If a new mbuf must be
@@ -853,57 +984,14 @@ m_addr_changed(struct mbuf *m)
#define M_COPYALL 1000000000
/* Compatibility with 4.3. */
-#define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT)
+#define m_copy(m, o, l) m_copym((m), (o), (l), M_NOWAIT)
extern int max_datalen; /* MHLEN - max_hdr */
extern int max_hdr; /* Largest link + protocol header */
extern int max_linkhdr; /* Largest link-level header */
extern int max_protohdr; /* Largest protocol header */
-extern struct mbstat mbstat; /* General mbuf stats/infos */
extern int nmbclusters; /* Maximum number of clusters */
-struct uio;
-
-void m_adj(struct mbuf *, int);
-void m_align(struct mbuf *, int);
-int m_apply(struct mbuf *, int, int,
- int (*)(void *, void *, u_int), void *);
-int m_append(struct mbuf *, int, c_caddr_t);
-void m_cat(struct mbuf *, struct mbuf *);
-void m_extadd(struct mbuf *, caddr_t, u_int,
- void (*)(void *, void *), void *, void *, int, int);
-struct mbuf *m_collapse(struct mbuf *, int, int);
-void m_copyback(struct mbuf *, int, int, c_caddr_t);
-void m_copydata(const struct mbuf *, int, int, caddr_t);
-struct mbuf *m_copym(struct mbuf *, int, int, int);
-struct mbuf *m_copymdata(struct mbuf *, struct mbuf *,
- int, int, int, int);
-struct mbuf *m_copypacket(struct mbuf *, int);
-void m_copy_pkthdr(struct mbuf *, struct mbuf *);
-struct mbuf *m_copyup(struct mbuf *n, int len, int dstoff);
-struct mbuf *m_defrag(struct mbuf *, int);
-void m_demote(struct mbuf *, int);
-struct mbuf *m_devget(char *, int, int, struct ifnet *,
- void (*)(char *, caddr_t, u_int));
-struct mbuf *m_dup(struct mbuf *, int);
-int m_dup_pkthdr(struct mbuf *, struct mbuf *, int);
-u_int m_fixhdr(struct mbuf *);
-struct mbuf *m_fragment(struct mbuf *, int, int);
-void m_freem(struct mbuf *);
-struct mbuf *m_getm2(struct mbuf *, int, int, short, int);
-struct mbuf *m_getptr(struct mbuf *, int, int *);
-u_int m_length(struct mbuf *, struct mbuf **);
-int m_mbuftouio(struct uio *, struct mbuf *, int);
-void m_move_pkthdr(struct mbuf *, struct mbuf *);
-struct mbuf *m_prepend(struct mbuf *, int, int);
-void m_print(const struct mbuf *, int);
-struct mbuf *m_pulldown(struct mbuf *, int, int, int *);
-struct mbuf *m_pullup(struct mbuf *, int);
-int m_sanity(struct mbuf *, int);
-struct mbuf *m_split(struct mbuf *, int, int);
-struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
-struct mbuf *m_unshare(struct mbuf *, int how);
-
/*-
* Network packets may have annotations attached by affixing a list of
* "packet tags" to the pkthdr structure. Packet tags are dynamically
@@ -975,7 +1063,7 @@ struct mbuf *m_unshare(struct mbuf *, int how);
#define PACKET_TAG_DIVERT 17 /* divert info */
#define PACKET_TAG_IPFORWARD 18 /* ipforward info */
#define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */
-#define PACKET_TAG_PF 21 /* PF + ALTQ information */
+#define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */
#define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */
#define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */
#define PACKET_TAG_CARP 28 /* CARP info */
@@ -991,7 +1079,7 @@ void m_tag_delete_chain(struct mbuf *, struct m_tag *);
void m_tag_free_default(struct m_tag *);
struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *);
struct m_tag *m_tag_copy(struct m_tag *, int);
-int m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
+int m_tag_copy_chain(struct mbuf *, const struct mbuf *, int);
void m_tag_delete_nonpersistent(struct mbuf *);
/*
@@ -1043,7 +1131,7 @@ m_tag_first(struct mbuf *m)
* Return the next tag in the list of tags associated with an mbuf.
*/
static __inline struct m_tag *
-m_tag_next(struct mbuf *m, struct m_tag *t)
+m_tag_next(struct mbuf *m __unused, struct m_tag *t)
{
return (SLIST_NEXT(t, m_tag_link));
@@ -1085,20 +1173,43 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
}
-/* XXX temporary FIB methods probably eventually use tags.*/
-#define M_FIBSHIFT 28
-#define M_FIBMASK 0x0F
+static __inline struct mbuf *
+m_free(struct mbuf *m)
+{
+ struct mbuf *n = m->m_next;
-/* get the fib from an mbuf and if it is not set, return the default */
-#define M_GETFIB(_m) \
- ((((_m)->m_flags & M_FIB) >> M_FIBSHIFT) & M_FIBMASK)
+ MBUF_PROBE1(m__free, m);
+ if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
+ m_tag_delete_chain(m, NULL);
+ if (m->m_flags & M_EXT)
+ mb_free_ext(m);
+ else if ((m->m_flags & M_NOFREE) == 0)
+ uma_zfree(zone_mbuf, m);
+ return (n);
+}
+
+static __inline int
+rt_m_getfib(struct mbuf *m)
+{
+ KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf."));
+ return (m->m_pkthdr.fibnum);
+}
+
+#define M_GETFIB(_m) rt_m_getfib(_m)
#define M_SETFIB(_m, _fib) do { \
- _m->m_flags &= ~M_FIB; \
- _m->m_flags |= (((_fib) << M_FIBSHIFT) & M_FIB); \
+ KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf.")); \
+ ((_m)->m_pkthdr.fibnum) = (_fib); \
} while (0)
-#endif /* _KERNEL */
+/* flags passed as first argument for "m_ether_tcpip_hash()" */
+#define MBUF_HASHFLAG_L2 (1 << 2)
+#define MBUF_HASHFLAG_L3 (1 << 3)
+#define MBUF_HASHFLAG_L4 (1 << 4)
+
+/* mbuf hashing helper routines */
+uint32_t m_ether_tcpip_hash_init(void);
+uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t);
#ifdef MBUF_PROFILING
void m_profile(struct mbuf *m);
@@ -1107,5 +1218,103 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
#define M_PROFILE(m)
#endif
+struct mbufq {
+ STAILQ_HEAD(, mbuf) mq_head;
+ int mq_len;
+ int mq_maxlen;
+};
+
+static inline void
+mbufq_init(struct mbufq *mq, int maxlen)
+{
+
+ STAILQ_INIT(&mq->mq_head);
+ mq->mq_maxlen = maxlen;
+ mq->mq_len = 0;
+}
+
+static inline struct mbuf *
+mbufq_flush(struct mbufq *mq)
+{
+ struct mbuf *m;
+
+ m = STAILQ_FIRST(&mq->mq_head);
+ STAILQ_INIT(&mq->mq_head);
+ mq->mq_len = 0;
+ return (m);
+}
+
+static inline void
+mbufq_drain(struct mbufq *mq)
+{
+ struct mbuf *m, *n;
+
+ n = mbufq_flush(mq);
+ while ((m = n) != NULL) {
+ n = STAILQ_NEXT(m, m_stailqpkt);
+ m_freem(m);
+ }
+}
+
+static inline struct mbuf *
+mbufq_first(const struct mbufq *mq)
+{
+
+ return (STAILQ_FIRST(&mq->mq_head));
+}
+
+static inline struct mbuf *
+mbufq_last(const struct mbufq *mq)
+{
+
+ return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt));
+}
+
+static inline int
+mbufq_full(const struct mbufq *mq)
+{
+
+ return (mq->mq_len >= mq->mq_maxlen);
+}
+
+static inline int
+mbufq_len(const struct mbufq *mq)
+{
+ return (mq->mq_len);
+}
+
+static inline int
+mbufq_enqueue(struct mbufq *mq, struct mbuf *m)
+{
+
+ if (mbufq_full(mq))
+ return (ENOBUFS);
+ STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt);
+ mq->mq_len++;
+ return (0);
+}
+
+static inline struct mbuf *
+mbufq_dequeue(struct mbufq *mq)
+{
+ struct mbuf *m;
+
+ m = STAILQ_FIRST(&mq->mq_head);
+ if (m) {
+ STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt);
+ m->m_nextpkt = NULL;
+ mq->mq_len--;
+ }
+ return (m);
+}
+
+static inline void
+mbufq_prepend(struct mbufq *mq, struct mbuf *m)
+{
+
+ STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt);
+ mq->mq_len++;
+}
+#endif /* _KERNEL */
#endif /* !_SYS_MBUF_H_ */