diff options
Diffstat (limited to 'freebsd/sys/sys/mbuf.h')
-rw-r--r-- | freebsd/sys/sys/mbuf.h | 1077 |
1 files changed, 643 insertions, 434 deletions
diff --git a/freebsd/sys/sys/mbuf.h b/freebsd/sys/sys/mbuf.h index b6d58a25..95194e0b 100644 --- a/freebsd/sys/sys/mbuf.h +++ b/freebsd/sys/sys/mbuf.h @@ -44,6 +44,32 @@ #endif #endif +#ifdef _KERNEL +#include <sys/sdt.h> + +#define MBUF_PROBE1(probe, arg0) \ + SDT_PROBE1(sdt, , , probe, arg0) +#define MBUF_PROBE2(probe, arg0, arg1) \ + SDT_PROBE2(sdt, , , probe, arg0, arg1) +#define MBUF_PROBE3(probe, arg0, arg1, arg2) \ + SDT_PROBE3(sdt, , , probe, arg0, arg1, arg2) +#define MBUF_PROBE4(probe, arg0, arg1, arg2, arg3) \ + SDT_PROBE4(sdt, , , probe, arg0, arg1, arg2, arg3) +#define MBUF_PROBE5(probe, arg0, arg1, arg2, arg3, arg4) \ + SDT_PROBE5(sdt, , , probe, arg0, arg1, arg2, arg3, arg4) + +SDT_PROBE_DECLARE(sdt, , , m__init); +SDT_PROBE_DECLARE(sdt, , , m__gethdr); +SDT_PROBE_DECLARE(sdt, , , m__get); +SDT_PROBE_DECLARE(sdt, , , m__getcl); +SDT_PROBE_DECLARE(sdt, , , m__clget); +SDT_PROBE_DECLARE(sdt, , , m__cljget); +SDT_PROBE_DECLARE(sdt, , , m__cljset); +SDT_PROBE_DECLARE(sdt, , , m__free); +SDT_PROBE_DECLARE(sdt, , , m__freem); + +#endif /* _KERNEL */ + /* * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in @@ -52,11 +78,24 @@ * stored. Additionally, it is possible to allocate a separate buffer * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. + * + * NB: These calculation do not take actual compiler-induced alignment and + * padding inside the complete struct mbuf into account. Appropriate + * attention is required when changing members of struct mbuf. + * + * MLEN is data length in a normal mbuf. + * MHLEN is data length in an mbuf with pktheader. + * MINCLSIZE is a smallest amount of data that should be put into cluster. + * + * Compile-time assertions in uipc_mbuf.c test these values to ensure that + * they are sensible. */ -#define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ -#define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ -#define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ -#define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ +struct mbuf; +#define MHSIZE offsetof(struct mbuf, m_dat) +#define MPKTHSIZE offsetof(struct mbuf, m_pktdat) +#define MLEN ((int)(MSIZE - MHSIZE)) +#define MHLEN ((int)(MSIZE - MPKTHSIZE)) +#define MINCLSIZE (MHLEN + 1) #ifdef _KERNEL /*- @@ -64,8 +103,10 @@ * type: * * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. + * mtodo(m, o) -- Same as above but with offset 'o' into data. */ #define mtod(m, t) ((t)((m)->m_data)) +#define mtodo(m, o) ((void *)(((m)->m_data) + (o))) /* * Argument structure passed to UMA routines during mbuf and packet @@ -77,25 +118,6 @@ struct mb_args { }; #endif /* _KERNEL */ -#if defined(__LP64__) -#define M_HDR_PAD 6 -#else -#define M_HDR_PAD 2 -#endif - -/* - * Header present at the beginning of every mbuf. - */ -struct m_hdr { - struct mbuf *mh_next; /* next buffer in chain */ - struct mbuf *mh_nextpkt; /* next chain in queue/record */ - caddr_t mh_data; /* location of data */ - int mh_len; /* amount of data in this mbuf */ - int mh_flags; /* flags; see below */ - short mh_type; /* type of data in this mbuf */ - uint8_t pad[M_HDR_PAD];/* word align */ -}; - /* * Packet tag structure (see below for details). */ @@ -109,40 +131,73 @@ struct m_tag { /* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. + * Size ILP32: 48 + * LP64: 56 + * Compile-time assertions in uipc_mbuf.c test these values to ensure that + * they are correct. */ struct pkthdr { struct ifnet *rcvif; /* rcv interface */ - /* variables for ip and tcp reassembly */ - void *header; /* pointer to packet header */ - int len; /* total packet length */ - uint32_t flowid; /* packet's 4-tuple system - * flow identifier - */ - /* variables for hardware checksum */ - int csum_flags; /* flags regarding checksum */ - int csum_data; /* data field used by csum routines */ - u_int16_t tso_segsz; /* TSO segment size */ - union { - u_int16_t vt_vtag; /* Ethernet 802.1p+q vlan tag */ - u_int16_t vt_nrecs; /* # of IGMPv3 records in this chain */ - } PH_vt; SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ + int32_t len; /* total packet length */ + + /* Layer crossing persistent information. */ + uint32_t flowid; /* packet's 4-tuple system */ + uint64_t csum_flags; /* checksum and offload features */ + uint16_t fibnum; /* this packet should use this fib */ + uint8_t cosqos; /* class/quality of service */ + uint8_t rsstype; /* hash type */ + uint8_t l2hlen; /* layer 2 header length */ + uint8_t l3hlen; /* layer 3 header length */ + uint8_t l4hlen; /* layer 4 header length */ + uint8_t l5hlen; /* layer 5 header length */ + union { + uint8_t eight[8]; + uint16_t sixteen[4]; + uint32_t thirtytwo[2]; + uint64_t sixtyfour[1]; + uintptr_t unintptr[1]; + void *ptr; + } PH_per; + + /* Layer specific non-persistent local storage for reassembly, etc. */ + union { + uint8_t eight[8]; + uint16_t sixteen[4]; + uint32_t thirtytwo[2]; + uint64_t sixtyfour[1]; + uintptr_t unintptr[1]; + void *ptr; + } PH_loc; }; -#define ether_vtag PH_vt.vt_vtag +#define ether_vtag PH_per.sixteen[0] +#define PH_vt PH_per +#define vt_nrecs sixteen[0] +#define tso_segsz PH_per.sixteen[1] +#define csum_phsum PH_per.sixteen[2] +#define csum_data PH_per.thirtytwo[1] /* * Description of external storage mapped into mbuf; valid only if M_EXT is * set. + * Size ILP32: 28 + * LP64: 48 + * Compile-time assertions in uipc_mbuf.c test these values to ensure that + * they are correct. */ struct m_ext { + union { + volatile u_int ext_count; /* value of ref count info */ + volatile u_int *ext_cnt; /* pointer to ref count info */ + }; caddr_t ext_buf; /* start of buffer */ + uint32_t ext_size; /* size of buffer, for ext_free */ + uint32_t ext_type:8, /* type of external storage */ + ext_flags:24; /* external storage mbuf flags */ void (*ext_free) /* free routine if not the usual */ - (void *, void *); + (struct mbuf *, void *, void *); void *ext_arg1; /* optional argument pointer */ void *ext_arg2; /* optional argument pointer */ - u_int ext_size; /* size of buffer, for ext_free */ - volatile u_int *ref_cnt; /* pointer to ref count info */ - int ext_type; /* type of external storage */ }; /* @@ -150,71 +205,107 @@ struct m_ext { * purposes. */ struct mbuf { - struct m_hdr m_hdr; + /* + * Header present at the beginning of every mbuf. + * Size ILP32: 24 + * LP64: 32 + * Compile-time assertions in uipc_mbuf.c test these values to ensure + * that they are correct. + */ + union { /* next buffer in chain */ + struct mbuf *m_next; + SLIST_ENTRY(mbuf) m_slist; + STAILQ_ENTRY(mbuf) m_stailq; + }; + union { /* next chain in queue/record */ + struct mbuf *m_nextpkt; + SLIST_ENTRY(mbuf) m_slistpkt; + STAILQ_ENTRY(mbuf) m_stailqpkt; + }; + caddr_t m_data; /* location of data */ + int32_t m_len; /* amount of data in this mbuf */ + uint32_t m_type:8, /* type of data in this mbuf */ + m_flags:24; /* flags; see below */ +#if !defined(__LP64__) + uint32_t m_pad; /* pad for 64bit alignment */ +#endif + + /* + * A set of optional headers (packet header, external storage header) + * and internal data storage. Historically, these arrays were sized + * to MHLEN (space left after a packet header) and MLEN (space left + * after only a regular mbuf header); they are now variable size in + * order to support future work on variable-size mbufs. + */ union { struct { - struct pkthdr MH_pkthdr; /* M_PKTHDR set */ + struct pkthdr m_pkthdr; /* M_PKTHDR set */ union { - struct m_ext MH_ext; /* M_EXT set */ - char MH_databuf[MHLEN]; - } MH_dat; - } MH; - char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ - } M_dat; + struct m_ext m_ext; /* M_EXT set */ + char m_pktdat[0]; + }; + }; + char m_dat[0]; /* !M_PKTHDR, !M_EXT */ + }; }; -#define m_next m_hdr.mh_next -#define m_len m_hdr.mh_len -#define m_data m_hdr.mh_data -#define m_type m_hdr.mh_type -#define m_flags m_hdr.mh_flags -#define m_nextpkt m_hdr.mh_nextpkt -#define m_act m_nextpkt -#define m_pkthdr M_dat.MH.MH_pkthdr -#define m_ext M_dat.MH.MH_dat.MH_ext -#define m_pktdat M_dat.MH.MH_dat.MH_databuf -#define m_dat M_dat.M_databuf /* - * mbuf flags. + * mbuf flags of global significance and layer crossing. + * Those of only protocol/layer specific significance are to be mapped + * to M_PROTO[1-12] and cleared at layer handoff boundaries. + * NB: Limited to the lower 24 bits. */ #define M_EXT 0x00000001 /* has associated external storage */ #define M_PKTHDR 0x00000002 /* start of record */ #define M_EOR 0x00000004 /* end of record */ #define M_RDONLY 0x00000008 /* associated data is marked read-only */ -#define M_PROTO1 0x00000010 /* protocol-specific */ -#define M_PROTO2 0x00000020 /* protocol-specific */ -#define M_PROTO3 0x00000040 /* protocol-specific */ -#define M_PROTO4 0x00000080 /* protocol-specific */ -#define M_PROTO5 0x00000100 /* protocol-specific */ -#define M_BCAST 0x00000200 /* send/received as link-level broadcast */ -#define M_MCAST 0x00000400 /* send/received as link-level multicast */ -#define M_FRAG 0x00000800 /* packet is a fragment of a larger packet */ -#define M_FIRSTFRAG 0x00001000 /* packet is first fragment */ -#define M_LASTFRAG 0x00002000 /* packet is last fragment */ -#define M_SKIP_FIREWALL 0x00004000 /* skip firewall processing */ -#define M_FREELIST 0x00008000 /* mbuf is on the free list */ -#define M_VLANTAG 0x00010000 /* ether_vtag is valid */ -#define M_PROMISC 0x00020000 /* packet was not for us */ -#define M_NOFREE 0x00040000 /* do not free mbuf, embedded in cluster */ -#define M_PROTO6 0x00080000 /* protocol-specific */ -#define M_PROTO7 0x00100000 /* protocol-specific */ -#define M_PROTO8 0x00200000 /* protocol-specific */ -#define M_FLOWID 0x00400000 /* deprecated: flowid is valid */ -#define M_HASHTYPEBITS 0x0F000000 /* mask of bits holding flowid hash type */ +#define M_BCAST 0x00000010 /* send/received as link-level broadcast */ +#define M_MCAST 0x00000020 /* send/received as link-level multicast */ +#define M_PROMISC 0x00000040 /* packet was not for us */ +#define M_VLANTAG 0x00000080 /* ether_vtag is valid */ +#define M_UNUSED_8 0x00000100 /* --available-- */ +#define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */ + +#define M_PROTO1 0x00001000 /* protocol-specific */ +#define M_PROTO2 0x00002000 /* protocol-specific */ +#define M_PROTO3 0x00004000 /* protocol-specific */ +#define M_PROTO4 0x00008000 /* protocol-specific */ +#define M_PROTO5 0x00010000 /* protocol-specific */ +#define M_PROTO6 0x00020000 /* protocol-specific */ +#define M_PROTO7 0x00040000 /* protocol-specific */ +#define M_PROTO8 0x00080000 /* protocol-specific */ +#define M_PROTO9 0x00100000 /* protocol-specific */ +#define M_PROTO10 0x00200000 /* protocol-specific */ +#define M_PROTO11 0x00400000 /* protocol-specific */ +#define M_PROTO12 0x00800000 /* protocol-specific */ + +#define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */ /* - * For RELENG_{6,7} steal these flags for limited multiple routing table - * support. In RELENG_8 and beyond, use just one flag and a tag. + * Flags to purge when crossing layers. */ -#define M_FIB 0xF0000000 /* steal some bits to store fib number. */ +#define M_PROTOFLAGS \ + (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\ + M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12) -#define M_NOTIFICATION M_PROTO5 /* SCTP notification */ +/* + * Flags preserved when copying m_pkthdr. + */ +#define M_COPYFLAGS \ + (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \ + M_PROTOFLAGS) /* - * Flags to purge when crossing layers. + * Mbuf flag description for use with printf(9) %b identifier. */ -#define M_PROTOFLAGS \ - (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8) +#define M_FLAG_BITS \ + "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \ + "\7M_PROMISC\10M_VLANTAG" +#define M_FLAG_PROTOBITS \ + "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \ + "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \ + "\27M_PROTO11\30M_PROTO12" +#define M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS) /* * Network interface cards are able to hash protocol fields (such as IPv4 @@ -227,88 +318,214 @@ struct mbuf { * * Most NICs support RSS, which provides ordering and explicit affinity, and * use the hash m_flag bits to indicate what header fields were covered by - * the hash. M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations - * that provide an opaque flow identifier, allowing for ordering and - * distribution without explicit affinity. + * the hash. M_HASHTYPE_OPAQUE and M_HASHTYPE_OPAQUE_HASH can be set by non- + * RSS cards or configurations that provide an opaque flow identifier, allowing + * for ordering and distribution without explicit affinity. Additionally, + * M_HASHTYPE_OPAQUE_HASH indicates that the flow identifier has hash + * properties. */ -#define M_HASHTYPE_SHIFT 24 -#define M_HASHTYPE_NONE 0x0 -#define M_HASHTYPE_RSS_IPV4 0x1 /* IPv4 2-tuple */ -#define M_HASHTYPE_RSS_TCP_IPV4 0x2 /* TCPv4 4-tuple */ -#define M_HASHTYPE_RSS_IPV6 0x3 /* IPv6 2-tuple */ -#define M_HASHTYPE_RSS_TCP_IPV6 0x4 /* TCPv6 4-tuple */ -#define M_HASHTYPE_RSS_IPV6_EX 0x5 /* IPv6 2-tuple + ext hdrs */ -#define M_HASHTYPE_RSS_TCP_IPV6_EX 0x6 /* TCPv6 4-tiple + ext hdrs */ -#define M_HASHTYPE_OPAQUE 0xf /* ordering, not affinity */ - -#define M_HASHTYPE_CLEAR(m) (m)->m_flags &= ~(M_HASHTYPEBITS) -#define M_HASHTYPE_GET(m) (((m)->m_flags & M_HASHTYPEBITS) >> \ - M_HASHTYPE_SHIFT) -#define M_HASHTYPE_SET(m, v) do { \ - (m)->m_flags &= ~M_HASHTYPEBITS; \ - (m)->m_flags |= ((v) << M_HASHTYPE_SHIFT); \ -} while (0) +#define M_HASHTYPE_HASHPROP 0x80 /* has hash properties */ +#define M_HASHTYPE_HASH(t) (M_HASHTYPE_HASHPROP | (t)) +/* Microsoft RSS standard hash types */ +#define M_HASHTYPE_NONE 0 +#define M_HASHTYPE_RSS_IPV4 M_HASHTYPE_HASH(1) /* IPv4 2-tuple */ +#define M_HASHTYPE_RSS_TCP_IPV4 M_HASHTYPE_HASH(2) /* TCPv4 4-tuple */ +#define M_HASHTYPE_RSS_IPV6 M_HASHTYPE_HASH(3) /* IPv6 2-tuple */ +#define M_HASHTYPE_RSS_TCP_IPV6 M_HASHTYPE_HASH(4) /* TCPv6 4-tuple */ +#define M_HASHTYPE_RSS_IPV6_EX M_HASHTYPE_HASH(5) /* IPv6 2-tuple + + * ext hdrs */ +#define M_HASHTYPE_RSS_TCP_IPV6_EX M_HASHTYPE_HASH(6) /* TCPv6 4-tiple + + * ext hdrs */ +/* Non-standard RSS hash types */ +#define M_HASHTYPE_RSS_UDP_IPV4 M_HASHTYPE_HASH(7) /* IPv4 UDP 4-tuple*/ +#define M_HASHTYPE_RSS_UDP_IPV4_EX M_HASHTYPE_HASH(8) /* IPv4 UDP 4-tuple + + * ext hdrs */ +#define M_HASHTYPE_RSS_UDP_IPV6 M_HASHTYPE_HASH(9) /* IPv6 UDP 4-tuple*/ +#define M_HASHTYPE_RSS_UDP_IPV6_EX M_HASHTYPE_HASH(10)/* IPv6 UDP 4-tuple + + * ext hdrs */ + +#define M_HASHTYPE_OPAQUE 63 /* ordering, not affinity */ +#define M_HASHTYPE_OPAQUE_HASH M_HASHTYPE_HASH(M_HASHTYPE_OPAQUE) + /* ordering+hash, not affinity*/ + +#define M_HASHTYPE_CLEAR(m) ((m)->m_pkthdr.rsstype = 0) +#define M_HASHTYPE_GET(m) ((m)->m_pkthdr.rsstype) +#define M_HASHTYPE_SET(m, v) ((m)->m_pkthdr.rsstype = (v)) #define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v)) +#define M_HASHTYPE_ISHASH(m) (M_HASHTYPE_GET(m) & M_HASHTYPE_HASHPROP) /* - * Flags preserved when copying m_pkthdr. + * COS/QOS class and quality of service tags. + * It uses DSCP code points as base. */ -#define M_COPYFLAGS \ - (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\ - M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB|M_HASHTYPEBITS) +#define QOS_DSCP_CS0 0x00 +#define QOS_DSCP_DEF QOS_DSCP_CS0 +#define QOS_DSCP_CS1 0x20 +#define QOS_DSCP_AF11 0x28 +#define QOS_DSCP_AF12 0x30 +#define QOS_DSCP_AF13 0x38 +#define QOS_DSCP_CS2 0x40 +#define QOS_DSCP_AF21 0x48 +#define QOS_DSCP_AF22 0x50 +#define QOS_DSCP_AF23 0x58 +#define QOS_DSCP_CS3 0x60 +#define QOS_DSCP_AF31 0x68 +#define QOS_DSCP_AF32 0x70 +#define QOS_DSCP_AF33 0x78 +#define QOS_DSCP_CS4 0x80 +#define QOS_DSCP_AF41 0x88 +#define QOS_DSCP_AF42 0x90 +#define QOS_DSCP_AF43 0x98 +#define QOS_DSCP_CS5 0xa0 +#define QOS_DSCP_EF 0xb8 +#define QOS_DSCP_CS6 0xc0 +#define QOS_DSCP_CS7 0xe0 /* - * External buffer types: identify ext_buf type. + * External mbuf storage buffer types. */ #define EXT_CLUSTER 1 /* mbuf cluster */ -#define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ -#define EXT_JUMBOP 3 /* jumbo cluster 4096 bytes */ +#ifndef __rtems__ +#define EXT_SFBUF 2 /* sendfile(2)'s sf_buf */ +#endif /* __rtems__ */ +#define EXT_JUMBOP 3 /* jumbo cluster page sized */ #define EXT_JUMBO9 4 /* jumbo cluster 9216 bytes */ #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference (M_IOVEC) */ -#define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ -#define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ -#define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ -#define EXT_EXTREF 400 /* has externally maintained ref_cnt ptr */ +#ifndef __rtems__ +#define EXT_SFBUF_NOCACHE 8 /* sendfile(2)'s sf_buf not to be cached */ +#endif /* __rtems__ */ + +#define EXT_VENDOR1 224 /* for vendor-internal use */ +#define EXT_VENDOR2 225 /* for vendor-internal use */ +#define EXT_VENDOR3 226 /* for vendor-internal use */ +#define EXT_VENDOR4 227 /* for vendor-internal use */ + +#define EXT_EXP1 244 /* for experimental use */ +#define EXT_EXP2 245 /* for experimental use */ +#define EXT_EXP3 246 /* for experimental use */ +#define EXT_EXP4 247 /* for experimental use */ + +#define EXT_NET_DRV 252 /* custom ext_buf provided by net driver(s) */ +#define EXT_MOD_TYPE 253 /* custom module's ext_buf type */ +#define EXT_DISPOSABLE 254 /* can throw this buffer away w/page flipping */ +#define EXT_EXTREF 255 /* has externally maintained ext_cnt ptr */ /* - * Flags indicating hw checksum support and sw checksum requirements. This - * field can be directly tested against if_data.ifi_hwassist. + * Flags for external mbuf buffer types. + * NB: limited to the lower 24 bits. */ -#define CSUM_IP 0x0001 /* will csum IP */ -#define CSUM_TCP 0x0002 /* will csum TCP */ -#define CSUM_UDP 0x0004 /* will csum UDP */ -#define CSUM_IP_FRAGS 0x0008 /* removed, left for compat */ -#define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ -#define CSUM_TSO 0x0020 /* will do TSO */ -#define CSUM_SCTP 0x0040 /* will csum SCTP */ -#define CSUM_SCTP_IPV6 0x0080 /* will csum IPv6/SCTP */ - -#define CSUM_IP_CHECKED 0x0100 /* did csum IP */ -#define CSUM_IP_VALID 0x0200 /* ... the csum is valid */ -#define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */ -#define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */ -#define CSUM_SCTP_VALID 0x1000 /* SCTP checksum is valid */ -#define CSUM_UDP_IPV6 0x2000 /* will csum IPv6/UDP */ -#define CSUM_TCP_IPV6 0x4000 /* will csum IPv6/TCP */ -/* CSUM_TSO_IPV6 0x8000 will do IPv6/TSO */ - -/* CSUM_FRAGMENT_IPV6 0x10000 will do IPv6 fragementation */ - -#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6) -#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID +#define EXT_FLAG_EMBREF 0x000001 /* embedded ext_count */ +#define EXT_FLAG_EXTREF 0x000002 /* external ext_cnt, notyet */ + +#define EXT_FLAG_NOFREE 0x000010 /* don't free mbuf to pool, notyet */ -#define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) -#define CSUM_DELAY_IP (CSUM_IP) /* Only v4, no v6 IP hdr csum */ +#define EXT_FLAG_VENDOR1 0x010000 /* for vendor-internal use */ +#define EXT_FLAG_VENDOR2 0x020000 /* for vendor-internal use */ +#define EXT_FLAG_VENDOR3 0x040000 /* for vendor-internal use */ +#define EXT_FLAG_VENDOR4 0x080000 /* for vendor-internal use */ + +#define EXT_FLAG_EXP1 0x100000 /* for experimental use */ +#define EXT_FLAG_EXP2 0x200000 /* for experimental use */ +#define EXT_FLAG_EXP3 0x400000 /* for experimental use */ +#define EXT_FLAG_EXP4 0x800000 /* for experimental use */ + +/* + * EXT flag description for use with printf(9) %b identifier. + */ +#define EXT_FLAG_BITS \ + "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \ + "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \ + "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \ + "\30EXT_FLAG_EXP4" + +/* + * External reference/free functions. + */ +void sf_ext_free(void *, void *); +void sf_ext_free_nocache(void *, void *); /* - * mbuf types. + * Flags indicating checksum, segmentation and other offload work to be + * done, or already done, by hardware or lower layers. It is split into + * separate inbound and outbound flags. + * + * Outbound flags that are set by upper protocol layers requesting lower + * layers, or ideally the hardware, to perform these offloading tasks. + * For outbound packets this field and its flags can be directly tested + * against ifnet if_hwassist. + */ +#define CSUM_IP 0x00000001 /* IP header checksum offload */ +#define CSUM_IP_UDP 0x00000002 /* UDP checksum offload */ +#define CSUM_IP_TCP 0x00000004 /* TCP checksum offload */ +#define CSUM_IP_SCTP 0x00000008 /* SCTP checksum offload */ +#define CSUM_IP_TSO 0x00000010 /* TCP segmentation offload */ +#define CSUM_IP_ISCSI 0x00000020 /* iSCSI checksum offload */ + +#define CSUM_IP6_UDP 0x00000200 /* UDP checksum offload */ +#define CSUM_IP6_TCP 0x00000400 /* TCP checksum offload */ +#define CSUM_IP6_SCTP 0x00000800 /* SCTP checksum offload */ +#define CSUM_IP6_TSO 0x00001000 /* TCP segmentation offload */ +#define CSUM_IP6_ISCSI 0x00002000 /* iSCSI checksum offload */ + +/* Inbound checksum support where the checksum was verified by hardware. */ +#define CSUM_L3_CALC 0x01000000 /* calculated layer 3 csum */ +#define CSUM_L3_VALID 0x02000000 /* checksum is correct */ +#define CSUM_L4_CALC 0x04000000 /* calculated layer 4 csum */ +#define CSUM_L4_VALID 0x08000000 /* checksum is correct */ +#define CSUM_L5_CALC 0x10000000 /* calculated layer 5 csum */ +#define CSUM_L5_VALID 0x20000000 /* checksum is correct */ +#define CSUM_COALESED 0x40000000 /* contains merged segments */ + +/* + * CSUM flag description for use with printf(9) %b identifier. + */ +#define CSUM_BITS \ + "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \ + "\6CSUM_IP_ISCSI" \ + "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \ + "\16CSUM_IP6_ISCSI" \ + "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \ + "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESED" + +/* CSUM flags compatibility mappings. */ +#define CSUM_IP_CHECKED CSUM_L3_CALC +#define CSUM_IP_VALID CSUM_L3_VALID +#define CSUM_DATA_VALID CSUM_L4_VALID +#define CSUM_PSEUDO_HDR CSUM_L4_CALC +#define CSUM_SCTP_VALID CSUM_L4_VALID +#define CSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP) +#define CSUM_DELAY_IP CSUM_IP /* Only v4, no v6 IP hdr csum */ +#define CSUM_DELAY_DATA_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6) +#define CSUM_DATA_VALID_IPV6 CSUM_DATA_VALID +#define CSUM_TCP CSUM_IP_TCP +#define CSUM_UDP CSUM_IP_UDP +#define CSUM_SCTP CSUM_IP_SCTP +#define CSUM_TSO (CSUM_IP_TSO|CSUM_IP6_TSO) +#define CSUM_UDP_IPV6 CSUM_IP6_UDP +#define CSUM_TCP_IPV6 CSUM_IP6_TCP +#define CSUM_SCTP_IPV6 CSUM_IP6_SCTP + +/* + * mbuf types describing the content of the mbuf (including external storage). */ #define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ + +#define MT_VENDOR1 4 /* for vendor-internal use */ +#define MT_VENDOR2 5 /* for vendor-internal use */ +#define MT_VENDOR3 6 /* for vendor-internal use */ +#define MT_VENDOR4 7 /* for vendor-internal use */ + #define MT_SONAME 8 /* socket name */ + +#define MT_EXP1 9 /* for experimental use */ +#define MT_EXP2 10 /* for experimental use */ +#define MT_EXP3 11 /* for experimental use */ +#define MT_EXP4 12 /* for experimental use */ + #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ @@ -316,55 +533,6 @@ struct mbuf { #define MT_NOINIT 255 /* Not a type but a flag to allocate a non-initialized mbuf */ -#define MB_NOTAGS 0x1UL /* no tags attached to mbuf */ - -/* - * General mbuf allocator statistics structure. - * - * Many of these statistics are no longer used; we instead track many - * allocator statistics through UMA's built in statistics mechanism. - */ -struct mbstat { - u_long m_mbufs; /* XXX */ - u_long m_mclusts; /* XXX */ - - u_long m_drain; /* times drained protocols for space */ - u_long m_mcfail; /* XXX: times m_copym failed */ - u_long m_mpfail; /* XXX: times m_pullup failed */ - u_long m_msize; /* length of an mbuf */ - u_long m_mclbytes; /* length of an mbuf cluster */ - u_long m_minclsize; /* min length of data to allocate a cluster */ - u_long m_mlen; /* length of data in an mbuf */ - u_long m_mhlen; /* length of data in a header mbuf */ - - /* Number of mbtypes (gives # elems in mbtypes[] array) */ - short m_numtypes; - - /* XXX: Sendfile stats should eventually move to their own struct */ - u_long sf_iocnt; /* times sendfile had to do disk I/O */ - u_long sf_allocfail; /* times sfbuf allocation failed */ - u_long sf_allocwait; /* times sfbuf allocation had to wait */ -}; - -/* - * Flags specifying how an allocation should be made. - * - * The flag to use is as follows: - * - M_NOWAIT (M_DONTWAIT) from an interrupt handler to not block allocation. - * - M_WAITOK (M_WAIT) from wherever it is safe to block. - * - * M_DONTWAIT/M_NOWAIT means that we will not block the thread explicitly and - * if we cannot allocate immediately we may return NULL, whereas - * M_WAIT/M_WAITOK means that if we cannot allocate resources we - * will block until they are available, and thus never return NULL. - * - * XXX Eventually just phase this out to use M_WAITOK/M_NOWAIT. - */ -#define MBTOM(how) (how) -#define M_DONTWAIT M_NOWAIT -#define M_TRYWAIT M_WAITOK -#define M_WAIT M_WAITOK - /* * String names of mbuf-related UMA(9) and malloc(9) types. Exposed to * !_KERNEL so that monitoring tools can look up the zones with @@ -402,23 +570,53 @@ extern uma_zone_t zone_pack; extern uma_zone_t zone_jumbop; extern uma_zone_t zone_jumbo9; extern uma_zone_t zone_jumbo16; -extern uma_zone_t zone_ext_refcnt; - -static __inline struct mbuf *m_getcl(int how, short type, int flags); -static __inline struct mbuf *m_get(int how, short type); -static __inline struct mbuf *m_gethdr(int how, short type); -static __inline struct mbuf *m_getjcl(int how, short type, int flags, - int size); -static __inline struct mbuf *m_getclr(int how, short type); /* XXX */ -static __inline int m_init(struct mbuf *m, uma_zone_t zone, - int size, int how, short type, int flags); -static __inline struct mbuf *m_free(struct mbuf *m); -static __inline void m_clget(struct mbuf *m, int how); -static __inline void *m_cljget(struct mbuf *m, int how, int size); -static __inline void m_chtype(struct mbuf *m, short new_type); -void mb_free_ext(struct mbuf *); -static __inline struct mbuf *m_last(struct mbuf *m); -int m_pkthdr_init(struct mbuf *m, int how); + +void mb_dupcl(struct mbuf *, struct mbuf *); +void mb_free_ext(struct mbuf *); +void m_adj(struct mbuf *, int); +int m_apply(struct mbuf *, int, int, + int (*)(void *, void *, u_int), void *); +int m_append(struct mbuf *, int, c_caddr_t); +void m_cat(struct mbuf *, struct mbuf *); +void m_catpkt(struct mbuf *, struct mbuf *); +int m_clget(struct mbuf *m, int how); +void *m_cljget(struct mbuf *m, int how, int size); +struct mbuf *m_collapse(struct mbuf *, int, int); +void m_copyback(struct mbuf *, int, int, c_caddr_t); +void m_copydata(const struct mbuf *, int, int, caddr_t); +struct mbuf *m_copym(struct mbuf *, int, int, int); +struct mbuf *m_copypacket(struct mbuf *, int); +void m_copy_pkthdr(struct mbuf *, struct mbuf *); +struct mbuf *m_copyup(struct mbuf *, int, int); +struct mbuf *m_defrag(struct mbuf *, int); +void m_demote_pkthdr(struct mbuf *); +void m_demote(struct mbuf *, int, int); +struct mbuf *m_devget(char *, int, int, struct ifnet *, + void (*)(char *, caddr_t, u_int)); +struct mbuf *m_dup(const struct mbuf *, int); +int m_dup_pkthdr(struct mbuf *, const struct mbuf *, int); +void m_extadd(struct mbuf *, caddr_t, u_int, + void (*)(struct mbuf *, void *, void *), void *, void *, + int, int); +u_int m_fixhdr(struct mbuf *); +struct mbuf *m_fragment(struct mbuf *, int, int); +void m_freem(struct mbuf *); +struct mbuf *m_get2(int, int, short, int); +struct mbuf *m_getjcl(int, short, int, int); +struct mbuf *m_getm2(struct mbuf *, int, int, short, int); +struct mbuf *m_getptr(struct mbuf *, int, int *); +u_int m_length(struct mbuf *, struct mbuf **); +int m_mbuftouio(struct uio *, struct mbuf *, int); +void m_move_pkthdr(struct mbuf *, struct mbuf *); +int m_pkthdr_init(struct mbuf *, int); +struct mbuf *m_prepend(struct mbuf *, int, int); +void m_print(const struct mbuf *, int); +struct mbuf *m_pulldown(struct mbuf *, int, int, int *); +struct mbuf *m_pullup(struct mbuf *, int); +int m_sanity(struct mbuf *, int); +struct mbuf *m_split(struct mbuf *, int, int); +struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); +struct mbuf *m_unshare(struct mbuf *, int); static __inline int m_gettype(int size) @@ -444,7 +642,7 @@ m_gettype(int size) type = EXT_JUMBO16; break; default: - panic("%s: m_getjcl: invalid cluster size", __func__); + panic("%s: invalid cluster size %d", __func__, size); } return (type); @@ -455,7 +653,7 @@ m_gettype(int size) */ static __inline void m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt, - void (*freef)(void *, void *), void *arg1, void *arg2) + void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2) { KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__)); @@ -463,13 +661,14 @@ m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt, atomic_add_int((int*)ref_cnt, 1); m->m_flags |= M_EXT; m->m_ext.ext_buf = buf; - m->m_ext.ref_cnt = ref_cnt; + m->m_ext.ext_cnt = ref_cnt; m->m_data = m->m_ext.ext_buf; m->m_ext.ext_size = size; m->m_ext.ext_free = freef; m->m_ext.ext_arg1 = arg1; m->m_ext.ext_arg2 = arg2; m->m_ext.ext_type = EXT_EXTREF; + m->m_ext.ext_flags = 0; } static __inline uma_zone_t @@ -478,9 +677,6 @@ m_getzone(int size) uma_zone_t zone; switch (size) { - case MSIZE: - zone = zone_mbuf; - break; case MCLBYTES: zone = zone_clust; break; @@ -496,7 +692,7 @@ m_getzone(int size) zone = zone_jumbo16; break; default: - panic("%s: m_getjcl: invalid cluster type", __func__); + panic("%s: invalid cluster size %d", __func__, size); } return (zone); @@ -510,8 +706,7 @@ m_getzone(int size) * should go away with constant propagation for !MGETHDR. */ static __inline int -m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type, - int flags) +m_init(struct mbuf *m, int how, short type, int flags) { int error; @@ -521,182 +716,81 @@ m_init(struct mbuf *m, uma_zone_t zone, int size, int how, short type, m->m_len = 0; m->m_flags = flags; m->m_type = type; - if (flags & M_PKTHDR) { - if ((error = m_pkthdr_init(m, how)) != 0) - return (error); - } + if (flags & M_PKTHDR) + error = m_pkthdr_init(m, how); + else + error = 0; - return (0); + MBUF_PROBE5(m__init, m, how, type, flags, error); + return (error); } static __inline struct mbuf * m_get(int how, short type) { - struct mb_args args; - - args.flags = 0; - args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); -} - -/* - * XXX This should be deprecated, very little use. - */ -static __inline struct mbuf * -m_getclr(int how, short type) -{ struct mbuf *m; struct mb_args args; args.flags = 0; args.type = type; - m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, how); - if (m != NULL) - bzero(m->m_data, MLEN); + m = uma_zalloc_arg(zone_mbuf, &args, how); + MBUF_PROBE3(m__get, how, type, m); return (m); } static __inline struct mbuf * m_gethdr(int how, short type) { + struct mbuf *m; struct mb_args args; args.flags = M_PKTHDR; args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_mbuf, &args, how))); + m = uma_zalloc_arg(zone_mbuf, &args, how); + MBUF_PROBE3(m__gethdr, how, type, m); + return (m); } static __inline struct mbuf * m_getcl(int how, short type, int flags) { + struct mbuf *m; struct mb_args args; args.flags = flags; args.type = type; - return ((struct mbuf *)(uma_zalloc_arg(zone_pack, &args, how))); -} - -/* - * m_getjcl() returns an mbuf with a cluster of the specified size attached. - * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. - * - * XXX: This is rather large, should be real function maybe. - */ -static __inline struct mbuf * -m_getjcl(int how, short type, int flags, int size) -{ - struct mb_args args; - struct mbuf *m, *n; - uma_zone_t zone; - - if (size == MCLBYTES) - return m_getcl(how, type, flags); - - args.flags = flags; - args.type = type; - - m = (struct mbuf *)uma_zalloc_arg(zone_mbuf, &args, how); - if (m == NULL) - return (NULL); - - zone = m_getzone(size); - n = (struct mbuf *)uma_zalloc_arg(zone, m, how); - if (n == NULL) { - uma_zfree(zone_mbuf, m); - return (NULL); - } + m = uma_zalloc_arg(zone_pack, &args, how); + MBUF_PROBE4(m__getcl, how, type, flags, m); return (m); } -static __inline void -m_free_fast(struct mbuf *m) -{ -#ifdef INVARIANTS - if (m->m_flags & M_PKTHDR) - KASSERT(SLIST_EMPTY(&m->m_pkthdr.tags), ("doing fast free of mbuf with tags")); -#endif - - uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS); -} - -static __inline struct mbuf * -m_free(struct mbuf *m) -{ - struct mbuf *n = m->m_next; - - if (m->m_flags & M_EXT) - mb_free_ext(m); - else if ((m->m_flags & M_NOFREE) == 0) - uma_zfree(zone_mbuf, m); - return (n); -} - -static __inline void -m_clget(struct mbuf *m, int how) -{ - - if (m->m_flags & M_EXT) - printf("%s: %p mbuf already has cluster\n", __func__, m); - m->m_ext.ext_buf = (char *)NULL; - uma_zalloc_arg(zone_clust, m, how); - /* - * On a cluster allocation failure, drain the packet zone and retry, - * we might be able to loosen a few clusters up on the drain. - */ - if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { - zone_drain(zone_pack); - uma_zalloc_arg(zone_clust, m, how); - } -} - /* - * m_cljget() is different from m_clget() as it can allocate clusters without - * attaching them to an mbuf. In that case the return value is the pointer - * to the cluster of the requested size. If an mbuf was specified, it gets - * the cluster attached to it and the return value can be safely ignored. - * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. + * XXX: m_cljset() is a dangerous API. One must attach only a new, + * unreferenced cluster to an mbuf(9). It is not possible to assert + * that, so care can be taken only by users of the API. */ -static __inline void * -m_cljget(struct mbuf *m, int how, int size) -{ - uma_zone_t zone; - - if (m && m->m_flags & M_EXT) - printf("%s: %p mbuf already has cluster\n", __func__, m); - if (m != NULL) - m->m_ext.ext_buf = NULL; - - zone = m_getzone(size); - return (uma_zalloc_arg(zone, m, how)); -} - static __inline void m_cljset(struct mbuf *m, void *cl, int type) { - uma_zone_t zone; int size; switch (type) { case EXT_CLUSTER: size = MCLBYTES; - zone = zone_clust; break; #if MJUMPAGESIZE != MCLBYTES case EXT_JUMBOP: size = MJUMPAGESIZE; - zone = zone_jumbop; break; #endif case EXT_JUMBO9: size = MJUM9BYTES; - zone = zone_jumbo9; break; case EXT_JUMBO16: size = MJUM16BYTES; - zone = zone_jumbo16; break; default: - panic("unknown cluster type"); + panic("%s: unknown cluster type %d", __func__, type); break; } @@ -705,9 +799,10 @@ m_cljset(struct mbuf *m, void *cl, int type) m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = type; - m->m_ext.ref_cnt = (volatile u_int *) uma_find_refcnt(zone, cl); + m->m_ext.ext_flags = EXT_FLAG_EMBREF; + m->m_ext.ext_count = 1; m->m_flags |= M_EXT; - + MBUF_PROBE3(m__cljset, m, cl, type); } static __inline void @@ -717,6 +812,16 @@ m_chtype(struct mbuf *m, short new_type) m->m_type = new_type; } +static __inline void +m_clrprotoflags(struct mbuf *m) +{ + + while (m) { + m->m_flags &= ~M_PROTOFLAGS; + m = m->m_next; + } +} + static __inline struct mbuf * m_last(struct mbuf *m) { @@ -726,14 +831,14 @@ m_last(struct mbuf *m) return (m); } -extern void (*m_addr_chg_pf_p)(struct mbuf *m); - -static __inline void -m_addr_changed(struct mbuf *m) +static inline u_int +m_extrefcnt(struct mbuf *m) { - if (m_addr_chg_pf_p) - m_addr_chg_pf_p(m); + KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__)); + + return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count : + *m->m_ext.ext_cnt); } /* @@ -745,7 +850,8 @@ m_addr_changed(struct mbuf *m) #define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) #define MCLGET(m, how) m_clget((m), (how)) #define MEXTADD(m, buf, size, free, arg1, arg2, flags, type) \ - m_extadd((m), (caddr_t)(buf), (size), (free),(arg1),(arg2),(flags), (type)) + m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2), \ + (flags), (type)) #define m_getm(m, len, how, type) \ m_getm2((m), (len), (how), (type), M_PKTHDR) @@ -756,7 +862,7 @@ m_addr_changed(struct mbuf *m) */ #define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ (!(((m)->m_flags & M_EXT)) || \ - (*((m)->m_ext.ref_cnt) == 1)) ) \ + (m_extrefcnt(m) == 1))) /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ @@ -773,28 +879,50 @@ m_addr_changed(struct mbuf *m) ("%s: attempted use of a free mbuf!", __func__)) /* - * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place an - * object of the specified size at the end of the mbuf, longword aligned. + * Return the address of the start of the buffer associated with an mbuf, + * handling external storage, packet-header mbufs, and regular data mbufs. */ -#define M_ALIGN(m, len) do { \ - KASSERT(!((m)->m_flags & (M_PKTHDR|M_EXT)), \ - ("%s: M_ALIGN not normal mbuf", __func__)); \ - KASSERT((m)->m_data == (m)->m_dat, \ - ("%s: M_ALIGN not a virgin mbuf", __func__)); \ - (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \ -} while (0) +#define M_START(m) \ + (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \ + ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \ + &(m)->m_dat[0]) /* - * As above, for mbufs allocated with m_gethdr/MGETHDR or initialized by - * M_DUP/MOVE_PKTHDR. + * Return the size of the buffer associated with an mbuf, handling external + * storage, packet-header mbufs, and regular data mbufs. */ -#define MH_ALIGN(m, len) do { \ - KASSERT((m)->m_flags & M_PKTHDR && !((m)->m_flags & M_EXT), \ - ("%s: MH_ALIGN not PKTHDR mbuf", __func__)); \ - KASSERT((m)->m_data == (m)->m_pktdat, \ - ("%s: MH_ALIGN not a virgin mbuf", __func__)); \ - (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \ -} while (0) +#define M_SIZE(m) \ + (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size : \ + ((m)->m_flags & M_PKTHDR) ? MHLEN : \ + MLEN) + +/* + * Set the m_data pointer of a newly allocated mbuf to place an object of the + * specified size at the end of the mbuf, longword aligned. + * + * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as + * separate macros, each asserting that it was called at the proper moment. + * This required callers to themselves test the storage type and call the + * right one. Rather than require callers to be aware of those layout + * decisions, we centralize here. + */ +static __inline void +m_align(struct mbuf *m, int len) +{ +#ifdef INVARIANTS + const char *msg = "%s: not a virgin mbuf"; +#endif + int adjust; + + KASSERT(m->m_data == M_START(m), (msg, __func__)); + + adjust = M_SIZE(m) - len; + m->m_data += adjust &~ (sizeof(long)-1); +} + +#define M_ALIGN(m, len) m_align(m, len) +#define MH_ALIGN(m, len) m_align(m, len) +#define MEXT_ALIGN(m, len) m_align(m, len) /* * Compute the amount of space available before the current start of data in @@ -802,24 +930,27 @@ m_addr_changed(struct mbuf *m) * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. + * + * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE() + * for mbufs with external storage. We now allow mbuf-embedded data to be + * read-only as well. */ #define M_LEADINGSPACE(m) \ - ((m)->m_flags & M_EXT ? \ - (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \ - (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \ - (m)->m_data - (m)->m_dat) + (M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0) /* * Compute the amount of space available after the end of data in an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. + * + * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE() + * for mbufs with external storage. We now allow mbuf-embedded data to be + * read-only as well. */ #define M_TRAILINGSPACE(m) \ - ((m)->m_flags & M_EXT ? \ - (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \ - - ((m)->m_data + (m)->m_len) : 0) : \ - &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) + (M_WRITABLE(m) ? \ + ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0) /* * Arrange to prepend space of size plen to mbuf m. If a new mbuf must be @@ -853,57 +984,14 @@ m_addr_changed(struct mbuf *m) #define M_COPYALL 1000000000 /* Compatibility with 4.3. */ -#define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT) +#define m_copy(m, o, l) m_copym((m), (o), (l), M_NOWAIT) extern int max_datalen; /* MHLEN - max_hdr */ extern int max_hdr; /* Largest link + protocol header */ extern int max_linkhdr; /* Largest link-level header */ extern int max_protohdr; /* Largest protocol header */ -extern struct mbstat mbstat; /* General mbuf stats/infos */ extern int nmbclusters; /* Maximum number of clusters */ -struct uio; - -void m_adj(struct mbuf *, int); -void m_align(struct mbuf *, int); -int m_apply(struct mbuf *, int, int, - int (*)(void *, void *, u_int), void *); -int m_append(struct mbuf *, int, c_caddr_t); -void m_cat(struct mbuf *, struct mbuf *); -void m_extadd(struct mbuf *, caddr_t, u_int, - void (*)(void *, void *), void *, void *, int, int); -struct mbuf *m_collapse(struct mbuf *, int, int); -void m_copyback(struct mbuf *, int, int, c_caddr_t); -void m_copydata(const struct mbuf *, int, int, caddr_t); -struct mbuf *m_copym(struct mbuf *, int, int, int); -struct mbuf *m_copymdata(struct mbuf *, struct mbuf *, - int, int, int, int); -struct mbuf *m_copypacket(struct mbuf *, int); -void m_copy_pkthdr(struct mbuf *, struct mbuf *); -struct mbuf *m_copyup(struct mbuf *n, int len, int dstoff); -struct mbuf *m_defrag(struct mbuf *, int); -void m_demote(struct mbuf *, int); -struct mbuf *m_devget(char *, int, int, struct ifnet *, - void (*)(char *, caddr_t, u_int)); -struct mbuf *m_dup(struct mbuf *, int); -int m_dup_pkthdr(struct mbuf *, struct mbuf *, int); -u_int m_fixhdr(struct mbuf *); -struct mbuf *m_fragment(struct mbuf *, int, int); -void m_freem(struct mbuf *); -struct mbuf *m_getm2(struct mbuf *, int, int, short, int); -struct mbuf *m_getptr(struct mbuf *, int, int *); -u_int m_length(struct mbuf *, struct mbuf **); -int m_mbuftouio(struct uio *, struct mbuf *, int); -void m_move_pkthdr(struct mbuf *, struct mbuf *); -struct mbuf *m_prepend(struct mbuf *, int, int); -void m_print(const struct mbuf *, int); -struct mbuf *m_pulldown(struct mbuf *, int, int, int *); -struct mbuf *m_pullup(struct mbuf *, int); -int m_sanity(struct mbuf *, int); -struct mbuf *m_split(struct mbuf *, int, int); -struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); -struct mbuf *m_unshare(struct mbuf *, int how); - /*- * Network packets may have annotations attached by affixing a list of * "packet tags" to the pkthdr structure. Packet tags are dynamically @@ -975,7 +1063,7 @@ struct mbuf *m_unshare(struct mbuf *, int how); #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ #define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ -#define PACKET_TAG_PF 21 /* PF + ALTQ information */ +#define PACKET_TAG_PF (21 | MTAG_PERSISTENT) /* PF/ALTQ information */ #define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ #define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ #define PACKET_TAG_CARP 28 /* CARP info */ @@ -991,7 +1079,7 @@ void m_tag_delete_chain(struct mbuf *, struct m_tag *); void m_tag_free_default(struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); struct m_tag *m_tag_copy(struct m_tag *, int); -int m_tag_copy_chain(struct mbuf *, struct mbuf *, int); +int m_tag_copy_chain(struct mbuf *, const struct mbuf *, int); void m_tag_delete_nonpersistent(struct mbuf *); /* @@ -1043,7 +1131,7 @@ m_tag_first(struct mbuf *m) * Return the next tag in the list of tags associated with an mbuf. */ static __inline struct m_tag * -m_tag_next(struct mbuf *m, struct m_tag *t) +m_tag_next(struct mbuf *m __unused, struct m_tag *t) { return (SLIST_NEXT(t, m_tag_link)); @@ -1085,20 +1173,43 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start) m_tag_locate(m, MTAG_ABI_COMPAT, type, start)); } -/* XXX temporary FIB methods probably eventually use tags.*/ -#define M_FIBSHIFT 28 -#define M_FIBMASK 0x0F +static __inline struct mbuf * +m_free(struct mbuf *m) +{ + struct mbuf *n = m->m_next; -/* get the fib from an mbuf and if it is not set, return the default */ -#define M_GETFIB(_m) \ - ((((_m)->m_flags & M_FIB) >> M_FIBSHIFT) & M_FIBMASK) + MBUF_PROBE1(m__free, m); + if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) + m_tag_delete_chain(m, NULL); + if (m->m_flags & M_EXT) + mb_free_ext(m); + else if ((m->m_flags & M_NOFREE) == 0) + uma_zfree(zone_mbuf, m); + return (n); +} + +static __inline int +rt_m_getfib(struct mbuf *m) +{ + KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf.")); + return (m->m_pkthdr.fibnum); +} + +#define M_GETFIB(_m) rt_m_getfib(_m) #define M_SETFIB(_m, _fib) do { \ - _m->m_flags &= ~M_FIB; \ - _m->m_flags |= (((_fib) << M_FIBSHIFT) & M_FIB); \ + KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf.")); \ + ((_m)->m_pkthdr.fibnum) = (_fib); \ } while (0) -#endif /* _KERNEL */ +/* flags passed as first argument for "m_ether_tcpip_hash()" */ +#define MBUF_HASHFLAG_L2 (1 << 2) +#define MBUF_HASHFLAG_L3 (1 << 3) +#define MBUF_HASHFLAG_L4 (1 << 4) + +/* mbuf hashing helper routines */ +uint32_t m_ether_tcpip_hash_init(void); +uint32_t m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t); #ifdef MBUF_PROFILING void m_profile(struct mbuf *m); @@ -1107,5 +1218,103 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start) #define M_PROFILE(m) #endif +struct mbufq { + STAILQ_HEAD(, mbuf) mq_head; + int mq_len; + int mq_maxlen; +}; + +static inline void +mbufq_init(struct mbufq *mq, int maxlen) +{ + + STAILQ_INIT(&mq->mq_head); + mq->mq_maxlen = maxlen; + mq->mq_len = 0; +} + +static inline struct mbuf * +mbufq_flush(struct mbufq *mq) +{ + struct mbuf *m; + + m = STAILQ_FIRST(&mq->mq_head); + STAILQ_INIT(&mq->mq_head); + mq->mq_len = 0; + return (m); +} + +static inline void +mbufq_drain(struct mbufq *mq) +{ + struct mbuf *m, *n; + + n = mbufq_flush(mq); + while ((m = n) != NULL) { + n = STAILQ_NEXT(m, m_stailqpkt); + m_freem(m); + } +} + +static inline struct mbuf * +mbufq_first(const struct mbufq *mq) +{ + + return (STAILQ_FIRST(&mq->mq_head)); +} + +static inline struct mbuf * +mbufq_last(const struct mbufq *mq) +{ + + return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt)); +} + +static inline int +mbufq_full(const struct mbufq *mq) +{ + + return (mq->mq_len >= mq->mq_maxlen); +} + +static inline int +mbufq_len(const struct mbufq *mq) +{ + return (mq->mq_len); +} + +static inline int +mbufq_enqueue(struct mbufq *mq, struct mbuf *m) +{ + + if (mbufq_full(mq)) + return (ENOBUFS); + STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt); + mq->mq_len++; + return (0); +} + +static inline struct mbuf * +mbufq_dequeue(struct mbufq *mq) +{ + struct mbuf *m; + + m = STAILQ_FIRST(&mq->mq_head); + if (m) { + STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt); + m->m_nextpkt = NULL; + mq->mq_len--; + } + return (m); +} + +static inline void +mbufq_prepend(struct mbufq *mq, struct mbuf *m) +{ + + STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt); + mq->mq_len++; +} +#endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */ |