summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/netinet/in_pcb.h
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/netinet/in_pcb.h')
-rw-r--r--freebsd/sys/netinet/in_pcb.h89
1 files changed, 88 insertions, 1 deletions
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h
index 574ab407..d00dd456 100644
--- a/freebsd/sys/netinet/in_pcb.h
+++ b/freebsd/sys/netinet/in_pcb.h
@@ -41,6 +41,7 @@
#define _NETINET_IN_PCB_H_
#include <sys/queue.h>
+#include <sys/epoch.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_rwlock.h>
@@ -156,6 +157,7 @@ struct in_conninfo {
* from the global list.
*
* Key:
+ * (b) - Protected by the hpts lock.
* (c) - Constant after initialization
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
@@ -164,6 +166,51 @@ struct in_conninfo {
* (h) - Protected by the pcbhash lock for the inpcb
* (s) - Protected by another subsystem's locks
* (x) - Undefined locking
+ *
+ * Notes on the tcp_hpts:
+ *
+ * First Hpts lock order is
+ * 1) INP_WLOCK()
+ * 2) HPTS_LOCK() i.e. hpts->pmtx
+ *
+ * To insert a TCB on the hpts you *must* be holding the INP_WLOCK().
+ * You may check the inp->inp_in_hpts flag without the hpts lock.
+ * The hpts is the only one that will clear this flag holding
+ * only the hpts lock. This means that in your tcp_output()
+ * routine when you test for the inp_in_hpts flag to be 1
+ * it may be transitioning to 0 (by the hpts).
+ * That's ok since that will just mean an extra call to tcp_output
+ * that most likely will find the call you executed
+ * (when the mis-match occured) will have put the TCB back
+ * on the hpts and it will return. If your
+ * call did not add the inp back to the hpts then you will either
+ * over-send or the cwnd will block you from sending more.
+ *
+ * Note you should also be holding the INP_WLOCK() when you
+ * call the remove from the hpts as well. Though usually
+ * you are either doing this from a timer, where you need and have
+ * the INP_WLOCK() or from destroying your TCB where again
+ * you should already have the INP_WLOCK().
+ *
+ * The inp_hpts_cpu, inp_hpts_cpu_set, inp_input_cpu and
+ * inp_input_cpu_set fields are controlled completely by
+ * the hpts. Do not ever set these. The inp_hpts_cpu_set
+ * and inp_input_cpu_set fields indicate if the hpts has
+ * setup the respective cpu field. It is advised if this
+ * field is 0, to enqueue the packet with the appropriate
+ * hpts_immediate() call. If the _set field is 1, then
+ * you may compare the inp_*_cpu field to the curcpu and
+ * may want to again insert onto the hpts if these fields
+ * are not equal (i.e. you are not on the expected CPU).
+ *
+ * A note on inp_hpts_calls and inp_input_calls, these
+ * flags are set when the hpts calls either the output
+ * or do_segment routines respectively. If the routine
+ * being called wants to use this, then it needs to
+ * clear the flag before returning. The hpts will not
+ * clear the flag. The flags can be used to tell if
+ * the hpts is the function calling the respective
+ * routine.
*
* A few other notes:
*
@@ -190,14 +237,45 @@ struct inpcb {
LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
-#define inp_start_zero inp_refcount
+#define inp_start_zero inp_hpts
#define inp_zero_size (sizeof(struct inpcb) - \
offsetof(struct inpcb, inp_start_zero))
+ TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */
+
+ uint32_t inp_hpts_request; /* Current hpts request, zero if
+ * fits in the pacing window (i&b). */
+ /*
+ * Note the next fields are protected by a
+ * different lock (hpts-lock). This means that
+ * they must correspond in size to the smallest
+ * protectable bit field (uint8_t on x86, and
+ * other platfomrs potentially uint32_t?). Also
+ * since CPU switches can occur at different times the two
+ * fields can *not* be collapsed into a signal bit field.
+ */
+#if defined(__amd64__) || defined(__i386__)
+ volatile uint8_t inp_in_hpts; /* on output hpts (lock b) */
+ volatile uint8_t inp_in_input; /* on input hpts (lock b) */
+#else
+ volatile uint32_t inp_in_hpts; /* on output hpts (lock b) */
+ volatile uint32_t inp_in_input; /* on input hpts (lock b) */
+#endif
+ volatile uint16_t inp_hpts_cpu; /* Lock (i) */
u_int inp_refcount; /* (i) refcount */
int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
+ volatile uint16_t inp_input_cpu; /* Lock (i) */
+ volatile uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */
+ inp_input_cpu_set : 1, /* on input hpts (i) */
+ inp_hpts_calls :1, /* (i) from output hpts */
+ inp_input_calls :1, /* (i) from input hpts */
+ inp_spare_bits2 : 4;
+ uint8_t inp_spare_byte; /* Compiler hole */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct socket *inp_socket; /* (i) back pointer to socket */
+ uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
+ uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */
+ TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
@@ -330,6 +408,13 @@ struct inpcbport {
u_short phd_port;
};
+struct in_pcblist {
+ int il_count;
+ struct epoch_context il_epoch_ctx;
+ struct inpcbinfo *il_pcbinfo;
+ struct inpcb *il_inp_list[0];
+};
+
/*-
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
@@ -638,6 +723,7 @@ short inp_so_options(const struct inpcb *inp);
#define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */
#define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */
#define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */
+#define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */
/*
* Flags passed to in_pcblookup*() functions.
@@ -751,6 +837,7 @@ void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *);
int in_pcbrele(struct inpcb *);
int in_pcbrele_rlocked(struct inpcb *);
int in_pcbrele_wlocked(struct inpcb *);
+void in_pcblist_rele_rlocked(epoch_context_t ctx);
void in_losing(struct inpcb *);
void in_pcbsetsolabel(struct socket *so);
int in_getpeeraddr(struct socket *so, struct sockaddr **nam);