diff options
Diffstat (limited to 'freebsd/sys/netinet/in_pcb.h')
-rw-r--r-- | freebsd/sys/netinet/in_pcb.h | 89 |
1 files changed, 88 insertions, 1 deletions
diff --git a/freebsd/sys/netinet/in_pcb.h b/freebsd/sys/netinet/in_pcb.h index 574ab407..d00dd456 100644 --- a/freebsd/sys/netinet/in_pcb.h +++ b/freebsd/sys/netinet/in_pcb.h @@ -41,6 +41,7 @@ #define _NETINET_IN_PCB_H_ #include <sys/queue.h> +#include <sys/epoch.h> #include <sys/_lock.h> #include <sys/_mutex.h> #include <sys/_rwlock.h> @@ -156,6 +157,7 @@ struct in_conninfo { * from the global list. * * Key: + * (b) - Protected by the hpts lock. * (c) - Constant after initialization * (g) - Protected by the pcbgroup lock * (i) - Protected by the inpcb lock @@ -164,6 +166,51 @@ struct in_conninfo { * (h) - Protected by the pcbhash lock for the inpcb * (s) - Protected by another subsystem's locks * (x) - Undefined locking + * + * Notes on the tcp_hpts: + * + * First Hpts lock order is + * 1) INP_WLOCK() + * 2) HPTS_LOCK() i.e. hpts->pmtx + * + * To insert a TCB on the hpts you *must* be holding the INP_WLOCK(). + * You may check the inp->inp_in_hpts flag without the hpts lock. + * The hpts is the only one that will clear this flag holding + * only the hpts lock. This means that in your tcp_output() + * routine when you test for the inp_in_hpts flag to be 1 + * it may be transitioning to 0 (by the hpts). + * That's ok since that will just mean an extra call to tcp_output + * that most likely will find the call you executed + * (when the mis-match occured) will have put the TCB back + * on the hpts and it will return. If your + * call did not add the inp back to the hpts then you will either + * over-send or the cwnd will block you from sending more. + * + * Note you should also be holding the INP_WLOCK() when you + * call the remove from the hpts as well. Though usually + * you are either doing this from a timer, where you need and have + * the INP_WLOCK() or from destroying your TCB where again + * you should already have the INP_WLOCK(). + * + * The inp_hpts_cpu, inp_hpts_cpu_set, inp_input_cpu and + * inp_input_cpu_set fields are controlled completely by + * the hpts. Do not ever set these. The inp_hpts_cpu_set + * and inp_input_cpu_set fields indicate if the hpts has + * setup the respective cpu field. It is advised if this + * field is 0, to enqueue the packet with the appropriate + * hpts_immediate() call. If the _set field is 1, then + * you may compare the inp_*_cpu field to the curcpu and + * may want to again insert onto the hpts if these fields + * are not equal (i.e. you are not on the expected CPU). + * + * A note on inp_hpts_calls and inp_input_calls, these + * flags are set when the hpts calls either the output + * or do_segment routines respectively. If the routine + * being called wants to use this, then it needs to + * clear the flag before returning. The hpts will not + * clear the flag. The flags can be used to tell if + * the hpts is the function calling the respective + * routine. * * A few other notes: * @@ -190,14 +237,45 @@ struct inpcb { LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ struct rwlock inp_lock; /* Cache line #2 (amd64) */ -#define inp_start_zero inp_refcount +#define inp_start_zero inp_hpts #define inp_zero_size (sizeof(struct inpcb) - \ offsetof(struct inpcb, inp_start_zero)) + TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */ + + uint32_t inp_hpts_request; /* Current hpts request, zero if + * fits in the pacing window (i&b). */ + /* + * Note the next fields are protected by a + * different lock (hpts-lock). This means that + * they must correspond in size to the smallest + * protectable bit field (uint8_t on x86, and + * other platfomrs potentially uint32_t?). Also + * since CPU switches can occur at different times the two + * fields can *not* be collapsed into a signal bit field. + */ +#if defined(__amd64__) || defined(__i386__) + volatile uint8_t inp_in_hpts; /* on output hpts (lock b) */ + volatile uint8_t inp_in_input; /* on input hpts (lock b) */ +#else + volatile uint32_t inp_in_hpts; /* on output hpts (lock b) */ + volatile uint32_t inp_in_input; /* on input hpts (lock b) */ +#endif + volatile uint16_t inp_hpts_cpu; /* Lock (i) */ u_int inp_refcount; /* (i) refcount */ int inp_flags; /* (i) generic IP/datagram flags */ int inp_flags2; /* (i) generic IP/datagram flags #2*/ + volatile uint16_t inp_input_cpu; /* Lock (i) */ + volatile uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */ + inp_input_cpu_set : 1, /* on input hpts (i) */ + inp_hpts_calls :1, /* (i) from output hpts */ + inp_input_calls :1, /* (i) from input hpts */ + inp_spare_bits2 : 4; + uint8_t inp_spare_byte; /* Compiler hole */ void *inp_ppcb; /* (i) pointer to per-protocol pcb */ struct socket *inp_socket; /* (i) back pointer to socket */ + uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */ + uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */ + TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */ struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */ @@ -330,6 +408,13 @@ struct inpcbport { u_short phd_port; }; +struct in_pcblist { + int il_count; + struct epoch_context il_epoch_ctx; + struct inpcbinfo *il_pcbinfo; + struct inpcb *il_inp_list[0]; +}; + /*- * Global data structure for each high-level protocol (UDP, TCP, ...) in both * IPv4 and IPv6. Holds inpcb lists and information for managing them. @@ -638,6 +723,7 @@ short inp_so_options(const struct inpcb *inp); #define INP_RECVRSSBUCKETID 0x00000200 /* populate recv datagram with bucket id */ #define INP_RATE_LIMIT_CHANGED 0x00000400 /* rate limit needs attention */ #define INP_ORIGDSTADDR 0x00000800 /* receive IP dst address/port */ +#define INP_CANNOT_DO_ECN 0x00001000 /* The stack does not do ECN */ /* * Flags passed to in_pcblookup*() functions. @@ -751,6 +837,7 @@ void in_pcbrehash_mbuf(struct inpcb *, struct mbuf *); int in_pcbrele(struct inpcb *); int in_pcbrele_rlocked(struct inpcb *); int in_pcbrele_wlocked(struct inpcb *); +void in_pcblist_rele_rlocked(epoch_context_t ctx); void in_losing(struct inpcb *); void in_pcbsetsolabel(struct socket *so); int in_getpeeraddr(struct socket *so, struct sockaddr **nam); |