diff options
Diffstat (limited to 'freebsd/sys/kern/uipc_socket.c')
-rw-r--r-- | freebsd/sys/kern/uipc_socket.c | 154 |
1 files changed, 109 insertions, 45 deletions
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index 473c3df0..5b3b9eef 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -94,6 +94,12 @@ * from a listen queue to a file descriptor, in order to prevent garbage * collection of the socket at an untimely moment. For a number of reasons, * these interfaces are not preferred, and should be avoided. + * + * NOTE: With regard to VNETs the general rule is that callers do not set + * curvnet. Exceptions to this rule include soabort(), sodisconnect(), + * sofree() (and with that sorele(), sotryfree()), as well as sonewconn() + * and sorflush(), which are usually called from a pre-set VNET context. + * sopoll() currently does not need a VNET context to be set. */ #include <sys/cdefs.h> @@ -168,6 +174,10 @@ int maxsockets; MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); +#define VNET_SO_ASSERT(so) \ + VNET_ASSERT(curvnet != NULL, \ + ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); + static int somaxconn = SOMAXCONN; static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS); /* XXX: we dont have SYSCTL_USHORT */ @@ -294,6 +304,8 @@ soalloc(struct vnet *vnet) so->so_gencnt = ++so_gencnt; ++numopensockets; #ifdef VIMAGE + VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p", + __func__, __LINE__, so)); vnet->vnet_sockcnt++; so->so_vnet = vnet; #endif @@ -317,6 +329,8 @@ sodealloc(struct socket *so) so->so_gencnt = ++so_gencnt; --numopensockets; /* Could be below, but faster here. */ #ifdef VIMAGE + VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p", + __func__, __LINE__, so)); so->so_vnet->vnet_sockcnt--; #endif mtx_unlock(&so_global_mtx); @@ -377,6 +391,7 @@ socreate(int dom, struct socket **aso, int type, int proto, so->so_type = type; so->so_cred = crhold(cred); if ((prp->pr_domain->dom_family == PF_INET) || + (prp->pr_domain->dom_family == PF_INET6) || (prp->pr_domain->dom_family == PF_ROUTE)) #ifndef __rtems__ so->so_fibnum = td->td_proc->p_fibnum; @@ -440,7 +455,8 @@ sonewconn(struct socket *head, int connstatus) if (over) #endif return (NULL); - VNET_ASSERT(head->so_vnet); + VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p", + __func__, __LINE__, head)); so = soalloc(head->so_vnet); if (so == NULL) return (NULL); @@ -459,6 +475,7 @@ sonewconn(struct socket *head, int connstatus) #endif knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); + VNET_SO_ASSERT(head); if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); @@ -533,8 +550,12 @@ sobind(struct socket *so, struct sockaddr *nam, struct thread *td) int solisten(struct socket *so, int backlog, struct thread *td) { + int error; - return ((*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td)); + CURVNET_SET(so->so_vnet); + error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td); + CURVNET_RESTORE(); + return error; } int @@ -617,11 +638,12 @@ sofree(struct socket *so) so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); if (so->so_options & SO_ACCEPTCONN) { KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated")); - KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated")); + KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated")); } SOCK_UNLOCK(so); ACCEPT_UNLOCK(); + VNET_SO_ASSERT(so); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); if (pr->pr_usrreqs->pru_detach != NULL) @@ -643,6 +665,8 @@ sofree(struct socket *so) */ sbdestroy(&so->so_snd, so); sbdestroy(&so->so_rcv, so); + seldrain(&so->so_snd.sb_sel); + seldrain(&so->so_rcv.sb_sel); knlist_destroy(&so->so_rcv.sb_sel.si_note); knlist_destroy(&so->so_snd.sb_sel.si_note); sodealloc(so); @@ -751,6 +775,7 @@ soabort(struct socket *so) KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF")); KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP")); KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP")); + VNET_SO_ASSERT(so); if (so->so_proto->pr_usrreqs->pru_abort != NULL) (*so->so_proto->pr_usrreqs->pru_abort)(so); @@ -768,7 +793,10 @@ soaccept(struct socket *so, struct sockaddr **nam) KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF")); so->so_state &= ~SS_NOFDREF; SOCK_UNLOCK(so); + + CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); + CURVNET_RESTORE(); return (error); } @@ -806,8 +834,12 @@ soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) int soconnect2(struct socket *so1, struct socket *so2) { + int error; - return ((*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2)); + CURVNET_SET(so1->so_vnet); + error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); + CURVNET_RESTORE(); + return (error); } int @@ -819,6 +851,7 @@ sodisconnect(struct socket *so) return (ENOTCONN); if (so->so_state & SS_ISDISCONNECTING) return (EALREADY); + VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); return (error); } @@ -1084,6 +1117,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, * there are probably other places that this also happens. We must * rethink this. */ + VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* @@ -1273,6 +1307,7 @@ restart: * places that this also happens. We must rethink * this. */ + VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* @@ -1339,6 +1374,7 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) int error; KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); + VNET_SO_ASSERT(so); m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); @@ -1447,8 +1483,10 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, if (mp != NULL) *mp = NULL; if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) - && uio->uio_resid) + && uio->uio_resid) { + VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, 0); + } error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) @@ -1461,17 +1499,11 @@ restart: * If we have less data than requested, block awaiting more (subject * to any timeout) if: * 1. the current count is less than the low water mark, or - * 2. MSG_WAITALL is set, and it is possible to do the entire - * receive operation at once if we block (resid <= hiwat). - * 3. MSG_DONTWAIT is not set - * If MSG_WAITALL is set but resid is larger than the receive buffer, - * we have to do the receive in sections, and thus risk returning a - * short count if a timeout or signal occurs after we start. + * 2. MSG_DONTWAIT is not set */ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio->uio_resid) && - (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || - ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && + so->so_rcv.sb_cc < so->so_rcv.sb_lowat && m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { KASSERT(m != NULL || !so->so_rcv.sb_cc, ("receive: m == %p so->so_rcv.sb_cc == %u", @@ -1597,6 +1629,7 @@ dontblock: cm->m_next = NULL; if (pr->pr_domain->dom_externalize != NULL) { SOCKBUF_UNLOCK(&so->so_rcv); + VNET_SO_ASSERT(so); error = (*pr->pr_domain->dom_externalize) (cm, controlp); SOCKBUF_LOCK(&so->so_rcv); @@ -1661,8 +1694,8 @@ dontblock: * examined ('type'), end the receive operation. */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); - if (m->m_type == MT_OOBDATA) { - if (type != MT_OOBDATA) + if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) { + if (type != m->m_type) break; } else if (type == MT_OOBDATA) break; @@ -1812,15 +1845,22 @@ dontblock: */ if (pr->pr_flags & PR_WANTRCVD) { SOCKBUF_UNLOCK(&so->so_rcv); + VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(&so->so_rcv); } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - error = sbwait(&so->so_rcv); - if (error) { - SOCKBUF_UNLOCK(&so->so_rcv); - goto release; + /* + * We could receive some data while was notifying + * the protocol. Skip blocking in this case. + */ + if (so->so_rcv.sb_mb == NULL) { + error = sbwait(&so->so_rcv); + if (error) { + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } } m = so->so_rcv.sb_mb; if (m != NULL) @@ -1858,6 +1898,7 @@ dontblock: if (!(flags & MSG_SOCALLBCK) && (pr->pr_flags & PR_WANTRCVD)) { SOCKBUF_UNLOCK(&so->so_rcv); + VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(&so->so_rcv); } @@ -2055,6 +2096,7 @@ deliver: (((flags & MSG_WAITALL) && uio->uio_resid > 0) || !(flags & MSG_SOCALLBCK))) { SOCKBUF_UNLOCK(sb); + VNET_SO_ASSERT(so); (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(sb); } @@ -2266,9 +2308,13 @@ int soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { + int error; - return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, + CURVNET_SET(so->so_vnet); + error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, controlp, flagsp)); + CURVNET_RESTORE(); + return (error); } int @@ -2279,17 +2325,19 @@ soshutdown(struct socket *so, int how) if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) return (EINVAL); + + CURVNET_SET(so->so_vnet); if (pr->pr_usrreqs->pru_flush != NULL) { (*pr->pr_usrreqs->pru_flush)(so, how); } if (how != SHUT_WR) sorflush(so); if (how != SHUT_RD) { - CURVNET_SET(so->so_vnet); error = (*pr->pr_usrreqs->pru_shutdown)(so); CURVNET_RESTORE(); return (error); } + CURVNET_RESTORE(); return (0); } @@ -2300,6 +2348,8 @@ sorflush(struct socket *so) struct protosw *pr = so->so_proto; struct sockbuf asb; + VNET_SO_ASSERT(so); + /* * In order to avoid calling dom_dispose with the socket buffer mutex * held, and in order to generally avoid holding the lock for a long @@ -2313,7 +2363,6 @@ sorflush(struct socket *so) * socket buffer. Don't let our acquire be interrupted by a signal * despite any existing socket disposition on interruptable waiting. */ - CURVNET_SET(so->so_vnet); socantrcvmore(so); (void) sblock(sb, SBL_WAIT | SBL_NOINTR); @@ -2337,7 +2386,6 @@ sorflush(struct socket *so) if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(asb.sb_mb); sbrelease_internal(&asb, so); - CURVNET_RESTORE(); } /* @@ -2401,11 +2449,14 @@ sosetopt(struct socket *so, struct sockopt *sopt) struct mac extmac; #endif + CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { - if (so->so_proto && so->so_proto->pr_ctloutput) - return ((*so->so_proto->pr_ctloutput) - (so, sopt)); + if (so->so_proto->pr_ctloutput != NULL) { + error = (*so->so_proto->pr_ctloutput)(so, sopt); + CURVNET_RESTORE(); + return (error); + } error = ENOPROTOOPT; } else { switch (sopt->sopt_name) { @@ -2458,15 +2509,16 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_SETFIB: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); - if (optval < 1 || optval > rt_numfibs) { + if (optval < 0 || optval >= rt_numfibs) { error = EINVAL; goto bad; } - if ((so->so_proto->pr_domain->dom_family == PF_INET) || - (so->so_proto->pr_domain->dom_family == PF_ROUTE)) { + if (((so->so_proto->pr_domain->dom_family == PF_INET) || + (so->so_proto->pr_domain->dom_family == PF_INET6) || + (so->so_proto->pr_domain->dom_family == PF_ROUTE))) { so->so_fibnum = optval; /* Note: ignore error */ - if (so->so_proto && so->so_proto->pr_ctloutput) + if (so->so_proto->pr_ctloutput) (*so->so_proto->pr_ctloutput)(so, sopt); } else { so->so_fibnum = 0; @@ -2584,13 +2636,11 @@ sosetopt(struct socket *so, struct sockopt *sopt) error = ENOPROTOOPT; break; } - if (error == 0 && so->so_proto != NULL && - so->so_proto->pr_ctloutput != NULL) { - (void) ((*so->so_proto->pr_ctloutput) - (so, sopt)); - } + if (error == 0 && so->so_proto->pr_ctloutput != NULL) + (void)(*so->so_proto->pr_ctloutput)(so, sopt); } bad: + CURVNET_RESTORE(); return (error); } @@ -2634,13 +2684,15 @@ sogetopt(struct socket *so, struct sockopt *sopt) struct mac extmac; #endif + CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { - if (so->so_proto && so->so_proto->pr_ctloutput) { - return ((*so->so_proto->pr_ctloutput) - (so, sopt)); - } else - return (ENOPROTOOPT); + if (so->so_proto->pr_ctloutput != NULL) + error = (*so->so_proto->pr_ctloutput)(so, sopt); + else + error = ENOPROTOOPT; + CURVNET_RESTORE(); + return (error); } else { switch (sopt->sopt_name) { #ifdef INET @@ -2677,6 +2729,10 @@ integer: optval = so->so_type; goto integer; + case SO_PROTOCOL: + optval = so->so_proto->pr_protocol; + goto integer; + case SO_ERROR: SOCK_LOCK(so); optval = so->so_error; @@ -2724,11 +2780,11 @@ integer: error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) - return (error); + goto bad; error = mac_getsockopt_label(sopt->sopt_td->td_ucred, so, &extmac); if (error) - return (error); + goto bad; error = sooptcopyout(sopt, &extmac, sizeof extmac); #else error = EOPNOTSUPP; @@ -2740,11 +2796,11 @@ integer: error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) - return (error); + goto bad; error = mac_getsockopt_peerlabel( sopt->sopt_td->td_ucred, so, &extmac); if (error) - return (error); + goto bad; error = sooptcopyout(sopt, &extmac, sizeof extmac); #else error = EOPNOTSUPP; @@ -2767,8 +2823,12 @@ integer: error = ENOPROTOOPT; break; } - return (error); } +#ifdef MAC +bad: +#endif + CURVNET_RESTORE(); + return (error); } /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ @@ -2902,6 +2962,10 @@ sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td) { + /* + * We do not need to set or assert curvnet as long as everyone uses + * sopoll_generic(). + */ return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred, td)); } |