summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/kern/uipc_socket.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/kern/uipc_socket.c')
-rw-r--r--freebsd/sys/kern/uipc_socket.c154
1 files changed, 109 insertions, 45 deletions
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index 473c3df0..5b3b9eef 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -94,6 +94,12 @@
* from a listen queue to a file descriptor, in order to prevent garbage
* collection of the socket at an untimely moment. For a number of reasons,
* these interfaces are not preferred, and should be avoided.
+ *
+ * NOTE: With regard to VNETs the general rule is that callers do not set
+ * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
+ * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
+ * and sorflush(), which are usually called from a pre-set VNET context.
+ * sopoll() currently does not need a VNET context to be set.
*/
#include <sys/cdefs.h>
@@ -168,6 +174,10 @@ int maxsockets;
MALLOC_DEFINE(M_SONAME, "soname", "socket name");
MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
+#define VNET_SO_ASSERT(so) \
+ VNET_ASSERT(curvnet != NULL, \
+ ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
+
static int somaxconn = SOMAXCONN;
static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS);
/* XXX: we dont have SYSCTL_USHORT */
@@ -294,6 +304,8 @@ soalloc(struct vnet *vnet)
so->so_gencnt = ++so_gencnt;
++numopensockets;
#ifdef VIMAGE
+ VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
+ __func__, __LINE__, so));
vnet->vnet_sockcnt++;
so->so_vnet = vnet;
#endif
@@ -317,6 +329,8 @@ sodealloc(struct socket *so)
so->so_gencnt = ++so_gencnt;
--numopensockets; /* Could be below, but faster here. */
#ifdef VIMAGE
+ VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p",
+ __func__, __LINE__, so));
so->so_vnet->vnet_sockcnt--;
#endif
mtx_unlock(&so_global_mtx);
@@ -377,6 +391,7 @@ socreate(int dom, struct socket **aso, int type, int proto,
so->so_type = type;
so->so_cred = crhold(cred);
if ((prp->pr_domain->dom_family == PF_INET) ||
+ (prp->pr_domain->dom_family == PF_INET6) ||
(prp->pr_domain->dom_family == PF_ROUTE))
#ifndef __rtems__
so->so_fibnum = td->td_proc->p_fibnum;
@@ -440,7 +455,8 @@ sonewconn(struct socket *head, int connstatus)
if (over)
#endif
return (NULL);
- VNET_ASSERT(head->so_vnet);
+ VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
+ __func__, __LINE__, head));
so = soalloc(head->so_vnet);
if (so == NULL)
return (NULL);
@@ -459,6 +475,7 @@ sonewconn(struct socket *head, int connstatus)
#endif
knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
+ VNET_SO_ASSERT(head);
if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
(*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
sodealloc(so);
@@ -533,8 +550,12 @@ sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
int
solisten(struct socket *so, int backlog, struct thread *td)
{
+ int error;
- return ((*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td));
+ CURVNET_SET(so->so_vnet);
+ error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
+ CURVNET_RESTORE();
+ return error;
}
int
@@ -617,11 +638,12 @@ sofree(struct socket *so)
so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
if (so->so_options & SO_ACCEPTCONN) {
KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated"));
}
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
+ VNET_SO_ASSERT(so);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
(*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
if (pr->pr_usrreqs->pru_detach != NULL)
@@ -643,6 +665,8 @@ sofree(struct socket *so)
*/
sbdestroy(&so->so_snd, so);
sbdestroy(&so->so_rcv, so);
+ seldrain(&so->so_snd.sb_sel);
+ seldrain(&so->so_rcv.sb_sel);
knlist_destroy(&so->so_rcv.sb_sel.si_note);
knlist_destroy(&so->so_snd.sb_sel.si_note);
sodealloc(so);
@@ -751,6 +775,7 @@ soabort(struct socket *so)
KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
+ VNET_SO_ASSERT(so);
if (so->so_proto->pr_usrreqs->pru_abort != NULL)
(*so->so_proto->pr_usrreqs->pru_abort)(so);
@@ -768,7 +793,10 @@ soaccept(struct socket *so, struct sockaddr **nam)
KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
so->so_state &= ~SS_NOFDREF;
SOCK_UNLOCK(so);
+
+ CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
+ CURVNET_RESTORE();
return (error);
}
@@ -806,8 +834,12 @@ soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
int
soconnect2(struct socket *so1, struct socket *so2)
{
+ int error;
- return ((*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2));
+ CURVNET_SET(so1->so_vnet);
+ error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
+ CURVNET_RESTORE();
+ return (error);
}
int
@@ -819,6 +851,7 @@ sodisconnect(struct socket *so)
return (ENOTCONN);
if (so->so_state & SS_ISDISCONNECTING)
return (EALREADY);
+ VNET_SO_ASSERT(so);
error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
return (error);
}
@@ -1084,6 +1117,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
* there are probably other places that this also happens. We must
* rethink this.
*/
+ VNET_SO_ASSERT(so);
error = (*so->so_proto->pr_usrreqs->pru_send)(so,
(flags & MSG_OOB) ? PRUS_OOB :
/*
@@ -1273,6 +1307,7 @@ restart:
* places that this also happens. We must rethink
* this.
*/
+ VNET_SO_ASSERT(so);
error = (*so->so_proto->pr_usrreqs->pru_send)(so,
(flags & MSG_OOB) ? PRUS_OOB :
/*
@@ -1339,6 +1374,7 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
int error;
KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
+ VNET_SO_ASSERT(so);
m = m_get(M_WAIT, MT_DATA);
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
@@ -1447,8 +1483,10 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
if (mp != NULL)
*mp = NULL;
if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
- && uio->uio_resid)
+ && uio->uio_resid) {
+ VNET_SO_ASSERT(so);
(*pr->pr_usrreqs->pru_rcvd)(so, 0);
+ }
error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
if (error)
@@ -1461,17 +1499,11 @@ restart:
* If we have less data than requested, block awaiting more (subject
* to any timeout) if:
* 1. the current count is less than the low water mark, or
- * 2. MSG_WAITALL is set, and it is possible to do the entire
- * receive operation at once if we block (resid <= hiwat).
- * 3. MSG_DONTWAIT is not set
- * If MSG_WAITALL is set but resid is larger than the receive buffer,
- * we have to do the receive in sections, and thus risk returning a
- * short count if a timeout or signal occurs after we start.
+ * 2. MSG_DONTWAIT is not set
*/
if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
so->so_rcv.sb_cc < uio->uio_resid) &&
- (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
- ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
+ so->so_rcv.sb_cc < so->so_rcv.sb_lowat &&
m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
KASSERT(m != NULL || !so->so_rcv.sb_cc,
("receive: m == %p so->so_rcv.sb_cc == %u",
@@ -1597,6 +1629,7 @@ dontblock:
cm->m_next = NULL;
if (pr->pr_domain->dom_externalize != NULL) {
SOCKBUF_UNLOCK(&so->so_rcv);
+ VNET_SO_ASSERT(so);
error = (*pr->pr_domain->dom_externalize)
(cm, controlp);
SOCKBUF_LOCK(&so->so_rcv);
@@ -1661,8 +1694,8 @@ dontblock:
* examined ('type'), end the receive operation.
*/
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
- if (m->m_type == MT_OOBDATA) {
- if (type != MT_OOBDATA)
+ if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
+ if (type != m->m_type)
break;
} else if (type == MT_OOBDATA)
break;
@@ -1812,15 +1845,22 @@ dontblock:
*/
if (pr->pr_flags & PR_WANTRCVD) {
SOCKBUF_UNLOCK(&so->so_rcv);
+ VNET_SO_ASSERT(so);
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
SOCKBUF_LOCK(&so->so_rcv);
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
- error = sbwait(&so->so_rcv);
- if (error) {
- SOCKBUF_UNLOCK(&so->so_rcv);
- goto release;
+ /*
+ * We could receive some data while was notifying
+ * the protocol. Skip blocking in this case.
+ */
+ if (so->so_rcv.sb_mb == NULL) {
+ error = sbwait(&so->so_rcv);
+ if (error) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ goto release;
+ }
}
m = so->so_rcv.sb_mb;
if (m != NULL)
@@ -1858,6 +1898,7 @@ dontblock:
if (!(flags & MSG_SOCALLBCK) &&
(pr->pr_flags & PR_WANTRCVD)) {
SOCKBUF_UNLOCK(&so->so_rcv);
+ VNET_SO_ASSERT(so);
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
SOCKBUF_LOCK(&so->so_rcv);
}
@@ -2055,6 +2096,7 @@ deliver:
(((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
!(flags & MSG_SOCALLBCK))) {
SOCKBUF_UNLOCK(sb);
+ VNET_SO_ASSERT(so);
(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
SOCKBUF_LOCK(sb);
}
@@ -2266,9 +2308,13 @@ int
soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
+ int error;
- return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
+ CURVNET_SET(so->so_vnet);
+ error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
controlp, flagsp));
+ CURVNET_RESTORE();
+ return (error);
}
int
@@ -2279,17 +2325,19 @@ soshutdown(struct socket *so, int how)
if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
return (EINVAL);
+
+ CURVNET_SET(so->so_vnet);
if (pr->pr_usrreqs->pru_flush != NULL) {
(*pr->pr_usrreqs->pru_flush)(so, how);
}
if (how != SHUT_WR)
sorflush(so);
if (how != SHUT_RD) {
- CURVNET_SET(so->so_vnet);
error = (*pr->pr_usrreqs->pru_shutdown)(so);
CURVNET_RESTORE();
return (error);
}
+ CURVNET_RESTORE();
return (0);
}
@@ -2300,6 +2348,8 @@ sorflush(struct socket *so)
struct protosw *pr = so->so_proto;
struct sockbuf asb;
+ VNET_SO_ASSERT(so);
+
/*
* In order to avoid calling dom_dispose with the socket buffer mutex
* held, and in order to generally avoid holding the lock for a long
@@ -2313,7 +2363,6 @@ sorflush(struct socket *so)
* socket buffer. Don't let our acquire be interrupted by a signal
* despite any existing socket disposition on interruptable waiting.
*/
- CURVNET_SET(so->so_vnet);
socantrcvmore(so);
(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
@@ -2337,7 +2386,6 @@ sorflush(struct socket *so)
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
sbrelease_internal(&asb, so);
- CURVNET_RESTORE();
}
/*
@@ -2401,11 +2449,14 @@ sosetopt(struct socket *so, struct sockopt *sopt)
struct mac extmac;
#endif
+ CURVNET_SET(so->so_vnet);
error = 0;
if (sopt->sopt_level != SOL_SOCKET) {
- if (so->so_proto && so->so_proto->pr_ctloutput)
- return ((*so->so_proto->pr_ctloutput)
- (so, sopt));
+ if (so->so_proto->pr_ctloutput != NULL) {
+ error = (*so->so_proto->pr_ctloutput)(so, sopt);
+ CURVNET_RESTORE();
+ return (error);
+ }
error = ENOPROTOOPT;
} else {
switch (sopt->sopt_name) {
@@ -2458,15 +2509,16 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_SETFIB:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
- if (optval < 1 || optval > rt_numfibs) {
+ if (optval < 0 || optval >= rt_numfibs) {
error = EINVAL;
goto bad;
}
- if ((so->so_proto->pr_domain->dom_family == PF_INET) ||
- (so->so_proto->pr_domain->dom_family == PF_ROUTE)) {
+ if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
+ (so->so_proto->pr_domain->dom_family == PF_INET6) ||
+ (so->so_proto->pr_domain->dom_family == PF_ROUTE))) {
so->so_fibnum = optval;
/* Note: ignore error */
- if (so->so_proto && so->so_proto->pr_ctloutput)
+ if (so->so_proto->pr_ctloutput)
(*so->so_proto->pr_ctloutput)(so, sopt);
} else {
so->so_fibnum = 0;
@@ -2584,13 +2636,11 @@ sosetopt(struct socket *so, struct sockopt *sopt)
error = ENOPROTOOPT;
break;
}
- if (error == 0 && so->so_proto != NULL &&
- so->so_proto->pr_ctloutput != NULL) {
- (void) ((*so->so_proto->pr_ctloutput)
- (so, sopt));
- }
+ if (error == 0 && so->so_proto->pr_ctloutput != NULL)
+ (void)(*so->so_proto->pr_ctloutput)(so, sopt);
}
bad:
+ CURVNET_RESTORE();
return (error);
}
@@ -2634,13 +2684,15 @@ sogetopt(struct socket *so, struct sockopt *sopt)
struct mac extmac;
#endif
+ CURVNET_SET(so->so_vnet);
error = 0;
if (sopt->sopt_level != SOL_SOCKET) {
- if (so->so_proto && so->so_proto->pr_ctloutput) {
- return ((*so->so_proto->pr_ctloutput)
- (so, sopt));
- } else
- return (ENOPROTOOPT);
+ if (so->so_proto->pr_ctloutput != NULL)
+ error = (*so->so_proto->pr_ctloutput)(so, sopt);
+ else
+ error = ENOPROTOOPT;
+ CURVNET_RESTORE();
+ return (error);
} else {
switch (sopt->sopt_name) {
#ifdef INET
@@ -2677,6 +2729,10 @@ integer:
optval = so->so_type;
goto integer;
+ case SO_PROTOCOL:
+ optval = so->so_proto->pr_protocol;
+ goto integer;
+
case SO_ERROR:
SOCK_LOCK(so);
optval = so->so_error;
@@ -2724,11 +2780,11 @@ integer:
error = sooptcopyin(sopt, &extmac, sizeof(extmac),
sizeof(extmac));
if (error)
- return (error);
+ goto bad;
error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
so, &extmac);
if (error)
- return (error);
+ goto bad;
error = sooptcopyout(sopt, &extmac, sizeof extmac);
#else
error = EOPNOTSUPP;
@@ -2740,11 +2796,11 @@ integer:
error = sooptcopyin(sopt, &extmac, sizeof(extmac),
sizeof(extmac));
if (error)
- return (error);
+ goto bad;
error = mac_getsockopt_peerlabel(
sopt->sopt_td->td_ucred, so, &extmac);
if (error)
- return (error);
+ goto bad;
error = sooptcopyout(sopt, &extmac, sizeof extmac);
#else
error = EOPNOTSUPP;
@@ -2767,8 +2823,12 @@ integer:
error = ENOPROTOOPT;
break;
}
- return (error);
}
+#ifdef MAC
+bad:
+#endif
+ CURVNET_RESTORE();
+ return (error);
}
/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
@@ -2902,6 +2962,10 @@ sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td)
{
+ /*
+ * We do not need to set or assert curvnet as long as everyone uses
+ * sopoll_generic().
+ */
return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
td));
}