summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/kern/uipc_usrreq.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/kern/uipc_usrreq.c')
-rw-r--r--freebsd/sys/kern/uipc_usrreq.c419
1 files changed, 253 insertions, 166 deletions
diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c
index da4b624a..7b2d1bc4 100644
--- a/freebsd/sys/kern/uipc_usrreq.c
+++ b/freebsd/sys/kern/uipc_usrreq.c
@@ -53,7 +53,6 @@
*
* TODO:
* RDM
- * distinguish datagram size limits from flow control limits in SEQPACKET
* rethink name space problems
* need a proper out-of-band
*/
@@ -64,6 +63,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ddb.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/capsicum.h>
#include <sys/domain.h>
#include <sys/fcntl.h>
#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
@@ -106,6 +106,8 @@ __FBSDID("$FreeBSD$");
#include <rtems/imfs.h>
#endif /* __rtems__ */
+MALLOC_DECLARE(M_FILECAPS);
+
/*
* Locking key:
* (l) Locked using list lock
@@ -284,24 +286,27 @@ static int uipc_connect2(struct socket *, struct socket *);
static int uipc_ctloutput(struct socket *, struct sockopt *);
static int unp_connect(struct socket *, struct sockaddr *,
struct thread *);
+static int unp_connectat(int, struct socket *, struct sockaddr *,
+ struct thread *);
static int unp_connect2(struct socket *so, struct socket *so2, int);
static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
#ifndef __rtems__
static void unp_dispose(struct mbuf *);
+static void unp_dispose_so(struct socket *so);
#endif /* __rtems__ */
static void unp_shutdown(struct unpcb *);
-static void unp_drop(struct unpcb *, int);
+static void unp_drop(struct unpcb *);
#ifndef __rtems__
static void unp_gc(__unused void *, int);
-static void unp_scan(struct mbuf *, void (*)(struct file *));
+static void unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
static void unp_discard(struct file *);
-static void unp_freerights(struct file **, int);
+static void unp_freerights(struct filedescent **, int);
#endif /* __rtems__ */
static void unp_init(void);
#ifndef __rtems__
static int unp_internalize(struct mbuf **, struct thread *);
static void unp_internalize_fp(struct file *);
-static int unp_externalize(struct mbuf *, struct mbuf **);
+static int unp_externalize(struct mbuf *, struct mbuf **, int);
static int unp_externalize_fp(struct file *);
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
static void unp_process_defers(void * __unused, int);
@@ -350,10 +355,10 @@ static struct domain localdomain = {
.dom_init = unp_init,
#ifndef __rtems__
.dom_externalize = unp_externalize,
- .dom_dispose = unp_dispose,
+ .dom_dispose = unp_dispose_so,
#endif /* __rtems__ */
.dom_protosw = localsw,
- .dom_protoswNPROTOSW = &localsw[sizeof(localsw)/sizeof(localsw[0])]
+ .dom_protoswNPROTOSW = &localsw[nitems(localsw)]
};
DOMAIN_SET(local);
@@ -370,7 +375,7 @@ uipc_abort(struct socket *so)
unp2 = unp->unp_conn;
if (unp2 != NULL) {
UNP_PCB_LOCK(unp2);
- unp_drop(unp2, ECONNABORTED);
+ unp_drop(unp2);
UNP_PCB_UNLOCK(unp2);
}
UNP_PCB_UNLOCK(unp);
@@ -446,6 +451,8 @@ uipc_attach(struct socket *so, int proto, struct thread *td)
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
+ if (so->so_head != NULL)
+ unp->unp_flags |= UNP_NASCENT;
UNP_LIST_LOCK();
unp->unp_gencnt = ++unp_gencnt;
@@ -511,16 +518,17 @@ static const IMFS_node_control rtems_uipc_imfs_zombi_control =
IMFS_node_destroy_default);
#endif /* __rtems__ */
static int
-uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
#ifndef __rtems__
struct vattr vattr;
- int error, namelen, vfslocked;
+ int error, namelen;
struct nameidata nd;
struct unpcb *unp;
struct vnode *vp;
struct mount *mp;
+ cap_rights_t rights;
char *buf;
#else /* __rtems__ */
int rv;
@@ -528,6 +536,9 @@ uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
struct unpcb *unp;
#endif /* __rtems__ */
+ if (nam->sa_family != AF_UNIX)
+ return (EAFNOSUPPORT);
+
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
@@ -564,15 +575,13 @@ uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
buf[namelen] = 0;
restart:
- vfslocked = 0;
- NDINIT(&nd, CREATE, MPSAFE | NOFOLLOW | LOCKPARENT | SAVENAME,
- UIO_SYSSPACE, buf, td);
+ NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
+ UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td);
/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
error = namei(&nd);
if (error)
goto error;
vp = nd.ni_vp;
- vfslocked = NDHASGIANT(&nd);
if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == vp)
@@ -587,7 +596,6 @@ restart:
error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
if (error)
goto error;
- VFS_UNLOCK_GIANT(vfslocked);
goto restart;
}
VATTR_NULL(&vattr);
@@ -632,15 +640,11 @@ restart:
#ifndef __rtems__
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
- VFS_UNLOCK_GIANT(vfslocked);
free(buf, M_TEMP);
#endif /* __rtems__ */
return (0);
error:
-#ifndef __rtems__
- VFS_UNLOCK_GIANT(vfslocked);
-#endif /* __rtems__ */
UNP_PCB_LOCK(unp);
unp->unp_flags &= ~UNP_BINDING;
UNP_PCB_UNLOCK(unp);
@@ -651,6 +655,13 @@ error:
}
static int
+uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+
+ return (uipc_bindat(AT_FDCWD, so, nam, td));
+}
+
+static int
uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
int error;
@@ -662,6 +673,19 @@ uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
return (error);
}
+static int
+uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
+ int error;
+
+ KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
+ UNP_LINK_WLOCK();
+ error = unp_connectat(fd, so, nam, td);
+ UNP_LINK_WUNLOCK();
+ return (error);
+}
+
static void
uipc_close(struct socket *so)
{
@@ -718,14 +742,24 @@ uipc_detach(struct socket *so)
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
- UNP_LINK_WLOCK();
+ vp = NULL;
+#ifndef __rtems__
+ local_unp_rights = 0;
+#endif /* __rtems__ */
+
UNP_LIST_LOCK();
- UNP_PCB_LOCK(unp);
LIST_REMOVE(unp, unp_link);
unp->unp_gencnt = ++unp_gencnt;
--unp_count;
UNP_LIST_UNLOCK();
+ if ((unp->unp_flags & UNP_NASCENT) != 0) {
+ UNP_PCB_LOCK(unp);
+ goto teardown;
+ }
+ UNP_LINK_WLOCK();
+ UNP_PCB_LOCK(unp);
+
/*
* XXXRW: Should assert vp->v_socket == so.
*/
@@ -753,13 +787,14 @@ uipc_detach(struct socket *so)
struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
UNP_PCB_LOCK(ref);
- unp_drop(ref, ECONNRESET);
+ unp_drop(ref);
UNP_PCB_UNLOCK(ref);
}
#ifndef __rtems__
local_unp_rights = unp_rights;
#endif /* __rtems__ */
UNP_LINK_WUNLOCK();
+teardown:
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
@@ -773,13 +808,8 @@ uipc_detach(struct socket *so)
} else
UNP_PCB_UNLOCK(unp);
#ifndef __rtems__
- if (vp) {
- int vfslocked;
-
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ if (vp)
vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
- }
if (local_unp_rights)
taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
#endif /* __rtems__ */
@@ -817,8 +847,10 @@ uipc_listen(struct socket *so, int backlog, struct thread *td)
UNP_PCB_LOCK(unp);
if (unp->unp_vnode == NULL) {
+ /* Already connected or not bound to an address. */
+ error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
UNP_PCB_UNLOCK(unp);
- return (EINVAL);
+ return (error);
}
SOCK_LOCK(so);
@@ -872,13 +904,11 @@ uipc_rcvd(struct socket *so, int flags)
struct unpcb *unp, *unp2;
struct socket *so2;
u_int mbcnt, sbcc;
- u_long newhiwat;
unp = sotounpcb(so);
- KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL"));
-
- if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
- panic("uipc_rcvd socktype %d", so->so_type);
+ KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
+ KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
+ ("%s: socktype %d", __func__, so->so_type));
/*
* Adjust backpressure on sender and wakeup any waiting to write.
@@ -892,8 +922,17 @@ uipc_rcvd(struct socket *so, int flags)
*/
SOCKBUF_LOCK(&so->so_rcv);
mbcnt = so->so_rcv.sb_mbcnt;
- sbcc = so->so_rcv.sb_cc;
+ sbcc = sbavail(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
+ /*
+ * There is a benign race condition at this point. If we're planning to
+ * clear SB_STOP, but uipc_send is called on the connected socket at
+ * this instant, it might add data to the sockbuf and set SB_STOP. Then
+ * we would erroneously clear SB_STOP below, even though the sockbuf is
+ * full. The race is benign because the only ill effect is to allow the
+ * sockbuf to exceed its size limit, and the size limits are not
+ * strictly guaranteed anyway.
+ */
UNP_PCB_LOCK(unp);
unp2 = unp->unp_conn;
if (unp2 == NULL) {
@@ -902,13 +941,9 @@ uipc_rcvd(struct socket *so, int flags)
}
so2 = unp2->unp_socket;
SOCKBUF_LOCK(&so2->so_snd);
- so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt;
- newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc;
- (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
+ if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
+ so2->so_snd.sb_flags &= ~SB_STOP;
sowwakeup_locked(so2);
- unp->unp_mbcnt = mbcnt;
- unp->unp_cc = sbcc;
UNP_PCB_UNLOCK(unp);
return (0);
}
@@ -919,12 +954,14 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
{
struct unpcb *unp, *unp2;
struct socket *so2;
- u_int mbcnt_delta, sbcc;
- u_int newhiwat;
+ u_int mbcnt, sbcc;
int error = 0;
unp = sotounpcb(so);
- KASSERT(unp != NULL, ("uipc_send: unp == NULL"));
+ KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
+ KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
+ so->so_type == SOCK_SEQPACKET,
+ ("%s: socktype %d", __func__, so->so_type));
if (flags & PRUS_OOB) {
error = EOPNOTSUPP;
@@ -1067,7 +1104,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
control))
control = NULL;
} else
- sbappend_locked(&so2->so_rcv, m);
+ sbappend_locked(&so2->so_rcv, m, flags);
break;
case SOCK_SEQPACKET: {
@@ -1087,33 +1124,27 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
}
}
+ mbcnt = so2->so_rcv.sb_mbcnt;
+ sbcc = sbavail(&so2->so_rcv);
+ if (sbcc)
+ sorwakeup_locked(so2);
+ else
+ SOCKBUF_UNLOCK(&so2->so_rcv);
+
/*
- * XXXRW: While fine for SOCK_STREAM, this conflates maximum
- * datagram size and back-pressure for SOCK_SEQPACKET, which
- * can lead to undesired return of EMSGSIZE on send instead
- * of more desirable blocking.
+ * The PCB lock on unp2 protects the SB_STOP flag. Without it,
+ * it would be possible for uipc_rcvd to be called at this
+ * point, drain the receiving sockbuf, clear SB_STOP, and then
+ * we would set SB_STOP below. That could lead to an empty
+ * sockbuf having SB_STOP set
*/
- mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt;
- unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt;
- sbcc = so2->so_rcv.sb_cc;
- sorwakeup_locked(so2);
-
SOCKBUF_LOCK(&so->so_snd);
- if ((int)so->so_snd.sb_hiwat >= (int)(sbcc - unp2->unp_cc))
- newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc);
- else
- newhiwat = 0;
- (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
- newhiwat, RLIM_INFINITY);
- so->so_snd.sb_mbmax -= mbcnt_delta;
+ if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
+ so->so_snd.sb_flags |= SB_STOP;
SOCKBUF_UNLOCK(&so->so_snd);
- unp2->unp_cc = sbcc;
UNP_PCB_UNLOCK(unp2);
m = NULL;
break;
-
- default:
- panic("uipc_send unknown socktype");
}
/*
@@ -1147,29 +1178,46 @@ release:
}
static int
-uipc_sense(struct socket *so, struct stat *sb)
+uipc_ready(struct socket *so, struct mbuf *m, int count)
{
struct unpcb *unp, *unp2;
struct socket *so2;
+ int error;
+
+ unp = sotounpcb(so);
+
+ UNP_LINK_RLOCK();
+ unp2 = unp->unp_conn;
+ UNP_PCB_LOCK(unp2);
+ so2 = unp2->unp_socket;
+
+ SOCKBUF_LOCK(&so2->so_rcv);
+ if ((error = sbready(&so2->so_rcv, m, count)) == 0)
+ sorwakeup_locked(so2);
+ else
+ SOCKBUF_UNLOCK(&so2->so_rcv);
+
+ UNP_PCB_UNLOCK(unp2);
+ UNP_LINK_RUNLOCK();
+
+ return (error);
+}
+
+static int
+uipc_sense(struct socket *so, struct stat *sb)
+{
+ struct unpcb *unp;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
sb->st_blksize = so->so_snd.sb_hiwat;
- UNP_LINK_RLOCK();
UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
- unp2 != NULL) {
- so2 = unp2->unp_socket;
- sb->st_blksize += so2->so_rcv.sb_cc;
- }
sb->st_dev = NODEV;
if (unp->unp_ino == 0)
unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
sb->st_ino = unp->unp_ino;
UNP_PCB_UNLOCK(unp);
- UNP_LINK_RUNLOCK();
return (0);
}
@@ -1215,7 +1263,9 @@ static struct pr_usrreqs uipc_usrreqs_dgram = {
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1235,7 +1285,9 @@ static struct pr_usrreqs uipc_usrreqs_seqpacket = {
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1255,7 +1307,9 @@ static struct pr_usrreqs uipc_usrreqs_stream = {
.pru_accept = uipc_accept,
.pru_attach = uipc_attach,
.pru_bind = uipc_bind,
+ .pru_bindat = uipc_bindat,
.pru_connect = uipc_connect,
+ .pru_connectat = uipc_connectat,
.pru_connect2 = uipc_connect2,
.pru_detach = uipc_detach,
.pru_disconnect = uipc_disconnect,
@@ -1263,6 +1317,7 @@ static struct pr_usrreqs uipc_usrreqs_stream = {
.pru_peeraddr = uipc_peeraddr,
.pru_rcvd = uipc_rcvd,
.pru_send = uipc_send,
+ .pru_ready = uipc_ready,
.pru_sense = uipc_sense,
.pru_shutdown = uipc_shutdown,
.pru_sockaddr = uipc_sockaddr,
@@ -1367,6 +1422,14 @@ uipc_ctloutput(struct socket *so, struct sockopt *sopt)
static int
unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
{
+
+ return (unp_connectat(AT_FDCWD, so, nam, td));
+}
+
+static int
+unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
+ struct thread *td)
+{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
#ifndef __rtems__
struct vnode *vp;
@@ -1376,16 +1439,19 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
struct socket *so2, *so3;
struct unpcb *unp, *unp2, *unp3;
#ifndef __rtems__
- int error, len, vfslocked;
struct nameidata nd;
char buf[SOCK_MAXADDRLEN];
#else /* __rtems__ */
- int error, len;
rtems_filesystem_eval_path_context_t ctx;
int eval_flags;
const rtems_filesystem_location_info_t *currentloc;
#endif /* __rtems__ */
struct sockaddr *sa;
+ cap_rights_t rights;
+ int error, len;
+
+ if (nam->sa_family != AF_UNIX)
+ return (EAFNOSUPPORT);
UNP_LINK_WLOCK_ASSERT();
@@ -1413,15 +1479,14 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
#ifndef __rtems__
- NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKSHARED | LOCKLEAF,
- UIO_SYSSPACE, buf, td);
+ NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
+ UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td);
error = namei(&nd);
if (error)
vp = NULL;
else
vp = nd.ni_vp;
ASSERT_VOP_LOCKED(vp, "unp_connect");
- vfslocked = NDHASGIANT(&nd);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (error)
goto bad;
@@ -1438,7 +1503,6 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
if (error)
goto bad;
- VFS_UNLOCK_GIANT(vfslocked);
#else /* __rtems__ */
eval_flags = RTEMS_FS_FOLLOW_LINK;
currentloc = rtems_filesystem_eval_path_start_with_root_and_current(
@@ -1544,19 +1608,10 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
UNP_PCB_UNLOCK(unp);
bad2:
UNP_LINK_WUNLOCK();
-#ifndef __rtems__
- if (vfslocked)
- /*
- * Giant has been previously acquired. This means filesystem
- * isn't MPSAFE. Do it once again.
- */
- mtx_lock(&Giant);
-#endif /* __rtems__ */
bad:
#ifndef __rtems__
if (vp != NULL)
vput(vp);
- VFS_UNLOCK_GIANT(vfslocked);
#endif /* __rtems__ */
free(sa, M_SONAME);
UNP_LINK_WLOCK();
@@ -1583,6 +1638,7 @@ unp_connect2(struct socket *so, struct socket *so2, int req)
if (so2->so_type != so->so_type)
return (EPROTOTYPE);
+ unp2->unp_flags &= ~UNP_NASCENT;
unp->unp_conn = unp2;
switch (so->so_type) {
@@ -1808,11 +1864,7 @@ unp_shutdown(struct unpcb *unp)
}
static void
-#ifndef __rtems__
-unp_drop(struct unpcb *unp, int errno)
-#else /* __rtems__ */
-unp_drop(struct unpcb *unp, int error)
-#endif /* __rtems__ */
+unp_drop(struct unpcb *unp)
{
struct socket *so = unp->unp_socket;
struct unpcb *unp2;
@@ -1820,11 +1872,12 @@ unp_drop(struct unpcb *unp, int error)
UNP_LINK_WLOCK_ASSERT();
UNP_PCB_LOCK_ASSERT(unp);
-#ifndef __rtems__
- so->so_error = errno;
-#else /* __rtems__ */
- so->so_error = error;
-#endif /* __rtems__ */
+ /*
+ * Regardless of whether the socket's peer dropped the connection
+ * with this socket by aborting or disconnecting, POSIX requires
+ * that ECONNRESET is returned.
+ */
+ so->so_error = ECONNRESET;
unp2 = unp->unp_conn;
if (unp2 == NULL)
return;
@@ -1835,27 +1888,30 @@ unp_drop(struct unpcb *unp, int error)
#ifndef __rtems__
static void
-unp_freerights(struct file **rp, int fdcount)
+unp_freerights(struct filedescent **fdep, int fdcount)
{
- int i;
struct file *fp;
+ int i;
+
+ KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
for (i = 0; i < fdcount; i++) {
- fp = *rp;
- *rp++ = NULL;
+ fp = fdep[i]->fde_file;
+ filecaps_free(&fdep[i]->fde_caps);
unp_discard(fp);
}
+ free(fdep[0], M_FILECAPS);
}
static int
-unp_externalize(struct mbuf *control, struct mbuf **controlp)
+unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
{
struct thread *td = curthread; /* XXX */
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
int i;
int *fdp;
- struct file **rp;
- struct file *fp;
+ struct filedesc *fdesc = td->td_proc->p_fd;
+ struct filedescent **fdep;
void *data;
socklen_t clen = control->m_len, datalen;
int error, newfds;
@@ -1875,15 +1931,18 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
if (cm->cmsg_level == SOL_SOCKET
&& cm->cmsg_type == SCM_RIGHTS) {
- newfds = datalen / sizeof(struct file *);
- rp = data;
+ newfds = datalen / sizeof(*fdep);
+ if (newfds == 0)
+ goto next;
+ fdep = data;
/* If we're not outputting the descriptors free them. */
if (error || controlp == NULL) {
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
- FILEDESC_XLOCK(td->td_proc->p_fd);
+ FILEDESC_XLOCK(fdesc);
+
/*
* Now change each pointer to an fd in the global
* table to an integer that is the index to the local
@@ -1894,28 +1953,30 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
*controlp = sbcreatecontrol(NULL, newlen,
SCM_RIGHTS, SOL_SOCKET);
if (*controlp == NULL) {
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
error = E2BIG;
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
fdp = (int *)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
if (fdallocn(td, 0, fdp, newfds) != 0) {
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
error = EMSGSIZE;
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
m_freem(*controlp);
*controlp = NULL;
goto next;
}
- for (i = 0; i < newfds; i++) {
- fp = *rp++;
- td->td_proc->p_fd->fd_ofiles[fdp[i]] = fp;
- unp_externalize_fp(fp);
+ for (i = 0; i < newfds; i++, fdp++) {
+ _finstall(fdesc, fdep[i]->fde_file, *fdp,
+ (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0,
+ &fdep[i]->fde_caps);
+ unp_externalize_fp(fdep[i]->fde_file);
}
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
+ free(fdep[0], M_FILECAPS);
} else {
/* We can just copy anything else across. */
if (error || controlp == NULL)
@@ -1968,6 +2029,7 @@ unp_init(void)
if (unp_zone == NULL)
panic("unp_init");
uma_zone_set_max(unp_zone, maxsockets);
+ uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
LIST_INIT(&unp_dhead);
@@ -1989,14 +2051,14 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
{
struct mbuf *control = *controlp;
struct proc *p = td->td_proc;
- struct filedesc *fdescp = p->p_fd;
+ struct filedesc *fdesc = p->p_fd;
struct bintime *bt;
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
struct cmsgcred *cmcred;
- struct file **rp;
+ struct filedescent *fde, **fdep, *fdev;
struct file *fp;
struct timeval *tv;
- int i, fd, *fdp;
+ int i, *fdp;
void *data;
socklen_t clen = control->m_len, datalen;
int error, oldfds;
@@ -2008,7 +2070,7 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
*controlp = NULL;
while (cm != NULL) {
if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
- || cm->cmsg_len > clen) {
+ || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
error = EINVAL;
goto out;
}
@@ -2041,23 +2103,23 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
case SCM_RIGHTS:
oldfds = datalen / sizeof (int);
+ if (oldfds == 0)
+ break;
/*
* Check that all the FDs passed in refer to legal
* files. If not, reject the entire operation.
*/
fdp = data;
- FILEDESC_SLOCK(fdescp);
- for (i = 0; i < oldfds; i++) {
- fd = *fdp++;
- if ((unsigned)fd >= fdescp->fd_nfiles ||
- fdescp->fd_ofiles[fd] == NULL) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SLOCK(fdesc);
+ for (i = 0; i < oldfds; i++, fdp++) {
+ fp = fget_locked(fdesc, *fdp);
+ if (fp == NULL) {
+ FILEDESC_SUNLOCK(fdesc);
error = EBADF;
goto out;
}
- fp = fdescp->fd_ofiles[fd];
if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
error = EOPNOTSUPP;
goto out;
}
@@ -2066,25 +2128,30 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
/*
* Now replace the integer FDs with pointers to the
- * associated global file table entry..
+ * file structure and capability rights.
*/
- newlen = oldfds * sizeof(struct file *);
+ newlen = oldfds * sizeof(fdep[0]);
*controlp = sbcreatecontrol(NULL, newlen,
SCM_RIGHTS, SOL_SOCKET);
if (*controlp == NULL) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
error = E2BIG;
goto out;
}
fdp = data;
- rp = (struct file **)
+ fdep = (struct filedescent **)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
- for (i = 0; i < oldfds; i++) {
- fp = fdescp->fd_ofiles[*fdp++];
- *rp++ = fp;
- unp_internalize_fp(fp);
+ fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
+ M_WAITOK);
+ for (i = 0; i < oldfds; i++, fdev++, fdp++) {
+ fde = &fdesc->fd_ofiles[*fdp];
+ fdep[i] = fdev;
+ fdep[i]->fde_file = fde->fde_file;
+ filecaps_copy(&fde->fde_caps,
+ &fdep[i]->fde_caps, true);
+ unp_internalize_fp(fdep[i]->fde_file);
}
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
break;
case SCM_TIMESTAMP:
@@ -2280,17 +2347,22 @@ static int unp_marked;
static int unp_unreachable;
static void
-unp_accessable(struct file *fp)
+unp_accessable(struct filedescent **fdep, int fdcount)
{
struct unpcb *unp;
+ struct file *fp;
+ int i;
- if ((unp = fptounp(fp)) == NULL)
- return;
- if (unp->unp_gcflag & UNPGC_REF)
- return;
- unp->unp_gcflag &= ~UNPGC_DEAD;
- unp->unp_gcflag |= UNPGC_REF;
- unp_marked++;
+ for (i = 0; i < fdcount; i++) {
+ fp = fdep[i]->fde_file;
+ if ((unp = fptounp(fp)) == NULL)
+ continue;
+ if (unp->unp_gcflag & UNPGC_REF)
+ continue;
+ unp->unp_gcflag &= ~UNPGC_DEAD;
+ unp->unp_gcflag |= UNPGC_REF;
+ unp_marked++;
+ }
}
static void
@@ -2321,15 +2393,19 @@ unp_gc_process(struct unpcb *unp)
* Mark all sockets we reference with RIGHTS.
*/
so = unp->unp_socket;
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
/*
* Mark all sockets in our accept queue.
*/
ACCEPT_LOCK();
TAILQ_FOREACH(soa, &so->so_comp, so_list) {
+ if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
+ continue;
SOCKBUF_LOCK(&soa->so_rcv);
unp_scan(soa->so_rcv.sb_mb, unp_accessable);
SOCKBUF_UNLOCK(&soa->so_rcv);
@@ -2359,11 +2435,13 @@ unp_gc(__unused void *arg, int pending)
unp_taskcount++;
UNP_LIST_LOCK();
/*
- * First clear all gc flags from previous runs.
+ * First clear all gc flags from previous runs, apart from
+ * UNPGC_IGNORE_RIGHTS.
*/
for (head = heads; *head != NULL; head++)
LIST_FOREACH(unp, *head, unp_link)
- unp->unp_gcflag = 0;
+ unp->unp_gcflag =
+ (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
/*
* Scan marking all reachable sockets with UNPGC_REF. Once a socket
@@ -2437,19 +2515,31 @@ unp_dispose(struct mbuf *m)
{
if (m)
- unp_scan(m, unp_discard);
+ unp_scan(m, unp_freerights);
}
+/*
+ * Synchronize against unp_gc, which can trip over data as we are freeing it.
+ */
static void
-unp_scan(struct mbuf *m0, void (*op)(struct file *))
+unp_dispose_so(struct socket *so)
+{
+ struct unpcb *unp;
+
+ unp = sotounpcb(so);
+ UNP_LIST_LOCK();
+ unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
+ UNP_LIST_UNLOCK();
+ unp_dispose(so->so_rcv.sb_mb);
+}
+
+static void
+unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
{
struct mbuf *m;
- struct file **rp;
struct cmsghdr *cm;
void *data;
- int i;
socklen_t clen, datalen;
- int qfds;
while (m0 != NULL) {
for (m = m0; m; m = m->m_next) {
@@ -2469,10 +2559,8 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *))
if (cm->cmsg_level == SOL_SOCKET &&
cm->cmsg_type == SCM_RIGHTS) {
- qfds = datalen / sizeof (struct file *);
- rp = data;
- for (i = 0; i < qfds; i++)
- (*op)(*rp++);
+ (*op)(data, datalen /
+ sizeof(struct filedescent *));
}
if (CMSG_SPACE(datalen) < clen) {
@@ -2485,7 +2573,7 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *))
}
}
}
- m0 = m0->m_act;
+ m0 = m0->m_nextpkt;
}
}
@@ -2619,7 +2707,7 @@ DB_SHOW_COMMAND(unpcb, db_show_unpcb)
db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket,
unp->unp_vnode);
- db_printf("unp_ino: %d unp_conn: %p\n", unp->unp_ino,
+ db_printf("unp_ino: %ju unp_conn: %p\n", (uintmax_t)unp->unp_ino,
unp->unp_conn);
db_printf("unp_refs:\n");
@@ -2628,8 +2716,7 @@ DB_SHOW_COMMAND(unpcb, db_show_unpcb)
/* XXXRW: Would be nice to print the full address, if any. */
db_printf("unp_addr: %p\n", unp->unp_addr);
- db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n",
- unp->unp_cc, unp->unp_mbcnt,
+ db_printf("unp_gencnt: %llu\n",
(unsigned long long)unp->unp_gencnt);
db_printf("unp_flags: %x (", unp->unp_flags);