summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/kern/uipc_socket.c
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2013-11-06 16:20:21 +0100
committerSebastian Huber <sebastian.huber@embedded-brains.de>2013-11-11 10:08:08 +0100
commit66659ff1ad6831b0ea7425fa6ecd8a8687523658 (patch)
tree48e22b475fa8854128e0861a33fed6f78c8094b5 /freebsd/sys/kern/uipc_socket.c
parentDefine __GLOBL1() and __GLOBL() (diff)
downloadrtems-libbsd-66659ff1ad6831b0ea7425fa6ecd8a8687523658.tar.bz2
Update to FreeBSD 9.2
Diffstat (limited to 'freebsd/sys/kern/uipc_socket.c')
-rw-r--r--freebsd/sys/kern/uipc_socket.c309
1 files changed, 189 insertions, 120 deletions
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index 227e6dac..9ca2c14c 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -137,6 +137,8 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/jail.h>
+#include <sys/syslog.h>
+#include <netinet/in.h>
#include <net/vnet.h>
@@ -159,18 +161,24 @@ static void filt_sowdetach(struct knote *kn);
static int filt_sowrite(struct knote *kn, long hint);
static int filt_solisten(struct knote *kn, long hint);
-static struct filterops solisten_filtops =
- { 1, NULL, filt_sordetach, filt_solisten };
-static struct filterops soread_filtops =
- { 1, NULL, filt_sordetach, filt_soread };
-static struct filterops sowrite_filtops =
- { 1, NULL, filt_sowdetach, filt_sowrite };
+static struct filterops solisten_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sordetach,
+ .f_event = filt_solisten,
+};
+static struct filterops soread_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sordetach,
+ .f_event = filt_soread,
+};
+static struct filterops sowrite_filtops = {
+ .f_isfd = 1,
+ .f_detach = filt_sowdetach,
+ .f_event = filt_sowrite,
+};
-uma_zone_t socket_zone;
so_gen_t so_gencnt; /* generation count for sockets */
-int maxsockets;
-
MALLOC_DEFINE(M_SONAME, "soname", "socket name");
MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
@@ -178,15 +186,37 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
VNET_ASSERT(curvnet != NULL, \
("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
+/*
+ * Limit on the number of connections in the listen queue waiting
+ * for accept(2).
+ */
static int somaxconn = SOMAXCONN;
-static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS);
-/* XXX: we dont have SYSCTL_USHORT */
+
+static int
+sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int val;
+
+ val = somaxconn;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error || !req->newptr )
+ return (error);
+
+ if (val < 1 || val > USHRT_MAX)
+ return (EINVAL);
+
+ somaxconn = val;
+ return (0);
+}
SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW,
- 0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection "
- "queue size");
+ 0, sizeof(int), sysctl_somaxconn, "I",
+ "Maximum listen socket pending connection accept queue size");
+
static int numopensockets;
SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
&numopensockets, 0, "Number of open sockets");
+
#ifdef ZERO_COPY_SOCKETS
/* These aren't static because they're used in other files. */
int so_zero_copy_send = 1;
@@ -220,6 +250,49 @@ MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
/*
+ * Initialize the socket subsystem and set up the socket
+ * memory allocator.
+ */
+uma_zone_t socket_zone;
+int maxsockets;
+
+static void
+socket_zone_change(void *tag)
+{
+
+ uma_zone_set_max(socket_zone, maxsockets);
+}
+
+static void
+socket_init(void *tag)
+{
+
+ socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(socket_zone, maxsockets);
+ EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
+ EVENTHANDLER_PRI_FIRST);
+}
+SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
+
+/*
+ * Initialise maxsockets. This SYSINIT must be run after
+ * tunable_mbinit().
+ */
+static void
+init_maxsockets(void *ignored)
+{
+
+ TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
+#ifndef __rtems__
+ maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
+#else /* __rtems__ */
+ maxsockets = imax(maxsockets, nmbclusters);
+#endif /* __rtems__ */
+}
+SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
+
+/*
* Sysctl to get and set the maximum global sockets limit. Notify protocols
* of the change so that they can update their dependent limits as required.
*/
@@ -245,29 +318,11 @@ sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
}
return (error);
}
-
SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
&maxsockets, 0, sysctl_maxsockets, "IU",
"Maximum number of sockets avaliable");
/*
- * Initialise maxsockets. This SYSINIT must be run after
- * tunable_mbinit().
- */
-static void
-init_maxsockets(void *ignored)
-{
-
- TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
-#ifndef __rtems__
- maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
-#else /* __rtems__ */
- maxsockets = imax(maxsockets, nmbclusters);
-#endif /* __rtems__ */
-}
-SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
-
-/*
* Socket operation routines. These routines are called by the routines in
* sys_socket.c or from a system process, and implement the semantics of
* socket operations by switching out to the protocol specific routines.
@@ -450,16 +505,24 @@ sonewconn(struct socket *head, int connstatus)
over = (head->so_qlen > 3 * head->so_qlimit / 2);
ACCEPT_UNLOCK();
#ifdef REGRESSION
- if (regression_sonewconn_earlytest && over)
+ if (regression_sonewconn_earlytest && over) {
#else
- if (over)
+ if (over) {
#endif
+ log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
+ "%i already in queue awaiting acceptance\n",
+ __func__, head->so_pcb, head->so_qlen);
return (NULL);
+ }
VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
__func__, __LINE__, head));
so = soalloc(head->so_vnet);
- if (so == NULL)
+ if (so == NULL) {
+ log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
+ "limit reached or out of memory\n",
+ __func__, head->so_pcb);
return (NULL);
+ }
if ((head->so_options & SO_ACCEPTFILTER) != 0)
connstatus = 0;
so->so_head = head;
@@ -476,9 +539,16 @@ sonewconn(struct socket *head, int connstatus)
knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
VNET_SO_ASSERT(head);
- if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
- (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
+ __func__, head->so_pcb);
return (NULL);
}
so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
@@ -489,6 +559,20 @@ sonewconn(struct socket *head, int connstatus)
so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
so->so_state |= connstatus;
ACCEPT_LOCK();
+ /*
+ * The accept socket may be tearing down but we just
+ * won a race on the ACCEPT_LOCK.
+ * However, if sctp_peeloff() is called on a 1-to-many
+ * style socket, the SO_ACCEPTCONN doesn't need to be set.
+ */
+ if (!(head->so_options & SO_ACCEPTCONN) &&
+ ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
+ (head->so_type != SOCK_SEQPACKET))) {
+ SOCK_LOCK(so);
+ so->so_head = NULL;
+ sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */
+ return (NULL);
+ }
if (connstatus) {
TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
so->so_qstate |= SQ_COMP;
@@ -583,9 +667,12 @@ solisten_proto(struct socket *so, int backlog)
}
/*
- * Attempt to free a socket. This should really be sotryfree().
+ * Evaluate the reference count and named references on a socket; if no
+ * references remain, free it. This should be called whenever a reference is
+ * released, such as in sorele(), but also when named reference flags are
+ * cleared in socket or protocol code.
*
- * sofree() will succeed if:
+ * sofree() will free the socket if:
*
* - There are no outstanding file descriptor references or related consumers
* (so_count == 0).
@@ -598,9 +685,6 @@ solisten_proto(struct socket *so, int backlog)
* - The socket is not in a completed connection queue, so a process has been
* notified that it is present. If it is removed, the user process may
* block in accept() despite select() saying the socket was ready.
- *
- * Otherwise, it will quietly abort so that a future call to sofree(), when
- * conditions are right, can succeed.
*/
void
sofree(struct socket *so)
@@ -714,9 +798,14 @@ soclose(struct socket *so)
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
+ ACCEPT_LOCK();
if (so->so_options & SO_ACCEPTCONN) {
struct socket *sp;
- ACCEPT_LOCK();
+ /*
+ * Prevent new additions to the accept queues due
+ * to ACCEPT_LOCK races while we are draining them.
+ */
+ so->so_options &= ~SO_ACCEPTCONN;
while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
TAILQ_REMOVE(&so->so_incomp, sp, so_list);
so->so_incqlen--;
@@ -735,13 +824,15 @@ drop:
soabort(sp);
ACCEPT_LOCK();
}
- ACCEPT_UNLOCK();
+ KASSERT((TAILQ_EMPTY(&so->so_comp)),
+ ("%s: so_comp populated", __func__));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)),
+ ("%s: so_incomp populated", __func__));
}
- ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
- sorele(so);
+ sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */
CURVNET_RESTORE();
return (error);
}
@@ -863,12 +954,6 @@ struct so_zerocopy_stats{
int found_ifp;
};
struct so_zerocopy_stats so_zerocp_stats = {0,0,0};
-#include <netinet/in.h>
-#include <net/route.h>
-#include <netinet/in_pcb.h>
-#include <vm/vm.h>
-#include <vm/vm_page.h>
-#include <vm/vm_object.h>
/*
* sosend_copyin() is only used if zero copy sockets are enabled. Otherwise
@@ -890,11 +975,10 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
int flags)
{
struct mbuf *m, **mp, *top;
- long len, resid;
+ long len;
+ ssize_t resid;
int error;
-#ifdef ZERO_COPY_SOCKETS
int cow_send;
-#endif
*retmp = top = NULL;
mp = &top;
@@ -902,11 +986,8 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
resid = uio->uio_resid;
error = 0;
do {
-#ifdef ZERO_COPY_SOCKETS
cow_send = 0;
-#endif /* ZERO_COPY_SOCKETS */
if (resid >= MINCLSIZE) {
-#ifdef ZERO_COPY_SOCKETS
if (top == NULL) {
m = m_gethdr(M_WAITOK, MT_DATA);
m->m_pkthdr.len = 0;
@@ -914,9 +995,9 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
} else
m = m_get(M_WAITOK, MT_DATA);
if (so_zero_copy_send &&
- resid>=PAGE_SIZE &&
- *space>=PAGE_SIZE &&
- uio->uio_iov->iov_len>=PAGE_SIZE) {
+ resid >= PAGE_SIZE &&
+ *space >= PAGE_SIZE &&
+ uio->uio_iov->iov_len >= PAGE_SIZE) {
so_zerocp_stats.size_ok++;
so_zerocp_stats.align_ok++;
cow_send = socow_setup(m, uio);
@@ -926,15 +1007,6 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
m_clget(m, M_WAITOK);
len = min(min(MCLBYTES, resid), *space);
}
-#else /* ZERO_COPY_SOCKETS */
- if (top == NULL) {
- m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR);
- m->m_pkthdr.len = 0;
- m->m_pkthdr.rcvif = NULL;
- } else
- m = m_getcl(M_WAIT, MT_DATA, 0);
- len = min(min(MCLBYTES, resid), *space);
-#endif /* ZERO_COPY_SOCKETS */
} else {
if (top == NULL) {
m = m_gethdr(M_WAIT, MT_DATA);
@@ -959,12 +1031,10 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space,
}
*space -= len;
-#ifdef ZERO_COPY_SOCKETS
if (cow_send)
error = 0;
else
-#endif /* ZERO_COPY_SOCKETS */
- error = uiomove(mtod(m, void *), (int)len, uio);
+ error = uiomove(mtod(m, void *), (int)len, uio);
resid = uio->uio_resid;
m->m_len = len;
*mp = m;
@@ -982,7 +1052,7 @@ out:
*retmp = top;
return (error);
}
-#endif /*ZERO_COPY_SOCKETS*/
+#endif /* ZERO_COPY_SOCKETS */
#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
@@ -990,7 +1060,8 @@ int
sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
- long space, resid;
+ long space;
+ ssize_t resid;
int clen = 0, error, dontroute;
#ifdef ZERO_COPY_SOCKETS
int atomic = sosendallatonce(so) || top;
@@ -1164,7 +1235,8 @@ int
sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
{
- long space, resid;
+ long space;
+ ssize_t resid;
int clen = 0, error, dontroute;
int atomic = sosendallatonce(so) || top;
@@ -1463,11 +1535,12 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
struct mbuf *m, **mp;
- int flags, len, error, offset;
+ int flags, error, offset;
+ ssize_t len;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
int moff, type = 0;
- int orig_resid = uio->uio_resid;
+ ssize_t orig_resid = uio->uio_resid;
mp = mp0;
if (psa != NULL)
@@ -1920,6 +1993,7 @@ release:
/*
* Optimized version of soreceive() for stream (TCP) sockets.
+ * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled.
*/
int
soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
@@ -1960,20 +2034,9 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
}
oresid = uio->uio_resid;
- /* We will never ever get anything unless we are connected. */
+ /* We will never ever get anything unless we are or were connected. */
if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
- /* When disconnecting there may be still some data left. */
- if (sb->sb_cc > 0)
- goto deliver;
- if (!(so->so_state & SS_ISDISCONNECTED))
- error = ENOTCONN;
- goto out;
- }
-
- /* Socket buffer is empty and we shall not block. */
- if (sb->sb_cc == 0 &&
- ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
- error = EAGAIN;
+ error = ENOTCONN;
goto out;
}
@@ -2000,6 +2063,13 @@ restart:
goto out;
}
+ /* Socket buffer is empty and we shall not block. */
+ if (sb->sb_cc == 0 &&
+ ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
+ error = EAGAIN;
+ goto out;
+ }
+
/* Socket buffer got some data that we shall deliver now. */
if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
((sb->sb_flags & SS_NBIO) ||
@@ -2012,7 +2082,7 @@ restart:
/* On MSG_WAITALL we must wait until all data or error arrives. */
if ((flags & MSG_WAITALL) &&
- (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat))
+ (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat))
goto deliver;
/*
@@ -2040,7 +2110,11 @@ deliver:
if (mp0 != NULL) {
/* Dequeue as many mbufs as possible. */
if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
- for (*mp0 = m = sb->sb_mb;
+ if (*mp0 == NULL)
+ *mp0 = sb->sb_mb;
+ else
+ m_cat(*mp0, sb->sb_mb);
+ for (m = sb->sb_mb;
m != NULL && m->m_len <= len;
m = m->m_next) {
len -= m->m_len;
@@ -2048,10 +2122,11 @@ deliver:
sbfree(sb, m);
n = m;
}
+ n->m_next = NULL;
sb->sb_mb = m;
+ sb->sb_lastrecord = sb->sb_mb;
if (sb->sb_mb == NULL)
SB_EMPTY_FIXUP(sb);
- n->m_next = NULL;
}
/* Copy the remainder. */
if (len > 0) {
@@ -2062,9 +2137,9 @@ deliver:
if (m == NULL)
len = 0; /* Don't flush data from sockbuf. */
else
- uio->uio_resid -= m->m_len;
+ uio->uio_resid -= len;
if (*mp0 != NULL)
- n->m_next = m;
+ m_cat(*mp0, m);
else
*mp0 = m;
if (*mp0 == NULL) {
@@ -2128,7 +2203,8 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
struct mbuf *m, *m2;
- int flags, len, error;
+ int flags, error;
+ ssize_t len;
struct protosw *pr = so->so_proto;
struct mbuf *nextrecord;
@@ -2334,9 +2410,11 @@ soshutdown(struct socket *so, int how)
sorflush(so);
if (how != SHUT_RD) {
error = (*pr->pr_usrreqs->pru_shutdown)(so);
+ wakeup(&so->so_timeo);
CURVNET_RESTORE();
return (error);
}
+ wakeup(&so->so_timeo);
CURVNET_RESTORE();
return (0);
}
@@ -2445,6 +2523,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
struct linger l;
struct timeval tv;
u_long val;
+ uint32_t val32;
#ifdef MAC
struct mac extmac;
#endif
@@ -2509,21 +2588,29 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_SETFIB:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
+ if (error)
+ goto bad;
+
if (optval < 0 || optval >= rt_numfibs) {
error = EINVAL;
goto bad;
}
if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
(so->so_proto->pr_domain->dom_family == PF_INET6) ||
- (so->so_proto->pr_domain->dom_family == PF_ROUTE))) {
+ (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
so->so_fibnum = optval;
- /* Note: ignore error */
- if (so->so_proto->pr_ctloutput)
- (*so->so_proto->pr_ctloutput)(so, sopt);
- } else {
+ else
so->so_fibnum = 0;
- }
break;
+
+ case SO_USER_COOKIE:
+ error = sooptcopyin(sopt, &val32, sizeof val32,
+ sizeof val32);
+ if (error)
+ goto bad;
+ so->so_user_cookie = val32;
+ break;
+
case SO_SNDBUF:
case SO_RCVBUF:
case SO_SNDLOWAT:
@@ -3289,24 +3376,6 @@ socheckuid(struct socket *so, uid_t uid)
}
#endif /* __rtems__ */
-static int
-sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
-{
- int error;
- int val;
-
- val = somaxconn;
- error = sysctl_handle_int(oidp, &val, 0, req);
- if (error || !req->newptr )
- return (error);
-
- if (val < 1 || val > USHRT_MAX)
- return (EINVAL);
-
- somaxconn = val;
- return (0);
-}
-
/*
* These functions are used by protocols to notify the socket layer (and its
* consumers) of state changes in the sockets driven by protocol-side events.