diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-06 16:20:21 +0100 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-11 10:08:08 +0100 |
commit | 66659ff1ad6831b0ea7425fa6ecd8a8687523658 (patch) | |
tree | 48e22b475fa8854128e0861a33fed6f78c8094b5 /freebsd/sys/kern/uipc_socket.c | |
parent | Define __GLOBL1() and __GLOBL() (diff) | |
download | rtems-libbsd-66659ff1ad6831b0ea7425fa6ecd8a8687523658.tar.bz2 |
Update to FreeBSD 9.2
Diffstat (limited to 'freebsd/sys/kern/uipc_socket.c')
-rw-r--r-- | freebsd/sys/kern/uipc_socket.c | 309 |
1 files changed, 189 insertions, 120 deletions
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index 227e6dac..9ca2c14c 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -137,6 +137,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/uio.h> #include <sys/jail.h> +#include <sys/syslog.h> +#include <netinet/in.h> #include <net/vnet.h> @@ -159,18 +161,24 @@ static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); static int filt_solisten(struct knote *kn, long hint); -static struct filterops solisten_filtops = - { 1, NULL, filt_sordetach, filt_solisten }; -static struct filterops soread_filtops = - { 1, NULL, filt_sordetach, filt_soread }; -static struct filterops sowrite_filtops = - { 1, NULL, filt_sowdetach, filt_sowrite }; +static struct filterops solisten_filtops = { + .f_isfd = 1, + .f_detach = filt_sordetach, + .f_event = filt_solisten, +}; +static struct filterops soread_filtops = { + .f_isfd = 1, + .f_detach = filt_sordetach, + .f_event = filt_soread, +}; +static struct filterops sowrite_filtops = { + .f_isfd = 1, + .f_detach = filt_sowdetach, + .f_event = filt_sowrite, +}; -uma_zone_t socket_zone; so_gen_t so_gencnt; /* generation count for sockets */ -int maxsockets; - MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); @@ -178,15 +186,37 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); VNET_ASSERT(curvnet != NULL, \ ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); +/* + * Limit on the number of connections in the listen queue waiting + * for accept(2). + */ static int somaxconn = SOMAXCONN; -static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS); -/* XXX: we dont have SYSCTL_USHORT */ + +static int +sysctl_somaxconn(SYSCTL_HANDLER_ARGS) +{ + int error; + int val; + + val = somaxconn; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr ) + return (error); + + if (val < 1 || val > USHRT_MAX) + return (EINVAL); + + somaxconn = val; + return (0); +} SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW, - 0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection " - "queue size"); + 0, sizeof(int), sysctl_somaxconn, "I", + "Maximum listen socket pending connection accept queue size"); + static int numopensockets; SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, &numopensockets, 0, "Number of open sockets"); + #ifdef ZERO_COPY_SOCKETS /* These aren't static because they're used in other files. */ int so_zero_copy_send = 1; @@ -220,6 +250,49 @@ MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF); SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); /* + * Initialize the socket subsystem and set up the socket + * memory allocator. + */ +uma_zone_t socket_zone; +int maxsockets; + +static void +socket_zone_change(void *tag) +{ + + uma_zone_set_max(socket_zone, maxsockets); +} + +static void +socket_init(void *tag) +{ + + socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(socket_zone, maxsockets); + EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, + EVENTHANDLER_PRI_FIRST); +} +SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL); + +/* + * Initialise maxsockets. This SYSINIT must be run after + * tunable_mbinit(). + */ +static void +init_maxsockets(void *ignored) +{ + + TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); +#ifndef __rtems__ + maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); +#else /* __rtems__ */ + maxsockets = imax(maxsockets, nmbclusters); +#endif /* __rtems__ */ +} +SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); + +/* * Sysctl to get and set the maximum global sockets limit. Notify protocols * of the change so that they can update their dependent limits as required. */ @@ -245,29 +318,11 @@ sysctl_maxsockets(SYSCTL_HANDLER_ARGS) } return (error); } - SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW, &maxsockets, 0, sysctl_maxsockets, "IU", "Maximum number of sockets avaliable"); /* - * Initialise maxsockets. This SYSINIT must be run after - * tunable_mbinit(). - */ -static void -init_maxsockets(void *ignored) -{ - - TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); -#ifndef __rtems__ - maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); -#else /* __rtems__ */ - maxsockets = imax(maxsockets, nmbclusters); -#endif /* __rtems__ */ -} -SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); - -/* * Socket operation routines. These routines are called by the routines in * sys_socket.c or from a system process, and implement the semantics of * socket operations by switching out to the protocol specific routines. @@ -450,16 +505,24 @@ sonewconn(struct socket *head, int connstatus) over = (head->so_qlen > 3 * head->so_qlimit / 2); ACCEPT_UNLOCK(); #ifdef REGRESSION - if (regression_sonewconn_earlytest && over) + if (regression_sonewconn_earlytest && over) { #else - if (over) + if (over) { #endif + log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: " + "%i already in queue awaiting acceptance\n", + __func__, head->so_pcb, head->so_qlen); return (NULL); + } VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p", __func__, __LINE__, head)); so = soalloc(head->so_vnet); - if (so == NULL) + if (so == NULL) { + log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: " + "limit reached or out of memory\n", + __func__, head->so_pcb); return (NULL); + } if ((head->so_options & SO_ACCEPTFILTER) != 0) connstatus = 0; so->so_head = head; @@ -476,9 +539,16 @@ sonewconn(struct socket *head, int connstatus) knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); VNET_SO_ASSERT(head); - if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || - (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { + if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { sodealloc(so); + log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", + __func__, head->so_pcb); + return (NULL); + } + if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { + sodealloc(so); + log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n", + __func__, head->so_pcb); return (NULL); } so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; @@ -489,6 +559,20 @@ sonewconn(struct socket *head, int connstatus) so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; so->so_state |= connstatus; ACCEPT_LOCK(); + /* + * The accept socket may be tearing down but we just + * won a race on the ACCEPT_LOCK. + * However, if sctp_peeloff() is called on a 1-to-many + * style socket, the SO_ACCEPTCONN doesn't need to be set. + */ + if (!(head->so_options & SO_ACCEPTCONN) && + ((head->so_proto->pr_protocol != IPPROTO_SCTP) || + (head->so_type != SOCK_SEQPACKET))) { + SOCK_LOCK(so); + so->so_head = NULL; + sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */ + return (NULL); + } if (connstatus) { TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_qstate |= SQ_COMP; @@ -583,9 +667,12 @@ solisten_proto(struct socket *so, int backlog) } /* - * Attempt to free a socket. This should really be sotryfree(). + * Evaluate the reference count and named references on a socket; if no + * references remain, free it. This should be called whenever a reference is + * released, such as in sorele(), but also when named reference flags are + * cleared in socket or protocol code. * - * sofree() will succeed if: + * sofree() will free the socket if: * * - There are no outstanding file descriptor references or related consumers * (so_count == 0). @@ -598,9 +685,6 @@ solisten_proto(struct socket *so, int backlog) * - The socket is not in a completed connection queue, so a process has been * notified that it is present. If it is removed, the user process may * block in accept() despite select() saying the socket was ready. - * - * Otherwise, it will quietly abort so that a future call to sofree(), when - * conditions are right, can succeed. */ void sofree(struct socket *so) @@ -714,9 +798,14 @@ soclose(struct socket *so) drop: if (so->so_proto->pr_usrreqs->pru_close != NULL) (*so->so_proto->pr_usrreqs->pru_close)(so); + ACCEPT_LOCK(); if (so->so_options & SO_ACCEPTCONN) { struct socket *sp; - ACCEPT_LOCK(); + /* + * Prevent new additions to the accept queues due + * to ACCEPT_LOCK races while we are draining them. + */ + so->so_options &= ~SO_ACCEPTCONN; while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { TAILQ_REMOVE(&so->so_incomp, sp, so_list); so->so_incqlen--; @@ -735,13 +824,15 @@ drop: soabort(sp); ACCEPT_LOCK(); } - ACCEPT_UNLOCK(); + KASSERT((TAILQ_EMPTY(&so->so_comp)), + ("%s: so_comp populated", __func__)); + KASSERT((TAILQ_EMPTY(&so->so_incomp)), + ("%s: so_incomp populated", __func__)); } - ACCEPT_LOCK(); SOCK_LOCK(so); KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); so->so_state |= SS_NOFDREF; - sorele(so); + sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */ CURVNET_RESTORE(); return (error); } @@ -863,12 +954,6 @@ struct so_zerocopy_stats{ int found_ifp; }; struct so_zerocopy_stats so_zerocp_stats = {0,0,0}; -#include <netinet/in.h> -#include <net/route.h> -#include <netinet/in_pcb.h> -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_object.h> /* * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise @@ -890,11 +975,10 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, int flags) { struct mbuf *m, **mp, *top; - long len, resid; + long len; + ssize_t resid; int error; -#ifdef ZERO_COPY_SOCKETS int cow_send; -#endif *retmp = top = NULL; mp = ⊤ @@ -902,11 +986,8 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, resid = uio->uio_resid; error = 0; do { -#ifdef ZERO_COPY_SOCKETS cow_send = 0; -#endif /* ZERO_COPY_SOCKETS */ if (resid >= MINCLSIZE) { -#ifdef ZERO_COPY_SOCKETS if (top == NULL) { m = m_gethdr(M_WAITOK, MT_DATA); m->m_pkthdr.len = 0; @@ -914,9 +995,9 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, } else m = m_get(M_WAITOK, MT_DATA); if (so_zero_copy_send && - resid>=PAGE_SIZE && - *space>=PAGE_SIZE && - uio->uio_iov->iov_len>=PAGE_SIZE) { + resid >= PAGE_SIZE && + *space >= PAGE_SIZE && + uio->uio_iov->iov_len >= PAGE_SIZE) { so_zerocp_stats.size_ok++; so_zerocp_stats.align_ok++; cow_send = socow_setup(m, uio); @@ -926,15 +1007,6 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, m_clget(m, M_WAITOK); len = min(min(MCLBYTES, resid), *space); } -#else /* ZERO_COPY_SOCKETS */ - if (top == NULL) { - m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = NULL; - } else - m = m_getcl(M_WAIT, MT_DATA, 0); - len = min(min(MCLBYTES, resid), *space); -#endif /* ZERO_COPY_SOCKETS */ } else { if (top == NULL) { m = m_gethdr(M_WAIT, MT_DATA); @@ -959,12 +1031,10 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, } *space -= len; -#ifdef ZERO_COPY_SOCKETS if (cow_send) error = 0; else -#endif /* ZERO_COPY_SOCKETS */ - error = uiomove(mtod(m, void *), (int)len, uio); + error = uiomove(mtod(m, void *), (int)len, uio); resid = uio->uio_resid; m->m_len = len; *mp = m; @@ -982,7 +1052,7 @@ out: *retmp = top; return (error); } -#endif /*ZERO_COPY_SOCKETS*/ +#endif /* ZERO_COPY_SOCKETS */ #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) @@ -990,7 +1060,8 @@ int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { - long space, resid; + long space; + ssize_t resid; int clen = 0, error, dontroute; #ifdef ZERO_COPY_SOCKETS int atomic = sosendallatonce(so) || top; @@ -1164,7 +1235,8 @@ int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { - long space, resid; + long space; + ssize_t resid; int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; @@ -1463,11 +1535,12 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, **mp; - int flags, len, error, offset; + int flags, error, offset; + ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; - int orig_resid = uio->uio_resid; + ssize_t orig_resid = uio->uio_resid; mp = mp0; if (psa != NULL) @@ -1920,6 +1993,7 @@ release: /* * Optimized version of soreceive() for stream (TCP) sockets. + * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled. */ int soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, @@ -1960,20 +2034,9 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, } oresid = uio->uio_resid; - /* We will never ever get anything unless we are connected. */ + /* We will never ever get anything unless we are or were connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { - /* When disconnecting there may be still some data left. */ - if (sb->sb_cc > 0) - goto deliver; - if (!(so->so_state & SS_ISDISCONNECTED)) - error = ENOTCONN; - goto out; - } - - /* Socket buffer is empty and we shall not block. */ - if (sb->sb_cc == 0 && - ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { - error = EAGAIN; + error = ENOTCONN; goto out; } @@ -2000,6 +2063,13 @@ restart: goto out; } + /* Socket buffer is empty and we shall not block. */ + if (sb->sb_cc == 0 && + ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { + error = EAGAIN; + goto out; + } + /* Socket buffer got some data that we shall deliver now. */ if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) && ((sb->sb_flags & SS_NBIO) || @@ -2012,7 +2082,7 @@ restart: /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && - (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat)) + (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat)) goto deliver; /* @@ -2040,7 +2110,11 @@ deliver: if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { - for (*mp0 = m = sb->sb_mb; + if (*mp0 == NULL) + *mp0 = sb->sb_mb; + else + m_cat(*mp0, sb->sb_mb); + for (m = sb->sb_mb; m != NULL && m->m_len <= len; m = m->m_next) { len -= m->m_len; @@ -2048,10 +2122,11 @@ deliver: sbfree(sb, m); n = m; } + n->m_next = NULL; sb->sb_mb = m; + sb->sb_lastrecord = sb->sb_mb; if (sb->sb_mb == NULL) SB_EMPTY_FIXUP(sb); - n->m_next = NULL; } /* Copy the remainder. */ if (len > 0) { @@ -2062,9 +2137,9 @@ deliver: if (m == NULL) len = 0; /* Don't flush data from sockbuf. */ else - uio->uio_resid -= m->m_len; + uio->uio_resid -= len; if (*mp0 != NULL) - n->m_next = m; + m_cat(*mp0, m); else *mp0 = m; if (*mp0 == NULL) { @@ -2128,7 +2203,8 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, *m2; - int flags, len, error; + int flags, error; + ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; @@ -2334,9 +2410,11 @@ soshutdown(struct socket *so, int how) sorflush(so); if (how != SHUT_RD) { error = (*pr->pr_usrreqs->pru_shutdown)(so); + wakeup(&so->so_timeo); CURVNET_RESTORE(); return (error); } + wakeup(&so->so_timeo); CURVNET_RESTORE(); return (0); } @@ -2445,6 +2523,7 @@ sosetopt(struct socket *so, struct sockopt *sopt) struct linger l; struct timeval tv; u_long val; + uint32_t val32; #ifdef MAC struct mac extmac; #endif @@ -2509,21 +2588,29 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_SETFIB: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); + if (error) + goto bad; + if (optval < 0 || optval >= rt_numfibs) { error = EINVAL; goto bad; } if (((so->so_proto->pr_domain->dom_family == PF_INET) || (so->so_proto->pr_domain->dom_family == PF_INET6) || - (so->so_proto->pr_domain->dom_family == PF_ROUTE))) { + (so->so_proto->pr_domain->dom_family == PF_ROUTE))) so->so_fibnum = optval; - /* Note: ignore error */ - if (so->so_proto->pr_ctloutput) - (*so->so_proto->pr_ctloutput)(so, sopt); - } else { + else so->so_fibnum = 0; - } break; + + case SO_USER_COOKIE: + error = sooptcopyin(sopt, &val32, sizeof val32, + sizeof val32); + if (error) + goto bad; + so->so_user_cookie = val32; + break; + case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: @@ -3289,24 +3376,6 @@ socheckuid(struct socket *so, uid_t uid) } #endif /* __rtems__ */ -static int -sysctl_somaxconn(SYSCTL_HANDLER_ARGS) -{ - int error; - int val; - - val = somaxconn; - error = sysctl_handle_int(oidp, &val, 0, req); - if (error || !req->newptr ) - return (error); - - if (val < 1 || val > USHRT_MAX) - return (EINVAL); - - somaxconn = val; - return (0); -} - /* * These functions are used by protocols to notify the socket layer (and its * consumers) of state changes in the sockets driven by protocol-side events. |