diff options
Diffstat (limited to 'cpukit/libnetworking/kern')
-rw-r--r-- | cpukit/libnetworking/kern/kern_mib.c | 385 | ||||
-rw-r--r-- | cpukit/libnetworking/kern/kern_subr.c | 155 | ||||
-rw-r--r-- | cpukit/libnetworking/kern/kern_sysctl.c | 1541 | ||||
-rw-r--r-- | cpukit/libnetworking/kern/uipc_domain.c | 219 | ||||
-rw-r--r-- | cpukit/libnetworking/kern/uipc_mbuf.c | 741 | ||||
-rw-r--r-- | cpukit/libnetworking/kern/uipc_socket.c | 1104 | ||||
-rw-r--r-- | cpukit/libnetworking/kern/uipc_socket2.c | 940 |
7 files changed, 5085 insertions, 0 deletions
diff --git a/cpukit/libnetworking/kern/kern_mib.c b/cpukit/libnetworking/kern/kern_mib.c new file mode 100644 index 0000000000..6471208786 --- /dev/null +++ b/cpukit/libnetworking/kern/kern_mib.c @@ -0,0 +1,385 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Karels at Berkeley Software Design, Inc. + * + * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD + * project, to make these variables more userfriendly. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 + * $FreeBSD: src/sys/kern/kern_mib.c,v 1.74 2005/02/28 21:42:56 wes Exp $ + */ + +/* + * $Id$ + */ + +#ifndef __rtems__ +#include "opt_posix.h" +#endif + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/sysctl.h> +#include <sys/proc.h> +#ifndef __rtems__ +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/jail.h> +#include <sys/smp.h> +#endif +#include <sys/unistd.h> + +#ifdef __rtems__ +char machine[] = "SET ME"; +char osrelease[] = RTEMS_VERSION; +char ostype[] = "RTEMS"; +#endif + +SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0, + "Sysctl internal magic"); +SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW, 0, + "High kernel, proc, limits &c"); +#ifndef __rtems__ +SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW, 0, + "Virtual memory"); +SYSCTL_NODE(, CTL_VFS, vfs, CTLFLAG_RW, 0, + "File system"); +#endif +SYSCTL_NODE(, CTL_NET, net, CTLFLAG_RW, 0, + "Network, (see socket.h)"); +#ifndef __rtems__ +SYSCTL_NODE(, CTL_DEBUG, debug, CTLFLAG_RW, 0, + "Debugging"); +SYSCTL_NODE(_debug, OID_AUTO, sizeof, CTLFLAG_RW, 0, + "Sizeof various things"); +SYSCTL_NODE(, CTL_HW, hw, CTLFLAG_RW, 0, + "hardware"); +SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW, 0, + "machine dependent"); +SYSCTL_NODE(, CTL_USER, user, CTLFLAG_RW, 0, + "user-level"); +SYSCTL_NODE(, CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0, + "p1003_1b, (see p1003_1b.h)"); + +SYSCTL_NODE(, OID_AUTO, compat, CTLFLAG_RW, 0, + "Compatibility code"); +SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW, 0, + "Security"); +#ifdef REGRESSION +SYSCTL_NODE(, OID_AUTO, regression, CTLFLAG_RW, 0, + "Regression test MIB"); +#endif + +SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD, + kern_ident, 0, "Kernel identifier"); +#endif /* __rtems__ */ + +SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD, + osrelease, 0, "Operating system release"); + +SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD, + 0, BSD, "Operating system revision"); + +#ifndef __rtems__ +SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD, + version, 0, "Kernel version"); +#endif + +SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD, + ostype, 0, "Operating system type"); + +#ifndef __rtems__ +extern int osreldate; +SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD, + &osreldate, 0, "Operating system release date"); + +SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RD, + &maxproc, 0, "Maximum number of processes"); + +SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, + &maxprocperuid, 0, "Maximum processes allowed per userid"); + +SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RD, + &maxusers, 0, "Hint for kernel tuning"); + +SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD, + 0, ARG_MAX, "Maximum bytes of argument to execve(2)"); + +SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD, + 0, _POSIX_VERSION, "Version of POSIX attempting to comply to"); + +SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RD, + 0, NGROUPS_MAX, "Maximum number of groups a user can belong to"); + +SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD, + 0, 1, "Whether job control is available"); + +#ifdef _POSIX_SAVED_IDS +SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, + 0, 1, "Whether saved set-group/user ID is available"); +#else +SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, + 0, 0, "Whether saved set-group/user ID is available"); +#endif + +char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */ + +SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW, + kernelname, sizeof kernelname, "Name of kernel file booted"); + +#ifdef SMP +SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD, + &mp_ncpus, 0, "Number of active CPUs"); +#else +SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD, + 0, 1, "Number of active CPUs"); +#endif + +SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD, + 0, BYTE_ORDER, "System byte order"); + +SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD, + 0, PAGE_SIZE, "System memory page size"); + +static int +sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) +{ + u_long val; + + val = ctob(physmem); + return (sysctl_handle_long(oidp, &val, 0, req)); +} + +SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_ULONG | CTLFLAG_RD, + 0, 0, sysctl_hw_physmem, "LU", ""); + +static int +sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) +{ + u_long val; + + val = ctob(physmem - cnt.v_wire_count); + return (sysctl_handle_long(oidp, &val, 0, req)); +} + +SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_ULONG | CTLFLAG_RD, + 0, 0, sysctl_hw_usermem, "LU", ""); + +SYSCTL_ULONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, ""); + +static char machine_arch[] = MACHINE_ARCH; +SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD, + machine_arch, 0, "System architecture"); + +char hostname[MAXHOSTNAMELEN]; + +static int +sysctl_hostname(SYSCTL_HANDLER_ARGS) +{ + struct prison *pr; + char tmphostname[MAXHOSTNAMELEN]; + int error; + + pr = req->td->td_ucred->cr_prison; + if (pr != NULL) { + if (!jail_set_hostname_allowed && req->newptr) + return (EPERM); + /* + * Process is in jail, so make a local copy of jail + * hostname to get/set so we don't have to hold the jail + * mutex during the sysctl copyin/copyout activities. + */ + mtx_lock(&pr->pr_mtx); + bcopy(pr->pr_host, tmphostname, MAXHOSTNAMELEN); + mtx_unlock(&pr->pr_mtx); + + error = sysctl_handle_string(oidp, tmphostname, + sizeof pr->pr_host, req); + + if (req->newptr != NULL && error == 0) { + /* + * Copy the locally set hostname to the jail, if + * appropriate. + */ + mtx_lock(&pr->pr_mtx); + bcopy(tmphostname, pr->pr_host, MAXHOSTNAMELEN); + mtx_unlock(&pr->pr_mtx); + } + } else + error = sysctl_handle_string(oidp, + hostname, sizeof hostname, req); + return (error); +} + +SYSCTL_PROC(_kern, KERN_HOSTNAME, hostname, + CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON, + 0, 0, sysctl_hostname, "A", "Hostname"); + +static int regression_securelevel_nonmonotonic = 0; + +#ifdef REGRESSION +SYSCTL_INT(_regression, OID_AUTO, securelevel_nonmonotonic, CTLFLAG_RW, + ®ression_securelevel_nonmonotonic, 0, "securelevel may be lowered"); +#endif + +int securelevel = -1; +struct mtx securelevel_mtx; + +MTX_SYSINIT(securelevel_lock, &securelevel_mtx, "securelevel mutex lock", + MTX_DEF); + +static int +sysctl_kern_securelvl(SYSCTL_HANDLER_ARGS) +{ + struct prison *pr; + int error, level; + + pr = req->td->td_ucred->cr_prison; + + /* + * If the process is in jail, return the maximum of the global and + * local levels; otherwise, return the global level. + */ + if (pr != NULL) { + mtx_lock(&pr->pr_mtx); + level = imax(securelevel, pr->pr_securelevel); + mtx_unlock(&pr->pr_mtx); + } else + level = securelevel; + error = sysctl_handle_int(oidp, &level, 0, req); + if (error || !req->newptr) + return (error); + /* + * Permit update only if the new securelevel exceeds the + * global level, and local level if any. + */ + if (pr != NULL) { + mtx_lock(&pr->pr_mtx); + if (!regression_securelevel_nonmonotonic && + (level < imax(securelevel, pr->pr_securelevel))) { + mtx_unlock(&pr->pr_mtx); + return (EPERM); + } + pr->pr_securelevel = level; + mtx_unlock(&pr->pr_mtx); + } else { + mtx_lock(&securelevel_mtx); + if (!regression_securelevel_nonmonotonic && + (level < securelevel)) { + mtx_unlock(&securelevel_mtx); + return (EPERM); + } + securelevel = level; + mtx_unlock(&securelevel_mtx); + } + return (error); +} + +SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, sysctl_kern_securelvl, + "I", "Current secure level"); + +char domainname[MAXHOSTNAMELEN]; +SYSCTL_STRING(_kern, KERN_NISDOMAINNAME, domainname, CTLFLAG_RW, + &domainname, sizeof(domainname), "Name of the current YP/NIS domain"); + +u_long hostid; +SYSCTL_ULONG(_kern, KERN_HOSTID, hostid, CTLFLAG_RW, &hostid, 0, "Host ID"); + +/* + * This is really cheating. These actually live in the libc, something + * which I'm not quite sure is a good idea anyway, but in order for + * getnext and friends to actually work, we define dummies here. + */ +SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD, + "", 0, "PATH that finds all the standard utilities"); +SYSCTL_INT(_user, USER_BC_BASE_MAX, bc_base_max, CTLFLAG_RD, + 0, 0, "Max ibase/obase values in bc(1)"); +SYSCTL_INT(_user, USER_BC_DIM_MAX, bc_dim_max, CTLFLAG_RD, + 0, 0, "Max array size in bc(1)"); +SYSCTL_INT(_user, USER_BC_SCALE_MAX, bc_scale_max, CTLFLAG_RD, + 0, 0, "Max scale value in bc(1)"); +SYSCTL_INT(_user, USER_BC_STRING_MAX, bc_string_max, CTLFLAG_RD, + 0, 0, "Max string length in bc(1)"); +SYSCTL_INT(_user, USER_COLL_WEIGHTS_MAX, coll_weights_max, CTLFLAG_RD, + 0, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry"); +SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD, 0, 0, ""); +SYSCTL_INT(_user, USER_LINE_MAX, line_max, CTLFLAG_RD, + 0, 0, "Max length (bytes) of a text-processing utility's input line"); +SYSCTL_INT(_user, USER_RE_DUP_MAX, re_dup_max, CTLFLAG_RD, + 0, 0, "Maximum number of repeats of a regexp permitted"); +SYSCTL_INT(_user, USER_POSIX2_VERSION, posix2_version, CTLFLAG_RD, + 0, 0, + "The version of POSIX 1003.2 with which the system attempts to comply"); +SYSCTL_INT(_user, USER_POSIX2_C_BIND, posix2_c_bind, CTLFLAG_RD, + 0, 0, "Whether C development supports the C bindings option"); +SYSCTL_INT(_user, USER_POSIX2_C_DEV, posix2_c_dev, CTLFLAG_RD, + 0, 0, "Whether system supports the C development utilities option"); +SYSCTL_INT(_user, USER_POSIX2_CHAR_TERM, posix2_char_term, CTLFLAG_RD, + 0, 0, ""); +SYSCTL_INT(_user, USER_POSIX2_FORT_DEV, posix2_fort_dev, CTLFLAG_RD, + 0, 0, "Whether system supports FORTRAN development utilities"); +SYSCTL_INT(_user, USER_POSIX2_FORT_RUN, posix2_fort_run, CTLFLAG_RD, + 0, 0, "Whether system supports FORTRAN runtime utilities"); +SYSCTL_INT(_user, USER_POSIX2_LOCALEDEF, posix2_localedef, CTLFLAG_RD, + 0, 0, "Whether system supports creation of locales"); +SYSCTL_INT(_user, USER_POSIX2_SW_DEV, posix2_sw_dev, CTLFLAG_RD, + 0, 0, "Whether system supports software development utilities"); +SYSCTL_INT(_user, USER_POSIX2_UPE, posix2_upe, CTLFLAG_RD, + 0, 0, "Whether system supports the user portability utilities"); +SYSCTL_INT(_user, USER_STREAM_MAX, stream_max, CTLFLAG_RD, + 0, 0, "Min Maximum number of streams a process may have open at one time"); +SYSCTL_INT(_user, USER_TZNAME_MAX, tzname_max, CTLFLAG_RD, + 0, 0, "Min Maximum number of types supported for timezone names"); + +#include <sys/vnode.h> +SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD, + 0, sizeof(struct vnode), "sizeof(struct vnode)"); + +SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD, + 0, sizeof(struct proc), "sizeof(struct proc)"); + +#include <sys/conf.h> +SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD, + 0, sizeof(struct cdev), "sizeof(struct cdev)"); + +#include <sys/bio.h> +#include <sys/buf.h> +SYSCTL_INT(_debug_sizeof, OID_AUTO, bio, CTLFLAG_RD, + 0, sizeof(struct bio), "sizeof(struct bio)"); +SYSCTL_INT(_debug_sizeof, OID_AUTO, buf, CTLFLAG_RD, + 0, sizeof(struct buf), "sizeof(struct buf)"); + +#include <sys/user.h> +SYSCTL_INT(_debug_sizeof, OID_AUTO, kinfo_proc, CTLFLAG_RD, + 0, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)"); + +#endif /* __rtems__ */ diff --git a/cpukit/libnetworking/kern/kern_subr.c b/cpukit/libnetworking/kern/kern_subr.c new file mode 100644 index 0000000000..3823f607a1 --- /dev/null +++ b/cpukit/libnetworking/kern/kern_subr.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + * $Id$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/queue.h> + +int +uiomove(void *cp, int n, struct uio *uio) +{ + register struct iovec *iov; + u_int cnt; + int error; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE) + panic("uiomove: mode"); +#endif + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (uio->uio_rw == UIO_READ) + error = copyout(cp, iov->iov_base, cnt); + else + error = copyin(iov->iov_base, cp, cnt); + if (error) + return (error); + break; + + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy((caddr_t)cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, (caddr_t)cp, cnt); + break; + case UIO_NOCOPY: + break; + } + iov->iov_base += cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + cp += cnt; + n -= cnt; + } + return (0); +} + +/* + * General routine to allocate a hash table. + */ +void * +hashinit(elements, type, hashmask) + int elements, type; + u_long *hashmask; +{ + long hashsize; + LIST_HEAD(generic, generic) *hashtbl; + int i; + + if (elements <= 0) + panic("hashinit: bad elements"); + for (hashsize = 1; hashsize <= elements; hashsize <<= 1) + continue; + hashsize >>= 1; + hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); + for (i = 0; i < hashsize; i++) + LIST_INIT(&hashtbl[i]); + *hashmask = hashsize - 1; + return (hashtbl); +} + +#define NPRIMES 27 +static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, + 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, + 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; + +/* + * General routine to allocate a prime number sized hash table. + */ +void * +phashinit(elements, type, nentries) + int elements, type; + u_long *nentries; +{ + long hashsize; + LIST_HEAD(generic, generic) *hashtbl; + int i; + + if (elements <= 0) + panic("phashinit: bad elements"); + for (i = 1, hashsize = primes[1]; hashsize <= elements;) { + i++; + if (i == NPRIMES) + break; + hashsize = primes[i]; + } + hashsize = primes[i - 1]; + hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); + for (i = 0; i < hashsize; i++) + LIST_INIT(&hashtbl[i]); + *nentries = hashsize; + return (hashtbl); +} diff --git a/cpukit/libnetworking/kern/kern_sysctl.c b/cpukit/libnetworking/kern/kern_sysctl.c new file mode 100644 index 0000000000..993c20f5d2 --- /dev/null +++ b/cpukit/libnetworking/kern/kern_sysctl.c @@ -0,0 +1,1541 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Karels at Berkeley Software Design, Inc. + * + * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD + * project, to make these variables more userfriendly. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 + * $FreeBSD: src/sys/kern/kern_sysctl.c,v 1.135 2002/10/27 07:12:34 rwatson Exp $ + */ + +#include "opt_compat.h" +#include "opt_mac.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#ifndef __rtems__ +#include <sys/mac.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> +#include <sys/sysproto.h> +#else +#include <sys/queue.h> + +#include <stdio.h> /* for snprintf() */ +size_t strlcpy(char *, const char *, size_t); +#endif +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#ifndef __rtems__ +static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); +static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); +static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); +#else +/* + * Currently these mean nothing on RTEMS. + */ +#define M_SYSCTLOID 1 +#define M_SYSCTLTMP 2 +#define M_ZERO 0 + +#define mtx_lock(l) +#define mtx_unlock(l) + +#endif + +#ifndef __rtems__ +/* + * Locking - this locks the sysctl tree in memory. + */ +static struct sx sysctllock; +#endif + +#ifdef __rtems__ +#define SYSCTL_LOCK() +#define SYSCTL_UNLOCK() +#define SYSCTL_INIT() +#else +#define SYSCTL_LOCK() sx_xlock(&sysctllock) +#define SYSCTL_UNLOCK() sx_xunlock(&sysctllock) +#define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock") +#endif + +static int sysctl_root(SYSCTL_HANDLER_ARGS); + +struct sysctl_oid_list sysctl__children; /* root list */ + +static struct sysctl_oid * +sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) +{ + struct sysctl_oid *oidp; + + SLIST_FOREACH(oidp, list, oid_link) { + if (strcmp(oidp->oid_name, name) == 0) { + return (oidp); + } + } + return (NULL); +} + +/* + * Initialization of the MIB tree. + * + * Order by number in each list. + */ + +void +sysctl_register_oid(struct sysctl_oid *oidp) +{ + struct sysctl_oid_list *parent = oidp->oid_parent; + struct sysctl_oid *p; + struct sysctl_oid *q; + + /* + * First check if another oid with the same name already + * exists in the parent's list. + */ + p = sysctl_find_oidname(oidp->oid_name, parent); + if (p != NULL) { + if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) { + p->oid_refcnt++; + return; + } else { + printf("can't re-use a leaf (%s)!\n", p->oid_name); + return; + } + } + /* + * If this oid has a number OID_AUTO, give it a number which + * is greater than any current oid. + * NOTE: DO NOT change the starting value here, change it in + * <sys/sysctl.h>, and make sure it is at least 256 to + * accomodate e.g. net.inet.raw as a static sysctl node. + */ + if (oidp->oid_number == OID_AUTO) { + static int32_t newoid = CTL_AUTO_START; + + oidp->oid_number = newoid++; + if (newoid == 0x7fffffff) + panic("out of oids"); + } +#if 0 + else if (oidp->oid_number >= CTL_AUTO_START) { + /* do not panic; this happens when unregistering sysctl sets */ + printf("static sysctl oid too high: %d", oidp->oid_number); + } +#endif + + /* + * Insert the oid into the parent's list in order. + */ + q = NULL; + SLIST_FOREACH(p, parent, oid_link) { + if (oidp->oid_number < p->oid_number) + break; + q = p; + } + if (q) + SLIST_INSERT_AFTER(q, oidp, oid_link); + else + SLIST_INSERT_HEAD(parent, oidp, oid_link); +} + +void +sysctl_unregister_oid(struct sysctl_oid *oidp) +{ + SLIST_REMOVE(oidp->oid_parent, oidp, sysctl_oid, oid_link); +} + +/* Initialize a new context to keep track of dynamically added sysctls. */ +int +sysctl_ctx_init(struct sysctl_ctx_list *c) +{ + + if (c == NULL) { + return (EINVAL); + } + TAILQ_INIT(c); + return (0); +} + +/* Free the context, and destroy all dynamic oids registered in this context */ +int +sysctl_ctx_free(struct sysctl_ctx_list *clist) +{ + struct sysctl_ctx_entry *e, *e1; + int error; + + error = 0; + /* + * First perform a "dry run" to check if it's ok to remove oids. + * XXX FIXME + * XXX This algorithm is a hack. But I don't know any + * XXX better solution for now... + */ + TAILQ_FOREACH(e, clist, link) { + error = sysctl_remove_oid(e->entry, 0, 0); + if (error) + break; + } + /* + * Restore deregistered entries, either from the end, + * or from the place where error occured. + * e contains the entry that was not unregistered + */ + if (error) + e1 = TAILQ_PREV(e, sysctl_ctx_list, link); + else + e1 = TAILQ_LAST(clist, sysctl_ctx_list); + while (e1 != NULL) { + sysctl_register_oid(e1->entry); + e1 = TAILQ_PREV(e1, sysctl_ctx_list, link); + } + if (error) + return(EBUSY); + /* Now really delete the entries */ + e = TAILQ_FIRST(clist); + while (e != NULL) { + e1 = TAILQ_NEXT(e, link); + error = sysctl_remove_oid(e->entry, 1, 0); + if (error) + panic("sysctl_remove_oid: corrupt tree, entry: %s", + e->entry->oid_name); + free(e, M_SYSCTLOID); + e = e1; + } + return (error); +} + +/* Add an entry to the context */ +struct sysctl_ctx_entry * +sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) +{ + struct sysctl_ctx_entry *e; + + if (clist == NULL || oidp == NULL) + return(NULL); + e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); + e->entry = oidp; + TAILQ_INSERT_HEAD(clist, e, link); + return (e); +} + +/* Find an entry in the context */ +struct sysctl_ctx_entry * +sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) +{ + struct sysctl_ctx_entry *e; + + if (clist == NULL || oidp == NULL) + return(NULL); + TAILQ_FOREACH(e, clist, link) { + if(e->entry == oidp) + return(e); + } + return (e); +} + +/* + * Delete an entry from the context. + * NOTE: this function doesn't free oidp! You have to remove it + * with sysctl_remove_oid(). + */ +int +sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) +{ + struct sysctl_ctx_entry *e; + + if (clist == NULL || oidp == NULL) + return (EINVAL); + e = sysctl_ctx_entry_find(clist, oidp); + if (e != NULL) { + TAILQ_REMOVE(clist, e, link); + free(e, M_SYSCTLOID); + return (0); + } else + return (ENOENT); +} + +/* + * Remove dynamically created sysctl trees. + * oidp - top of the tree to be removed + * del - if 0 - just deregister, otherwise free up entries as well + * recurse - if != 0 traverse the subtree to be deleted + */ +int +sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) +{ + struct sysctl_oid *p; + int error; + + if (oidp == NULL) + return(EINVAL); + if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { + printf("can't remove non-dynamic nodes!\n"); + return (EINVAL); + } + /* + * WARNING: normal method to do this should be through + * sysctl_ctx_free(). Use recursing as the last resort + * method to purge your sysctl tree of leftovers... + * However, if some other code still references these nodes, + * it will panic. + */ + if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { + if (oidp->oid_refcnt == 1) { + SLIST_FOREACH(p, SYSCTL_CHILDREN(oidp), oid_link) { + if (!recurse) + return (ENOTEMPTY); + error = sysctl_remove_oid(p, del, recurse); + if (error) + return (error); + } + if (del) + free(SYSCTL_CHILDREN(oidp), M_SYSCTLOID); + } + } + if (oidp->oid_refcnt > 1 ) { + oidp->oid_refcnt--; + } else { + if (oidp->oid_refcnt == 0) { + printf("Warning: bad oid_refcnt=%u (%s)!\n", + oidp->oid_refcnt, oidp->oid_name); + return (EINVAL); + } + sysctl_unregister_oid(oidp); + if (del) { + if (oidp->descr) + free((void *)(uintptr_t)(const void *)oidp->descr, M_SYSCTLOID); + free((void *)(uintptr_t)(const void *)oidp->oid_name, + M_SYSCTLOID); + free(oidp, M_SYSCTLOID); + } + } + return (0); +} + +/* + * Create new sysctls at run time. + * clist may point to a valid context initialized with sysctl_ctx_init(). + */ +struct sysctl_oid * +sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, + int number, const char *name, int kind, void *arg1, int arg2, + int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) +{ + struct sysctl_oid *oidp; + ssize_t len; + char *newname; + + /* You have to hook up somewhere.. */ + if (parent == NULL) + return(NULL); + /* Check if the node already exists, otherwise create it */ + oidp = sysctl_find_oidname(name, parent); + if (oidp != NULL) { + if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { + oidp->oid_refcnt++; + /* Update the context */ + if (clist != NULL) + sysctl_ctx_entry_add(clist, oidp); + return (oidp); + } else { + printf("can't re-use a leaf (%s)!\n", name); + return (NULL); + } + } + oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); + oidp->oid_parent = parent; + SLIST_NEXT(oidp, oid_link) = NULL; + oidp->oid_number = number; + oidp->oid_refcnt = 1; + len = strlen(name); + newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); + bcopy(name, newname, len + 1); + newname[len] = '\0'; + oidp->oid_name = newname; + oidp->oid_handler = handler; + oidp->oid_kind = CTLFLAG_DYN | kind; + if ((kind & CTLTYPE) == CTLTYPE_NODE) { + /* Allocate space for children */ + SYSCTL_CHILDREN_SET(oidp, malloc(sizeof(struct sysctl_oid_list), + M_SYSCTLOID, M_WAITOK)); + SLIST_INIT(SYSCTL_CHILDREN(oidp)); + } else { + oidp->oid_arg1 = arg1; + oidp->oid_arg2 = arg2; + } + oidp->oid_fmt = fmt; + if (descr) { + int len = strlen(descr) + 1; + oidp->descr = malloc(len, M_SYSCTLOID, M_WAITOK); + if (oidp->descr) + strcpy((char *)(uintptr_t)(const void *)oidp->descr, descr); + } + /* Update the context, if used */ + if (clist != NULL) + sysctl_ctx_entry_add(clist, oidp); + /* Register this oid */ + sysctl_register_oid(oidp); + return (oidp); +} + +/* + * Register the kernel's oids on startup. + */ +SET_DECLARE(sysctl_set, struct sysctl_oid); + +#if defined(__rtems__) +void +#else +static void +#endif +sysctl_register_all(void *arg) +{ + struct sysctl_oid **oidp; + + SYSCTL_INIT(); + SET_FOREACH(oidp, sysctl_set) + sysctl_register_oid(*oidp); +} +SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_ANY, sysctl_register_all, 0); + +/* + * "Staff-functions" + * + * These functions implement a presently undocumented interface + * used by the sysctl program to walk the tree, and get the type + * so it can print the value. + * This interface is under work and consideration, and should probably + * be killed with a big axe by the first person who can find the time. + * (be aware though, that the proper interface isn't as obvious as it + * may seem, there are various conflicting requirements. + * + * {0,0} printf the entire MIB-tree. + * {0,1,...} return the name of the "..." OID. + * {0,2,...} return the next OID. + * {0,3} return the OID of the name in "new" + * {0,4,...} return the kind & format info for the "..." OID. + * {0,5,...} return the description the "..." OID. + */ + +static void +sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) +{ + int k; + struct sysctl_oid *oidp; + + SLIST_FOREACH(oidp, l, oid_link) { + + for (k=0; k<i; k++) + printf(" "); + + printf("%d %s ", oidp->oid_number, oidp->oid_name); + + printf("%c%c", + oidp->oid_kind & CTLFLAG_RD ? 'R':' ', + oidp->oid_kind & CTLFLAG_WR ? 'W':' '); + + if (oidp->oid_handler) + printf(" *Handler"); + + switch (oidp->oid_kind & CTLTYPE) { + case CTLTYPE_NODE: + printf(" Node\n"); + if (!oidp->oid_handler) { + sysctl_sysctl_debug_dump_node( + oidp->oid_arg1, i+2); + } + break; + case CTLTYPE_INT: printf(" Int\n"); break; + case CTLTYPE_STRING: printf(" String\n"); break; + case CTLTYPE_QUAD: printf(" Quad\n"); break; + case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; + default: printf("\n"); + } + + } +} + +static int +sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) +{ +#ifndef __rtems__ + int error; + + error = suser(req->td); + if (error) + return error; +#endif + sysctl_sysctl_debug_dump_node(&sysctl__children, 0); + return ENOENT; +} + +SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD, + 0, 0, sysctl_sysctl_debug, "-", ""); + +static int +sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) +{ + int *name = (int *) arg1; + u_int namelen = arg2; + int error = 0; + struct sysctl_oid *oid; + struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; + char buf[10]; + + while (namelen) { + if (!lsp) { + snprintf(buf,sizeof(buf),"%d",*name); + if (req->oldidx) + error = SYSCTL_OUT(req, ".", 1); + if (!error) + error = SYSCTL_OUT(req, buf, strlen(buf)); + if (error) + return (error); + namelen--; + name++; + continue; + } + lsp2 = 0; + SLIST_FOREACH(oid, lsp, oid_link) { + if (oid->oid_number != *name) + continue; + + if (req->oldidx) + error = SYSCTL_OUT(req, ".", 1); + if (!error) + error = SYSCTL_OUT(req, oid->oid_name, + strlen(oid->oid_name)); + if (error) + return (error); + + namelen--; + name++; + + if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) + break; + + if (oid->oid_handler) + break; + + lsp2 = (struct sysctl_oid_list *)oid->oid_arg1; + break; + } + lsp = lsp2; + } + return (SYSCTL_OUT(req, "", 1)); +} + +SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, ""); + +static int +sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, + int *next, int *len, int level, struct sysctl_oid **oidpp) +{ + struct sysctl_oid *oidp; + + *len = level; + SLIST_FOREACH(oidp, lsp, oid_link) { + *next = oidp->oid_number; + *oidpp = oidp; + + if (oidp->oid_kind & CTLFLAG_SKIP) + continue; + + if (!namelen) { + if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) + return 0; + if (oidp->oid_handler) + /* We really should call the handler here...*/ + return 0; + lsp = (struct sysctl_oid_list *)oidp->oid_arg1; + if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, + len, level+1, oidpp)) + return 0; + goto emptynode; + } + + if (oidp->oid_number < *name) + continue; + + if (oidp->oid_number > *name) { + if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) + return 0; + if (oidp->oid_handler) + return 0; + lsp = (struct sysctl_oid_list *)oidp->oid_arg1; + if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, + next+1, len, level+1, oidpp)) + return (0); + goto next; + } + if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) + continue; + + if (oidp->oid_handler) + continue; + + lsp = (struct sysctl_oid_list *)oidp->oid_arg1; + if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, + len, level+1, oidpp)) + return (0); + next: + namelen = 1; + emptynode: + *len = level; + } + return 1; +} + +static int +sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) +{ + int *name = (int *) arg1; + u_int namelen = arg2; + int i, j, error; + struct sysctl_oid *oid; + struct sysctl_oid_list *lsp = &sysctl__children; + int newoid[CTL_MAXNAME]; + + i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); + if (i) + return ENOENT; + error = SYSCTL_OUT(req, newoid, j * sizeof (int)); + return (error); +} + +SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, ""); + +static int +name2oid (char *name, int *oid, int *len, struct sysctl_oid **oidpp) +{ + int i; + struct sysctl_oid *oidp; + struct sysctl_oid_list *lsp = &sysctl__children; + char *p; + + if (!*name) + return ENOENT; + + p = name + strlen(name) - 1 ; + if (*p == '.') + *p = '\0'; + + *len = 0; + + for (p = name; *p && *p != '.'; p++) + ; + i = *p; + if (i == '.') + *p = '\0'; + + oidp = SLIST_FIRST(lsp); + + while (oidp && *len < CTL_MAXNAME) { + if (strcmp(name, oidp->oid_name)) { + oidp = SLIST_NEXT(oidp, oid_link); + continue; + } + *oid++ = oidp->oid_number; + (*len)++; + + if (!i) { + if (oidpp) + *oidpp = oidp; + return (0); + } + + if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) + break; + + if (oidp->oid_handler) + break; + + lsp = (struct sysctl_oid_list *)oidp->oid_arg1; + oidp = SLIST_FIRST(lsp); + name = p+1; + for (p = name; *p && *p != '.'; p++) + ; + i = *p; + if (i == '.') + *p = '\0'; + } + return ENOENT; +} + +static int +sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) +{ + char *p; + int error, oid[CTL_MAXNAME], len=0; + struct sysctl_oid *op = 0; + + if (!req->newlen) + return ENOENT; + if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ + return (ENAMETOOLONG); + + p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); + + error = SYSCTL_IN(req, p, req->newlen); + if (error) { + free(p, M_SYSCTL); + return (error); + } + + p [req->newlen] = '\0'; + + error = name2oid(p, oid, &len, &op); + + free(p, M_SYSCTL); + + if (error) + return (error); + + error = SYSCTL_OUT(req, oid, len * sizeof *oid); + return (error); +} + +SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY, 0, 0, + sysctl_sysctl_name2oid, "I", ""); + +static int +sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) +{ + struct sysctl_oid *oid; + int error; + + error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); + if (error) + return (error); + + if (!oid->oid_fmt) + return (ENOENT); + error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); + if (error) + return (error); + error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); + return (error); +} + + +SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD, sysctl_sysctl_oidfmt, ""); + +static int +sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) +{ + struct sysctl_oid *oid; + int error; + + error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); + if (error) + return (error); + + if (!oid->descr) + return (ENOENT); + error = SYSCTL_OUT(req, oid->descr, strlen(oid->descr) + 1); + return (error); +} + +SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD, sysctl_sysctl_oiddescr, ""); + +/* + * Default "handler" functions. + */ + +/* + * Handle an int, signed or unsigned. + * Two cases: + * a variable: point arg1 at it. + * a constant: pass it in arg2. + */ + +int +sysctl_handle_int(SYSCTL_HANDLER_ARGS) +{ + int tmpout, error = 0; + + /* + * Attempt to get a coherent snapshot by making a copy of the data. + */ + if (arg1) + tmpout = *(int *)arg1; + else + tmpout = arg2; + error = SYSCTL_OUT(req, &tmpout, sizeof(int)); + + if (error || !req->newptr) + return (error); + + if (!arg1) + error = EPERM; + else + error = SYSCTL_IN(req, arg1, sizeof(int)); + return (error); +} + +/* + * Handle a long, signed or unsigned. arg1 points to it. + */ + +int +sysctl_handle_long(SYSCTL_HANDLER_ARGS) +{ + int error = 0; + long tmpout; + + /* + * Attempt to get a coherent snapshot by making a copy of the data. + */ + if (!arg1) + return (EINVAL); + tmpout = *(long *)arg1; + error = SYSCTL_OUT(req, &tmpout, sizeof(long)); + + if (error || !req->newptr) + return (error); + + error = SYSCTL_IN(req, arg1, sizeof(long)); + return (error); +} + +/* + * Handle our generic '\0' terminated 'C' string. + * Two cases: + * a variable string: point arg1 at it, arg2 is max length. + * a constant string: point arg1 at it, arg2 is zero. + */ + +int +sysctl_handle_string(SYSCTL_HANDLER_ARGS) +{ + int error=0; + char *tmparg; + size_t outlen; + + /* + * Attempt to get a coherent snapshot by copying to a + * temporary kernel buffer. + */ +retry: + outlen = strlen((char *)arg1)+1; + tmparg = malloc(outlen, M_SYSCTLTMP, M_WAITOK); + + if (strlcpy(tmparg, (char *)arg1, outlen) >= outlen) { + free(tmparg, M_SYSCTLTMP); + goto retry; + } + + error = SYSCTL_OUT(req, tmparg, outlen); + free(tmparg, M_SYSCTLTMP); + + if (error || !req->newptr) + return (error); + + if ((req->newlen - req->newidx) >= arg2) { + error = EINVAL; + } else { + arg2 = (req->newlen - req->newidx); + error = SYSCTL_IN(req, arg1, arg2); + ((char *)arg1)[arg2] = '\0'; + } + + return (error); +} + +/* + * Handle any kind of opaque data. + * arg1 points to it, arg2 is the size. + */ + +int +sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) +{ + int error; + void *tmparg; + + /* + * Attempt to get a coherent snapshot, either by wiring the + * user space buffer or copying to a temporary kernel buffer + * depending on the size of the data. + */ + if (arg2 > PAGE_SIZE) { + sysctl_wire_old_buffer(req, arg2); + error = SYSCTL_OUT(req, arg1, arg2); + } else { + tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); + bcopy(arg1, tmparg, arg2); + error = SYSCTL_OUT(req, tmparg, arg2); + free(tmparg, M_SYSCTLTMP); + } + + if (error || !req->newptr) + return (error); + + error = SYSCTL_IN(req, arg1, arg2); + + return (error); +} + +/* + * Transfer functions to/from kernel space. + * XXX: rather untested at this point + */ +static int +sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) +{ + size_t i = 0; + + if (req->oldptr) { + i = l; + if (req->oldlen <= req->oldidx) + i = 0; + else + if (i > req->oldlen - req->oldidx) + i = req->oldlen - req->oldidx; + if (i > 0) + bcopy(p, (char *)req->oldptr + req->oldidx, i); + } + req->oldidx += l; + if (req->oldptr && i != l) + return (ENOMEM); + return (0); +} + +static int +sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) +{ + if (!req->newptr) + return 0; + if (req->newlen - req->newidx < l) + return (EINVAL); + bcopy((char *)req->newptr + req->newidx, p, l); + req->newidx += l; + return (0); +} + +int +kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, + size_t *oldlenp, void *new, size_t newlen, size_t *retval) +{ + int error = 0; + struct sysctl_req req; + + bzero(&req, sizeof req); + + req.td = td; + + if (oldlenp) { + req.oldlen = *oldlenp; + } + + if (old) { + req.oldptr= old; + } + + if (new != NULL) { + req.newlen = newlen; + req.newptr = new; + } + + req.oldfunc = sysctl_old_kernel; + req.newfunc = sysctl_new_kernel; + req.lock = REQ_LOCKED; + + SYSCTL_LOCK(); + + error = sysctl_root(0, name, namelen, &req); + + if (req.lock == REQ_WIRED) +#ifdef __rtems__ + printf ("kern_sysctl: vsunlock needs to be called!\n"); +#else + vsunlock(req.oldptr, req.oldlen); +#endif + + SYSCTL_UNLOCK(); + + if (error && error != ENOMEM) + return (error); + + if (retval) { + if (req.oldptr && req.oldidx > req.oldlen) + *retval = req.oldlen; + else + *retval = req.oldidx; + } + return (error); +} + +int +kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, + void *new, size_t newlen, size_t *retval) +{ + int oid[CTL_MAXNAME]; + size_t oidlen, plen; + int error; + + plen = 0; /* RTEMS - to avoid warnings */ + + oid[0] = 0; /* sysctl internal magic */ + oid[1] = 3; /* name2oid */ + oidlen = sizeof(oid); + + error = kernel_sysctl(td, oid, 2, oid, &oidlen, + (void *)name, strlen(name), &plen); + if (error) + return (error); + + error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, + new, newlen, retval); + return (error); +} + +/* + * Transfer function to/from user space. + */ +static int +sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) +{ + int error = 0; + size_t i = 0; + +#ifndef __rtems__ + if (req->lock == 1 && req->oldptr) + WITNESS_SLEEP(1, NULL); +#endif + if (req->oldptr) { + i = l; + if (req->oldlen <= req->oldidx) + i = 0; + else + if (i > req->oldlen - req->oldidx) + i = req->oldlen - req->oldidx; + if (i > 0) + error = copyout(p, (char *)req->oldptr + req->oldidx, + i); + } + req->oldidx += l; + if (error) + return (error); + if (req->oldptr && i < l) + return (ENOMEM); + return (0); +} + +static int +sysctl_new_user(struct sysctl_req *req, void *p, size_t l) +{ + int error; + + if (!req->newptr) + return 0; + if (req->newlen - req->newidx < l) + return (EINVAL); + error = copyin((char *)req->newptr + req->newidx, p, l); + req->newidx += l; + return (error); +} + +/* + * Wire the user space destination buffer. If set to a value greater than + * zero, the len parameter limits the maximum amount of wired memory. + * + * XXX - The len parameter is currently ignored due to the lack of + * a place to save it in the sysctl_req structure so that the matching + * amount of memory can be unwired in the sysctl exit code. + */ +int +sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) +{ + if (req->lock == REQ_LOCKED && req->oldptr && + req->oldfunc == sysctl_old_user) { +#ifndef __rtems__ + vslock(req->oldptr, req->oldlen); +#endif + req->lock = REQ_WIRED; + } + return (0); +} + +int +sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, + int *nindx, struct sysctl_req *req) +{ + struct sysctl_oid *oid; + int indx; + + oid = SLIST_FIRST(&sysctl__children); + indx = 0; + while (oid && indx < CTL_MAXNAME) { + if (oid->oid_number == name[indx]) { + indx++; + if (oid->oid_kind & CTLFLAG_NOLOCK) + req->lock = REQ_UNLOCKED; + if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { + if (oid->oid_handler != NULL || + indx == namelen) { + *noid = oid; + if (nindx != NULL) + *nindx = indx; + return (0); + } + oid = SLIST_FIRST( + (struct sysctl_oid_list *)oid->oid_arg1); + } else if (indx == namelen) { + *noid = oid; + if (nindx != NULL) + *nindx = indx; + return (0); + } else { + return (ENOTDIR); + } + } else { + oid = SLIST_NEXT(oid, oid_link); + } + } + return (ENOENT); +} + +/* + * Traverse our tree, and find the right node, execute whatever it points + * to, and return the resulting error code. + */ + +static int +sysctl_root(SYSCTL_HANDLER_ARGS) +{ + struct sysctl_oid *oid; + int error, indx; + + error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); + if (error) + return (error); + + if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { + /* + * You can't call a sysctl when it's a node, but has + * no handler. Inform the user that it's a node. + * The indx may or may not be the same as namelen. + */ + if (oid->oid_handler == NULL) + return (EISDIR); + } + + /* Is this sysctl writable? */ + if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) + return (EPERM); + +#ifndef __rtems__ + KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); + + /* Is this sysctl sensitive to securelevels? */ + if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { + error = securelevel_gt(req->td->td_ucred, 0); + if (error) + return (error); + } + + /* Is this sysctl writable by only privileged users? */ + if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) { + int flags; + + if (oid->oid_kind & CTLFLAG_PRISON) + flags = PRISON_ROOT; + else + flags = 0; + error = suser_cred(req->td->td_ucred, flags); + if (error) + return (error); + } +#endif + + if (!oid->oid_handler) + return EINVAL; + + if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) + error = oid->oid_handler(oid, (int *)arg1 + indx, arg2 - indx, + req); + else + error = oid->oid_handler(oid, oid->oid_arg1, oid->oid_arg2, + req); + return (error); +} + +#ifndef _SYS_SYSPROTO_H_ +struct sysctl_args { + int *name; + u_int namelen; + void *old; + size_t *oldlenp; + void *new; + size_t newlen; +}; +#endif + +/* + * MPSAFE + */ +int +__sysctl(struct thread *td, struct sysctl_args *uap) +{ + int error, name[CTL_MAXNAME]; + size_t j; + + if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) + return (EINVAL); + + error = copyin(uap->name, &name, uap->namelen * sizeof(int)); + if (error) + return (error); + + mtx_lock(&Giant); + + error = userland_sysctl(td, name, uap->namelen, + uap->old, uap->oldlenp, 0, + uap->new, uap->newlen, &j); + if (error && error != ENOMEM) + goto done2; + if (uap->oldlenp) { + int i = copyout(&j, uap->oldlenp, sizeof(j)); + if (i) + error = i; + } +done2: + mtx_unlock(&Giant); + return (error); +} + +/* + * This is used from various compatibility syscalls too. That's why name + * must be in kernel space. + */ +int +userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, + size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval) +{ + int error = 0; + struct sysctl_req req, req2; + + bzero(&req, sizeof req); + + req.td = td; + + if (oldlenp) { + if (inkernel) { + req.oldlen = *oldlenp; + } else { + error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); + if (error) + return (error); + } + } + + if (old) { +#ifndef __rtems__ + if (!useracc(old, req.oldlen, VM_PROT_WRITE)) + return (EFAULT); +#endif + req.oldptr= old; + } + + if (new != NULL) { +#ifndef __rtems__ + if (!useracc(new, req.newlen, VM_PROT_READ)) + return (EFAULT); +#endif + req.newlen = newlen; + req.newptr = new; + } + + req.oldfunc = sysctl_old_user; + req.newfunc = sysctl_new_user; + req.lock = REQ_LOCKED; + + SYSCTL_LOCK(); + +#ifdef MAC + error = mac_check_system_sysctl(td->td_ucred, name, namelen, old, + oldlenp, inkernel, new, newlen); + if (error) { + SYSCTL_UNLOCK(); + return (error); + } +#endif + + do { + req2 = req; + error = sysctl_root(0, name, namelen, &req2); + } while (error == EAGAIN); + + req = req2; +#ifndef __rtems__ + if (req.lock == REQ_WIRED) + vsunlock(req.oldptr, req.oldlen); +#endif + + SYSCTL_UNLOCK(); + + if (error && error != ENOMEM) + return (error); + + if (retval) { + if (req.oldptr && req.oldidx > req.oldlen) + *retval = req.oldlen; + else + *retval = req.oldidx; + } + return (error); +} + +#ifdef COMPAT_43 +#include <sys/socket.h> +#include <vm/vm_param.h> + +#define KINFO_PROC (0<<8) +#define KINFO_RT (1<<8) +#define KINFO_VNODE (2<<8) +#define KINFO_FILE (3<<8) +#define KINFO_METER (4<<8) +#define KINFO_LOADAVG (5<<8) +#define KINFO_CLOCKRATE (6<<8) + +/* Non-standard BSDI extension - only present on their 4.3 net-2 releases */ +#define KINFO_BSDI_SYSINFO (101<<8) + +/* + * XXX this is bloat, but I hope it's better here than on the potentially + * limited kernel stack... -Peter + */ + +static struct { + int bsdi_machine; /* "i386" on BSD/386 */ +/* ^^^ this is an offset to the string, relative to the struct start */ + char *pad0; + long pad1; + long pad2; + long pad3; + u_long pad4; + u_long pad5; + u_long pad6; + + int bsdi_ostype; /* "BSD/386" on BSD/386 */ + int bsdi_osrelease; /* "1.1" on BSD/386 */ + long pad7; + long pad8; + char *pad9; + + long pad10; + long pad11; + int pad12; + long pad13; + quad_t pad14; + long pad15; + + struct timeval pad16; + /* we dont set this, because BSDI's uname used gethostname() instead */ + int bsdi_hostname; /* hostname on BSD/386 */ + + /* the actual string data is appended here */ + +} bsdi_si; +/* + * this data is appended to the end of the bsdi_si structure during copyout. + * The "char *" offsets are relative to the base of the bsdi_si struct. + * This contains "FreeBSD\02.0-BUILT-nnnnnn\0i386\0", and these strings + * should not exceed the length of the buffer here... (or else!! :-) + */ +static char bsdi_strings[80]; /* It had better be less than this! */ + +#ifndef _SYS_SYSPROTO_H_ +struct getkerninfo_args { + int op; + char *where; + size_t *size; + int arg; +}; +#endif + +/* + * MPSAFE + */ +int +ogetkerninfo(struct thread *td, struct getkerninfo_args *uap) +{ + int error, name[6]; + size_t size; + u_int needed = 0; + + mtx_lock(&Giant); + + switch (uap->op & 0xff00) { + + case KINFO_RT: + name[0] = CTL_NET; + name[1] = PF_ROUTE; + name[2] = 0; + name[3] = (uap->op & 0xff0000) >> 16; + name[4] = uap->op & 0xff; + name[5] = uap->arg; + error = userland_sysctl(td, name, 6, uap->where, uap->size, + 0, 0, 0, &size); + break; + + case KINFO_VNODE: + name[0] = CTL_KERN; + name[1] = KERN_VNODE; + error = userland_sysctl(td, name, 2, uap->where, uap->size, + 0, 0, 0, &size); + break; + + case KINFO_PROC: + name[0] = CTL_KERN; + name[1] = KERN_PROC; + name[2] = uap->op & 0xff; + name[3] = uap->arg; + error = userland_sysctl(td, name, 4, uap->where, uap->size, + 0, 0, 0, &size); + break; + + case KINFO_FILE: + name[0] = CTL_KERN; + name[1] = KERN_FILE; + error = userland_sysctl(td, name, 2, uap->where, uap->size, + 0, 0, 0, &size); + break; + + case KINFO_METER: + name[0] = CTL_VM; + name[1] = VM_TOTAL; + error = userland_sysctl(td, name, 2, uap->where, uap->size, + 0, 0, 0, &size); + break; + + case KINFO_LOADAVG: + name[0] = CTL_VM; + name[1] = VM_LOADAVG; + error = userland_sysctl(td, name, 2, uap->where, uap->size, + 0, 0, 0, &size); + break; + + case KINFO_CLOCKRATE: + name[0] = CTL_KERN; + name[1] = KERN_CLOCKRATE; + error = userland_sysctl(td, name, 2, uap->where, uap->size, + 0, 0, 0, &size); + break; + + case KINFO_BSDI_SYSINFO: { + /* + * this is pretty crude, but it's just enough for uname() + * from BSDI's 1.x libc to work. + * + * *size gives the size of the buffer before the call, and + * the amount of data copied after a successful call. + * If successful, the return value is the amount of data + * available, which can be larger than *size. + * + * BSDI's 2.x product apparently fails with ENOMEM if *size + * is too small. + */ + + u_int left; + char *s; + + bzero((char *)&bsdi_si, sizeof(bsdi_si)); + bzero(bsdi_strings, sizeof(bsdi_strings)); + + s = bsdi_strings; + + bsdi_si.bsdi_ostype = (s - bsdi_strings) + sizeof(bsdi_si); + strcpy(s, ostype); + s += strlen(s) + 1; + + bsdi_si.bsdi_osrelease = (s - bsdi_strings) + sizeof(bsdi_si); + strcpy(s, osrelease); + s += strlen(s) + 1; + + bsdi_si.bsdi_machine = (s - bsdi_strings) + sizeof(bsdi_si); + strcpy(s, machine); + s += strlen(s) + 1; + + needed = sizeof(bsdi_si) + (s - bsdi_strings); + + if ((uap->where == NULL) || (uap->size == NULL)) { + /* process is asking how much buffer to supply.. */ + size = needed; + error = 0; + break; + } + + if ((error = copyin(uap->size, &size, sizeof(size))) != 0) + break; + + /* if too much buffer supplied, trim it down */ + if (size > needed) + size = needed; + + /* how much of the buffer is remaining */ + left = size; + + if ((error = copyout((char *)&bsdi_si, uap->where, left)) != 0) + break; + + /* is there any point in continuing? */ + if (left > sizeof(bsdi_si)) { + left -= sizeof(bsdi_si); + error = copyout(&bsdi_strings, + uap->where + sizeof(bsdi_si), left); + } + break; + } + + default: + error = EOPNOTSUPP; + break; + } + if (error == 0) { + td->td_retval[0] = needed ? needed : size; + if (uap->size) { + error = copyout(&size, uap->size, sizeof(size)); + } + } + mtx_unlock(&Giant); + return (error); +} +#endif /* COMPAT_43 */ diff --git a/cpukit/libnetworking/kern/uipc_domain.c b/cpukit/libnetworking/kern/uipc_domain.c new file mode 100644 index 0000000000..241fe4cc86 --- /dev/null +++ b/cpukit/libnetworking/kern/uipc_domain.c @@ -0,0 +1,219 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_domain.c 8.2 (Berkeley) 10/18/93 + * $Id$ + */ + +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/protosw.h> +#include <sys/domain.h> +#include <sys/mbuf.h> +#include <sys/kernel.h> +#include <sys/systm.h> + +/* + * System initialization + * + * Note: domain initialization wants to take place on a per domain basis + * as a result of traversing a linker set. Most likely, each domain + * want to call a registration function rather than being handled here + * in domaininit(). Probably this will look like: + * + * SYSINIT(unique, SI_SUB_PROTO_DOMAI, SI_ORDER_ANY, domain_add, xxx) + * + * Where 'xxx' is replaced by the address of a parameter struct to be + * passed to the doamin_add() function. + */ + +#if !defined(__rtems__) +static int x_save_spl; /* used by kludge*/ +static void kludge_splimp(void *); +static void kludge_splx(void *); + void domaininit(void *); +SYSINIT(splimp, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, kludge_splimp, &x_save_spl) +SYSINIT(domain, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, domaininit, NULL) +SYSINIT(splx, SI_SUB_PROTO_END, SI_ORDER_FIRST, kludge_splx, &x_save_spl) +#endif + +static void pffasttimo(void *); +static void pfslowtimo(void *); + +struct domain *domains; + +#define ADDDOMAIN(x) { \ + __CONCAT(x,domain.dom_next) = domains; \ + domains = &__CONCAT(x,domain); \ +} + +/* ARGSUSED*/ +void +domaininit(void *dummy) +{ + register struct domain *dp; + register struct protosw *pr; + +/* - not in our sources +#ifdef ISDN + ADDDOMAIN(isdn); +#endif +*/ + + for (dp = domains; dp; dp = dp->dom_next) { + if (dp->dom_init) + (*dp->dom_init)(); + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++){ +#ifdef PRU_OLDSTYLE + /* See comments in uipc_socket2.c. */ + if (pr->pr_usrreqs == 0 && pr->pr_ousrreq) + pr->pr_usrreqs = &pru_oldstyle; +#endif + if (pr->pr_init) + (*pr->pr_init)(); + } + } + + if (max_linkhdr < 16) /* XXX */ + max_linkhdr = 16; + max_hdr = max_linkhdr + max_protohdr; + max_datalen = MHLEN - max_hdr; + timeout(pffasttimo, (void *)0, 1); + timeout(pfslowtimo, (void *)0, 1); +} + + +#if !defined(__rtems__) +/* + * The following two operations are kludge code. Most likely, they should + * be done as a "domainpreinit()" for the first function and then rolled + * in as the last act of "domaininit()" for the second. + * + * In point of fact, it is questionable why other initialization prior + * to this does not also take place at splimp by default. + */ +static void +kludge_splimp(void *udata) +{ + int *savesplp = udata; + + *savesplp = splimp(); +} + +static void +kludge_splx(void *udata) +{ + int *savesplp = udata; + + splx( *savesplp); +} +#endif /* !defined(__rtems__) */ + +struct protosw * +pffindtype(int family, int type) +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (0); +found: + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_type && pr->pr_type == type) + return (pr); + return (0); +} + +struct protosw * +pffindproto(int family, int protocol, int type) +{ + register struct domain *dp; + register struct protosw *pr; + struct protosw *maybe = 0; + + if (family == 0) + return (0); + for (dp = domains; dp; dp = dp->dom_next) + if (dp->dom_family == family) + goto found; + return (0); +found: + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) { + if ((pr->pr_protocol == protocol) && (pr->pr_type == type)) + return (pr); + + if (type == SOCK_RAW && pr->pr_type == SOCK_RAW && + pr->pr_protocol == 0 && maybe == (struct protosw *)0) + maybe = pr; + } + return (maybe); +} + +void +pfctlinput(int cmd, struct sockaddr *sa) +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_ctlinput) + (*pr->pr_ctlinput)(cmd, sa, (void *)0); +} + +static void +pfslowtimo(void *arg) +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_slowtimo) + (*pr->pr_slowtimo)(); + timeout(pfslowtimo, (void *)0, hz/2); +} + +static void +pffasttimo(void *arg) +{ + register struct domain *dp; + register struct protosw *pr; + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_fasttimo) + (*pr->pr_fasttimo)(); + timeout(pffasttimo, (void *)0, hz/5); +} diff --git a/cpukit/libnetworking/kern/uipc_mbuf.c b/cpukit/libnetworking/kern/uipc_mbuf.c new file mode 100644 index 0000000000..390f9b6899 --- /dev/null +++ b/cpukit/libnetworking/kern/uipc_mbuf.c @@ -0,0 +1,741 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 + * $Id$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#define MBTYPES +#include <sys/mbuf.h> +#include <sys/kernel.h> +#include <sys/syslog.h> +#include <sys/domain.h> +#include <sys/protosw.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_extern.h> + +#if !defined(__rtems__) +static void mbinit (void *) __attribute__ ((unused)); +SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) +#endif + +struct mbuf *mbutl; +char *mclrefcnt; +struct mbstat mbstat; +struct mbuf *mmbfree; +union mcluster *mclfree; +int max_linkhdr; +int max_protohdr; +int max_hdr; +int max_datalen; + +/* "number of clusters of pages" */ +#define NCL_INIT 1 + +#define NMB_INIT 16 + +/* + * When MGET failes, ask protocols to free space when short of memory, + * then re-attempt to allocate an mbuf. + */ +struct mbuf * +m_retry(int i, int t) +{ + register struct mbuf *m; + + m_reclaim(); +#define m_retry(i, t) (struct mbuf *)0 + MGET(m, i, t); +#undef m_retry + if (m != NULL) + mbstat.m_wait++; + else + mbstat.m_drops++; + return (m); +} + +/* + * As above; retry an MGETHDR. + */ +struct mbuf * +m_retryhdr(int i, int t) +{ + register struct mbuf *m; + + m_reclaim(); +#define m_retryhdr(i, t) (struct mbuf *)0 + MGETHDR(m, i, t); +#undef m_retryhdr + if (m != NULL) + mbstat.m_wait++; + else + mbstat.m_drops++; + return (m); +} + +void +m_reclaim(void) +{ + register struct domain *dp; + register struct protosw *pr; + int s = splimp(); + + for (dp = domains; dp; dp = dp->dom_next) + for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) + if (pr->pr_drain) + (*pr->pr_drain)(); + splx(s); + mbstat.m_drain++; +} + +/* + * Space allocation routines. + * These are also available as macros + * for critical paths. + */ +struct mbuf * +m_get(int nowait, int type) +{ + register struct mbuf *m; + + MGET(m, nowait, type); + return (m); +} + +struct mbuf * +m_gethdr(int nowait, int type) +{ + register struct mbuf *m; + + MGETHDR(m, nowait, type); + return (m); +} + +struct mbuf * +m_getclr(int nowait, int type) +{ + register struct mbuf *m; + + MGET(m, nowait, type); + if (m == 0) + return (0); + bzero(mtod(m, caddr_t), MLEN); + return (m); +} + +struct mbuf * +m_free(struct mbuf *m) +{ + struct mbuf *n; + + MFREE(m, n); + return (n); +} + +void +m_freem(struct mbuf *mb) +{ + struct mbuf *n; + + if (mb == NULL) + return; + do { + MFREE(mb, n); + mb = n; + } while (mb); +} + +/* + * Mbuffer utility routines. + */ + +/* + * Lesser-used path for M_PREPEND: + * allocate new mbuf to prepend to chain, + * copy junk along. + */ +struct mbuf * +m_prepend(struct mbuf *m, int len, int how) +{ + struct mbuf *mn; + + MGET(mn, how, m->m_type); + if (mn == (struct mbuf *)NULL) { + m_freem(m); + return ((struct mbuf *)NULL); + } + if (m->m_flags & M_PKTHDR) { + M_COPY_PKTHDR(mn, m); + m->m_flags &= ~M_PKTHDR; + } + mn->m_next = m; + m = mn; + if (len < MHLEN) + MH_ALIGN(m, len); + m->m_len = len; + return (m); +} + +/* + * Make a copy of an mbuf chain starting "off0" bytes from the beginning, + * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. + * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. + */ +static int MCFail; + +struct mbuf * +m_copym(struct mbuf *m, int off0, uint32_t len, int wait) +{ + struct mbuf *n, **np; + int off = off0; + struct mbuf *top; + int copyhdr = 0; + + if (off < 0 || len < 0) + panic("m_copym"); + if (off == 0 && m->m_flags & M_PKTHDR) + copyhdr = 1; + while (off > 0) { + if (m == NULL) + panic("m_copym"); + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + np = ⊤ + top = 0; + while (len > 0) { + if (m == NULL) { + if (len != M_COPYALL) + panic("m_copym"); + break; + } + MGET(n, wait, m->m_type); + *np = n; + if (n == NULL) + goto nospace; + if (copyhdr) { + M_COPY_PKTHDR(n, m); + if (len == M_COPYALL) + n->m_pkthdr.len -= off0; + else + n->m_pkthdr.len = len; + copyhdr = 0; + } + n->m_len = min(len, m->m_len - off); + if (m->m_flags & M_EXT) { + n->m_data = m->m_data + off; + if(!m->m_ext.ext_ref) + mclrefcnt[mtocl(m->m_ext.ext_buf)]++; + else + (*(m->m_ext.ext_ref))(m->m_ext.ext_buf, + m->m_ext.ext_size); + n->m_ext = m->m_ext; + n->m_flags |= M_EXT; + } else + bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), + (unsigned)n->m_len); + if (len != M_COPYALL) + len -= n->m_len; + off = 0; + m = m->m_next; + np = &n->m_next; + } + if (top == NULL) + MCFail++; + return (top); +nospace: + m_freem(top); + MCFail++; + return (NULL); +} + +/* + * Copy an entire packet, including header (which must be present). + * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. + */ +struct mbuf * +m_copypacket(struct mbuf *m, int how) +{ + struct mbuf *top, *n, *o; + + MGET(n, how, m->m_type); + top = n; + if (!n) + goto nospace; + + M_COPY_PKTHDR(n, m); + n->m_len = m->m_len; + if (m->m_flags & M_EXT) { + n->m_data = m->m_data; + mclrefcnt[mtocl(m->m_ext.ext_buf)]++; + n->m_ext = m->m_ext; + n->m_flags |= M_EXT; + } else { + bcopy(mtod(m, char *), mtod(n, char *), n->m_len); + } + + m = m->m_next; + while (m) { + MGET(o, how, m->m_type); + if (!o) + goto nospace; + + n->m_next = o; + n = n->m_next; + + n->m_len = m->m_len; + if (m->m_flags & M_EXT) { + n->m_data = m->m_data; + mclrefcnt[mtocl(m->m_ext.ext_buf)]++; + n->m_ext = m->m_ext; + n->m_flags |= M_EXT; + } else { + bcopy(mtod(m, char *), mtod(n, char *), n->m_len); + } + + m = m->m_next; + } + return top; +nospace: + m_freem(top); + MCFail++; + return 0; +} + +/* + * Copy data from an mbuf chain starting "off" bytes from the beginning, + * continuing for "len" bytes, into the indicated buffer. Return -1 if requested + * size is bigger than available + */ +int +m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) +{ + u_int count; + + if (off < 0 || len < 0) + panic("m_copydata"); + while (off > 0) { + if (m == 0) { + /*printf("m_copydata: offset > mbuf length ("); + while(m0) { + printf("[%d] ",m0->m_len); + m0 = m0->m_next; + } + printf(")\n");*/ + return -1; + } + if (off < m->m_len) + break; + off -= m->m_len; + m = m->m_next; + } + while (len > 0) { + if (m == 0) { + /*printf("m_copydata: length > mbuf length ("); + while(m0) { + printf("[%d] ",m0->m_len); + m0 = m0->m_next; + } + printf(")\n");*/ + + return -1; + } + count = min(m->m_len - off, len); + bcopy(mtod(m, caddr_t) + off, cp, count); + len -= count; + cp += count; + off = 0; + m = m->m_next; + } + return 0; +} + +/* + * Concatenate mbuf chain n to m. + * Both chains must be of the same type (e.g. MT_DATA). + * Any m_pkthdr is not updated. + */ +void +m_cat(struct mbuf *m, struct mbuf *n) +{ + while (m->m_next) + m = m->m_next; + while (n) { + if (m->m_flags & M_EXT || + m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + /* just join the two chains */ + m->m_next = n; + return; + } + /* splat the data from one into the other */ + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (u_int)n->m_len); + m->m_len += n->m_len; + n = m_free(n); + } +} + +void +m_adj(struct mbuf *mp, int req_len) +{ + int len = req_len; + struct mbuf *m; + int count; + + if ((m = mp) == NULL) + return; + if (len >= 0) { + /* + * Trim from head. + */ + while (m != NULL && len > 0) { + if (m->m_len <= len) { + len -= m->m_len; + m->m_len = 0; + m = m->m_next; + } else { + m->m_len -= len; + m->m_data += len; + len = 0; + } + } + m = mp; + if (mp->m_flags & M_PKTHDR) + m->m_pkthdr.len -= (req_len - len); + } else { + /* + * Trim from tail. Scan the mbuf chain, + * calculating its length and finding the last mbuf. + * If the adjustment only affects this mbuf, then just + * adjust and return. Otherwise, rescan and truncate + * after the remaining size. + */ + len = -len; + count = 0; + for (;;) { + count += m->m_len; + if (m->m_next == (struct mbuf *)0) + break; + m = m->m_next; + } + if (m->m_len >= len) { + m->m_len -= len; + if (mp->m_flags & M_PKTHDR) + mp->m_pkthdr.len -= len; + return; + } + count -= len; + if (count < 0) + count = 0; + /* + * Correct length for chain is "count". + * Find the mbuf with last data, adjust its length, + * and toss data from remaining mbufs on chain. + */ + m = mp; + if (m->m_flags & M_PKTHDR) + m->m_pkthdr.len = count; + for (; m; m = m->m_next) { + if (m->m_len >= count) { + m->m_len = count; + break; + } + count -= m->m_len; + } + while (m->m_next) + (m = m->m_next) ->m_len = 0; + } +} + +/* + * Rearange an mbuf chain so that len bytes are contiguous + * and in the data area of an mbuf (so that mtod and dtom + * will work for a structure of size len). Returns the resulting + * mbuf chain on success, frees it and returns null on failure. + * If there is room, it will add up to max_protohdr-len extra bytes to the + * contiguous region in an attempt to avoid being called next time. + */ +static int MPFail; + +struct mbuf * +m_pullup(struct mbuf *n, int len) +{ + struct mbuf *m; + int count; + int space; + + /* + * If first mbuf has no cluster, and has room for len bytes + * without shifting current data, pullup into it, + * otherwise allocate a new mbuf to prepend to the chain. + */ + if ((n->m_flags & M_EXT) == 0 && + n->m_data + len < &n->m_dat[MLEN] && n->m_next) { + if (n->m_len >= len) + return (n); + m = n; + n = n->m_next; + len -= m->m_len; + } else { + if (len > MHLEN) + goto bad; + MGET(m, M_DONTWAIT, n->m_type); + if (m == NULL) + goto bad; + m->m_len = 0; + if (n->m_flags & M_PKTHDR) { + M_COPY_PKTHDR(m, n); + n->m_flags &= ~M_PKTHDR; + } + } + space = &m->m_dat[MLEN] - (m->m_data + m->m_len); + do { + count = min(min(max(len, max_protohdr), space), n->m_len); + bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, + (unsigned)count); + len -= count; + m->m_len += count; + n->m_len -= count; + space -= count; + if (n->m_len) + n->m_data += count; + else + n = m_free(n); + } while (len > 0 && n); + if (len > 0) { + (void) m_free(m); + goto bad; + } + m->m_next = n; + return (m); + bad: + m_freem(n); + MPFail++; + return (NULL); +} + +/* + * Partition an mbuf chain in two pieces, returning the tail -- + * all but the first len0 bytes. In case of failure, it returns NULL and + * attempts to restore the chain to its original state. + */ +struct mbuf * +m_split(struct mbuf *m0, int len0, int wait) +{ + struct mbuf *m, *n; + u_int len = len0, remain; + + for (m = m0; m && len > m->m_len; m = m->m_next) + len -= m->m_len; + if (m == NULL) + return (NULL); + remain = m->m_len - len; + if (m0->m_flags & M_PKTHDR) { + MGETHDR(n, wait, m0->m_type); + if (n == NULL) + return (NULL); + n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; + n->m_pkthdr.len = m0->m_pkthdr.len - len0; + m0->m_pkthdr.len = len0; + if (m->m_flags & M_EXT) + goto extpacket; + if (remain > MHLEN) { + /* m can't be the lead packet */ + MH_ALIGN(n, 0); + n->m_next = m_split(m, len, wait); + if (n->m_next == 0) { + (void) m_free(n); + return (0); + } else + return (n); + } else + MH_ALIGN(n, remain); + } else if (remain == 0) { + n = m->m_next; + m->m_next = NULL; + return (n); + } else { + MGET(n, wait, m->m_type); + if (n == 0) + return (0); + M_ALIGN(n, remain); + } +extpacket: + if (m->m_flags & M_EXT) { + n->m_flags |= M_EXT; + n->m_ext = m->m_ext; + if(!m->m_ext.ext_ref) + mclrefcnt[mtocl(m->m_ext.ext_buf)]++; + else + (*(m->m_ext.ext_ref))(m->m_ext.ext_buf, + m->m_ext.ext_size); + m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ + n->m_data = m->m_data + len; + } else { + bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); + } + n->m_len = remain; + m->m_len = len; + n->m_next = m->m_next; + m->m_next = NULL; + return (n); +} +/* + * Routine to copy from device local memory into mbufs. + */ +struct mbuf * +m_devget(char *buf, int totlen, int off0, struct ifnet *ifp, + void (*copy)(char *from, caddr_t to, u_int len)) +{ + struct mbuf *m; + struct mbuf *top = NULL, **mp = ⊤ + int len; + int off = off0; + char *cp; + char *epkt; + + cp = buf; + epkt = cp + totlen; + if (off) { + cp += off + 2 * sizeof(u_short); + totlen -= 2 * sizeof(u_short); + } + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m == 0) + return (0); + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = totlen; + m->m_len = MHLEN; + + while (totlen > 0) { + if (top) { + MGET(m, M_DONTWAIT, MT_DATA); + if (m == 0) { + m_freem(top); + return (0); + } + m->m_len = MLEN; + } + len = min(totlen, epkt - cp); + if (len >= MINCLSIZE) { + MCLGET(m, M_DONTWAIT); + if (m->m_flags & M_EXT) + m->m_len = len = min(len, MCLBYTES); + else + len = m->m_len; + } else { + /* + * Place initial small packet/header at end of mbuf. + */ + if (len < m->m_len) { + if (top == 0 && len + max_linkhdr <= m->m_len) + m->m_data += max_linkhdr; + m->m_len = len; + } else + len = m->m_len; + } + if (copy) + copy(cp, mtod(m, caddr_t), (u_int)len); + else + bcopy(cp, mtod(m, caddr_t), (u_int)len); + cp += len; + *mp = m; + mp = &m->m_next; + totlen -= len; + if (cp == epkt) + cp = buf; + } + return (top); +} + +/* + * Copy data from a buffer back into the indicated mbuf chain, + * starting "off" bytes from the beginning, extending the mbuf + * chain if necessary. + */ +int +m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) +{ + int mlen; + struct mbuf *m = m0, *n; + int totlen = 0; + + if (m0 == NULL) + return 0; + while (off > (mlen = m->m_len)) { + off -= mlen; + totlen += mlen; + if (m->m_next == NULL) { + n = m_getclr(M_DONTWAIT, m->m_type); + if (n == NULL) { + /*panic("m_copyback() : malformed chain\n");*/ + return -1; + } + n->m_len = min(MLEN, len + off); + m->m_next = n; + } + m = m->m_next; + } + while (len > 0) { + mlen = min (m->m_len - off, len); + bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen); + cp += mlen; + len -= mlen; + mlen += off; + off = 0; + totlen += mlen; + if (len == 0) { + /* m->m_len = mlen; */ + break; + } + if (m->m_next == NULL) { + n = m_get(M_DONTWAIT, m->m_type); + if (n == 0) { + /*panic("m_copyback() : malformed chain 2\n");*/ + return -1; + }; + n->m_len = min(MLEN, len); + m->m_next = n; + } + /* m->m_len = mlen; */ + m = m->m_next; + } +/*out:*/ + if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) + m->m_pkthdr.len = totlen; + return 0; +} diff --git a/cpukit/libnetworking/kern/uipc_socket.c b/cpukit/libnetworking/kern/uipc_socket.c new file mode 100644 index 0000000000..47fe7194fd --- /dev/null +++ b/cpukit/libnetworking/kern/uipc_socket.c @@ -0,0 +1,1104 @@ +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 + * $Id$ + */ + +#include <sys/param.h> +#include <sys/queue.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/domain.h> +#include <sys/kernel.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/resourcevar.h> +#include <sys/signalvar.h> +#include <sys/sysctl.h> +#include <limits.h> + +static int somaxconn = SOMAXCONN; +SYSCTL_INT(_kern, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); + +/* + * Socket operation routines. + * These routines are called by the routines in + * sys_socket.c or from a system process, and + * implement the semantics of socket operations by + * switching out to the protocol specific routines. + */ +/*ARGSUSED*/ +int +socreate(int dom, struct socket **aso, int type, int proto, + struct proc *p) +{ + register struct protosw *prp; + register struct socket *so; + register int error; + + if (proto) + prp = pffindproto(dom, proto, type); + else + prp = pffindtype(dom, type); + if (prp == 0 || prp->pr_usrreqs == 0) + return (EPROTONOSUPPORT); + if (prp->pr_type != type) + return (EPROTOTYPE); + MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT); + bzero((caddr_t)so, sizeof(*so)); + TAILQ_INIT(&so->so_incomp); + TAILQ_INIT(&so->so_comp); + so->so_type = type; + so->so_state = SS_PRIV; + so->so_uid = 0; + so->so_proto = prp; + error = (*prp->pr_usrreqs->pru_attach)(so, proto); + if (error) { + so->so_state |= SS_NOFDREF; + sofree(so); + return (error); + } + *aso = so; + return (0); +} + +int +sobind(struct socket *so, struct mbuf *nam) +{ + int s = splnet(); + int error; + + error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam); + splx(s); + return (error); +} + +int +solisten(struct socket *so, int backlog) +{ + int s = splnet(), error; + + error = (*so->so_proto->pr_usrreqs->pru_listen)(so); + if (error) { + splx(s); + return (error); + } + if (so->so_comp.tqh_first == NULL) + so->so_options |= SO_ACCEPTCONN; + if (backlog < 0 || backlog > somaxconn) + backlog = somaxconn; + so->so_qlimit = backlog; + splx(s); + return (0); +} + +void +sofree(struct socket *so) +{ + struct socket *head = so->so_head; + + if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) + return; + if (head != NULL) { + if (so->so_state & SS_INCOMP) { + TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + } else if (so->so_state & SS_COMP) { + TAILQ_REMOVE(&head->so_comp, so, so_list); + } else { + panic("sofree: not queued"); + } + head->so_qlen--; + so->so_state &= ~(SS_INCOMP|SS_COMP); + so->so_head = NULL; + } + sbrelease(&so->so_snd); + sorflush(so); + FREE(so, M_SOCKET); +} + +/* + * Close a socket on last file table reference removal. + * Initiate disconnect if connected. + * Free socket when disconnect complete. + */ +int +soclose(struct socket *so) +{ + int s = splnet(); /* conservative */ + int error = 0; + + if (so->so_options & SO_ACCEPTCONN) { + struct socket *sp, *sonext; + + for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { + sonext = sp->so_list.tqe_next; + (void) soabort(sp); + } + for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { + sonext = sp->so_list.tqe_next; + (void) soabort(sp); + } + } + if (so->so_pcb == 0) + goto discard; + if (so->so_state & SS_ISCONNECTED) { + if ((so->so_state & SS_ISDISCONNECTING) == 0) { + error = sodisconnect(so); + if (error) + goto drop; + } + if (so->so_options & SO_LINGER) { + if ((so->so_state & SS_ISDISCONNECTING) && + (so->so_state & SS_NBIO)) + goto drop; + while (so->so_state & SS_ISCONNECTED) { + soconnsleep (so); + } + } + } +drop: + if (so->so_pcb) { + int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); + if (error == 0) + error = error2; + } +discard: + if (so->so_state & SS_NOFDREF) + panic("soclose: NOFDREF"); + so->so_state |= SS_NOFDREF; + sofree(so); + splx(s); + return (error); +} + +/* + * Must be called at splnet... + */ +int +soabort(struct socket *so) +{ + + return (*so->so_proto->pr_usrreqs->pru_abort)(so); +} + +int +soaccept(struct socket *so, struct mbuf *nam) +{ + int s = splnet(); + int error; + + if ((so->so_state & SS_NOFDREF) == 0) + panic("soaccept: !NOFDREF"); + so->so_state &= ~SS_NOFDREF; + error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); + splx(s); + return (error); +} + +int +soconnect(struct socket *so, struct mbuf *nam) +{ + int s; + int error; + + if (so->so_options & SO_ACCEPTCONN) + return (EOPNOTSUPP); + s = splnet(); + /* + * If protocol is connection-based, can only connect once. + * Otherwise, if connected, try to disconnect first. + * This allows user to disconnect by connecting to, e.g., + * a null address. + */ + if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && + ((so->so_proto->pr_flags & PR_CONNREQUIRED) || + (error = sodisconnect(so)))) + error = EISCONN; + else + error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam); + splx(s); + return (error); +} + +int +soconnect2(struct socket *so1,struct socket *so2) +{ + int s = splnet(); + int error; + + error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); + splx(s); + return (error); +} + +int +sodisconnect(struct socket *so) +{ + int s = splnet(); + int error; + + if ((so->so_state & SS_ISCONNECTED) == 0) { + error = ENOTCONN; + goto bad; + } + if (so->so_state & SS_ISDISCONNECTING) { + error = EALREADY; + goto bad; + } + error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); +bad: + splx(s); + return (error); +} + +#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) +/* + * Send on a socket. + * If send must go all at once and message is larger than + * send buffering, then hard error. + * Lock against other senders. + * If must go all at once and not enough room now, then + * inform user that this would block and do nothing. + * Otherwise, if nonblocking, send as much as possible. + * The data to be sent is described by "uio" if nonzero, + * otherwise by the mbuf chain "top" (which must be null + * if uio is not). Data provided in mbuf chain must be small + * enough to send all at once. + * + * Returns nonzero on error, timeout or signal; callers + * must check for short counts if EINTR/ERESTART are returned. + * Data and control buffers are freed on return. + */ +int +sosend(struct socket *so, struct mbuf *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags) +{ + struct mbuf **mp; + register struct mbuf *m; + register long space, len, resid; + int clen = 0, error, s, dontroute, mlen; + int atomic = sosendallatonce(so) || top; + + if (uio) + resid = uio->uio_resid; + else + resid = top->m_pkthdr.len; + /* + * In theory resid should be unsigned. + * However, space must be signed, as it might be less than 0 + * if we over-committed, and we must use a signed comparison + * of space and resid. On the other hand, a negative resid + * causes us to loop sending 0-length segments to the protocol. + * + * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM + * type sockets since that's an error. + */ + if ((resid < 0) || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { + error = EINVAL; + goto out; + } + + dontroute = + (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && + (so->so_proto->pr_flags & PR_ATOMIC); + if (control) + clen = control->m_len; +#define snderr(errno) { error = errno; splx(s); goto release; } + +restart: + error = sblock(&so->so_snd, SBLOCKWAIT(flags)); + if (error) + goto out; + do { + s = splnet(); + if (so->so_state & SS_CANTSENDMORE) + snderr(EPIPE); + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + goto release; + } + if ((so->so_state & SS_ISCONNECTED) == 0) { + /* + * `sendto' and `sendmsg' is allowed on a connection- + * based socket if it supports implied connect. + * Return ENOTCONN if not connected and no address is + * supplied. + */ + if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && + (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { + if ((so->so_state & SS_ISCONFIRMING) == 0 && + !(resid == 0 && clen != 0)) + snderr(ENOTCONN); + } else if (addr == 0) + snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? + ENOTCONN : EDESTADDRREQ); + } + space = sbspace(&so->so_snd); + if (flags & MSG_OOB) + space += 1024; + if ((atomic && resid > so->so_snd.sb_hiwat) || + clen > so->so_snd.sb_hiwat) + snderr(EMSGSIZE); + if (space < resid + clen && uio && + (atomic || space < so->so_snd.sb_lowat || space < clen)) { + if (so->so_state & SS_NBIO) + snderr(EWOULDBLOCK); + sbunlock(&so->so_snd); + error = sbwait(&so->so_snd); + splx(s); + if (error) + goto out; + goto restart; + } + splx(s); + mp = ⊤ + space -= clen; + do { + if (uio == NULL) { + /* + * Data is prepackaged in "top". + */ + resid = 0; + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + } else do { + if (top == 0) { + MGETHDR(m, M_WAIT, MT_DATA); + mlen = MHLEN; + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else { + MGET(m, M_WAIT, MT_DATA); + mlen = MLEN; + } + if (resid >= MINCLSIZE) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) + goto nopages; + mlen = MCLBYTES; + len = min(min(mlen, resid), space); + } else { +nopages: + len = min(min(mlen, resid), space); + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && top == 0 && len < mlen) + MH_ALIGN(m, len); + } + space -= len; + error = uiomove(mtod(m, caddr_t), (int)len, uio); + resid = uio->uio_resid; + m->m_len = len; + *mp = m; + top->m_pkthdr.len += len; + if (error) + goto release; + mp = &m->m_next; + if (resid <= 0) { + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + break; + } + } while (space > 0 && atomic); + if (dontroute) + so->so_options |= SO_DONTROUTE; + s = splnet(); /* XXX */ + error = (*so->so_proto->pr_usrreqs->pru_send)(so, + (flags & MSG_OOB) ? PRUS_OOB : + /* + * If the user set MSG_EOF, the protocol + * understands this flag and nothing left to + * send then use PRU_SEND_EOF instead of PRU_SEND. + */ + ((flags & MSG_EOF) && + (so->so_proto->pr_flags & PR_IMPLOPCL) && + (resid <= 0)) ? + PRUS_EOF : 0, + top, addr, control); + splx(s); + if (dontroute) + so->so_options &= ~SO_DONTROUTE; + clen = 0; + control = 0; + top = 0; + mp = ⊤ + if (error) + goto release; + } while (resid && space > 0); + } while (resid); + +release: + sbunlock(&so->so_snd); +out: + if (top) + m_freem(top); + if (control) + m_freem(control); + return (error); +} + +/* + * Implement receive operations on a socket. + * We depend on the way that records are added to the sockbuf + * by sbappend*. In particular, each record (mbufs linked through m_next) + * must begin with an address if the protocol so specifies, + * followed by an optional mbuf or mbufs containing ancillary data, + * and then zero or more mbufs of data. + * In order to avoid blocking network interrupts for the entire time here, + * we splx() while doing the actual copy to user space. + * Although the sockbuf is locked, new data may still be appended, + * and thus we must maintain consistency of the sockbuf during that time. + * + * The caller may receive the data as a single mbuf chain by supplying + * an mbuf **mp0 for use in returning the chain. The uio is then used + * only for the count in uio_resid. + */ +int +soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, + struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +{ + register struct mbuf *m, **mp; + register int flags, len, error, s, offset; + struct protosw *pr = so->so_proto; + struct mbuf *nextrecord; + int moff, type = 0; + int orig_resid = uio->uio_resid; + + mp = mp0; + if (paddr) + *paddr = 0; + if (controlp) + *controlp = 0; + if (flagsp) + flags = *flagsp &~ MSG_EOR; + else + flags = 0; + if (flags & MSG_OOB) { + m = m_get(M_WAIT, MT_DATA); + error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); + if (error) + goto bad; + do { + error = uiomove(mtod(m, caddr_t), + (int) min(uio->uio_resid, m->m_len), uio); + m = m_free(m); + } while (uio->uio_resid && error == 0 && m); +bad: + if (m) + m_freem(m); + return (error); + } + if (mp) + *mp = (struct mbuf *)0; + if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) + (*pr->pr_usrreqs->pru_rcvd)(so, 0); + +restart: + error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); + if (error) + return (error); + s = splnet(); + + m = so->so_rcv.sb_mb; + /* + * If we have less data than requested, block awaiting more + * (subject to any timeout) if: + * 1. the current count is less than the low water mark, or + * 2. MSG_WAITALL is set, and it is possible to do the entire + * receive operation at once if we block (resid <= hiwat). + * 3. MSG_DONTWAIT is not set + * If MSG_WAITALL is set but resid is larger than the receive buffer, + * we have to do the receive in sections, and thus risk returning + * a short count if a timeout or signal occurs after we start. + */ + if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && + so->so_rcv.sb_cc < uio->uio_resid) && + (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || + ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && + m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { +#ifdef DIAGNOSTIC + if (m == 0 && so->so_rcv.sb_cc) + panic("receive 1"); +#endif + if (so->so_error) { + if (m) + goto dontblock; + error = so->so_error; + if ((flags & MSG_PEEK) == 0) + so->so_error = 0; + goto release; + } + if (so->so_state & SS_CANTRCVMORE) { + if (m) + goto dontblock; + else + goto release; + } + for (; m; m = m->m_next) + if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { + m = so->so_rcv.sb_mb; + goto dontblock; + } + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) { + error = ENOTCONN; + goto release; + } + if (uio->uio_resid == 0) + goto release; + if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { + error = EWOULDBLOCK; + goto release; + } + sbunlock(&so->so_rcv); + error = sbwait(&so->so_rcv); + splx(s); + if (error) + return (error); + goto restart; + } +dontblock: + nextrecord = m->m_nextpkt; + if (pr->pr_flags & PR_ADDR) { +#ifdef DIAGNOSTIC + if (m->m_type != MT_SONAME) + panic("receive 1a"); +#endif + orig_resid = 0; + if (flags & MSG_PEEK) { + if (paddr) + *paddr = m_copy(m, 0, m->m_len); + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (paddr) { + *paddr = m; + so->so_rcv.sb_mb = m->m_next; + m->m_next = 0; + m = so->so_rcv.sb_mb; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + } + } + while (m && m->m_type == MT_CONTROL && error == 0) { + if (flags & MSG_PEEK) { + if (controlp) + *controlp = m_copy(m, 0, m->m_len); + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (controlp) { + if (pr->pr_domain->dom_externalize && + mtod(m, struct cmsghdr *)->cmsg_type == + SCM_RIGHTS) + error = (*pr->pr_domain->dom_externalize)(m); + *controlp = m; + so->so_rcv.sb_mb = m->m_next; + m->m_next = 0; + m = so->so_rcv.sb_mb; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + } + if (controlp) { + orig_resid = 0; + controlp = &(*controlp)->m_next; + } + } + if (m) { + if ((flags & MSG_PEEK) == 0) + m->m_nextpkt = nextrecord; + type = m->m_type; + if (type == MT_OOBDATA) + flags |= MSG_OOB; + } + moff = 0; + offset = 0; + while (m && uio->uio_resid > 0 && error == 0) { + if (m->m_type == MT_OOBDATA) { + if (type != MT_OOBDATA) + break; + } else if (type == MT_OOBDATA) + break; +#ifdef DIAGNOSTIC + else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) + panic("receive 3"); +#endif + so->so_state &= ~SS_RCVATMARK; + len = uio->uio_resid; + if (so->so_oobmark && len > so->so_oobmark - offset) + len = so->so_oobmark - offset; + if (len > m->m_len - moff) + len = m->m_len - moff; + /* + * If mp is set, just pass back the mbufs. + * Otherwise copy them out via the uio, then free. + * Sockbuf must be consistent here (points to current mbuf, + * it points to next record) when we drop priority; + * we must note any additions to the sockbuf when we + * block interrupts again. + */ + if (mp == 0) { + splx(s); + error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); + s = splnet(); + if (error) + goto release; + } else + uio->uio_resid -= len; + if (len == m->m_len - moff) { + if (m->m_flags & M_EOR) + flags |= MSG_EOR; + if (flags & MSG_PEEK) { + m = m->m_next; + moff = 0; + } else { + nextrecord = m->m_nextpkt; + sbfree(&so->so_rcv, m); + if (mp) { + *mp = m; + mp = &m->m_next; + so->so_rcv.sb_mb = m = m->m_next; + *mp = (struct mbuf *)0; + } else { + MFREE(m, so->so_rcv.sb_mb); + m = so->so_rcv.sb_mb; + } + if (m) + m->m_nextpkt = nextrecord; + } + } else { + if (flags & MSG_PEEK) + moff += len; + else { + if (mp) + *mp = m_copym(m, 0, len, M_WAIT); + m->m_data += len; + m->m_len -= len; + so->so_rcv.sb_cc -= len; + } + } + if (so->so_oobmark) { + if ((flags & MSG_PEEK) == 0) { + so->so_oobmark -= len; + if (so->so_oobmark == 0) { + so->so_state |= SS_RCVATMARK; + break; + } + } else { + offset += len; + if (offset == so->so_oobmark) + break; + } + } + if (flags & MSG_EOR) + break; + /* + * If the MSG_WAITALL flag is set (for non-atomic socket), + * we must not quit until "uio->uio_resid == 0" or an error + * termination. If a signal/timeout occurs, return + * with a short count but without error. + * Keep sockbuf locked against other readers. + */ + while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && + !sosendallatonce(so) && !nextrecord) { + if (so->so_error || so->so_state & SS_CANTRCVMORE) + break; + error = sbwait(&so->so_rcv); + if (error) { + sbunlock(&so->so_rcv); + splx(s); + return (0); + } + m = so->so_rcv.sb_mb; + if (m) + nextrecord = m->m_nextpkt; + } + } + + if (m && pr->pr_flags & PR_ATOMIC) { + flags |= MSG_TRUNC; + if ((flags & MSG_PEEK) == 0) + (void) sbdroprecord(&so->so_rcv); + } + if ((flags & MSG_PEEK) == 0) { + if (m == 0) + so->so_rcv.sb_mb = nextrecord; + if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) + (*pr->pr_usrreqs->pru_rcvd)(so, flags); + } + if (orig_resid == uio->uio_resid && orig_resid && + (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { + sbunlock(&so->so_rcv); + splx(s); + goto restart; + } + + if (flagsp) + *flagsp |= flags; +release: + sbunlock(&so->so_rcv); + splx(s); + return (error); +} + +int +soshutdown(struct socket *so, int how ) +{ + register struct protosw *pr = so->so_proto; + + how++; + if (how & FREAD) + sorflush(so); + if (how & FWRITE) + return ((*pr->pr_usrreqs->pru_shutdown)(so)); + return (0); +} + +void +sorflush(struct socket *so) +{ + register struct sockbuf *sb = &so->so_rcv; + register struct protosw *pr = so->so_proto; + register int s; + struct sockbuf asb; + + sb->sb_flags |= SB_NOINTR; + (void) sblock(sb, M_WAITOK); + s = splimp(); + socantrcvmore(so); + sbunlock(sb); + asb = *sb; + bzero((caddr_t)sb, sizeof (*sb)); + splx(s); + if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) + (*pr->pr_domain->dom_dispose)(asb.sb_mb); + sbrelease(&asb); +} + +int +sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) +{ + int error = 0; + register struct mbuf *m = m0; + + if (level != SOL_SOCKET) { + if (so->so_proto && so->so_proto->pr_ctloutput) + return ((*so->so_proto->pr_ctloutput) + (PRCO_SETOPT, so, level, optname, &m0)); + error = ENOPROTOOPT; + } else { + switch (optname) { + + case SO_LINGER: + if (m == NULL || m->m_len != sizeof (struct linger)) { + error = EINVAL; + goto bad; + } + so->so_linger = mtod(m, struct linger *)->l_linger; + /* fall thru... */ + + case SO_DEBUG: + case SO_KEEPALIVE: + case SO_DONTROUTE: + case SO_USELOOPBACK: + case SO_BROADCAST: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_OOBINLINE: + case SO_TIMESTAMP: + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + goto bad; + } + if (*mtod(m, int *)) + so->so_options |= optname; + else + so->so_options &= ~optname; + break; + + case SO_SNDBUF: + case SO_RCVBUF: + case SO_SNDLOWAT: + case SO_RCVLOWAT: + { + int optval; + + if (m == NULL || m->m_len < sizeof (int)) { + error = EINVAL; + goto bad; + } + + /* + * Values < 1 make no sense for any of these + * options, so disallow them. + */ + optval = *mtod(m, int *); + if (optval < 1) { + error = EINVAL; + goto bad; + } + + switch (optname) { + + case SO_SNDBUF: + case SO_RCVBUF: + if (sbreserve(optname == SO_SNDBUF ? + &so->so_snd : &so->so_rcv, + (u_long) optval) == 0) { + error = ENOBUFS; + goto bad; + } + break; + + /* + * Make sure the low-water is never greater than + * the high-water. + */ + case SO_SNDLOWAT: + so->so_snd.sb_lowat = + (optval > so->so_snd.sb_hiwat) ? + so->so_snd.sb_hiwat : optval; + break; + case SO_RCVLOWAT: + so->so_rcv.sb_lowat = + (optval > so->so_rcv.sb_hiwat) ? + so->so_rcv.sb_hiwat : optval; + break; + } + break; + } + + case SO_SNDTIMEO: + case SO_RCVTIMEO: + { + struct timeval *tv; + unsigned long val; + + if (m == NULL || m->m_len < sizeof (*tv)) { + error = EINVAL; + goto bad; + } + tv = mtod(m, struct timeval *); + if (tv->tv_sec >= (ULONG_MAX - hz) / hz) { + error = EDOM; + goto bad; + } + + val = tv->tv_sec * hz + tv->tv_usec / tick; + if ((val == 0) && (tv->tv_sec || tv->tv_usec)) + val = 1; + + switch (optname) { + + case SO_SNDTIMEO: + so->so_snd.sb_timeo = val; + break; + case SO_RCVTIMEO: + so->so_rcv.sb_timeo = val; + break; + } + break; + } + + case SO_PRIVSTATE: + /* we don't care what the parameter is... */ + so->so_state &= ~SS_PRIV; + break; + + case SO_SNDWAKEUP: + case SO_RCVWAKEUP: + { + /* RTEMS addition. */ + struct sockwakeup *sw; + struct sockbuf *sb; + + if (m == NULL + || m->m_len != sizeof (struct sockwakeup)) { + error = EINVAL; + goto bad; + } + sw = mtod(m, struct sockwakeup *); + sb = (optname == SO_SNDWAKEUP + ? &so->so_snd + : &so->so_rcv); + sb->sb_wakeup = sw->sw_pfn; + sb->sb_wakeuparg = sw->sw_arg; + if (sw->sw_pfn) + sb->sb_flags |= SB_ASYNC; + else + sb->sb_flags &=~ SB_ASYNC; + break; + } + + default: + error = ENOPROTOOPT; + break; + } + if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { + (void) ((*so->so_proto->pr_ctloutput) + (PRCO_SETOPT, so, level, optname, &m0)); + m = NULL; /* freed by protocol */ + } + } +bad: + if (m) + (void) m_free(m); + return (error); +} + +int +sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) +{ + register struct mbuf *m; + + if (level != SOL_SOCKET) { + if (so->so_proto && so->so_proto->pr_ctloutput) { + return ((*so->so_proto->pr_ctloutput) + (PRCO_GETOPT, so, level, optname, mp)); + } else + return (ENOPROTOOPT); + } else { + m = m_get(M_WAIT, MT_SOOPTS); + m->m_len = sizeof (int); + + switch (optname) { + + case SO_LINGER: + m->m_len = sizeof (struct linger); + mtod(m, struct linger *)->l_onoff = + so->so_options & SO_LINGER; + mtod(m, struct linger *)->l_linger = so->so_linger; + break; + + case SO_USELOOPBACK: + case SO_DONTROUTE: + case SO_DEBUG: + case SO_KEEPALIVE: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_BROADCAST: + case SO_OOBINLINE: + case SO_TIMESTAMP: + *mtod(m, int *) = so->so_options & optname; + break; + + case SO_PRIVSTATE: + *mtod(m, int *) = so->so_state & SS_PRIV; + break; + + case SO_TYPE: + *mtod(m, int *) = so->so_type; + break; + + case SO_ERROR: + *mtod(m, int *) = so->so_error; + so->so_error = 0; + break; + + case SO_SNDBUF: + *mtod(m, int *) = so->so_snd.sb_hiwat; + break; + + case SO_RCVBUF: + *mtod(m, int *) = so->so_rcv.sb_hiwat; + break; + + case SO_SNDLOWAT: + *mtod(m, int *) = so->so_snd.sb_lowat; + break; + + case SO_RCVLOWAT: + *mtod(m, int *) = so->so_rcv.sb_lowat; + break; + + case SO_SNDTIMEO: + case SO_RCVTIMEO: + { + unsigned long val = (optname == SO_SNDTIMEO ? + so->so_snd.sb_timeo : so->so_rcv.sb_timeo); + + m->m_len = sizeof(struct timeval); + mtod(m, struct timeval *)->tv_sec = val / hz; + mtod(m, struct timeval *)->tv_usec = + (val % hz) * tick; + break; + } + + case SO_SNDWAKEUP: + case SO_RCVWAKEUP: + { + struct sockbuf *sb; + struct sockwakeup *sw; + + /* RTEMS additions. */ + sb = (optname == SO_SNDWAKEUP + ? &so->so_snd + : &so->so_rcv); + m->m_len = sizeof (struct sockwakeup); + sw = mtod(m, struct sockwakeup *); + sw->sw_pfn = sb->sb_wakeup; + sw->sw_arg = sb->sb_wakeuparg; + break; + } + + default: + (void)m_free(m); + return (ENOPROTOOPT); + } + *mp = m; + return (0); + } +} + +void +sohasoutofband(struct socket *so) +{ +#if 0 /* FIXME: For now we just ignore out of band data */ + struct proc *p; + + if (so->so_pgid < 0) + gsignal(-so->so_pgid, SIGURG); + else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) + psignal(p, SIGURG); + selwakeup(&so->so_rcv.sb_sel); +#endif +} diff --git a/cpukit/libnetworking/kern/uipc_socket2.c b/cpukit/libnetworking/kern/uipc_socket2.c new file mode 100644 index 0000000000..fa5487fd84 --- /dev/null +++ b/cpukit/libnetworking/kern/uipc_socket2.c @@ -0,0 +1,940 @@ +/* + * This file has undergone several changes to reflect the + * differences between the RTEMS and FreeBSD kernels. + */ + +/* + * Copyright (c) 1982, 1986, 1988, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 + * $Id$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/queue.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/protosw.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/signalvar.h> +#include <sys/sysctl.h> + +/* + * Primitive routines for operating on sockets and socket buffers + */ + +u_long sb_max = SB_MAX; /* XXX should be static */ +SYSCTL_INT(_kern, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, ""); + +static u_long sb_efficiency = 8; /* parameter for sbreserve() */ +SYSCTL_INT(_kern, OID_AUTO, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, + 0, ""); + +#if defined(__rtems__) + void rtems_set_sb_efficiency( + u_long efficiency + ) + { + sb_efficiency = (efficiency == 0) ? 2 : efficiency; + } +#endif + +/* + * Procedures to manipulate state flags of socket + * and do appropriate wakeups. Normal sequence from the + * active (originating) side is that soisconnecting() is + * called during processing of connect() call, + * resulting in an eventual call to soisconnected() if/when the + * connection is established. When the connection is torn down + * soisdisconnecting() is called during processing of disconnect() call, + * and soisdisconnected() is called when the connection to the peer + * is totally severed. The semantics of these routines are such that + * connectionless protocols can call soisconnected() and soisdisconnected() + * only, bypassing the in-progress calls when setting up a ``connection'' + * takes no time. + * + * From the passive side, a socket is created with + * two queues of sockets: so_q0 for connections in progress + * and so_q for connections already made and awaiting user acceptance. + * As a protocol is preparing incoming connections, it creates a socket + * structure queued on so_q0 by calling sonewconn(). When the connection + * is established, soisconnected() is called, and transfers the + * socket structure to so_q, making it available to accept(). + * + * If a socket is closed with sockets on either + * so_q0 or so_q, these sockets are dropped. + * + * If higher level protocols are implemented in + * the kernel, the wakeups done here will sometimes + * cause software-interrupt process scheduling. + */ + +void +soisconnecting(struct socket *so) +{ + + so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= SS_ISCONNECTING; +} + +void +soisconnected(struct socket *so) +{ + register struct socket *head = so->so_head; + + so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); + so->so_state |= SS_ISCONNECTED; + if (head && (so->so_state & SS_INCOMP)) { + TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + so->so_state &= ~SS_INCOMP; + TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); + so->so_state |= SS_COMP; + sorwakeup(head); + soconnwakeup(head); + } else { + soconnwakeup(so); + sorwakeup(so); + sowwakeup(so); + } +} + +void +soisdisconnecting(struct socket *so) +{ + + so->so_state &= ~SS_ISCONNECTING; + so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); + soconnwakeup(so); + sowwakeup(so); + sorwakeup(so); +} + +void +soisdisconnected(struct socket *so) +{ + + so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); + so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); + soconnwakeup(so); + sowwakeup(so); + sorwakeup(so); +} + +/* + * Return a random connection that hasn't been serviced yet and + * is eligible for discard. There is a one in qlen chance that + * we will return a null, saying that there are no dropable + * requests. In this case, the protocol specific code should drop + * the new request. This insures fairness. + * + * This may be used in conjunction with protocol specific queue + * congestion routines. + */ +struct socket * +sodropablereq(struct socket *head) +{ + register struct socket *so; + uint32_t i, j, qlen, m; + + static int rnd; + static long old_mono_secs; + static unsigned int cur_cnt, old_cnt; + + if ((i = (m = rtems_bsdnet_seconds_since_boot()) - old_mono_secs) != 0) { + old_mono_secs = m; + old_cnt = cur_cnt / i; + cur_cnt = 0; + } + + so = TAILQ_FIRST(&head->so_incomp); + if (!so) + return (so); + + qlen = head->so_incqlen; + if (++cur_cnt > qlen || old_cnt > qlen) { + rnd = (314159 * rnd + 66329) & 0xffff; + j = ((qlen + 1) * rnd) >> 16; + + while (j-- && so) + so = TAILQ_NEXT(so, so_list); + } + + return (so); +} + +/* + * When an attempt at a new connection is noted on a socket + * which accepts connections, sonewconn is called. If the + * connection is possible (subject to space constraints, etc.) + * then we allocate a new structure, propoerly linked into the + * data structure of the original socket, and return this. + * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. + * + * Currently, sonewconn() is defined as sonewconn1() in socketvar.h + * to catch calls that are missing the (new) second parameter. + */ +struct socket * +sonewconn1(struct socket *head, int connstatus) +{ + register struct socket *so; + + if (head->so_qlen > 3 * head->so_qlimit / 2) + return ((struct socket *)0); + MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); + if (so == NULL) + return ((struct socket *)0); + bzero((caddr_t)so, sizeof(*so)); + so->so_head = head; + so->so_type = head->so_type; + so->so_options = head->so_options &~ SO_ACCEPTCONN; + so->so_linger = head->so_linger; + so->so_state = head->so_state | SS_NOFDREF; + so->so_proto = head->so_proto; + so->so_timeo = head->so_timeo; + so->so_pgid = head->so_pgid; + so->so_uid = head->so_uid; + (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); + if (connstatus) { + TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); + so->so_state |= SS_COMP; + } else { + TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); + so->so_state |= SS_INCOMP; + head->so_incqlen++; + } + head->so_qlen++; + if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0)) { + if (so->so_state & SS_COMP) { + TAILQ_REMOVE(&head->so_comp, so, so_list); + } else { + TAILQ_REMOVE(&head->so_incomp, so, so_list); + head->so_incqlen--; + } + head->so_qlen--; + (void) free((caddr_t)so, M_SOCKET); + return ((struct socket *)0); + } + if (connstatus) { + sorwakeup(head); + soconnwakeup(head); + so->so_state |= connstatus; + } + return (so); +} + +/* + * Socantsendmore indicates that no more data will be sent on the + * socket; it would normally be applied to a socket when the user + * informs the system that no more data is to be sent, by the protocol + * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data + * will be received, and will normally be applied to the socket by a + * protocol when it detects that the peer will send no more data. + * Data queued for reading in the socket may yet be read. + */ + +void +socantsendmore(struct socket *so) +{ + + so->so_state |= SS_CANTSENDMORE; + sowwakeup(so); +} + +void +socantrcvmore(struct socket *so) +{ + + so->so_state |= SS_CANTRCVMORE; + sorwakeup(so); +} + +/* + * Socket buffer (struct sockbuf) utility routines. + * + * Each socket contains two socket buffers: one for sending data and + * one for receiving data. Each buffer contains a queue of mbufs, + * information about the number of mbufs and amount of data in the + * queue, and other fields allowing select() statements and notification + * on data availability to be implemented. + * + * Data stored in a socket buffer is maintained as a list of records. + * Each record is a list of mbufs chained together with the m_next + * field. Records are chained together with the m_nextpkt field. The upper + * level routine soreceive() expects the following conventions to be + * observed when placing information in the receive buffer: + * + * 1. If the protocol requires each message be preceded by the sender's + * name, then a record containing that name must be present before + * any associated data (mbuf's must be of type MT_SONAME). + * 2. If the protocol supports the exchange of ``access rights'' (really + * just additional data associated with the message), and there are + * ``rights'' to be received, then a record containing this data + * should be present (mbuf's must be of type MT_RIGHTS). + * 3. If a name or rights record exists, then it must be followed by + * a data record, perhaps of zero length. + * + * Before using a new socket structure it is first necessary to reserve + * buffer space to the socket, by calling sbreserve(). This should commit + * some of the available buffer space in the system buffer pool for the + * socket (currently, it does nothing but enforce limits). The space + * should be released by calling sbrelease() when the socket is destroyed. + */ + +int +soreserve(struct socket *so, u_long sndcc, u_long rcvcc) +{ + + if (sbreserve(&so->so_snd, sndcc) == 0) + goto bad; + if (sbreserve(&so->so_rcv, rcvcc) == 0) + goto bad2; + if (so->so_rcv.sb_lowat == 0) + so->so_rcv.sb_lowat = 1; + if (so->so_snd.sb_lowat == 0) + so->so_snd.sb_lowat = MCLBYTES; + if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) + so->so_snd.sb_lowat = so->so_snd.sb_hiwat; + return (0); +bad2: + sbrelease(&so->so_snd); +bad: + return (ENOBUFS); +} + +/* + * Allot mbufs to a sockbuf. + * Attempt to scale mbmax so that mbcnt doesn't become limiting + * if buffering efficiency is near the normal case. + */ +int +sbreserve(struct sockbuf *sb, u_long cc) +{ + + if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES)) + return (0); + sb->sb_hiwat = cc; + sb->sb_mbmax = min(cc * sb_efficiency, sb_max); + if (sb->sb_lowat > sb->sb_hiwat) + sb->sb_lowat = sb->sb_hiwat; + return (1); +} + +/* + * Free mbufs held by a socket, and reserved mbuf space. + */ +void +sbrelease(struct sockbuf *sb) +{ + + sbflush(sb); + sb->sb_hiwat = sb->sb_mbmax = 0; +} + +/* + * Routines to add and remove + * data from an mbuf queue. + * + * The routines sbappend() or sbappendrecord() are normally called to + * append new mbufs to a socket buffer, after checking that adequate + * space is available, comparing the function sbspace() with the amount + * of data to be added. sbappendrecord() differs from sbappend() in + * that data supplied is treated as the beginning of a new record. + * To place a sender's address, optional access rights, and data in a + * socket receive buffer, sbappendaddr() should be used. To place + * access rights and data in a socket receive buffer, sbappendrights() + * should be used. In either case, the new data begins a new record. + * Note that unlike sbappend() and sbappendrecord(), these routines check + * for the caller that there will be enough space to store the data. + * Each fails if there is not enough space, or if it cannot find mbufs + * to store additional information in. + * + * Reliable protocols may use the socket send buffer to hold data + * awaiting acknowledgement. Data is normally copied from a socket + * send buffer in a protocol with m_copy for output to a peer, + * and then removing the data from the socket buffer with sbdrop() + * or sbdroprecord() when the data is acknowledged by the peer. + */ + +/* + * Append mbuf chain m to the last record in the + * socket buffer sb. The additional space associated + * the mbuf chain is recorded in sb. Empty mbufs are + * discarded and mbufs are compacted where possible. + */ +void +sbappend(struct sockbuf *sb, struct mbuf *m) +{ + register struct mbuf *n; + + if (m == 0) + return; + n = sb->sb_mb; + if (n) { + while (n->m_nextpkt) + n = n->m_nextpkt; + do { + if (n->m_flags & M_EOR) { + sbappendrecord(sb, m); /* XXXXXX!!!! */ + return; + } + } while (n->m_next && (n = n->m_next)); + } + sbcompress(sb, m, n); +} + +#ifdef SOCKBUF_DEBUG +void +sbcheck(struct sockbuf *sb) +{ + register struct mbuf *m; + register int len = 0, mbcnt = 0; + + for (m = sb->sb_mb; m; m = m->m_next) { + len += m->m_len; + mbcnt += MSIZE; + if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ + mbcnt += m->m_ext.ext_size; + if (m->m_nextpkt) + panic("sbcheck nextpkt"); + } + if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { + printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc, + mbcnt, sb->sb_mbcnt); + panic("sbcheck"); + } +} +#endif + +/* + * As above, except the mbuf chain + * begins a new record. + */ +void +sbappendrecord(struct sockbuf *sb, struct mbuf *m0) +{ + register struct mbuf *m; + + if (m0 == 0) + return; + m = sb->sb_mb; + if (m) + while (m->m_nextpkt) + m = m->m_nextpkt; + /* + * Put the first mbuf on the queue. + * Note this permits zero length records. + */ + sballoc(sb, m0); + if (m) + m->m_nextpkt = m0; + else + sb->sb_mb = m0; + m = m0->m_next; + m0->m_next = 0; + if (m && (m0->m_flags & M_EOR)) { + m0->m_flags &= ~M_EOR; + m->m_flags |= M_EOR; + } + sbcompress(sb, m, m0); +} + +/* + * As above except that OOB data + * is inserted at the beginning of the sockbuf, + * but after any other OOB data. + */ +void +sbinsertoob(struct sockbuf *sb, struct mbuf *m0) +{ + register struct mbuf *m; + register struct mbuf **mp; + + if (m0 == 0) + return; + for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { + m = *mp; + again: + switch (m->m_type) { + + case MT_OOBDATA: + continue; /* WANT next train */ + + case MT_CONTROL: + m = m->m_next; + if (m) + goto again; /* inspect THIS train further */ + } + break; + } + /* + * Put the first mbuf on the queue. + * Note this permits zero length records. + */ + sballoc(sb, m0); + m0->m_nextpkt = *mp; + *mp = m0; + m = m0->m_next; + m0->m_next = 0; + if (m && (m0->m_flags & M_EOR)) { + m0->m_flags &= ~M_EOR; + m->m_flags |= M_EOR; + } + sbcompress(sb, m, m0); +} + +/* + * Append address and data, and optionally, control (ancillary) data + * to the receive queue of a socket. If present, + * m0 must include a packet header with total length. + * Returns 0 if no space in sockbuf or insufficient mbufs. + */ +int +sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control) +{ + register struct mbuf *m, *n; + int space = asa->sa_len; + +if (m0 && (m0->m_flags & M_PKTHDR) == 0) +panic("sbappendaddr"); + if (m0) + space += m0->m_pkthdr.len; + for (n = control; n; n = n->m_next) { + space += n->m_len; + if (n->m_next == 0) /* keep pointer to last control buf */ + break; + } + if (space > sbspace(sb)) + return (0); + if (asa->sa_len > MLEN) + return (0); + MGET(m, M_DONTWAIT, MT_SONAME); + if (m == 0) + return (0); + m->m_len = asa->sa_len; + bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); + if (n) + n->m_next = m0; /* concatenate data to control */ + else + control = m0; + m->m_next = control; + for (n = m; n; n = n->m_next) + sballoc(sb, n); + n = sb->sb_mb; + if (n) { + while (n->m_nextpkt) + n = n->m_nextpkt; + n->m_nextpkt = m; + } else + sb->sb_mb = m; + return (1); +} + +int +sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, + struct mbuf *control) +{ + register struct mbuf *m, *n; + int space = 0; + + if (control == 0) + panic("sbappendcontrol"); + for (m = control; ; m = m->m_next) { + space += m->m_len; + if (m->m_next == 0) + break; + } + n = m; /* save pointer to last control buffer */ + for (m = m0; m; m = m->m_next) + space += m->m_len; + if (space > sbspace(sb)) + return (0); + n->m_next = m0; /* concatenate data to control */ + for (m = control; m; m = m->m_next) + sballoc(sb, m); + n = sb->sb_mb; + if (n) { + while (n->m_nextpkt) + n = n->m_nextpkt; + n->m_nextpkt = control; + } else + sb->sb_mb = control; + return (1); +} + +/* + * Compress mbuf chain m into the socket + * buffer sb following mbuf n. If n + * is null, the buffer is presumed empty. + */ +void +sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) +{ + register int eor = 0; + register struct mbuf *o; + + while (m) { + eor |= m->m_flags & M_EOR; + if (m->m_len == 0 && + (eor == 0 || + (((o = m->m_next) || (o = n)) && + o->m_type == m->m_type))) { + m = m_free(m); + continue; + } + if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 && + (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && + n->m_type == m->m_type) { + bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, + (unsigned)m->m_len); + n->m_len += m->m_len; + sb->sb_cc += m->m_len; + m = m_free(m); + continue; + } + if (n) + n->m_next = m; + else + sb->sb_mb = m; + sballoc(sb, m); + n = m; + m->m_flags &= ~M_EOR; + m = m->m_next; + n->m_next = 0; + } + if (eor) { + if (n) + n->m_flags |= eor; + else + printf("semi-panic: sbcompress\n"); + } +} + +/* + * Free all mbufs in a sockbuf. + * Check that all resources are reclaimed. + */ +void +sbflush(struct sockbuf *sb) +{ + + if (sb->sb_flags & SB_LOCK) + panic("sbflush"); + while (sb->sb_mbcnt) + sbdrop(sb, (int)sb->sb_cc); + if (sb->sb_cc || sb->sb_mb) + panic("sbflush 2"); +} + +/* + * Drop data from (the front of) a sockbuf. + */ +void +sbdrop(struct sockbuf *sb, int len) +{ + register struct mbuf *m, *mn; + struct mbuf *next; + + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; + while (len > 0) { + if (m == 0) { + if (next == 0) + panic("sbdrop"); + m = next; + next = m->m_nextpkt; + continue; + } + if (m->m_len > len) { + m->m_len -= len; + m->m_data += len; + sb->sb_cc -= len; + break; + } + len -= m->m_len; + sbfree(sb, m); + MFREE(m, mn); + m = mn; + } + while (m && m->m_len == 0) { + sbfree(sb, m); + MFREE(m, mn); + m = mn; + } + if (m) { + sb->sb_mb = m; + m->m_nextpkt = next; + } else + sb->sb_mb = next; +} + +/* + * Drop a record off the front of a sockbuf + * and move the next record to the front. + */ +void +sbdroprecord(struct sockbuf *sb) +{ + register struct mbuf *m, *mn; + + m = sb->sb_mb; + if (m) { + sb->sb_mb = m->m_nextpkt; + do { + sbfree(sb, m); + MFREE(m, mn); + m = mn; + } while (m); + } +} + +/* + * Create a "control" mbuf containing the specified data + * with the specified type for presentation on a socket buffer. + */ +struct mbuf * +sbcreatecontrol(caddr_t p, int size, int type, int level) +{ + register struct cmsghdr *cp; + struct mbuf *m; + + if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) + return ((struct mbuf *) NULL); + cp = mtod(m, struct cmsghdr *); + /* XXX check size? */ + (void)memcpy(CMSG_DATA(cp), p, size); + size += sizeof(*cp); + m->m_len = size; + cp->cmsg_len = size; + cp->cmsg_level = level; + cp->cmsg_type = type; + return (m); +} + +#ifdef PRU_OLDSTYLE +/* + * The following routines mediate between the old-style `pr_usrreq' + * protocol implementations and the new-style `struct pr_usrreqs' + * calling convention. + */ + +/* syntactic sugar */ +#define nomb (struct mbuf *)0 + +static int +old_abort(struct socket *so) +{ + return so->so_proto->pr_ousrreq(so, PRU_ABORT, nomb, nomb, nomb); +} + +static int +old_accept(struct socket *so, struct mbuf *nam) +{ + return so->so_proto->pr_ousrreq(so, PRU_ACCEPT, nomb, nam, nomb); +} + +static int +old_attach(struct socket *so, intptr_t proto) +{ + return so->so_proto->pr_ousrreq(so, PRU_ATTACH, nomb, + (struct mbuf *)proto, /* XXX */ + nomb); +} + +static int +old_bind(struct socket *so, struct mbuf *nam) +{ + return so->so_proto->pr_ousrreq(so, PRU_BIND, nomb, nam, nomb); +} + +static int +old_connect(struct socket *so, struct mbuf *nam) +{ + return so->so_proto->pr_ousrreq(so, PRU_CONNECT, nomb, nam, nomb); +} + +static int +old_connect2(struct socket *so1, struct socket *so2) +{ + return so1->so_proto->pr_ousrreq(so1, PRU_CONNECT2, nomb, + (struct mbuf *)so2, nomb); +} + +static int +old_control(struct socket *so, intptr_t cmd, caddr_t data, struct ifnet *ifp) +{ + return so->so_proto->pr_ousrreq(so, PRU_CONTROL, (struct mbuf *)cmd, + (struct mbuf *)data, + (struct mbuf *)ifp); +} + +static int +old_detach(struct socket *so) +{ + return so->so_proto->pr_ousrreq(so, PRU_DETACH, nomb, nomb, nomb); +} + +static int +old_disconnect(struct socket *so) +{ + return so->so_proto->pr_ousrreq(so, PRU_DISCONNECT, nomb, nomb, nomb); +} + +static int +old_listen(struct socket *so) +{ + return so->so_proto->pr_ousrreq(so, PRU_LISTEN, nomb, nomb, nomb); +} + +static int +old_peeraddr(struct socket *so, struct mbuf *nam) +{ + return so->so_proto->pr_ousrreq(so, PRU_PEERADDR, nomb, nam, nomb); +} + +static int +old_rcvd(struct socket *so, intptr_t flags) +{ + return so->so_proto->pr_ousrreq(so, PRU_RCVD, nomb, + (struct mbuf *)flags, /* XXX */ + nomb); +} + +static int +old_rcvoob(struct socket *so, struct mbuf *m, intptr_t flags) +{ + return so->so_proto->pr_ousrreq(so, PRU_RCVOOB, m, + (struct mbuf *)flags, /* XXX */ + nomb); +} + +static int +old_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *addr, + struct mbuf *control) +{ + int req; + + if (flags & PRUS_OOB) { + req = PRU_SENDOOB; + } else if(flags & PRUS_EOF) { + req = PRU_SEND_EOF; + } else { + req = PRU_SEND; + } + return so->so_proto->pr_ousrreq(so, req, m, addr, control); +} + +static int +old_sense(struct socket *so, struct stat *sb) +{ + return so->so_proto->pr_ousrreq(so, PRU_SENSE, (struct mbuf *)sb, + nomb, nomb); +} + +static int +old_shutdown(struct socket *so) +{ + return so->so_proto->pr_ousrreq(so, PRU_SHUTDOWN, nomb, nomb, nomb); +} + +static int +old_sockaddr(struct socket *so, struct mbuf *nam) +{ + return so->so_proto->pr_ousrreq(so, PRU_SOCKADDR, nomb, nam, nomb); +} + +struct pr_usrreqs pru_oldstyle = { + old_abort, old_accept, old_attach, old_bind, old_connect, + old_connect2, old_control, old_detach, old_disconnect, + old_listen, old_peeraddr, old_rcvd, old_rcvoob, old_send, + old_sense, old_shutdown, old_sockaddr +}; + +#endif /* PRU_OLDSTYLE */ + +/* + * Some routines that return EOPNOTSUPP for entry points that are not + * supported by a protocol. Fill in as needed. + */ +int +pru_accept_notsupp(struct socket *so, struct mbuf *nam) +{ + return EOPNOTSUPP; +} + +int +pru_connect2_notsupp(struct socket *so1, struct socket *so2) +{ + return EOPNOTSUPP; +} + +int +pru_control_notsupp(struct socket *so, int cmd, caddr_t data, + struct ifnet *ifp) +{ + return EOPNOTSUPP; +} + +int +pru_listen_notsupp(struct socket *so) +{ + return EOPNOTSUPP; +} + +int +pru_rcvd_notsupp(struct socket *so, int flags) +{ + return EOPNOTSUPP; +} + +int +pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) +{ + return EOPNOTSUPP; +} + +/* + * This isn't really a ``null'' operation, but it's the default one + * and doesn't do anything destructive. + */ +int +pru_sense_null(struct socket *so, struct stat *sb) +{ + sb->st_blksize = so->so_snd.sb_hiwat; + return 0; +} |