summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJennifer Averett <jennifer.averett@oarcorp.com>2012-11-26 09:47:09 -0600
committerJennifer Averett <jennifer.averett@oarcorp.com>2012-11-26 09:47:09 -0600
commit0bde19eee050bbdc4511070cf14f48719e400c26 (patch)
treed7bf926c193da25600c5612fe25070c5133d9979
parentAdd custom limits.h and timespec.h (diff)
downloadrtems-libbsd-0bde19eee050bbdc4511070cf14f48719e400c26.tar.bz2
Switch to a version of select that is closer to bsd's version.
-rw-r--r--Makefile5
-rwxr-xr-xfreebsd-to-rtems.py19
-rw-r--r--freebsd-userspace/Makefile14
-rw-r--r--freebsd-userspace/commands/sbin/ping/ping.c4
-rw-r--r--freebsd-userspace/lib/libc_r/uthread/uthread_kern.c1157
-rw-r--r--freebsd-userspace/lib/libc_r/uthread/uthread_select.c240
-rw-r--r--freebsd-userspace/rtems/include/pthread_private.h158
-rw-r--r--freebsd-userspace/rtems/rtems-get_curthread.c72
-rw-r--r--freebsd-userspace/rtems/rtems-syspoll.c30
-rw-r--r--freebsd-userspace/rtems/rtems-uthread_init.c8
-rw-r--r--freebsd/kern/kern_condvar.c455
-rw-r--r--freebsd/kern/kern_descrip.c6912
-rw-r--r--freebsd/kern/kern_mtxpool.c220
-rw-r--r--freebsd/kern/kern_subr.c2
-rw-r--r--freebsd/kern/kern_time.c4
-rw-r--r--freebsd/kern/sys_generic.c1665
-rw-r--r--freebsd/sys/buf.h526
-rw-r--r--freebsd/sys/mqueue.h45
-rw-r--r--freebsd/sys/proc.h7
-rw-r--r--freebsd/sys/tty.h217
-rw-r--r--freebsd/sys/ttydevsw.h169
-rw-r--r--freebsd/sys/ttydisc.h86
-rw-r--r--freebsd/sys/ttyhook.h147
-rw-r--r--freebsd/sys/ttyqueue.h178
-rw-r--r--freebsd/sys/user.h414
-rw-r--r--rtemsbsd/src/rtems-bsd-condvar.c17
-rw-r--r--rtemsbsd/src/rtems-bsd-thread.c110
27 files changed, 12823 insertions, 58 deletions
diff --git a/Makefile b/Makefile
index 3ac26da2..8a58fd8e 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,6 @@ CFLAGS += -I freebsd/$(RTEMS_CPU)/include
CFLAGS += -I contrib/altq
CFLAGS += -I contrib/pf
CFLAGS += -I copied/rtemsbsd/$(RTEMS_CPU)/include
-CFLAGS += -g
CFLAGS += -w
CFLAGS += -std=gnu99
CFLAGS += -MT $@ -MD -MP -MF $(basename $@).d
@@ -38,7 +37,6 @@ C_FILES += rtemsbsd/src/rtems-bsd-lock.c
C_FILES += rtemsbsd/src/rtems-bsd-log.c
C_FILES += rtemsbsd/src/rtems-bsd-sx.c
C_FILES += rtemsbsd/src/rtems-bsd-rwlock.c
-C_FILES += rtemsbsd/src/rtems-bsd-generic.c
C_FILES += rtemsbsd/src/rtems-bsd-page.c
C_FILES += rtemsbsd/src/rtems-bsd-panic.c
C_FILES += rtemsbsd/src/rtems-bsd-synch.c
@@ -406,6 +404,9 @@ C_FILES += freebsd/netatalk/ddp_pcb.c
C_FILES += freebsd/netatalk/ddp_usrreq.c
C_FILES += freebsd/netatalk/at_proto.c
C_FILES += freebsd/netatalk/ddp_output.c
+C_FILES += freebsd/kern/sys_generic.c
+C_FILES += freebsd/kern/kern_descrip.c
+C_FILES += freebsd/kern/kern_mtxpool.c
ifeq ($(RTEMS_CPU), i386)
C_FILES += freebsd/i386/pci/pci_bus.c
C_FILES += freebsd/i386/i386/legacy.c
diff --git a/freebsd-to-rtems.py b/freebsd-to-rtems.py
index 9fb8788c..81b1b289 100755
--- a/freebsd-to-rtems.py
+++ b/freebsd-to-rtems.py
@@ -659,7 +659,7 @@ rtems.addRTEMSSourceFiles(
'src/rtems-bsd-log.c',
'src/rtems-bsd-sx.c',
'src/rtems-bsd-rwlock.c',
- 'src/rtems-bsd-generic.c',
+ #'src/rtems-bsd-generic.c',
'src/rtems-bsd-page.c',
'src/rtems-bsd-panic.c',
'src/rtems-bsd-synch.c',
@@ -711,13 +711,13 @@ rtems.addEmptyHeaderFiles(
'sys/cpuset.h',
'sys/exec.h',
'sys/fail.h',
- 'sys/limits.h',
+ #'sys/limits.h',
'sys/sleepqueue.h',
'sys/namei.h',
'sys/_pthreadtypes.h',
#'sys/resourcevar.h',
'sys/sched.h',
- 'sys/select.h',
+ #'sys/select.h',
'sys/syscallsubr.h',
'sys/sysent.h',
'sys/syslimits.h',
@@ -725,7 +725,7 @@ rtems.addEmptyHeaderFiles(
'sys/stat.h',
#'sys/time.h',
'time.h',
- 'sys/timespec.h',
+ #'sys/timespec.h',
'sys/_timeval.h',
#'sys/vmmeter.h',
#'sys/vnode.h',
@@ -1346,6 +1346,14 @@ devNic.addHeaderFiles(
'netatalk/ddp_var.h',
'netatalk/phase2.h',
'sys/mman.h',
+ 'sys/buf.h',
+ 'sys/mqueue.h',
+ 'sys/tty.h',
+ 'sys/ttyqueue.h',
+ 'sys/ttydisc.h',
+ 'sys/ttydevsw.h',
+ 'sys/ttyhook.h',
+ 'sys/user.h',
]
)
@@ -1412,6 +1420,9 @@ devNic.addSourceFiles(
'netatalk/ddp_usrreq.c',
'netatalk/at_proto.c',
'netatalk/ddp_output.c',
+ 'kern/sys_generic.c',
+ 'kern/kern_descrip.c',
+ 'kern/kern_mtxpool.c',
]
)
diff --git a/freebsd-userspace/Makefile b/freebsd-userspace/Makefile
index cef8546e..18dde49a 100644
--- a/freebsd-userspace/Makefile
+++ b/freebsd-userspace/Makefile
@@ -160,6 +160,9 @@ C_FILES += lib/libipsec/ipsec_dump_policy.c
C_FILES += lib/libipsec/policy_token.c
C_FILES += lib/libipsec/policy_parse.c
+C_FILES += lib/libc_r/uthread/uthread_select.c
+C_FILES += lib/libc_r/uthread/uthread_kern.c
+
# RTEMS Specific Files
# C_FILES += rtems/rtems-net-setup.c
C_FILES += rtems/syslog.c
@@ -171,6 +174,9 @@ C_FILES += rtems/rtems-uthread_main_np.c
C_FILES += rtems/rtems-uthread_kevent.c
C_FILES += rtems/rtems-uthread_kqueue.c
C_FILES += rtems/rtems-shell.c
+C_FILES += rtems/rtems-syspoll.c
+C_FILES += rtems/rtems-uthread_init.c
+C_FILES += rtems/rtems-get_curthread.c
# ping command sources
C_FILES += commands/sbin/ping/ping.c
@@ -278,14 +284,14 @@ GEN_FILES += commands/sbin/route/keywords.h
# lib/libc/net
GEN_FILES += lib/libc/net/nslexer.c
GEN_FILES += lib/libc/net/nsparser.c
-EXTRA_CLEAN = lib/libc/net/nsparser.i
-EXTRA_CLEAN += lib/libc/net/y.tab.h
+CLEAN_FILES = lib/libc/net/nsparser.i
+CLEAN_FILES += lib/libc/net/y.tab.h
# lib/libipsec
GEN_FILES += lib/libipsec/policy_token.c
GEN_FILES += lib/libipsec/policy_parse.c
-EXTRA_CLEAN += lib/libipsec/policy_parse.i
-EXTRA_CLEAN += lib/libipsec/y.tab.h
+CLEAN_FILES += lib/libipsec/policy_parse.i
+CLEAN_FILES += lib/libipsec/y.tab.h
all: $(LIB)
diff --git a/freebsd-userspace/commands/sbin/ping/ping.c b/freebsd-userspace/commands/sbin/ping/ping.c
index 45162532..34d86325 100644
--- a/freebsd-userspace/commands/sbin/ping/ping.c
+++ b/freebsd-userspace/commands/sbin/ping/ping.c
@@ -106,6 +106,10 @@ __FBSDID("$FreeBSD$");
#include <sysexits.h>
#include <unistd.h>
+#ifdef __rtems__
+#define select __select
+#endif
+
#define INADDR_LEN ((int)sizeof(in_addr_t))
#define TIMEVAL_LEN ((int)sizeof(struct tv32))
#define MASK_LEN (ICMP_MASKLEN - ICMP_MINLEN)
diff --git a/freebsd-userspace/lib/libc_r/uthread/uthread_kern.c b/freebsd-userspace/lib/libc_r/uthread/uthread_kern.c
new file mode 100644
index 00000000..a8c8720d
--- /dev/null
+++ b/freebsd-userspace/lib/libc_r/uthread/uthread_kern.c
@@ -0,0 +1,1157 @@
+/*
+ * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+#include <errno.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#ifdef __rtems__
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/timespec.h>
+#else
+#include <sys/signalvar.h>
+#endif
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#ifndef __rtems__
+#include <sys/syscall.h>
+#endif
+#include <fcntl.h>
+#include <pthread.h>
+#include "pthread_private.h"
+
+#ifdef __rtems__
+#include <rtems.h>
+#endif
+
+/* #define DEBUG_THREAD_KERN */
+#ifdef DEBUG_THREAD_KERN
+#define DBG_MSG stdout_debug
+#else
+#define DBG_MSG(x...)
+#endif
+
+/* Static function prototype definitions: */
+static void
+thread_kern_poll(int wait_reqd);
+
+static void
+dequeue_signals(void);
+
+static inline void
+thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in);
+
+/* Static variables: */
+static int last_tick = 0;
+static int called_from_handler = 0;
+
+#ifndef __rtems__
+/*
+ * This is called when a signal handler finishes and wants to
+ * return to a previous frame.
+ */
+void
+_thread_kern_sched_frame(struct pthread_signal_frame *psf)
+{
+ struct pthread *curthread = _get_curthread();
+
+ /*
+ * Flag the pthread kernel as executing scheduler code
+ * to avoid a signal from interrupting this execution and
+ * corrupting the (soon-to-be) current frame.
+ */
+ _thread_kern_in_sched = 1;
+
+ /* Restore the signal frame: */
+ _thread_sigframe_restore(curthread, psf);
+
+ /* The signal mask was restored; check for any pending signals: */
+ curthread->check_pending = 1;
+
+ /* Switch to the thread scheduler: */
+ ___longjmp(_thread_kern_sched_jb, 1);
+}
+
+
+void
+_thread_kern_sched(ucontext_t *ucp)
+{
+ struct pthread *curthread = _get_curthread();
+
+ /*
+ * Flag the pthread kernel as executing scheduler code
+ * to avoid a scheduler signal from interrupting this
+ * execution and calling the scheduler again.
+ */
+ _thread_kern_in_sched = 1;
+
+ /* Check if this function was called from the signal handler: */
+ if (ucp != NULL) {
+ called_from_handler = 1;
+ DBG_MSG("Entering scheduler due to signal\n");
+ }
+
+ /* Save the state of the current thread: */
+ if (_setjmp(curthread->ctx.jb) != 0) {
+ DBG_MSG("Returned from ___longjmp, thread %p\n",
+ curthread);
+ /*
+ * This point is reached when a longjmp() is called
+ * to restore the state of a thread.
+ *
+ * This is the normal way out of the scheduler.
+ */
+ _thread_kern_in_sched = 0;
+
+ if (curthread->sig_defer_count == 0) {
+ if (((curthread->cancelflags &
+ PTHREAD_AT_CANCEL_POINT) == 0) &&
+ ((curthread->cancelflags &
+ PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
+ /*
+ * Cancellations override signals.
+ *
+ * Stick a cancellation point at the
+ * start of each async-cancellable
+ * thread's resumption.
+ *
+ * We allow threads woken at cancel
+ * points to do their own checks.
+ */
+ pthread_testcancel();
+ }
+
+ if (_sched_switch_hook != NULL) {
+ /* Run the installed switch hook: */
+ thread_run_switch_hook(_last_user_thread, curthread);
+ }
+ if (ucp == NULL)
+ return;
+ else {
+ /*
+ * Set the process signal mask in the context; it
+ * could have changed by the handler.
+ */
+ ucp->uc_sigmask = _process_sigmask;
+
+ /* Resume the interrupted thread: */
+ __sys_sigreturn(ucp);
+ }
+ }
+ /* Switch to the thread scheduler: */
+ ___longjmp(_thread_kern_sched_jb, 1);
+}
+
+void
+_thread_kern_sched_sig(void)
+{
+ struct pthread *curthread = _get_curthread();
+
+ curthread->check_pending = 1;
+ _thread_kern_sched(NULL);
+}
+
+
+void
+_thread_kern_scheduler(void)
+{
+ struct timespec ts;
+ struct timeval tv;
+ struct pthread *curthread = _get_curthread();
+ pthread_t pthread, pthread_h;
+ unsigned int current_tick;
+ int add_to_prioq;
+
+ /* If the currently running thread is a user thread, save it: */
+ if ((curthread->flags & PTHREAD_FLAGS_PRIVATE) == 0)
+ _last_user_thread = curthread;
+
+ if (called_from_handler != 0) {
+ called_from_handler = 0;
+
+ /*
+ * We were called from a signal handler; restore the process
+ * signal mask.
+ */
+ if (__sys_sigprocmask(SIG_SETMASK,
+ &_process_sigmask, NULL) != 0)
+ PANIC("Unable to restore process mask after signal");
+ }
+
+ /*
+ * Enter a scheduling loop that finds the next thread that is
+ * ready to run. This loop completes when there are no more threads
+ * in the global list or when a thread has its state restored by
+ * either a sigreturn (if the state was saved as a sigcontext) or a
+ * longjmp (if the state was saved by a setjmp).
+ */
+ while (!(TAILQ_EMPTY(&_thread_list))) {
+ /* Get the current time of day: */
+ GET_CURRENT_TOD(tv);
+ TIMEVAL_TO_TIMESPEC(&tv, &ts);
+ current_tick = _sched_ticks;
+
+ /*
+ * Protect the scheduling queues from access by the signal
+ * handler.
+ */
+ _queue_signals = 1;
+ add_to_prioq = 0;
+
+ if (curthread != &_thread_kern_thread) {
+ /*
+ * This thread no longer needs to yield the CPU.
+ */
+ curthread->yield_on_sig_undefer = 0;
+
+ if (curthread->state != PS_RUNNING) {
+ /*
+ * Save the current time as the time that the
+ * thread became inactive:
+ */
+ curthread->last_inactive = (long)current_tick;
+ if (curthread->last_inactive <
+ curthread->last_active) {
+ /* Account for a rollover: */
+ curthread->last_inactive =+
+ UINT_MAX + 1;
+ }
+ }
+
+ /*
+ * Place the currently running thread into the
+ * appropriate queue(s).
+ */
+ switch (curthread->state) {
+ case PS_DEAD:
+ case PS_STATE_MAX: /* to silence -Wall */
+ case PS_SUSPENDED:
+ /*
+ * Dead and suspended threads are not placed
+ * in any queue:
+ */
+ break;
+
+ case PS_RUNNING:
+ /*
+ * Runnable threads can't be placed in the
+ * priority queue until after waiting threads
+ * are polled (to preserve round-robin
+ * scheduling).
+ */
+ add_to_prioq = 1;
+ break;
+
+ /*
+ * States which do not depend on file descriptor I/O
+ * operations or timeouts:
+ */
+ case PS_DEADLOCK:
+ case PS_FDLR_WAIT:
+ case PS_FDLW_WAIT:
+ case PS_FILE_WAIT:
+ case PS_JOIN:
+ case PS_MUTEX_WAIT:
+ case PS_SIGSUSPEND:
+ case PS_SIGTHREAD:
+ case PS_SIGWAIT:
+ case PS_WAIT_WAIT:
+ /* No timeouts for these states: */
+ curthread->wakeup_time.tv_sec = -1;
+ curthread->wakeup_time.tv_nsec = -1;
+
+ /* Restart the time slice: */
+ curthread->slice_usec = -1;
+
+ /* Insert into the waiting queue: */
+ PTHREAD_WAITQ_INSERT(curthread);
+ break;
+
+ /* States which can timeout: */
+ case PS_COND_WAIT:
+ case PS_SLEEP_WAIT:
+ /* Restart the time slice: */
+ curthread->slice_usec = -1;
+
+ /* Insert into the waiting queue: */
+ PTHREAD_WAITQ_INSERT(curthread);
+ break;
+
+ /* States that require periodic work: */
+ case PS_SPINBLOCK:
+ /* No timeouts for this state: */
+ curthread->wakeup_time.tv_sec = -1;
+ curthread->wakeup_time.tv_nsec = -1;
+
+ /* Increment spinblock count: */
+ _spinblock_count++;
+
+ /* FALLTHROUGH */
+ case PS_FDR_WAIT:
+ case PS_FDW_WAIT:
+ case PS_POLL_WAIT:
+ case PS_SELECT_WAIT:
+ /* Restart the time slice: */
+ curthread->slice_usec = -1;
+
+ /* Insert into the waiting queue: */
+ PTHREAD_WAITQ_INSERT(curthread);
+
+ /* Insert into the work queue: */
+ PTHREAD_WORKQ_INSERT(curthread);
+ break;
+ }
+
+ /*
+ * Are there pending signals for this thread?
+ *
+ * This check has to be performed after the thread
+ * has been placed in the queue(s) appropriate for
+ * its state. The process of adding pending signals
+ * can change a threads state, which in turn will
+ * attempt to add or remove the thread from any
+ * scheduling queue to which it belongs.
+ */
+ if (curthread->check_pending != 0) {
+ curthread->check_pending = 0;
+ _thread_sig_check_pending(curthread);
+ }
+ }
+
+ /*
+ * Avoid polling file descriptors if there are none
+ * waiting:
+ */
+ if (TAILQ_EMPTY(&_workq) != 0) {
+ }
+ /*
+ * Poll file descriptors only if a new scheduling signal
+ * has occurred or if we have no more runnable threads.
+ */
+ else if (((current_tick = _sched_ticks) != last_tick) ||
+ ((curthread->state != PS_RUNNING) &&
+ (PTHREAD_PRIOQ_FIRST() == NULL))) {
+ /* Unprotect the scheduling queues: */
+ _queue_signals = 0;
+
+ /*
+ * Poll file descriptors to update the state of threads
+ * waiting on file I/O where data may be available:
+ */
+ thread_kern_poll(0);
+
+ /* Protect the scheduling queues: */
+ _queue_signals = 1;
+ }
+ last_tick = current_tick;
+
+ /*
+ * Wake up threads that have timedout. This has to be
+ * done after polling in case a thread does a poll or
+ * select with zero time.
+ */
+ PTHREAD_WAITQ_SETACTIVE();
+ while (((pthread = TAILQ_FIRST(&_waitingq)) != NULL) &&
+ (pthread->wakeup_time.tv_sec != -1) &&
+ (((pthread->wakeup_time.tv_sec == 0) &&
+ (pthread->wakeup_time.tv_nsec == 0)) ||
+ (pthread->wakeup_time.tv_sec < ts.tv_sec) ||
+ ((pthread->wakeup_time.tv_sec == ts.tv_sec) &&
+ (pthread->wakeup_time.tv_nsec <= ts.tv_nsec)))) {
+ switch (pthread->state) {
+ case PS_POLL_WAIT:
+ case PS_SELECT_WAIT:
+ /* Return zero file descriptors ready: */
+ pthread->data.poll_data->nfds = 0;
+ /* FALLTHROUGH */
+ default:
+ /*
+ * Remove this thread from the waiting queue
+ * (and work queue if necessary) and place it
+ * in the ready queue.
+ */
+ PTHREAD_WAITQ_CLEARACTIVE();
+ if (pthread->flags & PTHREAD_FLAGS_IN_WORKQ)
+ PTHREAD_WORKQ_REMOVE(pthread);
+ PTHREAD_NEW_STATE(pthread, PS_RUNNING);
+ PTHREAD_WAITQ_SETACTIVE();
+ break;
+ }
+ /*
+ * Flag the timeout in the thread structure:
+ */
+ pthread->timeout = 1;
+ }
+ PTHREAD_WAITQ_CLEARACTIVE();
+
+ /*
+ * Check to see if the current thread needs to be added
+ * to the priority queue:
+ */
+ if (add_to_prioq != 0) {
+ /*
+ * Save the current time as the time that the
+ * thread became inactive:
+ */
+ current_tick = _sched_ticks;
+ curthread->last_inactive = (long)current_tick;
+ if (curthread->last_inactive <
+ curthread->last_active) {
+ /* Account for a rollover: */
+ curthread->last_inactive =+ UINT_MAX + 1;
+ }
+
+ if ((curthread->slice_usec != -1) &&
+ (curthread->attr.sched_policy != SCHED_FIFO)) {
+ /*
+ * Accumulate the number of microseconds for
+ * which the current thread has run:
+ */
+ curthread->slice_usec +=
+ (curthread->last_inactive -
+ curthread->last_active) *
+ (long)_clock_res_usec;
+ /* Check for time quantum exceeded: */
+ if (curthread->slice_usec > TIMESLICE_USEC)
+ curthread->slice_usec = -1;
+ }
+
+ if (curthread->slice_usec == -1) {
+ /*
+ * The thread exceeded its time
+ * quantum or it yielded the CPU;
+ * place it at the tail of the
+ * queue for its priority.
+ */
+ PTHREAD_PRIOQ_INSERT_TAIL(curthread);
+ } else {
+ /*
+ * The thread hasn't exceeded its
+ * interval. Place it at the head
+ * of the queue for its priority.
+ */
+ PTHREAD_PRIOQ_INSERT_HEAD(curthread);
+ }
+ }
+
+ /*
+ * Get the highest priority thread in the ready queue.
+ */
+ pthread_h = PTHREAD_PRIOQ_FIRST();
+
+ /* Check if there are no threads ready to run: */
+ if (pthread_h == NULL) {
+ /*
+ * Lock the pthread kernel by changing the pointer to
+ * the running thread to point to the global kernel
+ * thread structure:
+ */
+ _set_curthread(&_thread_kern_thread);
+ curthread = &_thread_kern_thread;
+
+ DBG_MSG("No runnable threads, using kernel thread %p\n",
+ curthread);
+
+ /* Unprotect the scheduling queues: */
+ _queue_signals = 0;
+
+ /*
+ * There are no threads ready to run, so wait until
+ * something happens that changes this condition:
+ */
+ thread_kern_poll(1);
+
+ /*
+ * This process' usage will likely be very small
+ * while waiting in a poll. Since the scheduling
+ * clock is based on the profiling timer, it is
+ * unlikely that the profiling timer will fire
+ * and update the time of day. To account for this,
+ * get the time of day after polling with a timeout.
+ */
+ gettimeofday((struct timeval *) &_sched_tod, NULL);
+
+ /* Check once more for a runnable thread: */
+ _queue_signals = 1;
+ pthread_h = PTHREAD_PRIOQ_FIRST();
+ _queue_signals = 0;
+ }
+
+ if (pthread_h != NULL) {
+ /* Remove the thread from the ready queue: */
+ PTHREAD_PRIOQ_REMOVE(pthread_h);
+
+ /* Unprotect the scheduling queues: */
+ _queue_signals = 0;
+
+ /*
+ * Check for signals queued while the scheduling
+ * queues were protected:
+ */
+ while (_sigq_check_reqd != 0) {
+ /* Clear before handling queued signals: */
+ _sigq_check_reqd = 0;
+
+ /* Protect the scheduling queues again: */
+ _queue_signals = 1;
+
+ dequeue_signals();
+
+ /*
+ * Check for a higher priority thread that
+ * became runnable due to signal handling.
+ */
+ if (((pthread = PTHREAD_PRIOQ_FIRST()) != NULL) &&
+ (pthread->active_priority > pthread_h->active_priority)) {
+ /* Remove the thread from the ready queue: */
+ PTHREAD_PRIOQ_REMOVE(pthread);
+
+ /*
+ * Insert the lower priority thread
+ * at the head of its priority list:
+ */
+ PTHREAD_PRIOQ_INSERT_HEAD(pthread_h);
+
+ /* There's a new thread in town: */
+ pthread_h = pthread;
+ }
+
+ /* Unprotect the scheduling queues: */
+ _queue_signals = 0;
+ }
+
+ /* Make the selected thread the current thread: */
+ _set_curthread(pthread_h);
+ curthread = pthread_h;
+
+ /*
+ * Save the current time as the time that the thread
+ * became active:
+ */
+ current_tick = _sched_ticks;
+ curthread->last_active = (long) current_tick;
+
+ /*
+ * Check if this thread is running for the first time
+ * or running again after using its full time slice
+ * allocation:
+ */
+ if (curthread->slice_usec == -1) {
+ /* Reset the accumulated time slice period: */
+ curthread->slice_usec = 0;
+ }
+
+ /*
+ * If we had a context switch, run any
+ * installed switch hooks.
+ */
+ if ((_sched_switch_hook != NULL) &&
+ (_last_user_thread != curthread)) {
+ thread_run_switch_hook(_last_user_thread,
+ curthread);
+ }
+ /*
+ * Continue the thread at its current frame:
+ */
+#if NOT_YET
+ _setcontext(&curthread->ctx.uc);
+#else
+ ___longjmp(curthread->ctx.jb, 1);
+#endif
+ /* This point should not be reached. */
+ PANIC("Thread has returned from sigreturn or longjmp");
+ }
+ }
+
+ /* There are no more threads, so exit this process: */
+ exit(0);
+}
+#endif /* __rtems__ */
+
+void
+_thread_kern_sched_state(enum pthread_state state, char *fname, int lineno)
+{
+#ifdef __rtems__
+ rtems_task_wake_after(RTEMS_YIELD_PROCESSOR);
+#else
+
+ struct pthread *curthread = _get_curthread();
+
+ /*
+ * Flag the pthread kernel as executing scheduler code
+ * to avoid a scheduler signal from interrupting this
+ * execution and calling the scheduler again.
+ */
+ _thread_kern_in_sched = 1;
+
+ /*
+ * Prevent the signal handler from fiddling with this thread
+ * before its state is set and is placed into the proper queue.
+ */
+ _queue_signals = 1;
+
+ /* Change the state of the current thread: */
+ curthread->state = state;
+ curthread->fname = fname;
+ curthread->lineno = lineno;
+
+ /* Schedule the next thread that is ready: */
+ _thread_kern_sched(NULL);
+#endif /* __rtems__ */
+}
+
+#ifndef __rtems__
+void
+_thread_kern_sched_state_unlock(enum pthread_state state,
+ spinlock_t *lock, char *fname, int lineno)
+{
+ struct pthread *curthread = _get_curthread();
+
+ /*
+ * Flag the pthread kernel as executing scheduler code
+ * to avoid a scheduler signal from interrupting this
+ * execution and calling the scheduler again.
+ */
+ _thread_kern_in_sched = 1;
+
+ /*
+ * Prevent the signal handler from fiddling with this thread
+ * before its state is set and it is placed into the proper
+ * queue(s).
+ */
+ _queue_signals = 1;
+
+ /* Change the state of the current thread: */
+ curthread->state = state;
+ curthread->fname = fname;
+ curthread->lineno = lineno;
+
+ _SPINUNLOCK(lock);
+
+ /* Schedule the next thread that is ready: */
+ _thread_kern_sched(NULL);
+}
+
+static void
+thread_kern_poll(int wait_reqd)
+{
+ int count = 0;
+ int i, found;
+ int kern_pipe_added = 0;
+ int nfds = 0;
+ int timeout_ms = 0;
+ struct pthread *pthread;
+ struct timespec ts;
+ struct timeval tv;
+
+ /* Check if the caller wants to wait: */
+ if (wait_reqd == 0) {
+ timeout_ms = 0;
+ }
+ else {
+ /* Get the current time of day: */
+ GET_CURRENT_TOD(tv);
+ TIMEVAL_TO_TIMESPEC(&tv, &ts);
+
+ _queue_signals = 1;
+ pthread = TAILQ_FIRST(&_waitingq);
+ _queue_signals = 0;
+
+ if ((pthread == NULL) || (pthread->wakeup_time.tv_sec == -1)) {
+ /*
+ * Either there are no threads in the waiting queue,
+ * or there are no threads that can timeout.
+ */
+ timeout_ms = INFTIM;
+ }
+ else if (pthread->wakeup_time.tv_sec - ts.tv_sec > 60000)
+ /* Limit maximum timeout to prevent rollover. */
+ timeout_ms = 60000;
+ else {
+ /*
+ * Calculate the time left for the next thread to
+ * timeout:
+ */
+ timeout_ms = ((pthread->wakeup_time.tv_sec - ts.tv_sec) *
+ 1000) + ((pthread->wakeup_time.tv_nsec - ts.tv_nsec) /
+ 1000000);
+ /*
+ * Don't allow negative timeouts:
+ */
+ if (timeout_ms < 0)
+ timeout_ms = 0;
+ }
+ }
+
+ /* Protect the scheduling queues: */
+ _queue_signals = 1;
+
+ /*
+ * Check to see if the signal queue needs to be walked to look
+ * for threads awoken by a signal while in the scheduler.
+ */
+ if (_sigq_check_reqd != 0) {
+ /* Reset flag before handling queued signals: */
+ _sigq_check_reqd = 0;
+
+ dequeue_signals();
+ }
+
+ /*
+ * Check for a thread that became runnable due to a signal:
+ */
+ if (PTHREAD_PRIOQ_FIRST() != NULL) {
+ /*
+ * Since there is at least one runnable thread,
+ * disable the wait.
+ */
+ timeout_ms = 0;
+ }
+
+ /*
+ * Form the poll table:
+ */
+ nfds = 0;
+ if (timeout_ms != 0) {
+ /* Add the kernel pipe to the poll table: */
+ _thread_pfd_table[nfds].fd = _thread_kern_pipe[0];
+ _thread_pfd_table[nfds].events = POLLRDNORM;
+ _thread_pfd_table[nfds].revents = 0;
+ nfds++;
+ kern_pipe_added = 1;
+ }
+
+ PTHREAD_WAITQ_SETACTIVE();
+ TAILQ_FOREACH(pthread, &_workq, qe) {
+ switch (pthread->state) {
+ case PS_SPINBLOCK:
+ /*
+ * If the lock is available, let the thread run.
+ */
+ if (pthread->data.spinlock->access_lock == 0) {
+ PTHREAD_WAITQ_CLEARACTIVE();
+ PTHREAD_WORKQ_REMOVE(pthread);
+ PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+ PTHREAD_WAITQ_SETACTIVE();
+ /* One less thread in a spinblock state: */
+ _spinblock_count--;
+ /*
+ * Since there is at least one runnable
+ * thread, disable the wait.
+ */
+ timeout_ms = 0;
+ }
+ break;
+
+ /* File descriptor read wait: */
+ case PS_FDR_WAIT:
+ /* Limit number of polled files to table size: */
+ if (nfds < _thread_dtablesize) {
+ _thread_pfd_table[nfds].events = POLLRDNORM;
+ _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
+ nfds++;
+ }
+ break;
+
+ /* File descriptor write wait: */
+ case PS_FDW_WAIT:
+ /* Limit number of polled files to table size: */
+ if (nfds < _thread_dtablesize) {
+ _thread_pfd_table[nfds].events = POLLWRNORM;
+ _thread_pfd_table[nfds].fd = pthread->data.fd.fd;
+ nfds++;
+ }
+ break;
+
+ /* File descriptor poll or select wait: */
+ case PS_POLL_WAIT:
+ case PS_SELECT_WAIT:
+ /* Limit number of polled files to table size: */
+ if (pthread->data.poll_data->nfds + nfds <
+ _thread_dtablesize) {
+ for (i = 0; i < pthread->data.poll_data->nfds; i++) {
+ _thread_pfd_table[nfds + i].fd =
+ pthread->data.poll_data->fds[i].fd;
+ _thread_pfd_table[nfds + i].events =
+ pthread->data.poll_data->fds[i].events;
+ }
+ nfds += pthread->data.poll_data->nfds;
+ }
+ break;
+
+ /* Other states do not depend on file I/O. */
+ default:
+ break;
+ }
+ }
+ PTHREAD_WAITQ_CLEARACTIVE();
+
+ /*
+ * Wait for a file descriptor to be ready for read, write, or
+ * an exception, or a timeout to occur:
+ */
+ count = __sys_poll(_thread_pfd_table, nfds, timeout_ms);
+
+ if (kern_pipe_added != 0)
+ /*
+ * Remove the pthread kernel pipe file descriptor
+ * from the pollfd table:
+ */
+ nfds = 1;
+ else
+ nfds = 0;
+
+ /*
+ * Check if it is possible that there are bytes in the kernel
+ * read pipe waiting to be read:
+ */
+ if (count < 0 || ((kern_pipe_added != 0) &&
+ (_thread_pfd_table[0].revents & POLLRDNORM))) {
+ /*
+ * If the kernel read pipe was included in the
+ * count:
+ */
+ if (count > 0) {
+ /* Decrement the count of file descriptors: */
+ count--;
+ }
+
+ if (_sigq_check_reqd != 0) {
+ /* Reset flag before handling signals: */
+ _sigq_check_reqd = 0;
+
+ dequeue_signals();
+ }
+ }
+
+ /*
+ * Check if any file descriptors are ready:
+ */
+ if (count > 0) {
+ /*
+ * Enter a loop to look for threads waiting on file
+ * descriptors that are flagged as available by the
+ * _poll syscall:
+ */
+ PTHREAD_WAITQ_SETACTIVE();
+ TAILQ_FOREACH(pthread, &_workq, qe) {
+ switch (pthread->state) {
+ case PS_SPINBLOCK:
+ /*
+ * If the lock is available, let the thread run.
+ */
+ if (pthread->data.spinlock->access_lock == 0) {
+ PTHREAD_WAITQ_CLEARACTIVE();
+ PTHREAD_WORKQ_REMOVE(pthread);
+ PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+ PTHREAD_WAITQ_SETACTIVE();
+
+ /*
+ * One less thread in a spinblock state:
+ */
+ _spinblock_count--;
+ }
+ break;
+
+ /* File descriptor read wait: */
+ case PS_FDR_WAIT:
+ if ((nfds < _thread_dtablesize) &&
+ (_thread_pfd_table[nfds].revents
+ & (POLLRDNORM|POLLERR|POLLHUP|POLLNVAL))
+ != 0) {
+ PTHREAD_WAITQ_CLEARACTIVE();
+ PTHREAD_WORKQ_REMOVE(pthread);
+ PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+ PTHREAD_WAITQ_SETACTIVE();
+ }
+ nfds++;
+ break;
+
+ /* File descriptor write wait: */
+ case PS_FDW_WAIT:
+ if ((nfds < _thread_dtablesize) &&
+ (_thread_pfd_table[nfds].revents
+ & (POLLWRNORM|POLLERR|POLLHUP|POLLNVAL))
+ != 0) {
+ PTHREAD_WAITQ_CLEARACTIVE();
+ PTHREAD_WORKQ_REMOVE(pthread);
+ PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+ PTHREAD_WAITQ_SETACTIVE();
+ }
+ nfds++;
+ break;
+
+ /* File descriptor poll or select wait: */
+ case PS_POLL_WAIT:
+ case PS_SELECT_WAIT:
+ if (pthread->data.poll_data->nfds + nfds <
+ _thread_dtablesize) {
+ /*
+ * Enter a loop looking for I/O
+ * readiness:
+ */
+ found = 0;
+ for (i = 0; i < pthread->data.poll_data->nfds; i++) {
+ if (_thread_pfd_table[nfds + i].revents != 0) {
+ pthread->data.poll_data->fds[i].revents =
+ _thread_pfd_table[nfds + i].revents;
+ found++;
+ }
+ }
+
+ /* Increment before destroying: */
+ nfds += pthread->data.poll_data->nfds;
+
+ if (found != 0) {
+ pthread->data.poll_data->nfds = found;
+ PTHREAD_WAITQ_CLEARACTIVE();
+ PTHREAD_WORKQ_REMOVE(pthread);
+ PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+ PTHREAD_WAITQ_SETACTIVE();
+ }
+ }
+ else
+ nfds += pthread->data.poll_data->nfds;
+ break;
+
+ /* Other states do not depend on file I/O. */
+ default:
+ break;
+ }
+ }
+ PTHREAD_WAITQ_CLEARACTIVE();
+ }
+ else if (_spinblock_count != 0) {
+ /*
+ * Enter a loop to look for threads waiting on a spinlock
+ * that is now available.
+ */
+ PTHREAD_WAITQ_SETACTIVE();
+ TAILQ_FOREACH(pthread, &_workq, qe) {
+ if (pthread->state == PS_SPINBLOCK) {
+ /*
+ * If the lock is available, let the thread run.
+ */
+ if (pthread->data.spinlock->access_lock == 0) {
+ PTHREAD_WAITQ_CLEARACTIVE();
+ PTHREAD_WORKQ_REMOVE(pthread);
+ PTHREAD_NEW_STATE(pthread,PS_RUNNING);
+ PTHREAD_WAITQ_SETACTIVE();
+
+ /*
+ * One less thread in a spinblock state:
+ */
+ _spinblock_count--;
+ }
+ }
+ }
+ PTHREAD_WAITQ_CLEARACTIVE();
+ }
+
+ /* Unprotect the scheduling queues: */
+ _queue_signals = 0;
+
+ while (_sigq_check_reqd != 0) {
+ /* Handle queued signals: */
+ _sigq_check_reqd = 0;
+
+ /* Protect the scheduling queues: */
+ _queue_signals = 1;
+
+ dequeue_signals();
+
+ /* Unprotect the scheduling queues: */
+ _queue_signals = 0;
+ }
+}
+#endif /* __rtems__ */
+
+void
+_thread_kern_set_timeout(const struct timespec * timeout)
+{
+ struct pthread *curthread = _get_curthread();
+ struct timespec current_time;
+ struct timeval tv;
+
+ /* Reset the timeout flag for the running thread: */
+ curthread->timeout = 0;
+
+ /* Check if the thread is to wait forever: */
+ if (timeout == NULL) {
+ /*
+ * Set the wakeup time to something that can be recognised as
+ * different to an actual time of day:
+ */
+ curthread->wakeup_time.tv_sec = -1;
+ curthread->wakeup_time.tv_nsec = -1;
+ }
+ /* Check if no waiting is required: */
+ else if (timeout->tv_sec == 0 && timeout->tv_nsec == 0) {
+ /* Set the wake up time to 'immediately': */
+ curthread->wakeup_time.tv_sec = 0;
+ curthread->wakeup_time.tv_nsec = 0;
+ } else {
+ /* Get the current time: */
+ GET_CURRENT_TOD(tv);
+ TIMEVAL_TO_TIMESPEC(&tv, &current_time);
+
+ /* Calculate the time for the current thread to wake up: */
+ curthread->wakeup_time.tv_sec = current_time.tv_sec + timeout->tv_sec;
+ curthread->wakeup_time.tv_nsec = current_time.tv_nsec + timeout->tv_nsec;
+
+ /* Check if the nanosecond field needs to wrap: */
+ if (curthread->wakeup_time.tv_nsec >= 1000000000) {
+ /* Wrap the nanosecond field: */
+ curthread->wakeup_time.tv_sec += 1;
+ curthread->wakeup_time.tv_nsec -= 1000000000;
+ }
+ }
+}
+
+#ifndef __rtems__
+void
+_thread_kern_sig_defer(void)
+{
+ struct pthread *curthread = _get_curthread();
+
+ /* Allow signal deferral to be recursive. */
+ curthread->sig_defer_count++;
+}
+
+void
+_thread_kern_sig_undefer(void)
+{
+ struct pthread *curthread = _get_curthread();
+
+ /*
+ * Perform checks to yield only if we are about to undefer
+ * signals.
+ */
+ if (curthread->sig_defer_count > 1) {
+ /* Decrement the signal deferral count. */
+ curthread->sig_defer_count--;
+ }
+ else if (curthread->sig_defer_count == 1) {
+ /* Reenable signals: */
+ curthread->sig_defer_count = 0;
+
+ /*
+ * Check if there are queued signals:
+ */
+ if (_sigq_check_reqd != 0)
+ _thread_kern_sched(NULL);
+
+ /*
+ * Check for asynchronous cancellation before delivering any
+ * pending signals:
+ */
+ if (((curthread->cancelflags & PTHREAD_AT_CANCEL_POINT) == 0) &&
+ ((curthread->cancelflags & PTHREAD_CANCEL_ASYNCHRONOUS) != 0))
+ pthread_testcancel();
+
+ /*
+ * If there are pending signals or this thread has
+ * to yield the CPU, call the kernel scheduler:
+ *
+ * XXX - Come back and revisit the pending signal problem
+ */
+ if ((curthread->yield_on_sig_undefer != 0) ||
+ SIGNOTEMPTY(curthread->sigpend)) {
+ curthread->yield_on_sig_undefer = 0;
+ _thread_kern_sched(NULL);
+ }
+ }
+}
+
+static void
+dequeue_signals(void)
+{
+ char bufr[128];
+ int num;
+
+ /*
+ * Enter a loop to clear the pthread kernel pipe:
+ */
+ while (((num = __sys_read(_thread_kern_pipe[0], bufr,
+ sizeof(bufr))) > 0) || (num == -1 && errno == EINTR)) {
+ }
+ if ((num < 0) && (errno != EAGAIN)) {
+ /*
+ * The only error we should expect is if there is
+ * no data to read.
+ */
+ PANIC("Unable to read from thread kernel pipe");
+ }
+ /* Handle any pending signals: */
+ _thread_sig_handle_pending();
+}
+
+static inline void
+thread_run_switch_hook(pthread_t thread_out, pthread_t thread_in)
+{
+ pthread_t tid_out = thread_out;
+ pthread_t tid_in = thread_in;
+
+ if ((tid_out != NULL) &&
+ (tid_out->flags & PTHREAD_FLAGS_PRIVATE) != 0)
+ tid_out = NULL;
+ if ((tid_in != NULL) &&
+ (tid_in->flags & PTHREAD_FLAGS_PRIVATE) != 0)
+ tid_in = NULL;
+
+ if ((_sched_switch_hook != NULL) && (tid_out != tid_in)) {
+ /* Run the scheduler switch hook: */
+ _sched_switch_hook(tid_out, tid_in);
+ }
+}
+
+struct pthread *
+_get_curthread(void)
+{
+ if (_thread_initial == NULL)
+ _thread_init();
+
+ return (_thread_run);
+}
+
+void
+_set_curthread(struct pthread *newthread)
+{
+ _thread_run = newthread;
+}
+#endif /* __rtems__ */
diff --git a/freebsd-userspace/lib/libc_r/uthread/uthread_select.c b/freebsd-userspace/lib/libc_r/uthread/uthread_select.c
new file mode 100644
index 00000000..632f44d2
--- /dev/null
+++ b/freebsd-userspace/lib/libc_r/uthread/uthread_select.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#include <unistd.h>
+#include <errno.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <pthread.h>
+#include "pthread_private.h"
+
+__weak_reference(__select, select);
+
+#ifdef __rtems__
+#include <freebsd/sys/timespec.h>
+#define realloc _bsd_realloc
+#endif
+
+
+int
+_select(int numfds, fd_set * readfds, fd_set * writefds, fd_set * exceptfds,
+ struct timeval * timeout)
+{
+ struct pthread *curthread = _get_curthread();
+ struct timespec ts;
+ int i, ret = 0, f_wait = 1;
+ int pfd_index, got_events = 0, fd_count = 0;
+ struct pthread_poll_data data;
+
+#ifndef __rtems__ /* XXX - NOT SURE WHAT TO DEFINE _thread_dtablesize TO. */
+ if (numfds > _thread_dtablesize) {
+ numfds = _thread_dtablesize;
+ }
+#endif
+ /* Count the number of file descriptors to be polled: */
+ if (readfds || writefds || exceptfds) {
+ for (i = 0; i < numfds; i++) {
+ if ((readfds && FD_ISSET(i, readfds)) ||
+ (exceptfds && FD_ISSET(i, exceptfds)) ||
+ (writefds && FD_ISSET(i, writefds))) {
+ fd_count++;
+ }
+ }
+ }
+
+ /*
+ * Allocate memory for poll data if it hasn't already been
+ * allocated or if previously allocated memory is insufficient.
+ */
+ if ((curthread->poll_data.fds == NULL) ||
+ (curthread->poll_data.nfds < fd_count)) {
+ data.fds = (struct pollfd *) realloc(curthread->poll_data.fds,
+ sizeof(struct pollfd) * MAX(128, fd_count));
+ if (data.fds == NULL) {
+ errno = ENOMEM;
+ ret = -1;
+ }
+ else {
+ /*
+ * Note that the threads poll data always
+ * indicates what is allocated, not what is
+ * currently being polled.
+ */
+ curthread->poll_data.fds = data.fds;
+ curthread->poll_data.nfds = MAX(128, fd_count);
+ }
+ }
+ /* Check if a timeout was specified: */
+ if (timeout) {
+ if (timeout->tv_sec < 0 ||
+ timeout->tv_usec < 0 || timeout->tv_usec >= 1000000) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ /* Convert the timeval to a timespec: */
+ TIMEVAL_TO_TIMESPEC(timeout, &ts);
+
+ /* Set the wake up time: */
+ _thread_kern_set_timeout(&ts);
+ if (ts.tv_sec == 0 && ts.tv_nsec == 0)
+ f_wait = 0;
+ } else {
+ /* Wait for ever: */
+ _thread_kern_set_timeout(NULL);
+ }
+
+ if (ret == 0) {
+ /* Setup the wait data. */
+ data.fds = curthread->poll_data.fds;
+ data.nfds = fd_count;
+
+ /*
+ * Setup the array of pollfds. Optimize this by
+ running the loop in reverse and stopping when
+ * the number of selected file descriptors is reached.
+ */
+ for (i = numfds - 1, pfd_index = fd_count - 1;
+ (i >= 0) && (pfd_index >= 0); i--) {
+ data.fds[pfd_index].events = 0;
+ if (readfds && FD_ISSET(i, readfds)) {
+ data.fds[pfd_index].events = POLLRDNORM;
+ }
+ if (exceptfds && FD_ISSET(i, exceptfds)) {
+ data.fds[pfd_index].events |= POLLRDBAND;
+ }
+ if (writefds && FD_ISSET(i, writefds)) {
+ data.fds[pfd_index].events |= POLLWRNORM;
+ }
+ if (data.fds[pfd_index].events != 0) {
+ /*
+ * Set the file descriptor to be polled and
+ * clear revents in case of a timeout which
+ * leaves fds unchanged:
+ */
+ data.fds[pfd_index].fd = i;
+ data.fds[pfd_index].revents = 0;
+ pfd_index--;
+ }
+ }
+ if (((ret = __sys_poll(data.fds, data.nfds, 0)) == 0) &&
+ (f_wait != 0)) {
+ curthread->data.poll_data = &data;
+ curthread->interrupted = 0;
+ _thread_kern_sched_state(PS_SELECT_WAIT, __FILE__, __LINE__);
+ if (curthread->interrupted) {
+ errno = EINTR;
+ data.nfds = 0;
+ ret = -1;
+ } else
+ ret = data.nfds;
+ }
+ }
+
+ if (ret >= 0) {
+ numfds = 0;
+ for (i = 0; i < fd_count; i++) {
+ /*
+ * Check the results of the poll and clear
+ * this file descriptor from the fdset if
+ * the requested event wasn't ready.
+ */
+
+ /*
+ * First check for invalid descriptor.
+ * If found, set errno and return -1.
+ */
+ if (data.fds[i].revents & POLLNVAL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ got_events = 0;
+ if (readfds != NULL) {
+ if (FD_ISSET(data.fds[i].fd, readfds)) {
+ if ((data.fds[i].revents & (POLLIN
+ | POLLRDNORM | POLLERR
+ | POLLHUP | POLLNVAL)) != 0)
+ got_events++;
+ else
+ FD_CLR(data.fds[i].fd, readfds);
+ }
+ }
+ if (writefds != NULL) {
+ if (FD_ISSET(data.fds[i].fd, writefds)) {
+ if ((data.fds[i].revents & (POLLOUT
+ | POLLWRNORM | POLLWRBAND | POLLERR
+ | POLLHUP | POLLNVAL)) != 0)
+ got_events++;
+ else
+ FD_CLR(data.fds[i].fd,
+ writefds);
+ }
+ }
+ if (exceptfds != NULL) {
+ if (FD_ISSET(data.fds[i].fd, exceptfds)) {
+ if (data.fds[i].revents & (POLLRDBAND |
+ POLLPRI))
+ got_events++;
+ else
+ FD_CLR(data.fds[i].fd,
+ exceptfds);
+ }
+ }
+ if (got_events != 0)
+ numfds+=got_events;
+ }
+ ret = numfds;
+ }
+
+ return (ret);
+}
+
+int
+__select(int numfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
+ struct timeval *timeout)
+{
+ int ret;
+
+#ifndef __rtems__
+ _thread_enter_cancellation_point();
+#endif
+ ret = _select(numfds, readfds, writefds, exceptfds, timeout);
+#ifndef __rtems__
+ _thread_leave_cancellation_point();
+#endif
+
+ return ret;
+}
diff --git a/freebsd-userspace/rtems/include/pthread_private.h b/freebsd-userspace/rtems/include/pthread_private.h
new file mode 100644
index 00000000..ce5ba6d8
--- /dev/null
+++ b/freebsd-userspace/rtems/include/pthread_private.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 1995-1998 John Birrell <jb@cimlogic.com.au>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Private thread definitions for the uthread kernel.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PTHREAD_PRIVATE_H
+#define _PTHREAD_PRIVATE_H
+
+/*
+ * Evaluate the storage class specifier.
+ */
+#ifdef GLOBAL_PTHREAD_PRIVATE
+#define SCLASS
+#else
+#define SCLASS extern
+#endif
+
+/*
+ * Include files.
+ */
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/types.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/cdefs.h>
+#include <sched.h>
+#include <spinlock.h>
+#include <pthread_np.h>
+#include <freebsd/sys/malloc.h>
+
+/*
+ * Define a thread-safe macro to get the current time of day
+ * which is updated at regular intervals by the scheduling signal
+ * handler.
+ */
+#define GET_CURRENT_TOD(tv) \
+ do { \
+ tv.tv_sec = _sched_tod.tv_sec; \
+ tv.tv_usec = _sched_tod.tv_usec; \
+ } while (tv.tv_sec != _sched_tod.tv_sec)
+
+
+/*
+ * rtems uses the following structure to allow the method
+ * _thread_kern_sched_state to be called. This function
+ * is stubbed out to cause a processor yeild.
+ */
+
+/*
+ * Thread states.
+ */
+enum pthread_state {
+#if 0
+ PS_RUNNING,
+ PS_SIGTHREAD,
+ PS_MUTEX_WAIT,
+ PS_COND_WAIT,
+ PS_FDLR_WAIT,
+ PS_FDLW_WAIT,
+ PS_FDR_WAIT,
+ PS_FDW_WAIT,
+ PS_FILE_WAIT,
+ PS_POLL_WAIT,
+#endif
+ PS_SELECT_WAIT,
+#if 0
+ PS_SLEEP_WAIT,
+ PS_WAIT_WAIT,
+ PS_SIGSUSPEND,
+ PS_SIGWAIT,
+ PS_SPINBLOCK,
+ PS_JOIN,
+ PS_SUSPENDED,
+ PS_DEAD,
+ PS_DEADLOCK,
+#endif
+ PS_STATE_MAX
+};
+
+struct pthread_poll_data {
+ int nfds;
+ struct pollfd *fds;
+};
+
+struct pthread_wait_data {
+ struct pthread_poll_data *poll_data;
+};
+
+/*
+ * Thread structure.
+ */
+struct pthread {
+
+ /*
+ * Time to wake up thread. This is used for sleeping threads and
+ * for any operation which may time out (such as select).
+ */
+ struct timespec wakeup_time;
+
+ /* TRUE if operation has timed out. */
+ int timeout;
+
+ /* Wait data. */
+ struct pthread_wait_data data;
+
+ /*
+ * Allocated for converting select into poll.
+ */
+ struct pthread_poll_data poll_data;
+
+ /*
+ * Set to TRUE if a blocking operation was
+ * interrupted by a signal:
+ */
+ int interrupted;
+};
+
+/* Time of day at last scheduling timer signal: */
+SCLASS struct timeval volatile _sched_tod
+#ifdef GLOBAL_PTHREAD_PRIVATE
+= { 0, 0 };
+#else
+;
+#endif
+struct pthread *_get_curthread(void);
+
+#endif /* !_PTHREAD_PRIVATE_H */
diff --git a/freebsd-userspace/rtems/rtems-get_curthread.c b/freebsd-userspace/rtems/rtems-get_curthread.c
new file mode 100644
index 00000000..263cac60
--- /dev/null
+++ b/freebsd-userspace/rtems/rtems-get_curthread.c
@@ -0,0 +1,72 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+#include <freebsd/sys/malloc.h>
+
+#include <pthread.h>
+#include "pthread_private.h"
+
+
+static struct pthread *rtems_bsd_curpthread = NULL;
+
+
+static void rtems_bsd_pthread_descriptor_dtor(void *td)
+{
+ // XXX are there other pieces to clean up?
+ free(td, M_TEMP);
+}
+
+static struct pthread *
+rtems_bsd_pthread_init( rtems_id id )
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+ unsigned index = 0;
+ struct pthread *td;
+
+ td = _bsd_malloc( sizeof(struct pthread), M_TEMP, M_WAITOK | M_ZERO);
+ if (td == NULL)
+ return NULL;
+
+ td->timeout = 0;
+ td->data.poll_data = NULL;
+ td->poll_data.nfds = 0;
+ td->poll_data.fds = NULL;
+ td->interrupted = 0;
+ rtems_bsd_curpthread = td;
+
+ // Now add the task descriptor as a per-task variable
+ sc = rtems_task_variable_add(
+ id,
+ &rtems_bsd_curpthread,
+ rtems_bsd_pthread_descriptor_dtor
+ );
+ if (sc != RTEMS_SUCCESSFUL) {
+ free(td, M_TEMP);
+ return NULL;
+ }
+
+ return td;
+}
+
+/*
+ */
+
+struct pthread *
+_get_curthread(void)
+{
+ struct pthread *td;
+
+ /*
+ * If we already have a struct thread associated with this thread,
+ * obtain it. Otherwise, allocate and initialize one.
+ */
+ td = rtems_bsd_curpthread;
+ if ( td == NULL ) {
+ td = rtems_bsd_pthread_init( rtems_task_self() );
+ if ( td == NULL ){
+ panic("_get_curthread: Unable to create pthread\n");
+ }
+ }
+
+ return td;
+}
+
diff --git a/freebsd-userspace/rtems/rtems-syspoll.c b/freebsd-userspace/rtems/rtems-syspoll.c
new file mode 100644
index 00000000..d53d14e8
--- /dev/null
+++ b/freebsd-userspace/rtems/rtems-syspoll.c
@@ -0,0 +1,30 @@
+#include <unistd.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <rtems.h>
+#include <rtems/error.h>
+#include <freebsd/sys/poll.h>
+
+struct poll_args {
+ struct pollfd *fds;
+ u_int nfds;
+ int timeout;
+};
+
+int kern_poll( struct thread *td, struct poll_args *uap );
+
+
+int
+__sys_poll(struct pollfd *fds, unsigned nfds, int timeout)
+{
+ struct poll_args uap;
+ struct thread *td = rtems_get_curthread();
+
+ uap.fds = fds;
+ uap.nfds = nfds;
+ uap.timeout = timeout;
+
+ kern_poll(td, &uap);
+
+ return -1;
+}
diff --git a/freebsd-userspace/rtems/rtems-uthread_init.c b/freebsd-userspace/rtems/rtems-uthread_init.c
new file mode 100644
index 00000000..3226ed62
--- /dev/null
+++ b/freebsd-userspace/rtems/rtems-uthread_init.c
@@ -0,0 +1,8 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/* Allocate space for global thread variables here: */
+#define GLOBAL_PTHREAD_PRIVATE
+
+#include <freebsd/sys/types.h>
+#include <pthread.h>
+#include "pthread_private.h"
diff --git a/freebsd/kern/kern_condvar.c b/freebsd/kern/kern_condvar.c
new file mode 100644
index 00000000..34ec29cb
--- /dev/null
+++ b/freebsd/kern/kern_condvar.c
@@ -0,0 +1,455 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_ktrace.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/condvar.h>
+#include <freebsd/sys/sched.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/sleepqueue.h>
+#include <freebsd/sys/resourcevar.h>
+#ifdef KTRACE
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/ktrace.h>
+#endif
+
+/*
+ * Common sanity checks for cv_wait* functions.
+ */
+#define CV_ASSERT(cvp, lock, td) do { \
+ KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \
+ KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \
+ KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \
+ KASSERT((lock) != NULL, ("%s: lock NULL", __func__)); \
+} while (0)
+
+/*
+ * Initialize a condition variable. Must be called before use.
+ */
+void
+cv_init(struct cv *cvp, const char *desc)
+{
+
+ cvp->cv_description = desc;
+ cvp->cv_waiters = 0;
+}
+
+/*
+ * Destroy a condition variable. The condition variable must be re-initialized
+ * in order to be re-used.
+ */
+void
+cv_destroy(struct cv *cvp)
+{
+#ifdef INVARIANTS
+ struct sleepqueue *sq;
+
+ sleepq_lock(cvp);
+ sq = sleepq_lookup(cvp);
+ sleepq_release(cvp);
+ KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__));
+#endif
+}
+
+/*
+ * Wait on a condition variable. The current thread is placed on the condition
+ * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same
+ * condition variable will resume the thread. The mutex is released before
+ * sleeping and will be held on return. It is recommended that the mutex be
+ * held when cv_signal or cv_broadcast are called.
+ */
+void
+_cv_wait(struct cv *cvp, struct lock_object *lock)
+{
+ WITNESS_SAVE_DECL(lock_witness);
+ struct lock_class *class;
+ struct thread *td;
+ int lock_state;
+
+ td = curthread;
+ lock_state = 0;
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(1, 0);
+#endif
+ CV_ASSERT(cvp, lock, td);
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+ "Waiting on \"%s\"", cvp->cv_description);
+ class = LOCK_CLASS(lock);
+
+ if (cold || panicstr) {
+ /*
+ * During autoconfiguration, just give interrupts
+ * a chance, then just return. Don't run any other
+ * thread or panic below, in case this is the idle
+ * process and already asleep.
+ */
+ return;
+ }
+
+ sleepq_lock(cvp);
+
+ cvp->cv_waiters++;
+ if (lock == &Giant.lock_object)
+ mtx_assert(&Giant, MA_OWNED);
+ DROP_GIANT();
+
+ sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
+ if (lock != &Giant.lock_object) {
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_release(cvp);
+ WITNESS_SAVE(lock, lock_witness);
+ lock_state = class->lc_unlock(lock);
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_lock(cvp);
+ }
+ sleepq_wait(cvp, 0);
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(0, 0);
+#endif
+ PICKUP_GIANT();
+ if (lock != &Giant.lock_object) {
+ class->lc_lock(lock, lock_state);
+ WITNESS_RESTORE(lock, lock_witness);
+ }
+}
+
+/*
+ * Wait on a condition variable. This function differs from cv_wait by
+ * not aquiring the mutex after condition variable was signaled.
+ */
+void
+_cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
+{
+ struct lock_class *class;
+ struct thread *td;
+
+ td = curthread;
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(1, 0);
+#endif
+ CV_ASSERT(cvp, lock, td);
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+ "Waiting on \"%s\"", cvp->cv_description);
+ KASSERT(lock != &Giant.lock_object,
+ ("cv_wait_unlock cannot be used with Giant"));
+ class = LOCK_CLASS(lock);
+
+ if (cold || panicstr) {
+ /*
+ * During autoconfiguration, just give interrupts
+ * a chance, then just return. Don't run any other
+ * thread or panic below, in case this is the idle
+ * process and already asleep.
+ */
+ class->lc_unlock(lock);
+ return;
+ }
+
+ sleepq_lock(cvp);
+
+ cvp->cv_waiters++;
+ DROP_GIANT();
+
+ sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_release(cvp);
+ class->lc_unlock(lock);
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_lock(cvp);
+ sleepq_wait(cvp, 0);
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(0, 0);
+#endif
+ PICKUP_GIANT();
+}
+
+/*
+ * Wait on a condition variable, allowing interruption by signals. Return 0 if
+ * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
+ * a signal was caught. If ERESTART is returned the system call should be
+ * restarted if possible.
+ */
+int
+_cv_wait_sig(struct cv *cvp, struct lock_object *lock)
+{
+ WITNESS_SAVE_DECL(lock_witness);
+ struct lock_class *class;
+ struct thread *td;
+ int lock_state, rval;
+
+ td = curthread;
+ lock_state = 0;
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(1, 0);
+#endif
+ CV_ASSERT(cvp, lock, td);
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+ "Waiting on \"%s\"", cvp->cv_description);
+ class = LOCK_CLASS(lock);
+
+ if (cold || panicstr) {
+ /*
+ * After a panic, or during autoconfiguration, just give
+ * interrupts a chance, then just return; don't run any other
+ * procs or panic below, in case this is the idle process and
+ * already asleep.
+ */
+ return (0);
+ }
+
+ sleepq_lock(cvp);
+
+ cvp->cv_waiters++;
+ if (lock == &Giant.lock_object)
+ mtx_assert(&Giant, MA_OWNED);
+ DROP_GIANT();
+
+ sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
+ SLEEPQ_INTERRUPTIBLE, 0);
+ if (lock != &Giant.lock_object) {
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_release(cvp);
+ WITNESS_SAVE(lock, lock_witness);
+ lock_state = class->lc_unlock(lock);
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_lock(cvp);
+ }
+ rval = sleepq_wait_sig(cvp, 0);
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(0, 0);
+#endif
+ PICKUP_GIANT();
+ if (lock != &Giant.lock_object) {
+ class->lc_lock(lock, lock_state);
+ WITNESS_RESTORE(lock, lock_witness);
+ }
+
+ return (rval);
+}
+
+/*
+ * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the
+ * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
+ * expires.
+ */
+int
+_cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo)
+{
+ WITNESS_SAVE_DECL(lock_witness);
+ struct lock_class *class;
+ struct thread *td;
+ int lock_state, rval;
+
+ td = curthread;
+ lock_state = 0;
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(1, 0);
+#endif
+ CV_ASSERT(cvp, lock, td);
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+ "Waiting on \"%s\"", cvp->cv_description);
+ class = LOCK_CLASS(lock);
+
+ if (cold || panicstr) {
+ /*
+ * After a panic, or during autoconfiguration, just give
+ * interrupts a chance, then just return; don't run any other
+ * thread or panic below, in case this is the idle process and
+ * already asleep.
+ */
+ return 0;
+ }
+
+ sleepq_lock(cvp);
+
+ cvp->cv_waiters++;
+ if (lock == &Giant.lock_object)
+ mtx_assert(&Giant, MA_OWNED);
+ DROP_GIANT();
+
+ sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
+ sleepq_set_timeout(cvp, timo);
+ if (lock != &Giant.lock_object) {
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_release(cvp);
+ WITNESS_SAVE(lock, lock_witness);
+ lock_state = class->lc_unlock(lock);
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_lock(cvp);
+ }
+ rval = sleepq_timedwait(cvp, 0);
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(0, 0);
+#endif
+ PICKUP_GIANT();
+ if (lock != &Giant.lock_object) {
+ class->lc_lock(lock, lock_state);
+ WITNESS_RESTORE(lock, lock_witness);
+ }
+
+ return (rval);
+}
+
+/*
+ * Wait on a condition variable for at most timo/hz seconds, allowing
+ * interruption by signals. Returns 0 if the thread was resumed by cv_signal
+ * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
+ * a signal was caught.
+ */
+int
+_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
+{
+ WITNESS_SAVE_DECL(lock_witness);
+ struct lock_class *class;
+ struct thread *td;
+ int lock_state, rval;
+
+ td = curthread;
+ lock_state = 0;
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(1, 0);
+#endif
+ CV_ASSERT(cvp, lock, td);
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+ "Waiting on \"%s\"", cvp->cv_description);
+ class = LOCK_CLASS(lock);
+
+ if (cold || panicstr) {
+ /*
+ * After a panic, or during autoconfiguration, just give
+ * interrupts a chance, then just return; don't run any other
+ * thread or panic below, in case this is the idle process and
+ * already asleep.
+ */
+ return 0;
+ }
+
+ sleepq_lock(cvp);
+
+ cvp->cv_waiters++;
+ if (lock == &Giant.lock_object)
+ mtx_assert(&Giant, MA_OWNED);
+ DROP_GIANT();
+
+ sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
+ SLEEPQ_INTERRUPTIBLE, 0);
+ sleepq_set_timeout(cvp, timo);
+ if (lock != &Giant.lock_object) {
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_release(cvp);
+ WITNESS_SAVE(lock, lock_witness);
+ lock_state = class->lc_unlock(lock);
+ if (class->lc_flags & LC_SLEEPABLE)
+ sleepq_lock(cvp);
+ }
+ rval = sleepq_timedwait_sig(cvp, 0);
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_CSW))
+ ktrcsw(0, 0);
+#endif
+ PICKUP_GIANT();
+ if (lock != &Giant.lock_object) {
+ class->lc_lock(lock, lock_state);
+ WITNESS_RESTORE(lock, lock_witness);
+ }
+
+ return (rval);
+}
+
+/*
+ * Signal a condition variable, wakes up one waiting thread. Will also wakeup
+ * the swapper if the process is not in memory, so that it can bring the
+ * sleeping process in. Note that this may also result in additional threads
+ * being made runnable. Should be called with the same mutex as was passed to
+ * cv_wait held.
+ */
+void
+cv_signal(struct cv *cvp)
+{
+ int wakeup_swapper;
+
+ wakeup_swapper = 0;
+ sleepq_lock(cvp);
+ if (cvp->cv_waiters > 0) {
+ cvp->cv_waiters--;
+ wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0);
+ }
+ sleepq_release(cvp);
+ if (wakeup_swapper)
+ kick_proc0();
+}
+
+/*
+ * Broadcast a signal to a condition variable. Wakes up all waiting threads.
+ * Should be called with the same mutex as was passed to cv_wait held.
+ */
+void
+cv_broadcastpri(struct cv *cvp, int pri)
+{
+ int wakeup_swapper;
+
+ /*
+ * XXX sleepq_broadcast pri argument changed from -1 meaning
+ * no pri to 0 meaning no pri.
+ */
+ wakeup_swapper = 0;
+ if (pri == -1)
+ pri = 0;
+ sleepq_lock(cvp);
+ if (cvp->cv_waiters > 0) {
+ cvp->cv_waiters = 0;
+ wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
+ }
+ sleepq_release(cvp);
+ if (wakeup_swapper)
+ kick_proc0();
+}
diff --git a/freebsd/kern/kern_descrip.c b/freebsd/kern/kern_descrip.c
new file mode 100644
index 00000000..0ba063f5
--- /dev/null
+++ b/freebsd/kern/kern_descrip.c
@@ -0,0 +1,6912 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_ddb.h>
+#include <freebsd/local/opt_ktrace.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/file.h>
+#include <freebsd/sys/filedesc.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mount.h>
+#include <freebsd/sys/mqueue.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/namei.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/stat.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/syscallsubr.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/sysproto.h>
+#include <freebsd/sys/tty.h>
+#include <freebsd/sys/unistd.h>
+#include <freebsd/sys/user.h>
+#include <freebsd/sys/vnode.h>
+#ifdef KTRACE
+#include <freebsd/sys/ktrace.h>
+#endif
+
+#include <freebsd/security/audit/audit.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/ddb/ddb.h>
+
+static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
+static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
+ "file desc to leader structures");
+static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
+
+static uma_zone_t file_zone;
+
+
+/* Flags for do_dup() */
+#define DUP_FIXED 0x1 /* Force fixed allocation */
+#define DUP_FCNTL 0x2 /* fcntl()-style errors */
+
+static int do_dup(struct thread *td, int flags, int old, int new,
+ register_t *retval);
+static int fd_first_free(struct filedesc *, int, int);
+static int fd_last_used(struct filedesc *, int, int);
+static void fdgrowtable(struct filedesc *, int);
+static void fdunused(struct filedesc *fdp, int fd);
+static void fdused(struct filedesc *fdp, int fd);
+
+/*
+ * A process is initially started out with NDFILE descriptors stored within
+ * this structure, selected to be enough for typical applications based on
+ * the historical limit of 20 open files (and the usage of descriptors by
+ * shells). If these descriptors are exhausted, a larger descriptor table
+ * may be allocated, up to a process' resource limit; the internal arrays
+ * are then unused.
+ */
+#define NDFILE 20
+#define NDSLOTSIZE sizeof(NDSLOTTYPE)
+#define NDENTRIES (NDSLOTSIZE * __CHAR_BIT)
+#define NDSLOT(x) ((x) / NDENTRIES)
+#define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES))
+#define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES)
+
+/*
+ * Storage required per open file descriptor.
+ */
+#define OFILESIZE (sizeof(struct file *) + sizeof(char))
+
+/*
+ * Storage to hold unused ofiles that need to be reclaimed.
+ */
+struct freetable {
+ struct file **ft_table;
+ SLIST_ENTRY(freetable) ft_next;
+};
+
+/*
+ * Basic allocation of descriptors:
+ * one of the above, plus arrays for NDFILE descriptors.
+ */
+struct filedesc0 {
+ struct filedesc fd_fd;
+ /*
+ * ofiles which need to be reclaimed on free.
+ */
+ SLIST_HEAD(,freetable) fd_free;
+ /*
+ * These arrays are used when the number of open files is
+ * <= NDFILE, and are then pointed to by the pointers above.
+ */
+ struct file *fd_dfiles[NDFILE];
+ char fd_dfileflags[NDFILE];
+ NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
+};
+
+/*
+ * Descriptor management.
+ */
+volatile int openfiles; /* actual number of open files */
+struct mtx sigio_lock; /* mtx to protect pointers to sigio */
+#ifndef __rtems__
+void (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
+
+/* A mutex to protect the association between a proc and filedesc. */
+static struct mtx fdesc_mtx;
+
+/*
+ * Find the first zero bit in the given bitmap, starting at low and not
+ * exceeding size - 1.
+ */
+static int
+fd_first_free(struct filedesc *fdp, int low, int size)
+{
+ NDSLOTTYPE *map = fdp->fd_map;
+ NDSLOTTYPE mask;
+ int off, maxoff;
+
+ if (low >= size)
+ return (low);
+
+ off = NDSLOT(low);
+ if (low % NDENTRIES) {
+ mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
+ if ((mask &= ~map[off]) != 0UL)
+ return (off * NDENTRIES + ffsl(mask) - 1);
+ ++off;
+ }
+ for (maxoff = NDSLOTS(size); off < maxoff; ++off)
+ if (map[off] != ~0UL)
+ return (off * NDENTRIES + ffsl(~map[off]) - 1);
+ return (size);
+}
+
+/*
+ * Find the highest non-zero bit in the given bitmap, starting at low and
+ * not exceeding size - 1.
+ */
+static int
+fd_last_used(struct filedesc *fdp, int low, int size)
+{
+ NDSLOTTYPE *map = fdp->fd_map;
+ NDSLOTTYPE mask;
+ int off, minoff;
+
+ if (low >= size)
+ return (-1);
+
+ off = NDSLOT(size);
+ if (size % NDENTRIES) {
+ mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
+ if ((mask &= map[off]) != 0)
+ return (off * NDENTRIES + flsl(mask) - 1);
+ --off;
+ }
+ for (minoff = NDSLOT(low); off >= minoff; --off)
+ if (map[off] != 0)
+ return (off * NDENTRIES + flsl(map[off]) - 1);
+ return (low - 1);
+}
+
+static int
+fdisused(struct filedesc *fdp, int fd)
+{
+ KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+ ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
+ return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
+}
+
+/*
+ * Mark a file descriptor as used.
+ */
+static void
+fdused(struct filedesc *fdp, int fd)
+{
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+ KASSERT(!fdisused(fdp, fd),
+ ("fd already used"));
+
+ fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
+ if (fd > fdp->fd_lastfile)
+ fdp->fd_lastfile = fd;
+ if (fd == fdp->fd_freefile)
+ fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
+}
+
+/*
+ * Mark a file descriptor as unused.
+ */
+static void
+fdunused(struct filedesc *fdp, int fd)
+{
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+ KASSERT(fdisused(fdp, fd),
+ ("fd is already unused"));
+ KASSERT(fdp->fd_ofiles[fd] == NULL,
+ ("fd is still in use"));
+
+ fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
+ if (fd < fdp->fd_freefile)
+ fdp->fd_freefile = fd;
+ if (fd == fdp->fd_lastfile)
+ fdp->fd_lastfile = fd_last_used(fdp, 0, fd);
+}
+
+/*
+ * System calls on descriptors.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct getdtablesize_args {
+ int dummy;
+};
+#endif
+/* ARGSUSED */
+int
+getdtablesize(struct thread *td, struct getdtablesize_args *uap)
+{
+ struct proc *p = td->td_proc;
+
+ PROC_LOCK(p);
+ td->td_retval[0] =
+ min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+ PROC_UNLOCK(p);
+ return (0);
+}
+
+/*
+ * Duplicate a file descriptor to a particular value.
+ *
+ * Note: keep in mind that a potential race condition exists when closing
+ * descriptors from a shared descriptor table (via rfork).
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct dup2_args {
+ u_int from;
+ u_int to;
+};
+#endif
+/* ARGSUSED */
+int
+dup2(struct thread *td, struct dup2_args *uap)
+{
+
+ return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
+ td->td_retval));
+}
+
+/*
+ * Duplicate a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct dup_args {
+ u_int fd;
+};
+#endif
+/* ARGSUSED */
+int
+dup(struct thread *td, struct dup_args *uap)
+{
+
+ return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval));
+}
+
+/*
+ * The file control system call.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fcntl_args {
+ int fd;
+ int cmd;
+ long arg;
+};
+#endif
+/* ARGSUSED */
+int
+fcntl(struct thread *td, struct fcntl_args *uap)
+{
+ struct flock fl;
+ struct oflock ofl;
+ intptr_t arg;
+ int error;
+ int cmd;
+
+ error = 0;
+ cmd = uap->cmd;
+ switch (uap->cmd) {
+ case F_OGETLK:
+ case F_OSETLK:
+ case F_OSETLKW:
+ /*
+ * Convert old flock structure to new.
+ */
+ error = copyin((void *)(intptr_t)uap->arg, &ofl, sizeof(ofl));
+ fl.l_start = ofl.l_start;
+ fl.l_len = ofl.l_len;
+ fl.l_pid = ofl.l_pid;
+ fl.l_type = ofl.l_type;
+ fl.l_whence = ofl.l_whence;
+ fl.l_sysid = 0;
+
+ switch (uap->cmd) {
+ case F_OGETLK:
+ cmd = F_GETLK;
+ break;
+ case F_OSETLK:
+ cmd = F_SETLK;
+ break;
+ case F_OSETLKW:
+ cmd = F_SETLKW;
+ break;
+ }
+ arg = (intptr_t)&fl;
+ break;
+ case F_GETLK:
+ case F_SETLK:
+ case F_SETLKW:
+ case F_SETLK_REMOTE:
+ error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
+ arg = (intptr_t)&fl;
+ break;
+ default:
+ arg = uap->arg;
+ break;
+ }
+ if (error)
+ return (error);
+ error = kern_fcntl(td, uap->fd, cmd, arg);
+ if (error)
+ return (error);
+ if (uap->cmd == F_OGETLK) {
+ ofl.l_start = fl.l_start;
+ ofl.l_len = fl.l_len;
+ ofl.l_pid = fl.l_pid;
+ ofl.l_type = fl.l_type;
+ ofl.l_whence = fl.l_whence;
+ error = copyout(&ofl, (void *)(intptr_t)uap->arg, sizeof(ofl));
+ } else if (uap->cmd == F_GETLK) {
+ error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
+ }
+ return (error);
+}
+
+static inline struct file *
+fdtofp(int fd, struct filedesc *fdp)
+{
+ struct file *fp;
+
+ FILEDESC_LOCK_ASSERT(fdp);
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (NULL);
+ return (fp);
+}
+
+int
+kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
+{
+ struct filedesc *fdp;
+ struct flock *flp;
+ struct file *fp;
+ struct proc *p;
+ char *pop;
+ struct vnode *vp;
+ int error, flg, tmp;
+ int vfslocked;
+ u_int old, new;
+ uint64_t bsize;
+
+ vfslocked = 0;
+ error = 0;
+ flg = F_POSIX;
+ p = td->td_proc;
+ fdp = p->p_fd;
+
+ switch (cmd) {
+ case F_DUPFD:
+ tmp = arg;
+ error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval);
+ break;
+
+ case F_DUP2FD:
+ tmp = arg;
+ error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval);
+ break;
+
+ case F_GETFD:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ pop = &fdp->fd_ofileflags[fd];
+ td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
+ FILEDESC_SUNLOCK(fdp);
+ break;
+
+ case F_SETFD:
+ FILEDESC_XLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ pop = &fdp->fd_ofileflags[fd];
+ *pop = (*pop &~ UF_EXCLOSE) |
+ (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
+ FILEDESC_XUNLOCK(fdp);
+ break;
+
+ case F_GETFL:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ td->td_retval[0] = OFLAGS(fp->f_flag);
+ FILEDESC_SUNLOCK(fdp);
+ break;
+
+ case F_SETFL:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ do {
+ tmp = flg = fp->f_flag;
+ tmp &= ~FCNTLFLAGS;
+ tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
+ } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
+ tmp = fp->f_flag & FNONBLOCK;
+ error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+ if (error) {
+ fdrop(fp, td);
+ break;
+ }
+ tmp = fp->f_flag & FASYNC;
+ error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
+ if (error == 0) {
+ fdrop(fp, td);
+ break;
+ }
+ atomic_clear_int(&fp->f_flag, FNONBLOCK);
+ tmp = 0;
+ (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+ fdrop(fp, td);
+ break;
+
+ case F_GETOWN:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
+ if (error == 0)
+ td->td_retval[0] = tmp;
+ fdrop(fp, td);
+ break;
+
+ case F_SETOWN:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ tmp = arg;
+ error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
+ fdrop(fp, td);
+ break;
+
+ case F_SETLK_REMOTE:
+ error = priv_check(td, PRIV_NFS_LOCKD);
+ if (error)
+ return (error);
+ flg = F_REMOTE;
+ goto do_setlk;
+
+ case F_SETLKW:
+ flg |= F_WAIT;
+ /* FALLTHROUGH F_SETLK */
+
+ case F_SETLK:
+ do_setlk:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ if (fp->f_type != DTYPE_VNODE) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ flp = (struct flock *)arg;
+ if (flp->l_whence == SEEK_CUR) {
+ if (fp->f_offset < 0 ||
+ (flp->l_start > 0 &&
+ fp->f_offset > OFF_MAX - flp->l_start)) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EOVERFLOW;
+ break;
+ }
+ flp->l_start += fp->f_offset;
+ }
+
+ /*
+ * VOP_ADVLOCK() may block.
+ */
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ switch (flp->l_type) {
+ case F_RDLCK:
+ if ((fp->f_flag & FREAD) == 0) {
+ error = EBADF;
+ break;
+ }
+ PROC_LOCK(p->p_leader);
+ p->p_leader->p_flag |= P_ADVLOCK;
+ PROC_UNLOCK(p->p_leader);
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
+ flp, flg);
+ break;
+ case F_WRLCK:
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ break;
+ }
+ PROC_LOCK(p->p_leader);
+ p->p_leader->p_flag |= P_ADVLOCK;
+ PROC_UNLOCK(p->p_leader);
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
+ flp, flg);
+ break;
+ case F_UNLCK:
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
+ flp, flg);
+ break;
+ case F_UNLCKSYS:
+ /*
+ * Temporary api for testing remote lock
+ * infrastructure.
+ */
+ if (flg != F_REMOTE) {
+ error = EINVAL;
+ break;
+ }
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
+ F_UNLCKSYS, flp, flg);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ VFS_UNLOCK_GIANT(vfslocked);
+ vfslocked = 0;
+ /* Check for race with close */
+ FILEDESC_SLOCK(fdp);
+ if ((unsigned) fd >= fdp->fd_nfiles ||
+ fp != fdp->fd_ofiles[fd]) {
+ FILEDESC_SUNLOCK(fdp);
+ flp->l_whence = SEEK_SET;
+ flp->l_start = 0;
+ flp->l_len = 0;
+ flp->l_type = F_UNLCK;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
+ F_UNLCK, flp, F_POSIX);
+ VFS_UNLOCK_GIANT(vfslocked);
+ vfslocked = 0;
+ } else
+ FILEDESC_SUNLOCK(fdp);
+ fdrop(fp, td);
+ break;
+
+ case F_GETLK:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ if (fp->f_type != DTYPE_VNODE) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ flp = (struct flock *)arg;
+ if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
+ flp->l_type != F_UNLCK) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EINVAL;
+ break;
+ }
+ if (flp->l_whence == SEEK_CUR) {
+ if ((flp->l_start > 0 &&
+ fp->f_offset > OFF_MAX - flp->l_start) ||
+ (flp->l_start < 0 &&
+ fp->f_offset < OFF_MIN - flp->l_start)) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EOVERFLOW;
+ break;
+ }
+ flp->l_start += fp->f_offset;
+ }
+ /*
+ * VOP_ADVLOCK() may block.
+ */
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
+ F_POSIX);
+ VFS_UNLOCK_GIANT(vfslocked);
+ vfslocked = 0;
+ fdrop(fp, td);
+ break;
+
+ case F_RDAHEAD:
+ arg = arg ? 128 * 1024: 0;
+ /* FALLTHROUGH */
+ case F_READAHEAD:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ if (fp->f_type != DTYPE_VNODE) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ if (arg != 0) {
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ error = vn_lock(vp, LK_SHARED);
+ if (error != 0)
+ goto readahead_vnlock_fail;
+ bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
+ VOP_UNLOCK(vp, 0);
+ fp->f_seqcount = (arg + bsize - 1) / bsize;
+ do {
+ new = old = fp->f_flag;
+ new |= FRDAHEAD;
+ } while (!atomic_cmpset_rel_int(&fp->f_flag, old, new));
+readahead_vnlock_fail:
+ VFS_UNLOCK_GIANT(vfslocked);
+ vfslocked = 0;
+ } else {
+ do {
+ new = old = fp->f_flag;
+ new &= ~FRDAHEAD;
+ } while (!atomic_cmpset_rel_int(&fp->f_flag, old, new));
+ }
+ fdrop(fp, td);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ VFS_UNLOCK_GIANT(vfslocked);
+ return (error);
+}
+
+/*
+ * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
+ */
+static int
+do_dup(struct thread *td, int flags, int old, int new,
+ register_t *retval)
+{
+ struct filedesc *fdp;
+ struct proc *p;
+ struct file *fp;
+ struct file *delfp;
+ int error, holdleaders, maxfd;
+
+ p = td->td_proc;
+ fdp = p->p_fd;
+
+ /*
+ * Verify we have a valid descriptor to dup from and possibly to
+ * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
+ * return EINVAL when the new descriptor is out of bounds.
+ */
+ if (old < 0)
+ return (EBADF);
+ if (new < 0)
+ return (flags & DUP_FCNTL ? EINVAL : EBADF);
+ PROC_LOCK(p);
+ maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+ PROC_UNLOCK(p);
+ if (new >= maxfd)
+ return (flags & DUP_FCNTL ? EINVAL : EMFILE);
+
+ FILEDESC_XLOCK(fdp);
+ if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+ if (flags & DUP_FIXED && old == new) {
+ *retval = new;
+ FILEDESC_XUNLOCK(fdp);
+ return (0);
+ }
+ fp = fdp->fd_ofiles[old];
+ fhold(fp);
+
+ /*
+ * If the caller specified a file descriptor, make sure the file
+ * table is large enough to hold it, and grab it. Otherwise, just
+ * allocate a new descriptor the usual way. Since the filedesc
+ * lock may be temporarily dropped in the process, we have to look
+ * out for a race.
+ */
+ if (flags & DUP_FIXED) {
+ if (new >= fdp->fd_nfiles)
+ fdgrowtable(fdp, new + 1);
+ if (fdp->fd_ofiles[new] == NULL)
+ fdused(fdp, new);
+ } else {
+ if ((error = fdalloc(td, new, &new)) != 0) {
+ FILEDESC_XUNLOCK(fdp);
+ fdrop(fp, td);
+ return (error);
+ }
+ }
+
+ /*
+ * If the old file changed out from under us then treat it as a
+ * bad file descriptor. Userland should do its own locking to
+ * avoid this case.
+ */
+ if (fdp->fd_ofiles[old] != fp) {
+ /* we've allocated a descriptor which we won't use */
+ if (fdp->fd_ofiles[new] == NULL)
+ fdunused(fdp, new);
+ FILEDESC_XUNLOCK(fdp);
+ fdrop(fp, td);
+ return (EBADF);
+ }
+ KASSERT(old != new,
+ ("new fd is same as old"));
+
+ /*
+ * Save info on the descriptor being overwritten. We cannot close
+ * it without introducing an ownership race for the slot, since we
+ * need to drop the filedesc lock to call closef().
+ *
+ * XXX this duplicates parts of close().
+ */
+ delfp = fdp->fd_ofiles[new];
+ holdleaders = 0;
+ if (delfp != NULL) {
+ if (td->td_proc->p_fdtol != NULL) {
+ /*
+ * Ask fdfree() to sleep to ensure that all relevant
+ * process leaders can be traversed in closef().
+ */
+ fdp->fd_holdleaderscount++;
+ holdleaders = 1;
+ }
+ }
+
+ /*
+ * Duplicate the source descriptor
+ */
+ fdp->fd_ofiles[new] = fp;
+ fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+ if (new > fdp->fd_lastfile)
+ fdp->fd_lastfile = new;
+ *retval = new;
+
+ /*
+ * If we dup'd over a valid file, we now own the reference to it
+ * and must dispose of it using closef() semantics (as if a
+ * close() were performed on it).
+ *
+ * XXX this duplicates parts of close().
+ */
+ if (delfp != NULL) {
+ knote_fdclose(td, new);
+ if (delfp->f_type == DTYPE_MQUEUE)
+ mq_fdclose(td, new, delfp);
+ FILEDESC_XUNLOCK(fdp);
+ (void) closef(delfp, td);
+ if (holdleaders) {
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_holdleaderscount--;
+ if (fdp->fd_holdleaderscount == 0 &&
+ fdp->fd_holdleaderswakeup != 0) {
+ fdp->fd_holdleaderswakeup = 0;
+ wakeup(&fdp->fd_holdleaderscount);
+ }
+ FILEDESC_XUNLOCK(fdp);
+ }
+ } else {
+ FILEDESC_XUNLOCK(fdp);
+ }
+ return (0);
+}
+
+/*
+ * If sigio is on the list associated with a process or process group,
+ * disable signalling from the device, remove sigio from the list and
+ * free sigio.
+ */
+void
+funsetown(struct sigio **sigiop)
+{
+ struct sigio *sigio;
+
+ SIGIO_LOCK();
+ sigio = *sigiop;
+ if (sigio == NULL) {
+ SIGIO_UNLOCK();
+ return;
+ }
+ *(sigio->sio_myref) = NULL;
+ if ((sigio)->sio_pgid < 0) {
+ struct pgrp *pg = (sigio)->sio_pgrp;
+ PGRP_LOCK(pg);
+ SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
+ sigio, sio_pgsigio);
+ PGRP_UNLOCK(pg);
+ } else {
+ struct proc *p = (sigio)->sio_proc;
+ PROC_LOCK(p);
+ SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
+ sigio, sio_pgsigio);
+ PROC_UNLOCK(p);
+ }
+ SIGIO_UNLOCK();
+ crfree(sigio->sio_ucred);
+ free(sigio, M_SIGIO);
+}
+
+/*
+ * Free a list of sigio structures.
+ * We only need to lock the SIGIO_LOCK because we have made ourselves
+ * inaccessible to callers of fsetown and therefore do not need to lock
+ * the proc or pgrp struct for the list manipulation.
+ */
+void
+funsetownlst(struct sigiolst *sigiolst)
+{
+ struct proc *p;
+ struct pgrp *pg;
+ struct sigio *sigio;
+
+ sigio = SLIST_FIRST(sigiolst);
+ if (sigio == NULL)
+ return;
+ p = NULL;
+ pg = NULL;
+
+ /*
+ * Every entry of the list should belong
+ * to a single proc or pgrp.
+ */
+ if (sigio->sio_pgid < 0) {
+ pg = sigio->sio_pgrp;
+ PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
+ } else /* if (sigio->sio_pgid > 0) */ {
+ p = sigio->sio_proc;
+ PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+ }
+
+ SIGIO_LOCK();
+ while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
+ *(sigio->sio_myref) = NULL;
+ if (pg != NULL) {
+ KASSERT(sigio->sio_pgid < 0,
+ ("Proc sigio in pgrp sigio list"));
+ KASSERT(sigio->sio_pgrp == pg,
+ ("Bogus pgrp in sigio list"));
+ PGRP_LOCK(pg);
+ SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
+ sio_pgsigio);
+ PGRP_UNLOCK(pg);
+ } else /* if (p != NULL) */ {
+ KASSERT(sigio->sio_pgid > 0,
+ ("Pgrp sigio in proc sigio list"));
+ KASSERT(sigio->sio_proc == p,
+ ("Bogus proc in sigio list"));
+ PROC_LOCK(p);
+ SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
+ sio_pgsigio);
+ PROC_UNLOCK(p);
+ }
+ SIGIO_UNLOCK();
+ crfree(sigio->sio_ucred);
+ free(sigio, M_SIGIO);
+ SIGIO_LOCK();
+ }
+ SIGIO_UNLOCK();
+}
+
+/*
+ * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
+ *
+ * After permission checking, add a sigio structure to the sigio list for
+ * the process or process group.
+ */
+int
+fsetown(pid_t pgid, struct sigio **sigiop)
+{
+ struct proc *proc;
+ struct pgrp *pgrp;
+ struct sigio *sigio;
+ int ret;
+
+ if (pgid == 0) {
+ funsetown(sigiop);
+ return (0);
+ }
+
+ ret = 0;
+
+ /* Allocate and fill in the new sigio out of locks. */
+ sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
+ sigio->sio_pgid = pgid;
+ sigio->sio_ucred = crhold(curthread->td_ucred);
+ sigio->sio_myref = sigiop;
+
+ sx_slock(&proctree_lock);
+ if (pgid > 0) {
+ proc = pfind(pgid);
+ if (proc == NULL) {
+ ret = ESRCH;
+ goto fail;
+ }
+
+ /*
+ * Policy - Don't allow a process to FSETOWN a process
+ * in another session.
+ *
+ * Remove this test to allow maximum flexibility or
+ * restrict FSETOWN to the current process or process
+ * group for maximum safety.
+ */
+ PROC_UNLOCK(proc);
+ if (proc->p_session != curthread->td_proc->p_session) {
+ ret = EPERM;
+ goto fail;
+ }
+
+ pgrp = NULL;
+ } else /* if (pgid < 0) */ {
+ pgrp = pgfind(-pgid);
+ if (pgrp == NULL) {
+ ret = ESRCH;
+ goto fail;
+ }
+ PGRP_UNLOCK(pgrp);
+
+ /*
+ * Policy - Don't allow a process to FSETOWN a process
+ * in another session.
+ *
+ * Remove this test to allow maximum flexibility or
+ * restrict FSETOWN to the current process or process
+ * group for maximum safety.
+ */
+ if (pgrp->pg_session != curthread->td_proc->p_session) {
+ ret = EPERM;
+ goto fail;
+ }
+
+ proc = NULL;
+ }
+ funsetown(sigiop);
+ if (pgid > 0) {
+ PROC_LOCK(proc);
+ /*
+ * Since funsetownlst() is called without the proctree
+ * locked, we need to check for P_WEXIT.
+ * XXX: is ESRCH correct?
+ */
+ if ((proc->p_flag & P_WEXIT) != 0) {
+ PROC_UNLOCK(proc);
+ ret = ESRCH;
+ goto fail;
+ }
+ SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
+ sigio->sio_proc = proc;
+ PROC_UNLOCK(proc);
+ } else {
+ PGRP_LOCK(pgrp);
+ SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
+ sigio->sio_pgrp = pgrp;
+ PGRP_UNLOCK(pgrp);
+ }
+ sx_sunlock(&proctree_lock);
+ SIGIO_LOCK();
+ *sigiop = sigio;
+ SIGIO_UNLOCK();
+ return (0);
+
+fail:
+ sx_sunlock(&proctree_lock);
+ crfree(sigio->sio_ucred);
+ free(sigio, M_SIGIO);
+ return (ret);
+}
+
+/*
+ * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
+ */
+pid_t
+fgetown(sigiop)
+ struct sigio **sigiop;
+{
+ pid_t pgid;
+
+ SIGIO_LOCK();
+ pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
+ SIGIO_UNLOCK();
+ return (pgid);
+}
+
+/*
+ * Close a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct close_args {
+ int fd;
+};
+#endif
+/* ARGSUSED */
+int
+close(td, uap)
+ struct thread *td;
+ struct close_args *uap;
+{
+
+ return (kern_close(td, uap->fd));
+}
+
+int
+kern_close(td, fd)
+ struct thread *td;
+ int fd;
+{
+ struct filedesc *fdp;
+ struct file *fp;
+ int error;
+ int holdleaders;
+
+ error = 0;
+ holdleaders = 0;
+ fdp = td->td_proc->p_fd;
+
+ AUDIT_SYSCLOSE(td, fd);
+
+ FILEDESC_XLOCK(fdp);
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+ fdp->fd_ofiles[fd] = NULL;
+ fdp->fd_ofileflags[fd] = 0;
+ fdunused(fdp, fd);
+ if (td->td_proc->p_fdtol != NULL) {
+ /*
+ * Ask fdfree() to sleep to ensure that all relevant
+ * process leaders can be traversed in closef().
+ */
+ fdp->fd_holdleaderscount++;
+ holdleaders = 1;
+ }
+
+ /*
+ * We now hold the fp reference that used to be owned by the
+ * descriptor array. We have to unlock the FILEDESC *AFTER*
+ * knote_fdclose to prevent a race of the fd getting opened, a knote
+ * added, and deleteing a knote for the new fd.
+ */
+ knote_fdclose(td, fd);
+ if (fp->f_type == DTYPE_MQUEUE)
+ mq_fdclose(td, fd, fp);
+ FILEDESC_XUNLOCK(fdp);
+
+ error = closef(fp, td);
+ if (holdleaders) {
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_holdleaderscount--;
+ if (fdp->fd_holdleaderscount == 0 &&
+ fdp->fd_holdleaderswakeup != 0) {
+ fdp->fd_holdleaderswakeup = 0;
+ wakeup(&fdp->fd_holdleaderscount);
+ }
+ FILEDESC_XUNLOCK(fdp);
+ }
+ return (error);
+}
+
+/*
+ * Close open file descriptors.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct closefrom_args {
+ int lowfd;
+};
+#endif
+/* ARGSUSED */
+int
+closefrom(struct thread *td, struct closefrom_args *uap)
+{
+ struct filedesc *fdp;
+ int fd;
+
+ fdp = td->td_proc->p_fd;
+ AUDIT_ARG_FD(uap->lowfd);
+
+ /*
+ * Treat negative starting file descriptor values identical to
+ * closefrom(0) which closes all files.
+ */
+ if (uap->lowfd < 0)
+ uap->lowfd = 0;
+ FILEDESC_SLOCK(fdp);
+ for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) {
+ if (fdp->fd_ofiles[fd] != NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ (void)kern_close(td, fd);
+ FILEDESC_SLOCK(fdp);
+ }
+ }
+ FILEDESC_SUNLOCK(fdp);
+ return (0);
+}
+
+#if defined(COMPAT_43)
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct ofstat_args {
+ int fd;
+ struct ostat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+ofstat(struct thread *td, struct ofstat_args *uap)
+{
+ struct ostat oub;
+ struct stat ub;
+ int error;
+
+ error = kern_fstat(td, uap->fd, &ub);
+ if (error == 0) {
+ cvtstat(&ub, &oub);
+ error = copyout(&oub, uap->sb, sizeof(oub));
+ }
+ return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fstat_args {
+ int fd;
+ struct stat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+fstat(struct thread *td, struct fstat_args *uap)
+{
+ struct stat ub;
+ int error;
+
+ error = kern_fstat(td, uap->fd, &ub);
+ if (error == 0)
+ error = copyout(&ub, uap->sb, sizeof(ub));
+ return (error);
+}
+
+int
+kern_fstat(struct thread *td, int fd, struct stat *sbp)
+{
+ struct file *fp;
+ int error;
+
+ AUDIT_ARG_FD(fd);
+
+ if ((error = fget(td, fd, &fp)) != 0)
+ return (error);
+
+ AUDIT_ARG_FILE(td->td_proc, fp);
+
+ error = fo_stat(fp, sbp, td->td_ucred, td);
+ fdrop(fp, td);
+#ifdef KTRACE
+ if (error == 0 && KTRPOINT(td, KTR_STRUCT))
+ ktrstat(sbp);
+#endif
+ return (error);
+}
+
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct nfstat_args {
+ int fd;
+ struct nstat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+nfstat(struct thread *td, struct nfstat_args *uap)
+{
+ struct nstat nub;
+ struct stat ub;
+ int error;
+
+ error = kern_fstat(td, uap->fd, &ub);
+ if (error == 0) {
+ cvtnstat(&ub, &nub);
+ error = copyout(&nub, uap->sb, sizeof(nub));
+ }
+ return (error);
+}
+
+/*
+ * Return pathconf information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fpathconf_args {
+ int fd;
+ int name;
+};
+#endif
+/* ARGSUSED */
+int
+fpathconf(struct thread *td, struct fpathconf_args *uap)
+{
+ struct file *fp;
+ struct vnode *vp;
+ int error;
+
+ if ((error = fget(td, uap->fd, &fp)) != 0)
+ return (error);
+
+ /* If asynchronous I/O is available, it works for all descriptors. */
+ if (uap->name == _PC_ASYNC_IO) {
+ td->td_retval[0] = async_io_version;
+ goto out;
+ }
+ vp = fp->f_vnode;
+ if (vp != NULL) {
+ int vfslocked;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+ error = VOP_PATHCONF(vp, uap->name, td->td_retval);
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
+ if (uap->name != _PC_PIPE_BUF) {
+ error = EINVAL;
+ } else {
+ td->td_retval[0] = PIPE_BUF;
+ error = 0;
+ }
+ } else {
+ error = EOPNOTSUPP;
+ }
+out:
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Grow the file table to accomodate (at least) nfd descriptors. This may
+ * block and drop the filedesc lock, but it will reacquire it before
+ * returning.
+ */
+static void
+fdgrowtable(struct filedesc *fdp, int nfd)
+{
+ struct filedesc0 *fdp0;
+ struct freetable *fo;
+ struct file **ntable;
+ struct file **otable;
+ char *nfileflags;
+ int nnfiles, onfiles;
+ NDSLOTTYPE *nmap;
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+
+ KASSERT(fdp->fd_nfiles > 0,
+ ("zero-length file table"));
+
+ /* compute the size of the new table */
+ onfiles = fdp->fd_nfiles;
+ nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
+ if (nnfiles <= onfiles)
+ /* the table is already large enough */
+ return;
+
+ /* allocate a new table and (if required) new bitmaps */
+ FILEDESC_XUNLOCK(fdp);
+ ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
+ M_FILEDESC, M_ZERO | M_WAITOK);
+ nfileflags = (char *)&ntable[nnfiles];
+ if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
+ nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE,
+ M_FILEDESC, M_ZERO | M_WAITOK);
+ else
+ nmap = NULL;
+ FILEDESC_XLOCK(fdp);
+
+ /*
+ * We now have new tables ready to go. Since we dropped the
+ * filedesc lock to call malloc(), watch out for a race.
+ */
+ onfiles = fdp->fd_nfiles;
+ if (onfiles >= nnfiles) {
+ /* we lost the race, but that's OK */
+ free(ntable, M_FILEDESC);
+ if (nmap != NULL)
+ free(nmap, M_FILEDESC);
+ return;
+ }
+ bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
+ bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
+ otable = fdp->fd_ofiles;
+ fdp->fd_ofileflags = nfileflags;
+ fdp->fd_ofiles = ntable;
+ /*
+ * We must preserve ofiles until the process exits because we can't
+ * be certain that no threads have references to the old table via
+ * _fget().
+ */
+ if (onfiles > NDFILE) {
+ fo = (struct freetable *)&otable[onfiles];
+ fdp0 = (struct filedesc0 *)fdp;
+ fo->ft_table = otable;
+ SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
+ }
+ if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
+ bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
+ if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
+ free(fdp->fd_map, M_FILEDESC);
+ fdp->fd_map = nmap;
+ }
+ fdp->fd_nfiles = nnfiles;
+}
+
+/*
+ * Allocate a file descriptor for the process.
+ */
+int
+fdalloc(struct thread *td, int minfd, int *result)
+{
+ struct proc *p = td->td_proc;
+ struct filedesc *fdp = p->p_fd;
+ int fd = -1, maxfd;
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+
+ if (fdp->fd_freefile > minfd)
+ minfd = fdp->fd_freefile;
+
+ PROC_LOCK(p);
+ maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+ PROC_UNLOCK(p);
+
+ /*
+ * Search the bitmap for a free descriptor. If none is found, try
+ * to grow the file table. Keep at it until we either get a file
+ * descriptor or run into process or system limits; fdgrowtable()
+ * may drop the filedesc lock, so we're in a race.
+ */
+ for (;;) {
+ fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
+ if (fd >= maxfd)
+ return (EMFILE);
+ if (fd < fdp->fd_nfiles)
+ break;
+ fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd));
+ }
+
+ /*
+ * Perform some sanity checks, then mark the file descriptor as
+ * used and return it to the caller.
+ */
+ KASSERT(!fdisused(fdp, fd),
+ ("fd_first_free() returned non-free descriptor"));
+ KASSERT(fdp->fd_ofiles[fd] == NULL,
+ ("free descriptor isn't"));
+ fdp->fd_ofileflags[fd] = 0; /* XXX needed? */
+ fdused(fdp, fd);
+ *result = fd;
+ return (0);
+}
+
+/*
+ * Check to see whether n user file descriptors are available to the process
+ * p.
+ */
+int
+fdavail(struct thread *td, int n)
+{
+ struct proc *p = td->td_proc;
+ struct filedesc *fdp = td->td_proc->p_fd;
+ struct file **fpp;
+ int i, lim, last;
+
+ FILEDESC_LOCK_ASSERT(fdp);
+
+ PROC_LOCK(p);
+ lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+ PROC_UNLOCK(p);
+ if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+ return (1);
+ last = min(fdp->fd_nfiles, lim);
+ fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+ for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
+ if (*fpp == NULL && --n <= 0)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Create a new open file structure and allocate a file decriptor for the
+ * process that refers to it. We add one reference to the file for the
+ * descriptor table and one reference for resultfp. This is to prevent us
+ * being preempted and the entry in the descriptor table closed after we
+ * release the FILEDESC lock.
+ */
+int
+falloc(struct thread *td, struct file **resultfp, int *resultfd)
+{
+ struct proc *p = td->td_proc;
+ struct file *fp;
+ int error, i;
+ int maxuserfiles = maxfiles - (maxfiles / 20);
+ static struct timeval lastfail;
+ static int curfail;
+
+ fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
+ if ((openfiles >= maxuserfiles &&
+ priv_check(td, PRIV_MAXFILES) != 0) ||
+ openfiles >= maxfiles) {
+ if (ppsratecheck(&lastfail, &curfail, 1)) {
+ printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
+ td->td_ucred->cr_ruid);
+ }
+ uma_zfree(file_zone, fp);
+ return (ENFILE);
+ }
+ atomic_add_int(&openfiles, 1);
+
+ /*
+ * If the process has file descriptor zero open, add the new file
+ * descriptor to the list of open files at that point, otherwise
+ * put it at the front of the list of open files.
+ */
+ refcount_init(&fp->f_count, 1);
+ if (resultfp)
+ fhold(fp);
+ fp->f_cred = crhold(td->td_ucred);
+ fp->f_ops = &badfileops;
+ fp->f_data = NULL;
+ fp->f_vnode = NULL;
+ FILEDESC_XLOCK(p->p_fd);
+ if ((error = fdalloc(td, 0, &i))) {
+ FILEDESC_XUNLOCK(p->p_fd);
+
+ fdrop(fp, td);
+ if (resultfp)
+ fdrop(fp, td);
+ return (error);
+ }
+ p->p_fd->fd_ofiles[i] = fp;
+ FILEDESC_XUNLOCK(p->p_fd);
+ if (resultfp)
+ *resultfp = fp;
+ if (resultfd)
+ *resultfd = i;
+ return (0);
+}
+
+/*
+ * Build a new filedesc structure from another.
+ * Copy the current, root, and jail root vnode references.
+ */
+struct filedesc *
+fdinit(struct filedesc *fdp)
+{
+ struct filedesc0 *newfdp;
+
+ newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO);
+ FILEDESC_LOCK_INIT(&newfdp->fd_fd);
+ if (fdp != NULL) {
+ FILEDESC_XLOCK(fdp);
+ newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
+ if (newfdp->fd_fd.fd_cdir)
+ VREF(newfdp->fd_fd.fd_cdir);
+ newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
+ if (newfdp->fd_fd.fd_rdir)
+ VREF(newfdp->fd_fd.fd_rdir);
+ newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
+ if (newfdp->fd_fd.fd_jdir)
+ VREF(newfdp->fd_fd.fd_jdir);
+ FILEDESC_XUNLOCK(fdp);
+ }
+
+ /* Create the file descriptor table. */
+ newfdp->fd_fd.fd_refcnt = 1;
+ newfdp->fd_fd.fd_holdcnt = 1;
+ newfdp->fd_fd.fd_cmask = CMASK;
+ newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
+ newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
+ newfdp->fd_fd.fd_nfiles = NDFILE;
+ newfdp->fd_fd.fd_map = newfdp->fd_dmap;
+ newfdp->fd_fd.fd_lastfile = -1;
+ return (&newfdp->fd_fd);
+}
+
+static struct filedesc *
+fdhold(struct proc *p)
+{
+ struct filedesc *fdp;
+
+ mtx_lock(&fdesc_mtx);
+ fdp = p->p_fd;
+ if (fdp != NULL)
+ fdp->fd_holdcnt++;
+ mtx_unlock(&fdesc_mtx);
+ return (fdp);
+}
+
+static void
+fddrop(struct filedesc *fdp)
+{
+ struct filedesc0 *fdp0;
+ struct freetable *ft;
+ int i;
+
+ mtx_lock(&fdesc_mtx);
+ i = --fdp->fd_holdcnt;
+ mtx_unlock(&fdesc_mtx);
+ if (i > 0)
+ return;
+
+ FILEDESC_LOCK_DESTROY(fdp);
+ fdp0 = (struct filedesc0 *)fdp;
+ while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) {
+ SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next);
+ free(ft->ft_table, M_FILEDESC);
+ }
+ free(fdp, M_FILEDESC);
+}
+
+/*
+ * Share a filedesc structure.
+ */
+struct filedesc *
+fdshare(struct filedesc *fdp)
+{
+
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_refcnt++;
+ FILEDESC_XUNLOCK(fdp);
+ return (fdp);
+}
+
+/*
+ * Unshare a filedesc structure, if necessary by making a copy
+ */
+void
+fdunshare(struct proc *p, struct thread *td)
+{
+
+ FILEDESC_XLOCK(p->p_fd);
+ if (p->p_fd->fd_refcnt > 1) {
+ struct filedesc *tmp;
+
+ FILEDESC_XUNLOCK(p->p_fd);
+ tmp = fdcopy(p->p_fd);
+ fdfree(td);
+ p->p_fd = tmp;
+ } else
+ FILEDESC_XUNLOCK(p->p_fd);
+}
+
+/*
+ * Copy a filedesc structure. A NULL pointer in returns a NULL reference,
+ * this is to ease callers, not catch errors.
+ */
+struct filedesc *
+fdcopy(struct filedesc *fdp)
+{
+ struct filedesc *newfdp;
+ int i;
+
+ /* Certain daemons might not have file descriptors. */
+ if (fdp == NULL)
+ return (NULL);
+
+ newfdp = fdinit(fdp);
+ FILEDESC_SLOCK(fdp);
+ while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
+ FILEDESC_SUNLOCK(fdp);
+ FILEDESC_XLOCK(newfdp);
+ fdgrowtable(newfdp, fdp->fd_lastfile + 1);
+ FILEDESC_XUNLOCK(newfdp);
+ FILEDESC_SLOCK(fdp);
+ }
+ /* copy everything except kqueue descriptors */
+ newfdp->fd_freefile = -1;
+ for (i = 0; i <= fdp->fd_lastfile; ++i) {
+ if (fdisused(fdp, i) &&
+ fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE &&
+ fdp->fd_ofiles[i]->f_ops != &badfileops) {
+ newfdp->fd_ofiles[i] = fdp->fd_ofiles[i];
+ newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
+ fhold(newfdp->fd_ofiles[i]);
+ newfdp->fd_lastfile = i;
+ } else {
+ if (newfdp->fd_freefile == -1)
+ newfdp->fd_freefile = i;
+ }
+ }
+ newfdp->fd_cmask = fdp->fd_cmask;
+ FILEDESC_SUNLOCK(fdp);
+ FILEDESC_XLOCK(newfdp);
+ for (i = 0; i <= newfdp->fd_lastfile; ++i)
+ if (newfdp->fd_ofiles[i] != NULL)
+ fdused(newfdp, i);
+ if (newfdp->fd_freefile == -1)
+ newfdp->fd_freefile = i;
+ FILEDESC_XUNLOCK(newfdp);
+ return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(struct thread *td)
+{
+ struct filedesc *fdp;
+ struct file **fpp;
+ int i, locked;
+ struct filedesc_to_leader *fdtol;
+ struct file *fp;
+ struct vnode *cdir, *jdir, *rdir, *vp;
+ struct flock lf;
+
+ /* Certain daemons might not have file descriptors. */
+ fdp = td->td_proc->p_fd;
+ if (fdp == NULL)
+ return;
+
+ /* Check for special need to clear POSIX style locks */
+ fdtol = td->td_proc->p_fdtol;
+ if (fdtol != NULL) {
+ FILEDESC_XLOCK(fdp);
+ KASSERT(fdtol->fdl_refcount > 0,
+ ("filedesc_to_refcount botch: fdl_refcount=%d",
+ fdtol->fdl_refcount));
+ if (fdtol->fdl_refcount == 1 &&
+ (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+ for (i = 0, fpp = fdp->fd_ofiles;
+ i <= fdp->fd_lastfile;
+ i++, fpp++) {
+ if (*fpp == NULL ||
+ (*fpp)->f_type != DTYPE_VNODE)
+ continue;
+ fp = *fpp;
+ fhold(fp);
+ FILEDESC_XUNLOCK(fdp);
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ vp = fp->f_vnode;
+ locked = VFS_LOCK_GIANT(vp->v_mount);
+ (void) VOP_ADVLOCK(vp,
+ (caddr_t)td->td_proc->
+ p_leader,
+ F_UNLCK,
+ &lf,
+ F_POSIX);
+ VFS_UNLOCK_GIANT(locked);
+ FILEDESC_XLOCK(fdp);
+ fdrop(fp, td);
+ fpp = fdp->fd_ofiles + i;
+ }
+ }
+ retry:
+ if (fdtol->fdl_refcount == 1) {
+ if (fdp->fd_holdleaderscount > 0 &&
+ (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+ /*
+ * close() or do_dup() has cleared a reference
+ * in a shared file descriptor table.
+ */
+ fdp->fd_holdleaderswakeup = 1;
+ sx_sleep(&fdp->fd_holdleaderscount,
+ FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
+ goto retry;
+ }
+ if (fdtol->fdl_holdcount > 0) {
+ /*
+ * Ensure that fdtol->fdl_leader remains
+ * valid in closef().
+ */
+ fdtol->fdl_wakeup = 1;
+ sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
+ "fdlhold", 0);
+ goto retry;
+ }
+ }
+ fdtol->fdl_refcount--;
+ if (fdtol->fdl_refcount == 0 &&
+ fdtol->fdl_holdcount == 0) {
+ fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
+ fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
+ } else
+ fdtol = NULL;
+ td->td_proc->p_fdtol = NULL;
+ FILEDESC_XUNLOCK(fdp);
+ if (fdtol != NULL)
+ free(fdtol, M_FILEDESC_TO_LEADER);
+ }
+ FILEDESC_XLOCK(fdp);
+ i = --fdp->fd_refcnt;
+ FILEDESC_XUNLOCK(fdp);
+ if (i > 0)
+ return;
+
+ fpp = fdp->fd_ofiles;
+ for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
+ if (*fpp) {
+ FILEDESC_XLOCK(fdp);
+ fp = *fpp;
+ *fpp = NULL;
+ FILEDESC_XUNLOCK(fdp);
+ (void) closef(fp, td);
+ }
+ }
+ FILEDESC_XLOCK(fdp);
+
+ /* XXX This should happen earlier. */
+ mtx_lock(&fdesc_mtx);
+ td->td_proc->p_fd = NULL;
+ mtx_unlock(&fdesc_mtx);
+
+ if (fdp->fd_nfiles > NDFILE)
+ free(fdp->fd_ofiles, M_FILEDESC);
+ if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
+ free(fdp->fd_map, M_FILEDESC);
+
+ fdp->fd_nfiles = 0;
+
+ cdir = fdp->fd_cdir;
+ fdp->fd_cdir = NULL;
+ rdir = fdp->fd_rdir;
+ fdp->fd_rdir = NULL;
+ jdir = fdp->fd_jdir;
+ fdp->fd_jdir = NULL;
+ FILEDESC_XUNLOCK(fdp);
+
+ if (cdir) {
+ locked = VFS_LOCK_GIANT(cdir->v_mount);
+ vrele(cdir);
+ VFS_UNLOCK_GIANT(locked);
+ }
+ if (rdir) {
+ locked = VFS_LOCK_GIANT(rdir->v_mount);
+ vrele(rdir);
+ VFS_UNLOCK_GIANT(locked);
+ }
+ if (jdir) {
+ locked = VFS_LOCK_GIANT(jdir->v_mount);
+ vrele(jdir);
+ VFS_UNLOCK_GIANT(locked);
+ }
+
+ fddrop(fdp);
+}
+
+/*
+ * For setugid programs, we don't want to people to use that setugidness
+ * to generate error messages which write to a file which otherwise would
+ * otherwise be off-limits to the process. We check for filesystems where
+ * the vnode can change out from under us after execve (like [lin]procfs).
+ *
+ * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
+ * sufficient. We also don't check for setugidness since we know we are.
+ */
+static int
+is_unsafe(struct file *fp)
+{
+ if (fp->f_type == DTYPE_VNODE) {
+ struct vnode *vp = fp->f_vnode;
+
+ if ((vp->v_vflag & VV_PROCDEP) != 0)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Make this setguid thing safe, if at all possible.
+ */
+void
+setugidsafety(struct thread *td)
+{
+ struct filedesc *fdp;
+ int i;
+
+ /* Certain daemons might not have file descriptors. */
+ fdp = td->td_proc->p_fd;
+ if (fdp == NULL)
+ return;
+
+ /*
+ * Note: fdp->fd_ofiles may be reallocated out from under us while
+ * we are blocked in a close. Be careful!
+ */
+ FILEDESC_XLOCK(fdp);
+ for (i = 0; i <= fdp->fd_lastfile; i++) {
+ if (i > 2)
+ break;
+ if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
+ struct file *fp;
+
+ knote_fdclose(td, i);
+ /*
+ * NULL-out descriptor prior to close to avoid
+ * a race while close blocks.
+ */
+ fp = fdp->fd_ofiles[i];
+ fdp->fd_ofiles[i] = NULL;
+ fdp->fd_ofileflags[i] = 0;
+ fdunused(fdp, i);
+ FILEDESC_XUNLOCK(fdp);
+ (void) closef(fp, td);
+ FILEDESC_XLOCK(fdp);
+ }
+ }
+ FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * If a specific file object occupies a specific file descriptor, close the
+ * file descriptor entry and drop a reference on the file object. This is a
+ * convenience function to handle a subsequent error in a function that calls
+ * falloc() that handles the race that another thread might have closed the
+ * file descriptor out from under the thread creating the file object.
+ */
+void
+fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
+{
+
+ FILEDESC_XLOCK(fdp);
+ if (fdp->fd_ofiles[idx] == fp) {
+ fdp->fd_ofiles[idx] = NULL;
+ fdunused(fdp, idx);
+ FILEDESC_XUNLOCK(fdp);
+ fdrop(fp, td);
+ } else
+ FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * Close any files on exec?
+ */
+void
+fdcloseexec(struct thread *td)
+{
+ struct filedesc *fdp;
+ int i;
+
+ /* Certain daemons might not have file descriptors. */
+ fdp = td->td_proc->p_fd;
+ if (fdp == NULL)
+ return;
+
+ FILEDESC_XLOCK(fdp);
+
+ /*
+ * We cannot cache fd_ofiles or fd_ofileflags since operations
+ * may block and rip them out from under us.
+ */
+ for (i = 0; i <= fdp->fd_lastfile; i++) {
+ if (fdp->fd_ofiles[i] != NULL &&
+ (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE ||
+ (fdp->fd_ofileflags[i] & UF_EXCLOSE))) {
+ struct file *fp;
+
+ knote_fdclose(td, i);
+ /*
+ * NULL-out descriptor prior to close to avoid
+ * a race while close blocks.
+ */
+ fp = fdp->fd_ofiles[i];
+ fdp->fd_ofiles[i] = NULL;
+ fdp->fd_ofileflags[i] = 0;
+ fdunused(fdp, i);
+ if (fp->f_type == DTYPE_MQUEUE)
+ mq_fdclose(td, i, fp);
+ FILEDESC_XUNLOCK(fdp);
+ (void) closef(fp, td);
+ FILEDESC_XLOCK(fdp);
+ }
+ }
+ FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * It is unsafe for set[ug]id processes to be started with file
+ * descriptors 0..2 closed, as these descriptors are given implicit
+ * significance in the Standard C library. fdcheckstd() will create a
+ * descriptor referencing /dev/null for each of stdin, stdout, and
+ * stderr that is not already open.
+ */
+int
+fdcheckstd(struct thread *td)
+{
+ struct filedesc *fdp;
+ register_t retval, save;
+ int i, error, devnull;
+
+ fdp = td->td_proc->p_fd;
+ if (fdp == NULL)
+ return (0);
+ KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
+ devnull = -1;
+ error = 0;
+ for (i = 0; i < 3; i++) {
+ if (fdp->fd_ofiles[i] != NULL)
+ continue;
+ if (devnull < 0) {
+ save = td->td_retval[0];
+ error = kern_open(td, "/dev/null", UIO_SYSSPACE,
+ O_RDWR, 0);
+ devnull = td->td_retval[0];
+ KASSERT(devnull == i, ("oof, we didn't get our fd"));
+ td->td_retval[0] = save;
+ if (error)
+ break;
+ } else {
+ error = do_dup(td, DUP_FIXED, devnull, i, &retval);
+ if (error != 0)
+ break;
+ }
+ }
+ return (error);
+}
+
+/*
+ * Internal form of close. Decrement reference count on file structure.
+ * Note: td may be NULL when closing a file that was being passed in a
+ * message.
+ *
+ * XXXRW: Giant is not required for the caller, but often will be held; this
+ * makes it moderately likely the Giant will be recursed in the VFS case.
+ */
+int
+closef(struct file *fp, struct thread *td)
+{
+ struct vnode *vp;
+ struct flock lf;
+ struct filedesc_to_leader *fdtol;
+ struct filedesc *fdp;
+
+ /*
+ * POSIX record locking dictates that any close releases ALL
+ * locks owned by this process. This is handled by setting
+ * a flag in the unlock to free ONLY locks obeying POSIX
+ * semantics, and not to free BSD-style file locks.
+ * If the descriptor was in a message, POSIX-style locks
+ * aren't passed with the descriptor, and the thread pointer
+ * will be NULL. Callers should be careful only to pass a
+ * NULL thread pointer when there really is no owning
+ * context that might have locks, or the locks will be
+ * leaked.
+ */
+ if (fp->f_type == DTYPE_VNODE && td != NULL) {
+ int vfslocked;
+
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
+ F_UNLCK, &lf, F_POSIX);
+ }
+ fdtol = td->td_proc->p_fdtol;
+ if (fdtol != NULL) {
+ /*
+ * Handle special case where file descriptor table is
+ * shared between multiple process leaders.
+ */
+ fdp = td->td_proc->p_fd;
+ FILEDESC_XLOCK(fdp);
+ for (fdtol = fdtol->fdl_next;
+ fdtol != td->td_proc->p_fdtol;
+ fdtol = fdtol->fdl_next) {
+ if ((fdtol->fdl_leader->p_flag &
+ P_ADVLOCK) == 0)
+ continue;
+ fdtol->fdl_holdcount++;
+ FILEDESC_XUNLOCK(fdp);
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ vp = fp->f_vnode;
+ (void) VOP_ADVLOCK(vp,
+ (caddr_t)fdtol->fdl_leader,
+ F_UNLCK, &lf, F_POSIX);
+ FILEDESC_XLOCK(fdp);
+ fdtol->fdl_holdcount--;
+ if (fdtol->fdl_holdcount == 0 &&
+ fdtol->fdl_wakeup != 0) {
+ fdtol->fdl_wakeup = 0;
+ wakeup(fdtol);
+ }
+ }
+ FILEDESC_XUNLOCK(fdp);
+ }
+ VFS_UNLOCK_GIANT(vfslocked);
+ }
+ return (fdrop(fp, td));
+}
+
+/*
+ * Initialize the file pointer with the specified properties.
+ *
+ * The ops are set with release semantics to be certain that the flags, type,
+ * and data are visible when ops is. This is to prevent ops methods from being
+ * called with bad data.
+ */
+void
+finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
+{
+ fp->f_data = data;
+ fp->f_flag = flag;
+ fp->f_type = type;
+ atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
+}
+#endif /* __rtems__ */
+
+struct file *
+fget_unlocked(struct filedesc *fdp, int fd)
+{
+ struct file *fp;
+ u_int count;
+
+ if (fd < 0 || fd >= fdp->fd_nfiles)
+ return (NULL);
+ /*
+ * Fetch the descriptor locklessly. We avoid fdrop() races by
+ * never raising a refcount above 0. To accomplish this we have
+ * to use a cmpset loop rather than an atomic_add. The descriptor
+ * must be re-verified once we acquire a reference to be certain
+ * that the identity is still correct and we did not lose a race
+ * due to preemption.
+ */
+ for (;;) {
+ fp = fdp->fd_ofiles[fd];
+ if (fp == NULL)
+ break;
+ count = fp->f_count;
+ if (count == 0)
+ continue;
+ /*
+ * Use an acquire barrier to prevent caching of fd_ofiles
+ * so it is refreshed for verification.
+ */
+ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1)
+ continue;
+ if (fp == fdp->fd_ofiles[fd])
+ break;
+ fdrop(fp, curthread);
+ }
+
+ return (fp);
+}
+
+/*
+ * Extract the file pointer associated with the specified descriptor for the
+ * current user process.
+ *
+ * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
+ * returned.
+ *
+ * If an error occured the non-zero error is returned and *fpp is set to
+ * NULL. Otherwise *fpp is held and set and zero is returned. Caller is
+ * responsible for fdrop().
+ */
+static __inline int
+_fget(struct thread *td, int fd, struct file **fpp, int flags)
+{
+ struct filedesc *fdp;
+ struct file *fp;
+
+ *fpp = NULL;
+ if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
+ return (EBADF);
+ if ((fp = fget_unlocked(fdp, fd)) == NULL)
+ return (EBADF);
+ if (fp->f_ops == &badfileops) {
+ fdrop(fp, td);
+ return (EBADF);
+ }
+ /*
+ * FREAD and FWRITE failure return EBADF as per POSIX.
+ *
+ * Only one flag, or 0, may be specified.
+ */
+ if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
+ (flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
+ fdrop(fp, td);
+ return (EBADF);
+ }
+ *fpp = fp;
+ return (0);
+}
+
+int
+fget(struct thread *td, int fd, struct file **fpp)
+{
+
+ return(_fget(td, fd, fpp, 0));
+}
+
+int
+fget_read(struct thread *td, int fd, struct file **fpp)
+{
+
+ return(_fget(td, fd, fpp, FREAD));
+}
+
+#ifndef __rtems__
+int
+fget_write(struct thread *td, int fd, struct file **fpp)
+{
+
+ return(_fget(td, fd, fpp, FWRITE));
+}
+
+/*
+ * Like fget() but loads the underlying vnode, or returns an error if the
+ * descriptor does not represent a vnode. Note that pipes use vnodes but
+ * never have VM objects. The returned vnode will be vref()'d.
+ *
+ * XXX: what about the unused flags ?
+ */
+static __inline int
+_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
+{
+ struct file *fp;
+ int error;
+
+ *vpp = NULL;
+ if ((error = _fget(td, fd, &fp, flags)) != 0)
+ return (error);
+ if (fp->f_vnode == NULL) {
+ error = EINVAL;
+ } else {
+ *vpp = fp->f_vnode;
+ vref(*vpp);
+ }
+ fdrop(fp, td);
+
+ return (error);
+}
+
+int
+fgetvp(struct thread *td, int fd, struct vnode **vpp)
+{
+
+ return (_fgetvp(td, fd, vpp, 0));
+}
+
+int
+fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
+{
+
+ return (_fgetvp(td, fd, vpp, FREAD));
+}
+
+#ifdef notyet
+int
+fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
+{
+
+ return (_fgetvp(td, fd, vpp, FWRITE));
+}
+#endif
+
+/*
+ * Like fget() but loads the underlying socket, or returns an error if the
+ * descriptor does not represent a socket.
+ *
+ * We bump the ref count on the returned socket. XXX Also obtain the SX lock
+ * in the future.
+ *
+ * Note: fgetsock() and fputsock() are deprecated, as consumers should rely
+ * on their file descriptor reference to prevent the socket from being free'd
+ * during use.
+ */
+int
+fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
+{
+ struct file *fp;
+ int error;
+
+ *spp = NULL;
+ if (fflagp != NULL)
+ *fflagp = 0;
+ if ((error = _fget(td, fd, &fp, 0)) != 0)
+ return (error);
+ if (fp->f_type != DTYPE_SOCKET) {
+ error = ENOTSOCK;
+ } else {
+ *spp = fp->f_data;
+ if (fflagp)
+ *fflagp = fp->f_flag;
+ SOCK_LOCK(*spp);
+ soref(*spp);
+ SOCK_UNLOCK(*spp);
+ }
+ fdrop(fp, td);
+
+ return (error);
+}
+
+/*
+ * Drop the reference count on the socket and XXX release the SX lock in the
+ * future. The last reference closes the socket.
+ *
+ * Note: fputsock() is deprecated, see comment for fgetsock().
+ */
+void
+fputsock(struct socket *so)
+{
+
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ sorele(so);
+}
+#endif /* __rtems__ */
+
+/*
+ * Handle the last reference to a file being closed.
+ */
+int
+_fdrop(struct file *fp, struct thread *td)
+{
+#ifdef __rtems__
+ panic("fdrop: RTEMS unsupported");
+
+#else /* __rtems__ */
+ int error;
+
+ error = 0;
+ if (fp->f_count != 0)
+ panic("fdrop: count %d", fp->f_count);
+ if (fp->f_ops != &badfileops)
+ error = fo_close(fp, td);
+ /*
+ * The f_cdevpriv cannot be assigned non-NULL value while we
+ * are destroying the file.
+ */
+ if (fp->f_cdevpriv != NULL)
+ devfs_fpdrop(fp);
+ atomic_subtract_int(&openfiles, 1);
+ crfree(fp->f_cred);
+ uma_zfree(file_zone, fp);
+
+ return (error);
+#endif /* __rtems__ */
+}
+
+#ifndef __rtems__
+/*
+ * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on the entire file
+ * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct flock_args {
+ int fd;
+ int how;
+};
+#endif
+/* ARGSUSED */
+int
+flock(struct thread *td, struct flock_args *uap)
+{
+ struct file *fp;
+ struct vnode *vp;
+ struct flock lf;
+ int vfslocked;
+ int error;
+
+ if ((error = fget(td, uap->fd, &fp)) != 0)
+ return (error);
+ if (fp->f_type != DTYPE_VNODE) {
+ fdrop(fp, td);
+ return (EOPNOTSUPP);
+ }
+
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (uap->how & LOCK_UN) {
+ lf.l_type = F_UNLCK;
+ atomic_clear_int(&fp->f_flag, FHASLOCK);
+ error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+ goto done2;
+ }
+ if (uap->how & LOCK_EX)
+ lf.l_type = F_WRLCK;
+ else if (uap->how & LOCK_SH)
+ lf.l_type = F_RDLCK;
+ else {
+ error = EBADF;
+ goto done2;
+ }
+ atomic_set_int(&fp->f_flag, FHASLOCK);
+ error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
+ (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
+done2:
+ fdrop(fp, td);
+ VFS_UNLOCK_GIANT(vfslocked);
+ return (error);
+}
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+int
+dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error)
+{
+ struct file *wfp;
+ struct file *fp;
+
+ /*
+ * If the to-be-dup'd fd number is greater than the allowed number
+ * of file descriptors, or the fd to be dup'd has already been
+ * closed, then reject.
+ */
+ FILEDESC_XLOCK(fdp);
+ if (dfd < 0 || dfd >= fdp->fd_nfiles ||
+ (wfp = fdp->fd_ofiles[dfd]) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+
+ /*
+ * There are two cases of interest here.
+ *
+ * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
+ *
+ * For ENXIO steal away the file structure from (dfd) and store it in
+ * (indx). (dfd) is effectively closed by this operation.
+ *
+ * Any other error code is just returned.
+ */
+ switch (error) {
+ case ENODEV:
+ /*
+ * Check that the mode the file is being opened for is a
+ * subset of the mode of the existing descriptor.
+ */
+ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EACCES);
+ }
+ fp = fdp->fd_ofiles[indx];
+ fdp->fd_ofiles[indx] = wfp;
+ fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+ if (fp == NULL)
+ fdused(fdp, indx);
+ fhold(wfp);
+ FILEDESC_XUNLOCK(fdp);
+ if (fp != NULL)
+ /*
+ * We now own the reference to fp that the ofiles[]
+ * array used to own. Release it.
+ */
+ fdrop(fp, td);
+ return (0);
+
+ case ENXIO:
+ /*
+ * Steal away the file pointer from dfd and stuff it into indx.
+ */
+ fp = fdp->fd_ofiles[indx];
+ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+ fdp->fd_ofiles[dfd] = NULL;
+ fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+ fdp->fd_ofileflags[dfd] = 0;
+ fdunused(fdp, dfd);
+ if (fp == NULL)
+ fdused(fdp, indx);
+ FILEDESC_XUNLOCK(fdp);
+
+ /*
+ * We now own the reference to fp that the ofiles[] array
+ * used to own. Release it.
+ */
+ if (fp != NULL)
+ fdrop(fp, td);
+ return (0);
+
+ default:
+ FILEDESC_XUNLOCK(fdp);
+ return (error);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Scan all active processes and prisons to see if any of them have a current
+ * or root directory of `olddp'. If so, replace them with the new mount point.
+ */
+void
+mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
+{
+ struct filedesc *fdp;
+ struct prison *pr;
+ struct proc *p;
+ int nrele;
+
+ if (vrefcnt(olddp) == 1)
+ return;
+ nrele = 0;
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ fdp = fdhold(p);
+ if (fdp == NULL)
+ continue;
+ FILEDESC_XLOCK(fdp);
+ if (fdp->fd_cdir == olddp) {
+ vref(newdp);
+ fdp->fd_cdir = newdp;
+ nrele++;
+ }
+ if (fdp->fd_rdir == olddp) {
+ vref(newdp);
+ fdp->fd_rdir = newdp;
+ nrele++;
+ }
+ if (fdp->fd_jdir == olddp) {
+ vref(newdp);
+ fdp->fd_jdir = newdp;
+ nrele++;
+ }
+ FILEDESC_XUNLOCK(fdp);
+ fddrop(fdp);
+ }
+ sx_sunlock(&allproc_lock);
+ if (rootvnode == olddp) {
+ vref(newdp);
+ rootvnode = newdp;
+ nrele++;
+ }
+ mtx_lock(&prison0.pr_mtx);
+ if (prison0.pr_root == olddp) {
+ vref(newdp);
+ prison0.pr_root = newdp;
+ nrele++;
+ }
+ mtx_unlock(&prison0.pr_mtx);
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list) {
+ mtx_lock(&pr->pr_mtx);
+ if (pr->pr_root == olddp) {
+ vref(newdp);
+ pr->pr_root = newdp;
+ nrele++;
+ }
+ mtx_unlock(&pr->pr_mtx);
+ }
+ sx_sunlock(&allprison_lock);
+ while (nrele--)
+ vrele(olddp);
+}
+
+struct filedesc_to_leader *
+filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
+{
+ struct filedesc_to_leader *fdtol;
+
+ fdtol = malloc(sizeof(struct filedesc_to_leader),
+ M_FILEDESC_TO_LEADER,
+ M_WAITOK);
+ fdtol->fdl_refcount = 1;
+ fdtol->fdl_holdcount = 0;
+ fdtol->fdl_wakeup = 0;
+ fdtol->fdl_leader = leader;
+ if (old != NULL) {
+ FILEDESC_XLOCK(fdp);
+ fdtol->fdl_next = old->fdl_next;
+ fdtol->fdl_prev = old;
+ old->fdl_next = fdtol;
+ fdtol->fdl_next->fdl_prev = fdtol;
+ FILEDESC_XUNLOCK(fdp);
+ } else {
+ fdtol->fdl_next = fdtol;
+ fdtol->fdl_prev = fdtol;
+ }
+ return (fdtol);
+}
+
+/*
+ * Get file structures globally.
+ */
+static int
+sysctl_kern_file(SYSCTL_HANDLER_ARGS)
+{
+ struct xfile xf;
+ struct filedesc *fdp;
+ struct file *fp;
+ struct proc *p;
+ int error, n;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ if (req->oldptr == NULL) {
+ n = 0;
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ fdp = fdhold(p);
+ if (fdp == NULL)
+ continue;
+ /* overestimates sparse tables. */
+ if (fdp->fd_lastfile > 0)
+ n += fdp->fd_lastfile;
+ fddrop(fdp);
+ }
+ sx_sunlock(&allproc_lock);
+ return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
+ }
+ error = 0;
+ bzero(&xf, sizeof(xf));
+ xf.xf_size = sizeof(xf);
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ PROC_LOCK(p);
+ if (p_cansee(req->td, p) != 0) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+ xf.xf_pid = p->p_pid;
+ xf.xf_uid = p->p_ucred->cr_uid;
+ PROC_UNLOCK(p);
+ fdp = fdhold(p);
+ if (fdp == NULL)
+ continue;
+ FILEDESC_SLOCK(fdp);
+ for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
+ if ((fp = fdp->fd_ofiles[n]) == NULL)
+ continue;
+ xf.xf_fd = n;
+ xf.xf_file = fp;
+ xf.xf_data = fp->f_data;
+ xf.xf_vnode = fp->f_vnode;
+ xf.xf_type = fp->f_type;
+ xf.xf_count = fp->f_count;
+ xf.xf_msgcount = 0;
+ xf.xf_offset = fp->f_offset;
+ xf.xf_flag = fp->f_flag;
+ error = SYSCTL_OUT(req, &xf, sizeof(xf));
+ if (error)
+ break;
+ }
+ FILEDESC_SUNLOCK(fdp);
+ fddrop(fdp);
+ if (error)
+ break;
+ }
+ sx_sunlock(&allproc_lock);
+ return (error);
+}
+
+SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
+ 0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
+
+#ifdef KINFO_OFILE_SIZE
+CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
+#endif
+
+#ifdef COMPAT_FREEBSD7
+static int
+export_vnode_for_osysctl(struct vnode *vp, int type,
+ struct kinfo_ofile *kif, struct filedesc *fdp, struct sysctl_req *req)
+{
+ int error;
+ char *fullpath, *freepath;
+ int vfslocked;
+
+ bzero(kif, sizeof(*kif));
+ kif->kf_structsize = sizeof(*kif);
+
+ vref(vp);
+ kif->kf_fd = type;
+ kif->kf_type = KF_TYPE_VNODE;
+ /* This function only handles directories. */
+ if (vp->v_type != VDIR) {
+ vrele(vp);
+ return (ENOTDIR);
+ }
+ kif->kf_vnode_type = KF_VTYPE_VDIR;
+
+ /*
+ * This is not a true file descriptor, so we set a bogus refcount
+ * and offset to indicate these fields should be ignored.
+ */
+ kif->kf_ref_count = -1;
+ kif->kf_offset = -1;
+
+ freepath = NULL;
+ fullpath = "-";
+ FILEDESC_SUNLOCK(fdp);
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vrele(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+ error = SYSCTL_OUT(req, kif, sizeof(*kif));
+ FILEDESC_SLOCK(fdp);
+ return (error);
+}
+
+/*
+ * Get per-process file descriptors for use by procstat(1), et al.
+ */
+static int
+sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
+{
+ char *fullpath, *freepath;
+ struct kinfo_ofile *kif;
+ struct filedesc *fdp;
+ int error, i, *name;
+ struct socket *so;
+ struct vnode *vp;
+ struct file *fp;
+ struct proc *p;
+ struct tty *tp;
+ int vfslocked;
+
+ name = (int *)arg1;
+ if ((p = pfind((pid_t)name[0])) == NULL)
+ return (ESRCH);
+ if ((error = p_candebug(curthread, p))) {
+ PROC_UNLOCK(p);
+ return (error);
+ }
+ fdp = fdhold(p);
+ PROC_UNLOCK(p);
+ if (fdp == NULL)
+ return (ENOENT);
+ kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+ FILEDESC_SLOCK(fdp);
+ if (fdp->fd_cdir != NULL)
+ export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
+ fdp, req);
+ if (fdp->fd_rdir != NULL)
+ export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
+ fdp, req);
+ if (fdp->fd_jdir != NULL)
+ export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
+ fdp, req);
+ for (i = 0; i < fdp->fd_nfiles; i++) {
+ if ((fp = fdp->fd_ofiles[i]) == NULL)
+ continue;
+ bzero(kif, sizeof(*kif));
+ kif->kf_structsize = sizeof(*kif);
+ vp = NULL;
+ so = NULL;
+ tp = NULL;
+ kif->kf_fd = i;
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ kif->kf_type = KF_TYPE_VNODE;
+ vp = fp->f_vnode;
+ break;
+
+ case DTYPE_SOCKET:
+ kif->kf_type = KF_TYPE_SOCKET;
+ so = fp->f_data;
+ break;
+
+ case DTYPE_PIPE:
+ kif->kf_type = KF_TYPE_PIPE;
+ break;
+
+ case DTYPE_FIFO:
+ kif->kf_type = KF_TYPE_FIFO;
+ vp = fp->f_vnode;
+ break;
+
+ case DTYPE_KQUEUE:
+ kif->kf_type = KF_TYPE_KQUEUE;
+ break;
+
+ case DTYPE_CRYPTO:
+ kif->kf_type = KF_TYPE_CRYPTO;
+ break;
+
+ case DTYPE_MQUEUE:
+ kif->kf_type = KF_TYPE_MQUEUE;
+ break;
+
+ case DTYPE_SHM:
+ kif->kf_type = KF_TYPE_SHM;
+ break;
+
+ case DTYPE_SEM:
+ kif->kf_type = KF_TYPE_SEM;
+ break;
+
+ case DTYPE_PTS:
+ kif->kf_type = KF_TYPE_PTS;
+ tp = fp->f_data;
+ break;
+
+ default:
+ kif->kf_type = KF_TYPE_UNKNOWN;
+ break;
+ }
+ kif->kf_ref_count = fp->f_count;
+ if (fp->f_flag & FREAD)
+ kif->kf_flags |= KF_FLAG_READ;
+ if (fp->f_flag & FWRITE)
+ kif->kf_flags |= KF_FLAG_WRITE;
+ if (fp->f_flag & FAPPEND)
+ kif->kf_flags |= KF_FLAG_APPEND;
+ if (fp->f_flag & FASYNC)
+ kif->kf_flags |= KF_FLAG_ASYNC;
+ if (fp->f_flag & FFSYNC)
+ kif->kf_flags |= KF_FLAG_FSYNC;
+ if (fp->f_flag & FNONBLOCK)
+ kif->kf_flags |= KF_FLAG_NONBLOCK;
+ if (fp->f_flag & O_DIRECT)
+ kif->kf_flags |= KF_FLAG_DIRECT;
+ if (fp->f_flag & FHASLOCK)
+ kif->kf_flags |= KF_FLAG_HASLOCK;
+ kif->kf_offset = fp->f_offset;
+ if (vp != NULL) {
+ vref(vp);
+ switch (vp->v_type) {
+ case VNON:
+ kif->kf_vnode_type = KF_VTYPE_VNON;
+ break;
+ case VREG:
+ kif->kf_vnode_type = KF_VTYPE_VREG;
+ break;
+ case VDIR:
+ kif->kf_vnode_type = KF_VTYPE_VDIR;
+ break;
+ case VBLK:
+ kif->kf_vnode_type = KF_VTYPE_VBLK;
+ break;
+ case VCHR:
+ kif->kf_vnode_type = KF_VTYPE_VCHR;
+ break;
+ case VLNK:
+ kif->kf_vnode_type = KF_VTYPE_VLNK;
+ break;
+ case VSOCK:
+ kif->kf_vnode_type = KF_VTYPE_VSOCK;
+ break;
+ case VFIFO:
+ kif->kf_vnode_type = KF_VTYPE_VFIFO;
+ break;
+ case VBAD:
+ kif->kf_vnode_type = KF_VTYPE_VBAD;
+ break;
+ default:
+ kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
+ break;
+ }
+ /*
+ * It is OK to drop the filedesc lock here as we will
+ * re-validate and re-evaluate its properties when
+ * the loop continues.
+ */
+ freepath = NULL;
+ fullpath = "-";
+ FILEDESC_SUNLOCK(fdp);
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vrele(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ strlcpy(kif->kf_path, fullpath,
+ sizeof(kif->kf_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+ FILEDESC_SLOCK(fdp);
+ }
+ if (so != NULL) {
+ struct sockaddr *sa;
+
+ if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
+ == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+ bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
+ == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+ bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ kif->kf_sock_domain =
+ so->so_proto->pr_domain->dom_family;
+ kif->kf_sock_type = so->so_type;
+ kif->kf_sock_protocol = so->so_proto->pr_protocol;
+ }
+ if (tp != NULL) {
+ strlcpy(kif->kf_path, tty_devname(tp),
+ sizeof(kif->kf_path));
+ }
+ error = SYSCTL_OUT(req, kif, sizeof(*kif));
+ if (error)
+ break;
+ }
+ FILEDESC_SUNLOCK(fdp);
+ fddrop(fdp);
+ free(kif, M_TEMP);
+ return (0);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, CTLFLAG_RD,
+ sysctl_kern_proc_ofiledesc, "Process ofiledesc entries");
+#endif /* COMPAT_FREEBSD7 */
+
+#ifdef KINFO_FILE_SIZE
+CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
+#endif
+
+static int
+export_vnode_for_sysctl(struct vnode *vp, int type,
+ struct kinfo_file *kif, struct filedesc *fdp, struct sysctl_req *req)
+{
+ int error;
+ char *fullpath, *freepath;
+ int vfslocked;
+
+ bzero(kif, sizeof(*kif));
+
+ vref(vp);
+ kif->kf_fd = type;
+ kif->kf_type = KF_TYPE_VNODE;
+ /* This function only handles directories. */
+ if (vp->v_type != VDIR) {
+ vrele(vp);
+ return (ENOTDIR);
+ }
+ kif->kf_vnode_type = KF_VTYPE_VDIR;
+
+ /*
+ * This is not a true file descriptor, so we set a bogus refcount
+ * and offset to indicate these fields should be ignored.
+ */
+ kif->kf_ref_count = -1;
+ kif->kf_offset = -1;
+
+ freepath = NULL;
+ fullpath = "-";
+ FILEDESC_SUNLOCK(fdp);
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vrele(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+ /* Pack record size down */
+ kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+ strlen(kif->kf_path) + 1;
+ kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
+ error = SYSCTL_OUT(req, kif, kif->kf_structsize);
+ FILEDESC_SLOCK(fdp);
+ return (error);
+}
+
+/*
+ * Get per-process file descriptors for use by procstat(1), et al.
+ */
+static int
+sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
+{
+ char *fullpath, *freepath;
+ struct kinfo_file *kif;
+ struct filedesc *fdp;
+ int error, i, *name;
+ struct socket *so;
+ struct vnode *vp;
+ struct file *fp;
+ struct proc *p;
+ struct tty *tp;
+ int vfslocked;
+ size_t oldidx;
+
+ name = (int *)arg1;
+ if ((p = pfind((pid_t)name[0])) == NULL)
+ return (ESRCH);
+ if ((error = p_candebug(curthread, p))) {
+ PROC_UNLOCK(p);
+ return (error);
+ }
+ fdp = fdhold(p);
+ PROC_UNLOCK(p);
+ if (fdp == NULL)
+ return (ENOENT);
+ kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+ FILEDESC_SLOCK(fdp);
+ if (fdp->fd_cdir != NULL)
+ export_vnode_for_sysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
+ fdp, req);
+ if (fdp->fd_rdir != NULL)
+ export_vnode_for_sysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
+ fdp, req);
+ if (fdp->fd_jdir != NULL)
+ export_vnode_for_sysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
+ fdp, req);
+ for (i = 0; i < fdp->fd_nfiles; i++) {
+ if ((fp = fdp->fd_ofiles[i]) == NULL)
+ continue;
+ bzero(kif, sizeof(*kif));
+ vp = NULL;
+ so = NULL;
+ tp = NULL;
+ kif->kf_fd = i;
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ kif->kf_type = KF_TYPE_VNODE;
+ vp = fp->f_vnode;
+ break;
+
+ case DTYPE_SOCKET:
+ kif->kf_type = KF_TYPE_SOCKET;
+ so = fp->f_data;
+ break;
+
+ case DTYPE_PIPE:
+ kif->kf_type = KF_TYPE_PIPE;
+ break;
+
+ case DTYPE_FIFO:
+ kif->kf_type = KF_TYPE_FIFO;
+ vp = fp->f_vnode;
+ break;
+
+ case DTYPE_KQUEUE:
+ kif->kf_type = KF_TYPE_KQUEUE;
+ break;
+
+ case DTYPE_CRYPTO:
+ kif->kf_type = KF_TYPE_CRYPTO;
+ break;
+
+ case DTYPE_MQUEUE:
+ kif->kf_type = KF_TYPE_MQUEUE;
+ break;
+
+ case DTYPE_SHM:
+ kif->kf_type = KF_TYPE_SHM;
+ break;
+
+ case DTYPE_SEM:
+ kif->kf_type = KF_TYPE_SEM;
+ break;
+
+ case DTYPE_PTS:
+ kif->kf_type = KF_TYPE_PTS;
+ tp = fp->f_data;
+ break;
+
+ default:
+ kif->kf_type = KF_TYPE_UNKNOWN;
+ break;
+ }
+ kif->kf_ref_count = fp->f_count;
+ if (fp->f_flag & FREAD)
+ kif->kf_flags |= KF_FLAG_READ;
+ if (fp->f_flag & FWRITE)
+ kif->kf_flags |= KF_FLAG_WRITE;
+ if (fp->f_flag & FAPPEND)
+ kif->kf_flags |= KF_FLAG_APPEND;
+ if (fp->f_flag & FASYNC)
+ kif->kf_flags |= KF_FLAG_ASYNC;
+ if (fp->f_flag & FFSYNC)
+ kif->kf_flags |= KF_FLAG_FSYNC;
+ if (fp->f_flag & FNONBLOCK)
+ kif->kf_flags |= KF_FLAG_NONBLOCK;
+ if (fp->f_flag & O_DIRECT)
+ kif->kf_flags |= KF_FLAG_DIRECT;
+ if (fp->f_flag & FHASLOCK)
+ kif->kf_flags |= KF_FLAG_HASLOCK;
+ kif->kf_offset = fp->f_offset;
+ if (vp != NULL) {
+ vref(vp);
+ switch (vp->v_type) {
+ case VNON:
+ kif->kf_vnode_type = KF_VTYPE_VNON;
+ break;
+ case VREG:
+ kif->kf_vnode_type = KF_VTYPE_VREG;
+ break;
+ case VDIR:
+ kif->kf_vnode_type = KF_VTYPE_VDIR;
+ break;
+ case VBLK:
+ kif->kf_vnode_type = KF_VTYPE_VBLK;
+ break;
+ case VCHR:
+ kif->kf_vnode_type = KF_VTYPE_VCHR;
+ break;
+ case VLNK:
+ kif->kf_vnode_type = KF_VTYPE_VLNK;
+ break;
+ case VSOCK:
+ kif->kf_vnode_type = KF_VTYPE_VSOCK;
+ break;
+ case VFIFO:
+ kif->kf_vnode_type = KF_VTYPE_VFIFO;
+ break;
+ case VBAD:
+ kif->kf_vnode_type = KF_VTYPE_VBAD;
+ break;
+ default:
+ kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
+ break;
+ }
+ /*
+ * It is OK to drop the filedesc lock here as we will
+ * re-validate and re-evaluate its properties when
+ * the loop continues.
+ */
+ freepath = NULL;
+ fullpath = "-";
+ FILEDESC_SUNLOCK(fdp);
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vrele(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ strlcpy(kif->kf_path, fullpath,
+ sizeof(kif->kf_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+ FILEDESC_SLOCK(fdp);
+ }
+ if (so != NULL) {
+ struct sockaddr *sa;
+
+ if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
+ == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+ bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
+ == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+ bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ kif->kf_sock_domain =
+ so->so_proto->pr_domain->dom_family;
+ kif->kf_sock_type = so->so_type;
+ kif->kf_sock_protocol = so->so_proto->pr_protocol;
+ }
+ if (tp != NULL) {
+ strlcpy(kif->kf_path, tty_devname(tp),
+ sizeof(kif->kf_path));
+ }
+ /* Pack record size down */
+ kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+ strlen(kif->kf_path) + 1;
+ kif->kf_structsize = roundup(kif->kf_structsize,
+ sizeof(uint64_t));
+ oldidx = req->oldidx;
+ error = SYSCTL_OUT(req, kif, kif->kf_structsize);
+ if (error) {
+ if (error == ENOMEM) {
+ /*
+ * The hack to keep the ABI of sysctl
+ * kern.proc.filedesc intact, but not
+ * to account a partially copied
+ * kinfo_file into the oldidx.
+ */
+ req->oldidx = oldidx;
+ error = 0;
+ }
+ break;
+ }
+ }
+ FILEDESC_SUNLOCK(fdp);
+ fddrop(fdp);
+ free(kif, M_TEMP);
+ return (error);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD,
+ sysctl_kern_proc_filedesc, "Process filedesc entries");
+
+#ifdef DDB
+/*
+ * For the purposes of debugging, generate a human-readable string for the
+ * file type.
+ */
+static const char *
+file_type_to_name(short type)
+{
+
+ switch (type) {
+ case 0:
+ return ("zero");
+ case DTYPE_VNODE:
+ return ("vnod");
+ case DTYPE_SOCKET:
+ return ("sock");
+ case DTYPE_PIPE:
+ return ("pipe");
+ case DTYPE_FIFO:
+ return ("fifo");
+ case DTYPE_KQUEUE:
+ return ("kque");
+ case DTYPE_CRYPTO:
+ return ("crpt");
+ case DTYPE_MQUEUE:
+ return ("mque");
+ case DTYPE_SHM:
+ return ("shm");
+ case DTYPE_SEM:
+ return ("ksem");
+ default:
+ return ("unkn");
+ }
+}
+
+/*
+ * For the purposes of debugging, identify a process (if any, perhaps one of
+ * many) that references the passed file in its file descriptor array. Return
+ * NULL if none.
+ */
+static struct proc *
+file_to_first_proc(struct file *fp)
+{
+ struct filedesc *fdp;
+ struct proc *p;
+ int n;
+
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ fdp = p->p_fd;
+ if (fdp == NULL)
+ continue;
+ for (n = 0; n < fdp->fd_nfiles; n++) {
+ if (fp == fdp->fd_ofiles[n])
+ return (p);
+ }
+ }
+ return (NULL);
+}
+
+static void
+db_print_file(struct file *fp, int header)
+{
+ struct proc *p;
+
+ if (header)
+ db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
+ "File", "Type", "Data", "Flag", "GCFl", "Count",
+ "MCount", "Vnode", "FPID", "FCmd");
+ p = file_to_first_proc(fp);
+ db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
+ file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
+ 0, fp->f_count, 0, fp->f_vnode,
+ p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
+}
+
+DB_SHOW_COMMAND(file, db_show_file)
+{
+ struct file *fp;
+
+ if (!have_addr) {
+ db_printf("usage: show file <addr>\n");
+ return;
+ }
+ fp = (struct file *)addr;
+ db_print_file(fp, 1);
+}
+
+DB_SHOW_COMMAND(files, db_show_files)
+{
+ struct filedesc *fdp;
+ struct file *fp;
+ struct proc *p;
+ int header;
+ int n;
+
+ header = 1;
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ if ((fdp = p->p_fd) == NULL)
+ continue;
+ for (n = 0; n < fdp->fd_nfiles; ++n) {
+ if ((fp = fdp->fd_ofiles[n]) == NULL)
+ continue;
+ db_print_file(fp, header);
+ header = 0;
+ }
+ }
+}
+#endif
+
+SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
+ &maxfilesperproc, 0, "Maximum files allowed open per process");
+
+SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
+ &maxfiles, 0, "Maximum number of files");
+
+SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
+ __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
+
+/* ARGSUSED*/
+static void
+filelistinit(void *dummy)
+{
+
+ file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
+ mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
+}
+SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
+#endif /* __rtems__ */
+
+/*-------------------------------------------------------------------*/
+
+static int
+badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td)
+{
+
+ return (EBADF);
+}
+
+static int
+badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td)
+{
+
+ return (EINVAL);
+}
+
+static int
+badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td)
+{
+
+ return (EBADF);
+}
+
+static int
+badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td)
+{
+
+ return (0);
+}
+
+static int
+badfo_kqfilter(struct file *fp, struct knote *kn)
+{
+
+ return (EBADF);
+}
+
+static int
+badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td)
+{
+
+ return (EBADF);
+}
+
+static int
+badfo_close(struct file *fp, struct thread *td)
+{
+
+ return (EBADF);
+}
+
+struct fileops badfileops = {
+ .fo_read = badfo_readwrite,
+ .fo_write = badfo_readwrite,
+ .fo_truncate = badfo_truncate,
+ .fo_ioctl = badfo_ioctl,
+ .fo_poll = badfo_poll,
+ .fo_kqfilter = badfo_kqfilter,
+ .fo_stat = badfo_stat,
+ .fo_close = badfo_close,
+};
+
+#ifndef __rtems__
+/*-------------------------------------------------------------------*/
+
+/*
+ * File Descriptor pseudo-device driver (/dev/fd/).
+ *
+ * Opening minor device N dup()s the file (if any) connected to file
+ * descriptor N belonging to the calling process. Note that this driver
+ * consists of only the ``open()'' routine, because all subsequent
+ * references to this file will be direct to the other driver.
+ *
+ * XXX: we could give this one a cloning event handler if necessary.
+ */
+
+/* ARGSUSED */
+static int
+fdopen(struct cdev *dev, int mode, int type, struct thread *td)
+{
+
+ /*
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_ddb.h>
+#include <freebsd/local/opt_ktrace.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+
+#include <freebsd/sys/conf.h>
+#include <freebsd/sys/domain.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/file.h>
+#include <freebsd/sys/filedesc.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/jail.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mount.h>
+#include <freebsd/sys/mqueue.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/namei.h>
+#include <freebsd/sys/priv.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/protosw.h>
+#include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/stat.h>
+#include <freebsd/sys/sx.h>
+#include <freebsd/sys/syscallsubr.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/sysproto.h>
+#include <freebsd/sys/tty.h>
+#include <freebsd/sys/unistd.h>
+#include <freebsd/sys/user.h>
+#include <freebsd/sys/vnode.h>
+#ifdef KTRACE
+#include <freebsd/sys/ktrace.h>
+#endif
+
+#include <freebsd/security/audit/audit.h>
+
+#include <freebsd/vm/uma.h>
+
+#include <freebsd/ddb/ddb.h>
+
+static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
+static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
+ "file desc to leader structures");
+static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
+
+static uma_zone_t file_zone;
+
+
+/* Flags for do_dup() */
+#define DUP_FIXED 0x1 /* Force fixed allocation */
+#define DUP_FCNTL 0x2 /* fcntl()-style errors */
+
+static int do_dup(struct thread *td, int flags, int old, int new,
+ register_t *retval);
+static int fd_first_free(struct filedesc *, int, int);
+static int fd_last_used(struct filedesc *, int, int);
+static void fdgrowtable(struct filedesc *, int);
+static void fdunused(struct filedesc *fdp, int fd);
+static void fdused(struct filedesc *fdp, int fd);
+
+/*
+ * A process is initially started out with NDFILE descriptors stored within
+ * this structure, selected to be enough for typical applications based on
+ * the historical limit of 20 open files (and the usage of descriptors by
+ * shells). If these descriptors are exhausted, a larger descriptor table
+ * may be allocated, up to a process' resource limit; the internal arrays
+ * are then unused.
+ */
+#define NDFILE 20
+#define NDSLOTSIZE sizeof(NDSLOTTYPE)
+#define NDENTRIES (NDSLOTSIZE * __CHAR_BIT)
+#define NDSLOT(x) ((x) / NDENTRIES)
+#define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES))
+#define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES)
+
+/*
+ * Storage required per open file descriptor.
+ */
+#define OFILESIZE (sizeof(struct file *) + sizeof(char))
+
+/*
+ * Storage to hold unused ofiles that need to be reclaimed.
+ */
+struct freetable {
+ struct file **ft_table;
+ SLIST_ENTRY(freetable) ft_next;
+};
+
+/*
+ * Basic allocation of descriptors:
+ * one of the above, plus arrays for NDFILE descriptors.
+ */
+struct filedesc0 {
+ struct filedesc fd_fd;
+ /*
+ * ofiles which need to be reclaimed on free.
+ */
+ SLIST_HEAD(,freetable) fd_free;
+ /*
+ * These arrays are used when the number of open files is
+ * <= NDFILE, and are then pointed to by the pointers above.
+ */
+ struct file *fd_dfiles[NDFILE];
+ char fd_dfileflags[NDFILE];
+ NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
+};
+
+/*
+ * Descriptor management.
+ */
+volatile int openfiles; /* actual number of open files */
+struct mtx sigio_lock; /* mtx to protect pointers to sigio */
+#ifndef __rtems__
+void (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
+
+/* A mutex to protect the association between a proc and filedesc. */
+static struct mtx fdesc_mtx;
+
+/*
+ * Find the first zero bit in the given bitmap, starting at low and not
+ * exceeding size - 1.
+ */
+static int
+fd_first_free(struct filedesc *fdp, int low, int size)
+{
+ NDSLOTTYPE *map = fdp->fd_map;
+ NDSLOTTYPE mask;
+ int off, maxoff;
+
+ if (low >= size)
+ return (low);
+
+ off = NDSLOT(low);
+ if (low % NDENTRIES) {
+ mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
+ if ((mask &= ~map[off]) != 0UL)
+ return (off * NDENTRIES + ffsl(mask) - 1);
+ ++off;
+ }
+ for (maxoff = NDSLOTS(size); off < maxoff; ++off)
+ if (map[off] != ~0UL)
+ return (off * NDENTRIES + ffsl(~map[off]) - 1);
+ return (size);
+}
+
+/*
+ * Find the highest non-zero bit in the given bitmap, starting at low and
+ * not exceeding size - 1.
+ */
+static int
+fd_last_used(struct filedesc *fdp, int low, int size)
+{
+ NDSLOTTYPE *map = fdp->fd_map;
+ NDSLOTTYPE mask;
+ int off, minoff;
+
+ if (low >= size)
+ return (-1);
+
+ off = NDSLOT(size);
+ if (size % NDENTRIES) {
+ mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
+ if ((mask &= map[off]) != 0)
+ return (off * NDENTRIES + flsl(mask) - 1);
+ --off;
+ }
+ for (minoff = NDSLOT(low); off >= minoff; --off)
+ if (map[off] != 0)
+ return (off * NDENTRIES + flsl(map[off]) - 1);
+ return (low - 1);
+}
+
+static int
+fdisused(struct filedesc *fdp, int fd)
+{
+ KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+ ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
+ return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
+}
+
+/*
+ * Mark a file descriptor as used.
+ */
+static void
+fdused(struct filedesc *fdp, int fd)
+{
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+ KASSERT(!fdisused(fdp, fd),
+ ("fd already used"));
+
+ fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
+ if (fd > fdp->fd_lastfile)
+ fdp->fd_lastfile = fd;
+ if (fd == fdp->fd_freefile)
+ fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
+}
+
+/*
+ * Mark a file descriptor as unused.
+ */
+static void
+fdunused(struct filedesc *fdp, int fd)
+{
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+ KASSERT(fdisused(fdp, fd),
+ ("fd is already unused"));
+ KASSERT(fdp->fd_ofiles[fd] == NULL,
+ ("fd is still in use"));
+
+ fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
+ if (fd < fdp->fd_freefile)
+ fdp->fd_freefile = fd;
+ if (fd == fdp->fd_lastfile)
+ fdp->fd_lastfile = fd_last_used(fdp, 0, fd);
+}
+
+/*
+ * System calls on descriptors.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct getdtablesize_args {
+ int dummy;
+};
+#endif
+/* ARGSUSED */
+int
+getdtablesize(struct thread *td, struct getdtablesize_args *uap)
+{
+ struct proc *p = td->td_proc;
+
+ PROC_LOCK(p);
+ td->td_retval[0] =
+ min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+ PROC_UNLOCK(p);
+ return (0);
+}
+
+/*
+ * Duplicate a file descriptor to a particular value.
+ *
+ * Note: keep in mind that a potential race condition exists when closing
+ * descriptors from a shared descriptor table (via rfork).
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct dup2_args {
+ u_int from;
+ u_int to;
+};
+#endif
+/* ARGSUSED */
+int
+dup2(struct thread *td, struct dup2_args *uap)
+{
+
+ return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
+ td->td_retval));
+}
+
+/*
+ * Duplicate a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct dup_args {
+ u_int fd;
+};
+#endif
+/* ARGSUSED */
+int
+dup(struct thread *td, struct dup_args *uap)
+{
+
+ return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval));
+}
+
+/*
+ * The file control system call.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fcntl_args {
+ int fd;
+ int cmd;
+ long arg;
+};
+#endif
+/* ARGSUSED */
+int
+fcntl(struct thread *td, struct fcntl_args *uap)
+{
+ struct flock fl;
+ struct oflock ofl;
+ intptr_t arg;
+ int error;
+ int cmd;
+
+ error = 0;
+ cmd = uap->cmd;
+ switch (uap->cmd) {
+ case F_OGETLK:
+ case F_OSETLK:
+ case F_OSETLKW:
+ /*
+ * Convert old flock structure to new.
+ */
+ error = copyin((void *)(intptr_t)uap->arg, &ofl, sizeof(ofl));
+ fl.l_start = ofl.l_start;
+ fl.l_len = ofl.l_len;
+ fl.l_pid = ofl.l_pid;
+ fl.l_type = ofl.l_type;
+ fl.l_whence = ofl.l_whence;
+ fl.l_sysid = 0;
+
+ switch (uap->cmd) {
+ case F_OGETLK:
+ cmd = F_GETLK;
+ break;
+ case F_OSETLK:
+ cmd = F_SETLK;
+ break;
+ case F_OSETLKW:
+ cmd = F_SETLKW;
+ break;
+ }
+ arg = (intptr_t)&fl;
+ break;
+ case F_GETLK:
+ case F_SETLK:
+ case F_SETLKW:
+ case F_SETLK_REMOTE:
+ error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
+ arg = (intptr_t)&fl;
+ break;
+ default:
+ arg = uap->arg;
+ break;
+ }
+ if (error)
+ return (error);
+ error = kern_fcntl(td, uap->fd, cmd, arg);
+ if (error)
+ return (error);
+ if (uap->cmd == F_OGETLK) {
+ ofl.l_start = fl.l_start;
+ ofl.l_len = fl.l_len;
+ ofl.l_pid = fl.l_pid;
+ ofl.l_type = fl.l_type;
+ ofl.l_whence = fl.l_whence;
+ error = copyout(&ofl, (void *)(intptr_t)uap->arg, sizeof(ofl));
+ } else if (uap->cmd == F_GETLK) {
+ error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
+ }
+ return (error);
+}
+
+static inline struct file *
+fdtofp(int fd, struct filedesc *fdp)
+{
+ struct file *fp;
+
+ FILEDESC_LOCK_ASSERT(fdp);
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (NULL);
+ return (fp);
+}
+
+int
+kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
+{
+ struct filedesc *fdp;
+ struct flock *flp;
+ struct file *fp;
+ struct proc *p;
+ char *pop;
+ struct vnode *vp;
+ int error, flg, tmp;
+ int vfslocked;
+ u_int old, new;
+ uint64_t bsize;
+
+ vfslocked = 0;
+ error = 0;
+ flg = F_POSIX;
+ p = td->td_proc;
+ fdp = p->p_fd;
+
+ switch (cmd) {
+ case F_DUPFD:
+ tmp = arg;
+ error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval);
+ break;
+
+ case F_DUP2FD:
+ tmp = arg;
+ error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval);
+ break;
+
+ case F_GETFD:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ pop = &fdp->fd_ofileflags[fd];
+ td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
+ FILEDESC_SUNLOCK(fdp);
+ break;
+
+ case F_SETFD:
+ FILEDESC_XLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ pop = &fdp->fd_ofileflags[fd];
+ *pop = (*pop &~ UF_EXCLOSE) |
+ (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
+ FILEDESC_XUNLOCK(fdp);
+ break;
+
+ case F_GETFL:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ td->td_retval[0] = OFLAGS(fp->f_flag);
+ FILEDESC_SUNLOCK(fdp);
+ break;
+
+ case F_SETFL:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ do {
+ tmp = flg = fp->f_flag;
+ tmp &= ~FCNTLFLAGS;
+ tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
+ } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
+ tmp = fp->f_flag & FNONBLOCK;
+ error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+ if (error) {
+ fdrop(fp, td);
+ break;
+ }
+ tmp = fp->f_flag & FASYNC;
+ error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
+ if (error == 0) {
+ fdrop(fp, td);
+ break;
+ }
+ atomic_clear_int(&fp->f_flag, FNONBLOCK);
+ tmp = 0;
+ (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
+ fdrop(fp, td);
+ break;
+
+ case F_GETOWN:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
+ if (error == 0)
+ td->td_retval[0] = tmp;
+ fdrop(fp, td);
+ break;
+
+ case F_SETOWN:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ tmp = arg;
+ error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
+ fdrop(fp, td);
+ break;
+
+ case F_SETLK_REMOTE:
+ error = priv_check(td, PRIV_NFS_LOCKD);
+ if (error)
+ return (error);
+ flg = F_REMOTE;
+ goto do_setlk;
+
+ case F_SETLKW:
+ flg |= F_WAIT;
+ /* FALLTHROUGH F_SETLK */
+
+ case F_SETLK:
+ do_setlk:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ if (fp->f_type != DTYPE_VNODE) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ flp = (struct flock *)arg;
+ if (flp->l_whence == SEEK_CUR) {
+ if (fp->f_offset < 0 ||
+ (flp->l_start > 0 &&
+ fp->f_offset > OFF_MAX - flp->l_start)) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EOVERFLOW;
+ break;
+ }
+ flp->l_start += fp->f_offset;
+ }
+
+ /*
+ * VOP_ADVLOCK() may block.
+ */
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ switch (flp->l_type) {
+ case F_RDLCK:
+ if ((fp->f_flag & FREAD) == 0) {
+ error = EBADF;
+ break;
+ }
+ PROC_LOCK(p->p_leader);
+ p->p_leader->p_flag |= P_ADVLOCK;
+ PROC_UNLOCK(p->p_leader);
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
+ flp, flg);
+ break;
+ case F_WRLCK:
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ break;
+ }
+ PROC_LOCK(p->p_leader);
+ p->p_leader->p_flag |= P_ADVLOCK;
+ PROC_UNLOCK(p->p_leader);
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
+ flp, flg);
+ break;
+ case F_UNLCK:
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
+ flp, flg);
+ break;
+ case F_UNLCKSYS:
+ /*
+ * Temporary api for testing remote lock
+ * infrastructure.
+ */
+ if (flg != F_REMOTE) {
+ error = EINVAL;
+ break;
+ }
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
+ F_UNLCKSYS, flp, flg);
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ VFS_UNLOCK_GIANT(vfslocked);
+ vfslocked = 0;
+ /* Check for race with close */
+ FILEDESC_SLOCK(fdp);
+ if ((unsigned) fd >= fdp->fd_nfiles ||
+ fp != fdp->fd_ofiles[fd]) {
+ FILEDESC_SUNLOCK(fdp);
+ flp->l_whence = SEEK_SET;
+ flp->l_start = 0;
+ flp->l_len = 0;
+ flp->l_type = F_UNLCK;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
+ F_UNLCK, flp, F_POSIX);
+ VFS_UNLOCK_GIANT(vfslocked);
+ vfslocked = 0;
+ } else
+ FILEDESC_SUNLOCK(fdp);
+ fdrop(fp, td);
+ break;
+
+ case F_GETLK:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ if (fp->f_type != DTYPE_VNODE) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ flp = (struct flock *)arg;
+ if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
+ flp->l_type != F_UNLCK) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EINVAL;
+ break;
+ }
+ if (flp->l_whence == SEEK_CUR) {
+ if ((flp->l_start > 0 &&
+ fp->f_offset > OFF_MAX - flp->l_start) ||
+ (flp->l_start < 0 &&
+ fp->f_offset < OFF_MIN - flp->l_start)) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EOVERFLOW;
+ break;
+ }
+ flp->l_start += fp->f_offset;
+ }
+ /*
+ * VOP_ADVLOCK() may block.
+ */
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
+ F_POSIX);
+ VFS_UNLOCK_GIANT(vfslocked);
+ vfslocked = 0;
+ fdrop(fp, td);
+ break;
+
+ case F_RDAHEAD:
+ arg = arg ? 128 * 1024: 0;
+ /* FALLTHROUGH */
+ case F_READAHEAD:
+ FILEDESC_SLOCK(fdp);
+ if ((fp = fdtofp(fd, fdp)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ if (fp->f_type != DTYPE_VNODE) {
+ FILEDESC_SUNLOCK(fdp);
+ error = EBADF;
+ break;
+ }
+ fhold(fp);
+ FILEDESC_SUNLOCK(fdp);
+ if (arg != 0) {
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ error = vn_lock(vp, LK_SHARED);
+ if (error != 0)
+ goto readahead_vnlock_fail;
+ bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
+ VOP_UNLOCK(vp, 0);
+ fp->f_seqcount = (arg + bsize - 1) / bsize;
+ do {
+ new = old = fp->f_flag;
+ new |= FRDAHEAD;
+ } while (!atomic_cmpset_rel_int(&fp->f_flag, old, new));
+readahead_vnlock_fail:
+ VFS_UNLOCK_GIANT(vfslocked);
+ vfslocked = 0;
+ } else {
+ do {
+ new = old = fp->f_flag;
+ new &= ~FRDAHEAD;
+ } while (!atomic_cmpset_rel_int(&fp->f_flag, old, new));
+ }
+ fdrop(fp, td);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+ VFS_UNLOCK_GIANT(vfslocked);
+ return (error);
+}
+
+/*
+ * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
+ */
+static int
+do_dup(struct thread *td, int flags, int old, int new,
+ register_t *retval)
+{
+ struct filedesc *fdp;
+ struct proc *p;
+ struct file *fp;
+ struct file *delfp;
+ int error, holdleaders, maxfd;
+
+ p = td->td_proc;
+ fdp = p->p_fd;
+
+ /*
+ * Verify we have a valid descriptor to dup from and possibly to
+ * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
+ * return EINVAL when the new descriptor is out of bounds.
+ */
+ if (old < 0)
+ return (EBADF);
+ if (new < 0)
+ return (flags & DUP_FCNTL ? EINVAL : EBADF);
+ PROC_LOCK(p);
+ maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+ PROC_UNLOCK(p);
+ if (new >= maxfd)
+ return (flags & DUP_FCNTL ? EINVAL : EMFILE);
+
+ FILEDESC_XLOCK(fdp);
+ if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+ if (flags & DUP_FIXED && old == new) {
+ *retval = new;
+ FILEDESC_XUNLOCK(fdp);
+ return (0);
+ }
+ fp = fdp->fd_ofiles[old];
+ fhold(fp);
+
+ /*
+ * If the caller specified a file descriptor, make sure the file
+ * table is large enough to hold it, and grab it. Otherwise, just
+ * allocate a new descriptor the usual way. Since the filedesc
+ * lock may be temporarily dropped in the process, we have to look
+ * out for a race.
+ */
+ if (flags & DUP_FIXED) {
+ if (new >= fdp->fd_nfiles)
+ fdgrowtable(fdp, new + 1);
+ if (fdp->fd_ofiles[new] == NULL)
+ fdused(fdp, new);
+ } else {
+ if ((error = fdalloc(td, new, &new)) != 0) {
+ FILEDESC_XUNLOCK(fdp);
+ fdrop(fp, td);
+ return (error);
+ }
+ }
+
+ /*
+ * If the old file changed out from under us then treat it as a
+ * bad file descriptor. Userland should do its own locking to
+ * avoid this case.
+ */
+ if (fdp->fd_ofiles[old] != fp) {
+ /* we've allocated a descriptor which we won't use */
+ if (fdp->fd_ofiles[new] == NULL)
+ fdunused(fdp, new);
+ FILEDESC_XUNLOCK(fdp);
+ fdrop(fp, td);
+ return (EBADF);
+ }
+ KASSERT(old != new,
+ ("new fd is same as old"));
+
+ /*
+ * Save info on the descriptor being overwritten. We cannot close
+ * it without introducing an ownership race for the slot, since we
+ * need to drop the filedesc lock to call closef().
+ *
+ * XXX this duplicates parts of close().
+ */
+ delfp = fdp->fd_ofiles[new];
+ holdleaders = 0;
+ if (delfp != NULL) {
+ if (td->td_proc->p_fdtol != NULL) {
+ /*
+ * Ask fdfree() to sleep to ensure that all relevant
+ * process leaders can be traversed in closef().
+ */
+ fdp->fd_holdleaderscount++;
+ holdleaders = 1;
+ }
+ }
+
+ /*
+ * Duplicate the source descriptor
+ */
+ fdp->fd_ofiles[new] = fp;
+ fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
+ if (new > fdp->fd_lastfile)
+ fdp->fd_lastfile = new;
+ *retval = new;
+
+ /*
+ * If we dup'd over a valid file, we now own the reference to it
+ * and must dispose of it using closef() semantics (as if a
+ * close() were performed on it).
+ *
+ * XXX this duplicates parts of close().
+ */
+ if (delfp != NULL) {
+ knote_fdclose(td, new);
+ if (delfp->f_type == DTYPE_MQUEUE)
+ mq_fdclose(td, new, delfp);
+ FILEDESC_XUNLOCK(fdp);
+ (void) closef(delfp, td);
+ if (holdleaders) {
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_holdleaderscount--;
+ if (fdp->fd_holdleaderscount == 0 &&
+ fdp->fd_holdleaderswakeup != 0) {
+ fdp->fd_holdleaderswakeup = 0;
+ wakeup(&fdp->fd_holdleaderscount);
+ }
+ FILEDESC_XUNLOCK(fdp);
+ }
+ } else {
+ FILEDESC_XUNLOCK(fdp);
+ }
+ return (0);
+}
+
+/*
+ * If sigio is on the list associated with a process or process group,
+ * disable signalling from the device, remove sigio from the list and
+ * free sigio.
+ */
+void
+funsetown(struct sigio **sigiop)
+{
+ struct sigio *sigio;
+
+ SIGIO_LOCK();
+ sigio = *sigiop;
+ if (sigio == NULL) {
+ SIGIO_UNLOCK();
+ return;
+ }
+ *(sigio->sio_myref) = NULL;
+ if ((sigio)->sio_pgid < 0) {
+ struct pgrp *pg = (sigio)->sio_pgrp;
+ PGRP_LOCK(pg);
+ SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
+ sigio, sio_pgsigio);
+ PGRP_UNLOCK(pg);
+ } else {
+ struct proc *p = (sigio)->sio_proc;
+ PROC_LOCK(p);
+ SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
+ sigio, sio_pgsigio);
+ PROC_UNLOCK(p);
+ }
+ SIGIO_UNLOCK();
+ crfree(sigio->sio_ucred);
+ free(sigio, M_SIGIO);
+}
+
+/*
+ * Free a list of sigio structures.
+ * We only need to lock the SIGIO_LOCK because we have made ourselves
+ * inaccessible to callers of fsetown and therefore do not need to lock
+ * the proc or pgrp struct for the list manipulation.
+ */
+void
+funsetownlst(struct sigiolst *sigiolst)
+{
+ struct proc *p;
+ struct pgrp *pg;
+ struct sigio *sigio;
+
+ sigio = SLIST_FIRST(sigiolst);
+ if (sigio == NULL)
+ return;
+ p = NULL;
+ pg = NULL;
+
+ /*
+ * Every entry of the list should belong
+ * to a single proc or pgrp.
+ */
+ if (sigio->sio_pgid < 0) {
+ pg = sigio->sio_pgrp;
+ PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
+ } else /* if (sigio->sio_pgid > 0) */ {
+ p = sigio->sio_proc;
+ PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+ }
+
+ SIGIO_LOCK();
+ while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
+ *(sigio->sio_myref) = NULL;
+ if (pg != NULL) {
+ KASSERT(sigio->sio_pgid < 0,
+ ("Proc sigio in pgrp sigio list"));
+ KASSERT(sigio->sio_pgrp == pg,
+ ("Bogus pgrp in sigio list"));
+ PGRP_LOCK(pg);
+ SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
+ sio_pgsigio);
+ PGRP_UNLOCK(pg);
+ } else /* if (p != NULL) */ {
+ KASSERT(sigio->sio_pgid > 0,
+ ("Pgrp sigio in proc sigio list"));
+ KASSERT(sigio->sio_proc == p,
+ ("Bogus proc in sigio list"));
+ PROC_LOCK(p);
+ SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
+ sio_pgsigio);
+ PROC_UNLOCK(p);
+ }
+ SIGIO_UNLOCK();
+ crfree(sigio->sio_ucred);
+ free(sigio, M_SIGIO);
+ SIGIO_LOCK();
+ }
+ SIGIO_UNLOCK();
+}
+
+/*
+ * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
+ *
+ * After permission checking, add a sigio structure to the sigio list for
+ * the process or process group.
+ */
+int
+fsetown(pid_t pgid, struct sigio **sigiop)
+{
+ struct proc *proc;
+ struct pgrp *pgrp;
+ struct sigio *sigio;
+ int ret;
+
+ if (pgid == 0) {
+ funsetown(sigiop);
+ return (0);
+ }
+
+ ret = 0;
+
+ /* Allocate and fill in the new sigio out of locks. */
+ sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
+ sigio->sio_pgid = pgid;
+ sigio->sio_ucred = crhold(curthread->td_ucred);
+ sigio->sio_myref = sigiop;
+
+ sx_slock(&proctree_lock);
+ if (pgid > 0) {
+ proc = pfind(pgid);
+ if (proc == NULL) {
+ ret = ESRCH;
+ goto fail;
+ }
+
+ /*
+ * Policy - Don't allow a process to FSETOWN a process
+ * in another session.
+ *
+ * Remove this test to allow maximum flexibility or
+ * restrict FSETOWN to the current process or process
+ * group for maximum safety.
+ */
+ PROC_UNLOCK(proc);
+ if (proc->p_session != curthread->td_proc->p_session) {
+ ret = EPERM;
+ goto fail;
+ }
+
+ pgrp = NULL;
+ } else /* if (pgid < 0) */ {
+ pgrp = pgfind(-pgid);
+ if (pgrp == NULL) {
+ ret = ESRCH;
+ goto fail;
+ }
+ PGRP_UNLOCK(pgrp);
+
+ /*
+ * Policy - Don't allow a process to FSETOWN a process
+ * in another session.
+ *
+ * Remove this test to allow maximum flexibility or
+ * restrict FSETOWN to the current process or process
+ * group for maximum safety.
+ */
+ if (pgrp->pg_session != curthread->td_proc->p_session) {
+ ret = EPERM;
+ goto fail;
+ }
+
+ proc = NULL;
+ }
+ funsetown(sigiop);
+ if (pgid > 0) {
+ PROC_LOCK(proc);
+ /*
+ * Since funsetownlst() is called without the proctree
+ * locked, we need to check for P_WEXIT.
+ * XXX: is ESRCH correct?
+ */
+ if ((proc->p_flag & P_WEXIT) != 0) {
+ PROC_UNLOCK(proc);
+ ret = ESRCH;
+ goto fail;
+ }
+ SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
+ sigio->sio_proc = proc;
+ PROC_UNLOCK(proc);
+ } else {
+ PGRP_LOCK(pgrp);
+ SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
+ sigio->sio_pgrp = pgrp;
+ PGRP_UNLOCK(pgrp);
+ }
+ sx_sunlock(&proctree_lock);
+ SIGIO_LOCK();
+ *sigiop = sigio;
+ SIGIO_UNLOCK();
+ return (0);
+
+fail:
+ sx_sunlock(&proctree_lock);
+ crfree(sigio->sio_ucred);
+ free(sigio, M_SIGIO);
+ return (ret);
+}
+
+/*
+ * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
+ */
+pid_t
+fgetown(sigiop)
+ struct sigio **sigiop;
+{
+ pid_t pgid;
+
+ SIGIO_LOCK();
+ pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
+ SIGIO_UNLOCK();
+ return (pgid);
+}
+
+/*
+ * Close a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct close_args {
+ int fd;
+};
+#endif
+/* ARGSUSED */
+int
+close(td, uap)
+ struct thread *td;
+ struct close_args *uap;
+{
+
+ return (kern_close(td, uap->fd));
+}
+
+int
+kern_close(td, fd)
+ struct thread *td;
+ int fd;
+{
+ struct filedesc *fdp;
+ struct file *fp;
+ int error;
+ int holdleaders;
+
+ error = 0;
+ holdleaders = 0;
+ fdp = td->td_proc->p_fd;
+
+ AUDIT_SYSCLOSE(td, fd);
+
+ FILEDESC_XLOCK(fdp);
+ if ((unsigned)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+ fdp->fd_ofiles[fd] = NULL;
+ fdp->fd_ofileflags[fd] = 0;
+ fdunused(fdp, fd);
+ if (td->td_proc->p_fdtol != NULL) {
+ /*
+ * Ask fdfree() to sleep to ensure that all relevant
+ * process leaders can be traversed in closef().
+ */
+ fdp->fd_holdleaderscount++;
+ holdleaders = 1;
+ }
+
+ /*
+ * We now hold the fp reference that used to be owned by the
+ * descriptor array. We have to unlock the FILEDESC *AFTER*
+ * knote_fdclose to prevent a race of the fd getting opened, a knote
+ * added, and deleteing a knote for the new fd.
+ */
+ knote_fdclose(td, fd);
+ if (fp->f_type == DTYPE_MQUEUE)
+ mq_fdclose(td, fd, fp);
+ FILEDESC_XUNLOCK(fdp);
+
+ error = closef(fp, td);
+ if (holdleaders) {
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_holdleaderscount--;
+ if (fdp->fd_holdleaderscount == 0 &&
+ fdp->fd_holdleaderswakeup != 0) {
+ fdp->fd_holdleaderswakeup = 0;
+ wakeup(&fdp->fd_holdleaderscount);
+ }
+ FILEDESC_XUNLOCK(fdp);
+ }
+ return (error);
+}
+
+/*
+ * Close open file descriptors.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct closefrom_args {
+ int lowfd;
+};
+#endif
+/* ARGSUSED */
+int
+closefrom(struct thread *td, struct closefrom_args *uap)
+{
+ struct filedesc *fdp;
+ int fd;
+
+ fdp = td->td_proc->p_fd;
+ AUDIT_ARG_FD(uap->lowfd);
+
+ /*
+ * Treat negative starting file descriptor values identical to
+ * closefrom(0) which closes all files.
+ */
+ if (uap->lowfd < 0)
+ uap->lowfd = 0;
+ FILEDESC_SLOCK(fdp);
+ for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) {
+ if (fdp->fd_ofiles[fd] != NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ (void)kern_close(td, fd);
+ FILEDESC_SLOCK(fdp);
+ }
+ }
+ FILEDESC_SUNLOCK(fdp);
+ return (0);
+}
+
+#if defined(COMPAT_43)
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct ofstat_args {
+ int fd;
+ struct ostat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+ofstat(struct thread *td, struct ofstat_args *uap)
+{
+ struct ostat oub;
+ struct stat ub;
+ int error;
+
+ error = kern_fstat(td, uap->fd, &ub);
+ if (error == 0) {
+ cvtstat(&ub, &oub);
+ error = copyout(&oub, uap->sb, sizeof(oub));
+ }
+ return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fstat_args {
+ int fd;
+ struct stat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+fstat(struct thread *td, struct fstat_args *uap)
+{
+ struct stat ub;
+ int error;
+
+ error = kern_fstat(td, uap->fd, &ub);
+ if (error == 0)
+ error = copyout(&ub, uap->sb, sizeof(ub));
+ return (error);
+}
+
+int
+kern_fstat(struct thread *td, int fd, struct stat *sbp)
+{
+ struct file *fp;
+ int error;
+
+ AUDIT_ARG_FD(fd);
+
+ if ((error = fget(td, fd, &fp)) != 0)
+ return (error);
+
+ AUDIT_ARG_FILE(td->td_proc, fp);
+
+ error = fo_stat(fp, sbp, td->td_ucred, td);
+ fdrop(fp, td);
+#ifdef KTRACE
+ if (error == 0 && KTRPOINT(td, KTR_STRUCT))
+ ktrstat(sbp);
+#endif
+ return (error);
+}
+
+/*
+ * Return status information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct nfstat_args {
+ int fd;
+ struct nstat *sb;
+};
+#endif
+/* ARGSUSED */
+int
+nfstat(struct thread *td, struct nfstat_args *uap)
+{
+ struct nstat nub;
+ struct stat ub;
+ int error;
+
+ error = kern_fstat(td, uap->fd, &ub);
+ if (error == 0) {
+ cvtnstat(&ub, &nub);
+ error = copyout(&nub, uap->sb, sizeof(nub));
+ }
+ return (error);
+}
+
+/*
+ * Return pathconf information about a file descriptor.
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct fpathconf_args {
+ int fd;
+ int name;
+};
+#endif
+/* ARGSUSED */
+int
+fpathconf(struct thread *td, struct fpathconf_args *uap)
+{
+ struct file *fp;
+ struct vnode *vp;
+ int error;
+
+ if ((error = fget(td, uap->fd, &fp)) != 0)
+ return (error);
+
+ /* If asynchronous I/O is available, it works for all descriptors. */
+ if (uap->name == _PC_ASYNC_IO) {
+ td->td_retval[0] = async_io_version;
+ goto out;
+ }
+ vp = fp->f_vnode;
+ if (vp != NULL) {
+ int vfslocked;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+ error = VOP_PATHCONF(vp, uap->name, td->td_retval);
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
+ if (uap->name != _PC_PIPE_BUF) {
+ error = EINVAL;
+ } else {
+ td->td_retval[0] = PIPE_BUF;
+ error = 0;
+ }
+ } else {
+ error = EOPNOTSUPP;
+ }
+out:
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Grow the file table to accomodate (at least) nfd descriptors. This may
+ * block and drop the filedesc lock, but it will reacquire it before
+ * returning.
+ */
+static void
+fdgrowtable(struct filedesc *fdp, int nfd)
+{
+ struct filedesc0 *fdp0;
+ struct freetable *fo;
+ struct file **ntable;
+ struct file **otable;
+ char *nfileflags;
+ int nnfiles, onfiles;
+ NDSLOTTYPE *nmap;
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+
+ KASSERT(fdp->fd_nfiles > 0,
+ ("zero-length file table"));
+
+ /* compute the size of the new table */
+ onfiles = fdp->fd_nfiles;
+ nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
+ if (nnfiles <= onfiles)
+ /* the table is already large enough */
+ return;
+
+ /* allocate a new table and (if required) new bitmaps */
+ FILEDESC_XUNLOCK(fdp);
+ ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
+ M_FILEDESC, M_ZERO | M_WAITOK);
+ nfileflags = (char *)&ntable[nnfiles];
+ if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
+ nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE,
+ M_FILEDESC, M_ZERO | M_WAITOK);
+ else
+ nmap = NULL;
+ FILEDESC_XLOCK(fdp);
+
+ /*
+ * We now have new tables ready to go. Since we dropped the
+ * filedesc lock to call malloc(), watch out for a race.
+ */
+ onfiles = fdp->fd_nfiles;
+ if (onfiles >= nnfiles) {
+ /* we lost the race, but that's OK */
+ free(ntable, M_FILEDESC);
+ if (nmap != NULL)
+ free(nmap, M_FILEDESC);
+ return;
+ }
+ bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
+ bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
+ otable = fdp->fd_ofiles;
+ fdp->fd_ofileflags = nfileflags;
+ fdp->fd_ofiles = ntable;
+ /*
+ * We must preserve ofiles until the process exits because we can't
+ * be certain that no threads have references to the old table via
+ * _fget().
+ */
+ if (onfiles > NDFILE) {
+ fo = (struct freetable *)&otable[onfiles];
+ fdp0 = (struct filedesc0 *)fdp;
+ fo->ft_table = otable;
+ SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
+ }
+ if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
+ bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
+ if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
+ free(fdp->fd_map, M_FILEDESC);
+ fdp->fd_map = nmap;
+ }
+ fdp->fd_nfiles = nnfiles;
+}
+
+/*
+ * Allocate a file descriptor for the process.
+ */
+int
+fdalloc(struct thread *td, int minfd, int *result)
+{
+ struct proc *p = td->td_proc;
+ struct filedesc *fdp = p->p_fd;
+ int fd = -1, maxfd;
+
+ FILEDESC_XLOCK_ASSERT(fdp);
+
+ if (fdp->fd_freefile > minfd)
+ minfd = fdp->fd_freefile;
+
+ PROC_LOCK(p);
+ maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+ PROC_UNLOCK(p);
+
+ /*
+ * Search the bitmap for a free descriptor. If none is found, try
+ * to grow the file table. Keep at it until we either get a file
+ * descriptor or run into process or system limits; fdgrowtable()
+ * may drop the filedesc lock, so we're in a race.
+ */
+ for (;;) {
+ fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
+ if (fd >= maxfd)
+ return (EMFILE);
+ if (fd < fdp->fd_nfiles)
+ break;
+ fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd));
+ }
+
+ /*
+ * Perform some sanity checks, then mark the file descriptor as
+ * used and return it to the caller.
+ */
+ KASSERT(!fdisused(fdp, fd),
+ ("fd_first_free() returned non-free descriptor"));
+ KASSERT(fdp->fd_ofiles[fd] == NULL,
+ ("free descriptor isn't"));
+ fdp->fd_ofileflags[fd] = 0; /* XXX needed? */
+ fdused(fdp, fd);
+ *result = fd;
+ return (0);
+}
+
+/*
+ * Check to see whether n user file descriptors are available to the process
+ * p.
+ */
+int
+fdavail(struct thread *td, int n)
+{
+ struct proc *p = td->td_proc;
+ struct filedesc *fdp = td->td_proc->p_fd;
+ struct file **fpp;
+ int i, lim, last;
+
+ FILEDESC_LOCK_ASSERT(fdp);
+
+ PROC_LOCK(p);
+ lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+ PROC_UNLOCK(p);
+ if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
+ return (1);
+ last = min(fdp->fd_nfiles, lim);
+ fpp = &fdp->fd_ofiles[fdp->fd_freefile];
+ for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
+ if (*fpp == NULL && --n <= 0)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Create a new open file structure and allocate a file decriptor for the
+ * process that refers to it. We add one reference to the file for the
+ * descriptor table and one reference for resultfp. This is to prevent us
+ * being preempted and the entry in the descriptor table closed after we
+ * release the FILEDESC lock.
+ */
+int
+falloc(struct thread *td, struct file **resultfp, int *resultfd)
+{
+ struct proc *p = td->td_proc;
+ struct file *fp;
+ int error, i;
+ int maxuserfiles = maxfiles - (maxfiles / 20);
+ static struct timeval lastfail;
+ static int curfail;
+
+ fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
+ if ((openfiles >= maxuserfiles &&
+ priv_check(td, PRIV_MAXFILES) != 0) ||
+ openfiles >= maxfiles) {
+ if (ppsratecheck(&lastfail, &curfail, 1)) {
+ printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
+ td->td_ucred->cr_ruid);
+ }
+ uma_zfree(file_zone, fp);
+ return (ENFILE);
+ }
+ atomic_add_int(&openfiles, 1);
+
+ /*
+ * If the process has file descriptor zero open, add the new file
+ * descriptor to the list of open files at that point, otherwise
+ * put it at the front of the list of open files.
+ */
+ refcount_init(&fp->f_count, 1);
+ if (resultfp)
+ fhold(fp);
+ fp->f_cred = crhold(td->td_ucred);
+ fp->f_ops = &badfileops;
+ fp->f_data = NULL;
+ fp->f_vnode = NULL;
+ FILEDESC_XLOCK(p->p_fd);
+ if ((error = fdalloc(td, 0, &i))) {
+ FILEDESC_XUNLOCK(p->p_fd);
+
+ fdrop(fp, td);
+ if (resultfp)
+ fdrop(fp, td);
+ return (error);
+ }
+ p->p_fd->fd_ofiles[i] = fp;
+ FILEDESC_XUNLOCK(p->p_fd);
+ if (resultfp)
+ *resultfp = fp;
+ if (resultfd)
+ *resultfd = i;
+ return (0);
+}
+
+/*
+ * Build a new filedesc structure from another.
+ * Copy the current, root, and jail root vnode references.
+ */
+struct filedesc *
+fdinit(struct filedesc *fdp)
+{
+ struct filedesc0 *newfdp;
+
+ newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO);
+ FILEDESC_LOCK_INIT(&newfdp->fd_fd);
+ if (fdp != NULL) {
+ FILEDESC_XLOCK(fdp);
+ newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
+ if (newfdp->fd_fd.fd_cdir)
+ VREF(newfdp->fd_fd.fd_cdir);
+ newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
+ if (newfdp->fd_fd.fd_rdir)
+ VREF(newfdp->fd_fd.fd_rdir);
+ newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
+ if (newfdp->fd_fd.fd_jdir)
+ VREF(newfdp->fd_fd.fd_jdir);
+ FILEDESC_XUNLOCK(fdp);
+ }
+
+ /* Create the file descriptor table. */
+ newfdp->fd_fd.fd_refcnt = 1;
+ newfdp->fd_fd.fd_holdcnt = 1;
+ newfdp->fd_fd.fd_cmask = CMASK;
+ newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
+ newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
+ newfdp->fd_fd.fd_nfiles = NDFILE;
+ newfdp->fd_fd.fd_map = newfdp->fd_dmap;
+ newfdp->fd_fd.fd_lastfile = -1;
+ return (&newfdp->fd_fd);
+}
+
+static struct filedesc *
+fdhold(struct proc *p)
+{
+ struct filedesc *fdp;
+
+ mtx_lock(&fdesc_mtx);
+ fdp = p->p_fd;
+ if (fdp != NULL)
+ fdp->fd_holdcnt++;
+ mtx_unlock(&fdesc_mtx);
+ return (fdp);
+}
+
+static void
+fddrop(struct filedesc *fdp)
+{
+ struct filedesc0 *fdp0;
+ struct freetable *ft;
+ int i;
+
+ mtx_lock(&fdesc_mtx);
+ i = --fdp->fd_holdcnt;
+ mtx_unlock(&fdesc_mtx);
+ if (i > 0)
+ return;
+
+ FILEDESC_LOCK_DESTROY(fdp);
+ fdp0 = (struct filedesc0 *)fdp;
+ while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) {
+ SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next);
+ free(ft->ft_table, M_FILEDESC);
+ }
+ free(fdp, M_FILEDESC);
+}
+
+/*
+ * Share a filedesc structure.
+ */
+struct filedesc *
+fdshare(struct filedesc *fdp)
+{
+
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_refcnt++;
+ FILEDESC_XUNLOCK(fdp);
+ return (fdp);
+}
+
+/*
+ * Unshare a filedesc structure, if necessary by making a copy
+ */
+void
+fdunshare(struct proc *p, struct thread *td)
+{
+
+ FILEDESC_XLOCK(p->p_fd);
+ if (p->p_fd->fd_refcnt > 1) {
+ struct filedesc *tmp;
+
+ FILEDESC_XUNLOCK(p->p_fd);
+ tmp = fdcopy(p->p_fd);
+ fdfree(td);
+ p->p_fd = tmp;
+ } else
+ FILEDESC_XUNLOCK(p->p_fd);
+}
+
+/*
+ * Copy a filedesc structure. A NULL pointer in returns a NULL reference,
+ * this is to ease callers, not catch errors.
+ */
+struct filedesc *
+fdcopy(struct filedesc *fdp)
+{
+ struct filedesc *newfdp;
+ int i;
+
+ /* Certain daemons might not have file descriptors. */
+ if (fdp == NULL)
+ return (NULL);
+
+ newfdp = fdinit(fdp);
+ FILEDESC_SLOCK(fdp);
+ while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
+ FILEDESC_SUNLOCK(fdp);
+ FILEDESC_XLOCK(newfdp);
+ fdgrowtable(newfdp, fdp->fd_lastfile + 1);
+ FILEDESC_XUNLOCK(newfdp);
+ FILEDESC_SLOCK(fdp);
+ }
+ /* copy everything except kqueue descriptors */
+ newfdp->fd_freefile = -1;
+ for (i = 0; i <= fdp->fd_lastfile; ++i) {
+ if (fdisused(fdp, i) &&
+ fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE &&
+ fdp->fd_ofiles[i]->f_ops != &badfileops) {
+ newfdp->fd_ofiles[i] = fdp->fd_ofiles[i];
+ newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
+ fhold(newfdp->fd_ofiles[i]);
+ newfdp->fd_lastfile = i;
+ } else {
+ if (newfdp->fd_freefile == -1)
+ newfdp->fd_freefile = i;
+ }
+ }
+ newfdp->fd_cmask = fdp->fd_cmask;
+ FILEDESC_SUNLOCK(fdp);
+ FILEDESC_XLOCK(newfdp);
+ for (i = 0; i <= newfdp->fd_lastfile; ++i)
+ if (newfdp->fd_ofiles[i] != NULL)
+ fdused(newfdp, i);
+ if (newfdp->fd_freefile == -1)
+ newfdp->fd_freefile = i;
+ FILEDESC_XUNLOCK(newfdp);
+ return (newfdp);
+}
+
+/*
+ * Release a filedesc structure.
+ */
+void
+fdfree(struct thread *td)
+{
+ struct filedesc *fdp;
+ struct file **fpp;
+ int i, locked;
+ struct filedesc_to_leader *fdtol;
+ struct file *fp;
+ struct vnode *cdir, *jdir, *rdir, *vp;
+ struct flock lf;
+
+ /* Certain daemons might not have file descriptors. */
+ fdp = td->td_proc->p_fd;
+ if (fdp == NULL)
+ return;
+
+ /* Check for special need to clear POSIX style locks */
+ fdtol = td->td_proc->p_fdtol;
+ if (fdtol != NULL) {
+ FILEDESC_XLOCK(fdp);
+ KASSERT(fdtol->fdl_refcount > 0,
+ ("filedesc_to_refcount botch: fdl_refcount=%d",
+ fdtol->fdl_refcount));
+ if (fdtol->fdl_refcount == 1 &&
+ (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+ for (i = 0, fpp = fdp->fd_ofiles;
+ i <= fdp->fd_lastfile;
+ i++, fpp++) {
+ if (*fpp == NULL ||
+ (*fpp)->f_type != DTYPE_VNODE)
+ continue;
+ fp = *fpp;
+ fhold(fp);
+ FILEDESC_XUNLOCK(fdp);
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ vp = fp->f_vnode;
+ locked = VFS_LOCK_GIANT(vp->v_mount);
+ (void) VOP_ADVLOCK(vp,
+ (caddr_t)td->td_proc->
+ p_leader,
+ F_UNLCK,
+ &lf,
+ F_POSIX);
+ VFS_UNLOCK_GIANT(locked);
+ FILEDESC_XLOCK(fdp);
+ fdrop(fp, td);
+ fpp = fdp->fd_ofiles + i;
+ }
+ }
+ retry:
+ if (fdtol->fdl_refcount == 1) {
+ if (fdp->fd_holdleaderscount > 0 &&
+ (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+ /*
+ * close() or do_dup() has cleared a reference
+ * in a shared file descriptor table.
+ */
+ fdp->fd_holdleaderswakeup = 1;
+ sx_sleep(&fdp->fd_holdleaderscount,
+ FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
+ goto retry;
+ }
+ if (fdtol->fdl_holdcount > 0) {
+ /*
+ * Ensure that fdtol->fdl_leader remains
+ * valid in closef().
+ */
+ fdtol->fdl_wakeup = 1;
+ sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
+ "fdlhold", 0);
+ goto retry;
+ }
+ }
+ fdtol->fdl_refcount--;
+ if (fdtol->fdl_refcount == 0 &&
+ fdtol->fdl_holdcount == 0) {
+ fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
+ fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
+ } else
+ fdtol = NULL;
+ td->td_proc->p_fdtol = NULL;
+ FILEDESC_XUNLOCK(fdp);
+ if (fdtol != NULL)
+ free(fdtol, M_FILEDESC_TO_LEADER);
+ }
+ FILEDESC_XLOCK(fdp);
+ i = --fdp->fd_refcnt;
+ FILEDESC_XUNLOCK(fdp);
+ if (i > 0)
+ return;
+
+ fpp = fdp->fd_ofiles;
+ for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
+ if (*fpp) {
+ FILEDESC_XLOCK(fdp);
+ fp = *fpp;
+ *fpp = NULL;
+ FILEDESC_XUNLOCK(fdp);
+ (void) closef(fp, td);
+ }
+ }
+ FILEDESC_XLOCK(fdp);
+
+ /* XXX This should happen earlier. */
+ mtx_lock(&fdesc_mtx);
+ td->td_proc->p_fd = NULL;
+ mtx_unlock(&fdesc_mtx);
+
+ if (fdp->fd_nfiles > NDFILE)
+ free(fdp->fd_ofiles, M_FILEDESC);
+ if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
+ free(fdp->fd_map, M_FILEDESC);
+
+ fdp->fd_nfiles = 0;
+
+ cdir = fdp->fd_cdir;
+ fdp->fd_cdir = NULL;
+ rdir = fdp->fd_rdir;
+ fdp->fd_rdir = NULL;
+ jdir = fdp->fd_jdir;
+ fdp->fd_jdir = NULL;
+ FILEDESC_XUNLOCK(fdp);
+
+ if (cdir) {
+ locked = VFS_LOCK_GIANT(cdir->v_mount);
+ vrele(cdir);
+ VFS_UNLOCK_GIANT(locked);
+ }
+ if (rdir) {
+ locked = VFS_LOCK_GIANT(rdir->v_mount);
+ vrele(rdir);
+ VFS_UNLOCK_GIANT(locked);
+ }
+ if (jdir) {
+ locked = VFS_LOCK_GIANT(jdir->v_mount);
+ vrele(jdir);
+ VFS_UNLOCK_GIANT(locked);
+ }
+
+ fddrop(fdp);
+}
+
+/*
+ * For setugid programs, we don't want to people to use that setugidness
+ * to generate error messages which write to a file which otherwise would
+ * otherwise be off-limits to the process. We check for filesystems where
+ * the vnode can change out from under us after execve (like [lin]procfs).
+ *
+ * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
+ * sufficient. We also don't check for setugidness since we know we are.
+ */
+static int
+is_unsafe(struct file *fp)
+{
+ if (fp->f_type == DTYPE_VNODE) {
+ struct vnode *vp = fp->f_vnode;
+
+ if ((vp->v_vflag & VV_PROCDEP) != 0)
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Make this setguid thing safe, if at all possible.
+ */
+void
+setugidsafety(struct thread *td)
+{
+ struct filedesc *fdp;
+ int i;
+
+ /* Certain daemons might not have file descriptors. */
+ fdp = td->td_proc->p_fd;
+ if (fdp == NULL)
+ return;
+
+ /*
+ * Note: fdp->fd_ofiles may be reallocated out from under us while
+ * we are blocked in a close. Be careful!
+ */
+ FILEDESC_XLOCK(fdp);
+ for (i = 0; i <= fdp->fd_lastfile; i++) {
+ if (i > 2)
+ break;
+ if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
+ struct file *fp;
+
+ knote_fdclose(td, i);
+ /*
+ * NULL-out descriptor prior to close to avoid
+ * a race while close blocks.
+ */
+ fp = fdp->fd_ofiles[i];
+ fdp->fd_ofiles[i] = NULL;
+ fdp->fd_ofileflags[i] = 0;
+ fdunused(fdp, i);
+ FILEDESC_XUNLOCK(fdp);
+ (void) closef(fp, td);
+ FILEDESC_XLOCK(fdp);
+ }
+ }
+ FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * If a specific file object occupies a specific file descriptor, close the
+ * file descriptor entry and drop a reference on the file object. This is a
+ * convenience function to handle a subsequent error in a function that calls
+ * falloc() that handles the race that another thread might have closed the
+ * file descriptor out from under the thread creating the file object.
+ */
+void
+fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
+{
+
+ FILEDESC_XLOCK(fdp);
+ if (fdp->fd_ofiles[idx] == fp) {
+ fdp->fd_ofiles[idx] = NULL;
+ fdunused(fdp, idx);
+ FILEDESC_XUNLOCK(fdp);
+ fdrop(fp, td);
+ } else
+ FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * Close any files on exec?
+ */
+void
+fdcloseexec(struct thread *td)
+{
+ struct filedesc *fdp;
+ int i;
+
+ /* Certain daemons might not have file descriptors. */
+ fdp = td->td_proc->p_fd;
+ if (fdp == NULL)
+ return;
+
+ FILEDESC_XLOCK(fdp);
+
+ /*
+ * We cannot cache fd_ofiles or fd_ofileflags since operations
+ * may block and rip them out from under us.
+ */
+ for (i = 0; i <= fdp->fd_lastfile; i++) {
+ if (fdp->fd_ofiles[i] != NULL &&
+ (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE ||
+ (fdp->fd_ofileflags[i] & UF_EXCLOSE))) {
+ struct file *fp;
+
+ knote_fdclose(td, i);
+ /*
+ * NULL-out descriptor prior to close to avoid
+ * a race while close blocks.
+ */
+ fp = fdp->fd_ofiles[i];
+ fdp->fd_ofiles[i] = NULL;
+ fdp->fd_ofileflags[i] = 0;
+ fdunused(fdp, i);
+ if (fp->f_type == DTYPE_MQUEUE)
+ mq_fdclose(td, i, fp);
+ FILEDESC_XUNLOCK(fdp);
+ (void) closef(fp, td);
+ FILEDESC_XLOCK(fdp);
+ }
+ }
+ FILEDESC_XUNLOCK(fdp);
+}
+
+/*
+ * It is unsafe for set[ug]id processes to be started with file
+ * descriptors 0..2 closed, as these descriptors are given implicit
+ * significance in the Standard C library. fdcheckstd() will create a
+ * descriptor referencing /dev/null for each of stdin, stdout, and
+ * stderr that is not already open.
+ */
+int
+fdcheckstd(struct thread *td)
+{
+ struct filedesc *fdp;
+ register_t retval, save;
+ int i, error, devnull;
+
+ fdp = td->td_proc->p_fd;
+ if (fdp == NULL)
+ return (0);
+ KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
+ devnull = -1;
+ error = 0;
+ for (i = 0; i < 3; i++) {
+ if (fdp->fd_ofiles[i] != NULL)
+ continue;
+ if (devnull < 0) {
+ save = td->td_retval[0];
+ error = kern_open(td, "/dev/null", UIO_SYSSPACE,
+ O_RDWR, 0);
+ devnull = td->td_retval[0];
+ KASSERT(devnull == i, ("oof, we didn't get our fd"));
+ td->td_retval[0] = save;
+ if (error)
+ break;
+ } else {
+ error = do_dup(td, DUP_FIXED, devnull, i, &retval);
+ if (error != 0)
+ break;
+ }
+ }
+ return (error);
+}
+
+/*
+ * Internal form of close. Decrement reference count on file structure.
+ * Note: td may be NULL when closing a file that was being passed in a
+ * message.
+ *
+ * XXXRW: Giant is not required for the caller, but often will be held; this
+ * makes it moderately likely the Giant will be recursed in the VFS case.
+ */
+int
+closef(struct file *fp, struct thread *td)
+{
+ struct vnode *vp;
+ struct flock lf;
+ struct filedesc_to_leader *fdtol;
+ struct filedesc *fdp;
+
+ /*
+ * POSIX record locking dictates that any close releases ALL
+ * locks owned by this process. This is handled by setting
+ * a flag in the unlock to free ONLY locks obeying POSIX
+ * semantics, and not to free BSD-style file locks.
+ * If the descriptor was in a message, POSIX-style locks
+ * aren't passed with the descriptor, and the thread pointer
+ * will be NULL. Callers should be careful only to pass a
+ * NULL thread pointer when there really is no owning
+ * context that might have locks, or the locks will be
+ * leaked.
+ */
+ if (fp->f_type == DTYPE_VNODE && td != NULL) {
+ int vfslocked;
+
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
+ F_UNLCK, &lf, F_POSIX);
+ }
+ fdtol = td->td_proc->p_fdtol;
+ if (fdtol != NULL) {
+ /*
+ * Handle special case where file descriptor table is
+ * shared between multiple process leaders.
+ */
+ fdp = td->td_proc->p_fd;
+ FILEDESC_XLOCK(fdp);
+ for (fdtol = fdtol->fdl_next;
+ fdtol != td->td_proc->p_fdtol;
+ fdtol = fdtol->fdl_next) {
+ if ((fdtol->fdl_leader->p_flag &
+ P_ADVLOCK) == 0)
+ continue;
+ fdtol->fdl_holdcount++;
+ FILEDESC_XUNLOCK(fdp);
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+ vp = fp->f_vnode;
+ (void) VOP_ADVLOCK(vp,
+ (caddr_t)fdtol->fdl_leader,
+ F_UNLCK, &lf, F_POSIX);
+ FILEDESC_XLOCK(fdp);
+ fdtol->fdl_holdcount--;
+ if (fdtol->fdl_holdcount == 0 &&
+ fdtol->fdl_wakeup != 0) {
+ fdtol->fdl_wakeup = 0;
+ wakeup(fdtol);
+ }
+ }
+ FILEDESC_XUNLOCK(fdp);
+ }
+ VFS_UNLOCK_GIANT(vfslocked);
+ }
+ return (fdrop(fp, td));
+}
+
+/*
+ * Initialize the file pointer with the specified properties.
+ *
+ * The ops are set with release semantics to be certain that the flags, type,
+ * and data are visible when ops is. This is to prevent ops methods from being
+ * called with bad data.
+ */
+void
+finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
+{
+ fp->f_data = data;
+ fp->f_flag = flag;
+ fp->f_type = type;
+ atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
+}
+#endif /* __rtems__ */
+
+struct file *
+fget_unlocked(struct filedesc *fdp, int fd)
+{
+ struct file *fp;
+ u_int count;
+
+ if (fd < 0 || fd >= fdp->fd_nfiles)
+ return (NULL);
+ /*
+ * Fetch the descriptor locklessly. We avoid fdrop() races by
+ * never raising a refcount above 0. To accomplish this we have
+ * to use a cmpset loop rather than an atomic_add. The descriptor
+ * must be re-verified once we acquire a reference to be certain
+ * that the identity is still correct and we did not lose a race
+ * due to preemption.
+ */
+ for (;;) {
+ fp = fdp->fd_ofiles[fd];
+ if (fp == NULL)
+ break;
+ count = fp->f_count;
+ if (count == 0)
+ continue;
+ /*
+ * Use an acquire barrier to prevent caching of fd_ofiles
+ * so it is refreshed for verification.
+ */
+ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1)
+ continue;
+ if (fp == fdp->fd_ofiles[fd])
+ break;
+ fdrop(fp, curthread);
+ }
+
+ return (fp);
+}
+
+/*
+ * Extract the file pointer associated with the specified descriptor for the
+ * current user process.
+ *
+ * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
+ * returned.
+ *
+ * If an error occured the non-zero error is returned and *fpp is set to
+ * NULL. Otherwise *fpp is held and set and zero is returned. Caller is
+ * responsible for fdrop().
+ */
+static __inline int
+_fget(struct thread *td, int fd, struct file **fpp, int flags)
+{
+ struct filedesc *fdp;
+ struct file *fp;
+
+ *fpp = NULL;
+ if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
+ return (EBADF);
+ if ((fp = fget_unlocked(fdp, fd)) == NULL)
+ return (EBADF);
+ if (fp->f_ops == &badfileops) {
+ fdrop(fp, td);
+ return (EBADF);
+ }
+ /*
+ * FREAD and FWRITE failure return EBADF as per POSIX.
+ *
+ * Only one flag, or 0, may be specified.
+ */
+ if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
+ (flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
+ fdrop(fp, td);
+ return (EBADF);
+ }
+ *fpp = fp;
+ return (0);
+}
+
+int
+fget(struct thread *td, int fd, struct file **fpp)
+{
+
+ return(_fget(td, fd, fpp, 0));
+}
+
+int
+fget_read(struct thread *td, int fd, struct file **fpp)
+{
+
+ return(_fget(td, fd, fpp, FREAD));
+}
+
+#ifndef __rtems__
+int
+fget_write(struct thread *td, int fd, struct file **fpp)
+{
+
+ return(_fget(td, fd, fpp, FWRITE));
+}
+
+/*
+ * Like fget() but loads the underlying vnode, or returns an error if the
+ * descriptor does not represent a vnode. Note that pipes use vnodes but
+ * never have VM objects. The returned vnode will be vref()'d.
+ *
+ * XXX: what about the unused flags ?
+ */
+static __inline int
+_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
+{
+ struct file *fp;
+ int error;
+
+ *vpp = NULL;
+ if ((error = _fget(td, fd, &fp, flags)) != 0)
+ return (error);
+ if (fp->f_vnode == NULL) {
+ error = EINVAL;
+ } else {
+ *vpp = fp->f_vnode;
+ vref(*vpp);
+ }
+ fdrop(fp, td);
+
+ return (error);
+}
+
+int
+fgetvp(struct thread *td, int fd, struct vnode **vpp)
+{
+
+ return (_fgetvp(td, fd, vpp, 0));
+}
+
+int
+fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
+{
+
+ return (_fgetvp(td, fd, vpp, FREAD));
+}
+
+#ifdef notyet
+int
+fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
+{
+
+ return (_fgetvp(td, fd, vpp, FWRITE));
+}
+#endif
+
+/*
+ * Like fget() but loads the underlying socket, or returns an error if the
+ * descriptor does not represent a socket.
+ *
+ * We bump the ref count on the returned socket. XXX Also obtain the SX lock
+ * in the future.
+ *
+ * Note: fgetsock() and fputsock() are deprecated, as consumers should rely
+ * on their file descriptor reference to prevent the socket from being free'd
+ * during use.
+ */
+int
+fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
+{
+ struct file *fp;
+ int error;
+
+ *spp = NULL;
+ if (fflagp != NULL)
+ *fflagp = 0;
+ if ((error = _fget(td, fd, &fp, 0)) != 0)
+ return (error);
+ if (fp->f_type != DTYPE_SOCKET) {
+ error = ENOTSOCK;
+ } else {
+ *spp = fp->f_data;
+ if (fflagp)
+ *fflagp = fp->f_flag;
+ SOCK_LOCK(*spp);
+ soref(*spp);
+ SOCK_UNLOCK(*spp);
+ }
+ fdrop(fp, td);
+
+ return (error);
+}
+
+/*
+ * Drop the reference count on the socket and XXX release the SX lock in the
+ * future. The last reference closes the socket.
+ *
+ * Note: fputsock() is deprecated, see comment for fgetsock().
+ */
+void
+fputsock(struct socket *so)
+{
+
+ ACCEPT_LOCK();
+ SOCK_LOCK(so);
+ sorele(so);
+}
+#endif /* __rtems__ */
+
+/*
+ * Handle the last reference to a file being closed.
+ */
+int
+_fdrop(struct file *fp, struct thread *td)
+{
+#ifdef __rtems__
+ panic("fdrop: RTEMS unsupported");
+
+#else /* __rtems__ */
+ int error;
+
+ error = 0;
+ if (fp->f_count != 0)
+ panic("fdrop: count %d", fp->f_count);
+ if (fp->f_ops != &badfileops)
+ error = fo_close(fp, td);
+ /*
+ * The f_cdevpriv cannot be assigned non-NULL value while we
+ * are destroying the file.
+ */
+ if (fp->f_cdevpriv != NULL)
+ devfs_fpdrop(fp);
+ atomic_subtract_int(&openfiles, 1);
+ crfree(fp->f_cred);
+ uma_zfree(file_zone, fp);
+
+ return (error);
+#endif /* __rtems__ */
+}
+
+#ifndef __rtems__
+/*
+ * Apply an advisory lock on a file descriptor.
+ *
+ * Just attempt to get a record lock of the requested type on the entire file
+ * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
+ */
+#ifndef _SYS_SYSPROTO_HH_
+struct flock_args {
+ int fd;
+ int how;
+};
+#endif
+/* ARGSUSED */
+int
+flock(struct thread *td, struct flock_args *uap)
+{
+ struct file *fp;
+ struct vnode *vp;
+ struct flock lf;
+ int vfslocked;
+ int error;
+
+ if ((error = fget(td, uap->fd, &fp)) != 0)
+ return (error);
+ if (fp->f_type != DTYPE_VNODE) {
+ fdrop(fp, td);
+ return (EOPNOTSUPP);
+ }
+
+ vp = fp->f_vnode;
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (uap->how & LOCK_UN) {
+ lf.l_type = F_UNLCK;
+ atomic_clear_int(&fp->f_flag, FHASLOCK);
+ error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
+ goto done2;
+ }
+ if (uap->how & LOCK_EX)
+ lf.l_type = F_WRLCK;
+ else if (uap->how & LOCK_SH)
+ lf.l_type = F_RDLCK;
+ else {
+ error = EBADF;
+ goto done2;
+ }
+ atomic_set_int(&fp->f_flag, FHASLOCK);
+ error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
+ (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
+done2:
+ fdrop(fp, td);
+ VFS_UNLOCK_GIANT(vfslocked);
+ return (error);
+}
+/*
+ * Duplicate the specified descriptor to a free descriptor.
+ */
+int
+dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error)
+{
+ struct file *wfp;
+ struct file *fp;
+
+ /*
+ * If the to-be-dup'd fd number is greater than the allowed number
+ * of file descriptors, or the fd to be dup'd has already been
+ * closed, then reject.
+ */
+ FILEDESC_XLOCK(fdp);
+ if (dfd < 0 || dfd >= fdp->fd_nfiles ||
+ (wfp = fdp->fd_ofiles[dfd]) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+
+ /*
+ * There are two cases of interest here.
+ *
+ * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
+ *
+ * For ENXIO steal away the file structure from (dfd) and store it in
+ * (indx). (dfd) is effectively closed by this operation.
+ *
+ * Any other error code is just returned.
+ */
+ switch (error) {
+ case ENODEV:
+ /*
+ * Check that the mode the file is being opened for is a
+ * subset of the mode of the existing descriptor.
+ */
+ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EACCES);
+ }
+ fp = fdp->fd_ofiles[indx];
+ fdp->fd_ofiles[indx] = wfp;
+ fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+ if (fp == NULL)
+ fdused(fdp, indx);
+ fhold(wfp);
+ FILEDESC_XUNLOCK(fdp);
+ if (fp != NULL)
+ /*
+ * We now own the reference to fp that the ofiles[]
+ * array used to own. Release it.
+ */
+ fdrop(fp, td);
+ return (0);
+
+ case ENXIO:
+ /*
+ * Steal away the file pointer from dfd and stuff it into indx.
+ */
+ fp = fdp->fd_ofiles[indx];
+ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+ fdp->fd_ofiles[dfd] = NULL;
+ fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
+ fdp->fd_ofileflags[dfd] = 0;
+ fdunused(fdp, dfd);
+ if (fp == NULL)
+ fdused(fdp, indx);
+ FILEDESC_XUNLOCK(fdp);
+
+ /*
+ * We now own the reference to fp that the ofiles[] array
+ * used to own. Release it.
+ */
+ if (fp != NULL)
+ fdrop(fp, td);
+ return (0);
+
+ default:
+ FILEDESC_XUNLOCK(fdp);
+ return (error);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Scan all active processes and prisons to see if any of them have a current
+ * or root directory of `olddp'. If so, replace them with the new mount point.
+ */
+void
+mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
+{
+ struct filedesc *fdp;
+ struct prison *pr;
+ struct proc *p;
+ int nrele;
+
+ if (vrefcnt(olddp) == 1)
+ return;
+ nrele = 0;
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ fdp = fdhold(p);
+ if (fdp == NULL)
+ continue;
+ FILEDESC_XLOCK(fdp);
+ if (fdp->fd_cdir == olddp) {
+ vref(newdp);
+ fdp->fd_cdir = newdp;
+ nrele++;
+ }
+ if (fdp->fd_rdir == olddp) {
+ vref(newdp);
+ fdp->fd_rdir = newdp;
+ nrele++;
+ }
+ if (fdp->fd_jdir == olddp) {
+ vref(newdp);
+ fdp->fd_jdir = newdp;
+ nrele++;
+ }
+ FILEDESC_XUNLOCK(fdp);
+ fddrop(fdp);
+ }
+ sx_sunlock(&allproc_lock);
+ if (rootvnode == olddp) {
+ vref(newdp);
+ rootvnode = newdp;
+ nrele++;
+ }
+ mtx_lock(&prison0.pr_mtx);
+ if (prison0.pr_root == olddp) {
+ vref(newdp);
+ prison0.pr_root = newdp;
+ nrele++;
+ }
+ mtx_unlock(&prison0.pr_mtx);
+ sx_slock(&allprison_lock);
+ TAILQ_FOREACH(pr, &allprison, pr_list) {
+ mtx_lock(&pr->pr_mtx);
+ if (pr->pr_root == olddp) {
+ vref(newdp);
+ pr->pr_root = newdp;
+ nrele++;
+ }
+ mtx_unlock(&pr->pr_mtx);
+ }
+ sx_sunlock(&allprison_lock);
+ while (nrele--)
+ vrele(olddp);
+}
+
+struct filedesc_to_leader *
+filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
+{
+ struct filedesc_to_leader *fdtol;
+
+ fdtol = malloc(sizeof(struct filedesc_to_leader),
+ M_FILEDESC_TO_LEADER,
+ M_WAITOK);
+ fdtol->fdl_refcount = 1;
+ fdtol->fdl_holdcount = 0;
+ fdtol->fdl_wakeup = 0;
+ fdtol->fdl_leader = leader;
+ if (old != NULL) {
+ FILEDESC_XLOCK(fdp);
+ fdtol->fdl_next = old->fdl_next;
+ fdtol->fdl_prev = old;
+ old->fdl_next = fdtol;
+ fdtol->fdl_next->fdl_prev = fdtol;
+ FILEDESC_XUNLOCK(fdp);
+ } else {
+ fdtol->fdl_next = fdtol;
+ fdtol->fdl_prev = fdtol;
+ }
+ return (fdtol);
+}
+
+/*
+ * Get file structures globally.
+ */
+static int
+sysctl_kern_file(SYSCTL_HANDLER_ARGS)
+{
+ struct xfile xf;
+ struct filedesc *fdp;
+ struct file *fp;
+ struct proc *p;
+ int error, n;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ if (req->oldptr == NULL) {
+ n = 0;
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ fdp = fdhold(p);
+ if (fdp == NULL)
+ continue;
+ /* overestimates sparse tables. */
+ if (fdp->fd_lastfile > 0)
+ n += fdp->fd_lastfile;
+ fddrop(fdp);
+ }
+ sx_sunlock(&allproc_lock);
+ return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
+ }
+ error = 0;
+ bzero(&xf, sizeof(xf));
+ xf.xf_size = sizeof(xf);
+ sx_slock(&allproc_lock);
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ PROC_LOCK(p);
+ if (p_cansee(req->td, p) != 0) {
+ PROC_UNLOCK(p);
+ continue;
+ }
+ xf.xf_pid = p->p_pid;
+ xf.xf_uid = p->p_ucred->cr_uid;
+ PROC_UNLOCK(p);
+ fdp = fdhold(p);
+ if (fdp == NULL)
+ continue;
+ FILEDESC_SLOCK(fdp);
+ for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
+ if ((fp = fdp->fd_ofiles[n]) == NULL)
+ continue;
+ xf.xf_fd = n;
+ xf.xf_file = fp;
+ xf.xf_data = fp->f_data;
+ xf.xf_vnode = fp->f_vnode;
+ xf.xf_type = fp->f_type;
+ xf.xf_count = fp->f_count;
+ xf.xf_msgcount = 0;
+ xf.xf_offset = fp->f_offset;
+ xf.xf_flag = fp->f_flag;
+ error = SYSCTL_OUT(req, &xf, sizeof(xf));
+ if (error)
+ break;
+ }
+ FILEDESC_SUNLOCK(fdp);
+ fddrop(fdp);
+ if (error)
+ break;
+ }
+ sx_sunlock(&allproc_lock);
+ return (error);
+}
+
+SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
+ 0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
+
+#ifdef KINFO_OFILE_SIZE
+CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
+#endif
+
+#ifdef COMPAT_FREEBSD7
+static int
+export_vnode_for_osysctl(struct vnode *vp, int type,
+ struct kinfo_ofile *kif, struct filedesc *fdp, struct sysctl_req *req)
+{
+ int error;
+ char *fullpath, *freepath;
+ int vfslocked;
+
+ bzero(kif, sizeof(*kif));
+ kif->kf_structsize = sizeof(*kif);
+
+ vref(vp);
+ kif->kf_fd = type;
+ kif->kf_type = KF_TYPE_VNODE;
+ /* This function only handles directories. */
+ if (vp->v_type != VDIR) {
+ vrele(vp);
+ return (ENOTDIR);
+ }
+ kif->kf_vnode_type = KF_VTYPE_VDIR;
+
+ /*
+ * This is not a true file descriptor, so we set a bogus refcount
+ * and offset to indicate these fields should be ignored.
+ */
+ kif->kf_ref_count = -1;
+ kif->kf_offset = -1;
+
+ freepath = NULL;
+ fullpath = "-";
+ FILEDESC_SUNLOCK(fdp);
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vrele(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+ error = SYSCTL_OUT(req, kif, sizeof(*kif));
+ FILEDESC_SLOCK(fdp);
+ return (error);
+}
+
+/*
+ * Get per-process file descriptors for use by procstat(1), et al.
+ */
+static int
+sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
+{
+ char *fullpath, *freepath;
+ struct kinfo_ofile *kif;
+ struct filedesc *fdp;
+ int error, i, *name;
+ struct socket *so;
+ struct vnode *vp;
+ struct file *fp;
+ struct proc *p;
+ struct tty *tp;
+ int vfslocked;
+
+ name = (int *)arg1;
+ if ((p = pfind((pid_t)name[0])) == NULL)
+ return (ESRCH);
+ if ((error = p_candebug(curthread, p))) {
+ PROC_UNLOCK(p);
+ return (error);
+ }
+ fdp = fdhold(p);
+ PROC_UNLOCK(p);
+ if (fdp == NULL)
+ return (ENOENT);
+ kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+ FILEDESC_SLOCK(fdp);
+ if (fdp->fd_cdir != NULL)
+ export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
+ fdp, req);
+ if (fdp->fd_rdir != NULL)
+ export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
+ fdp, req);
+ if (fdp->fd_jdir != NULL)
+ export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
+ fdp, req);
+ for (i = 0; i < fdp->fd_nfiles; i++) {
+ if ((fp = fdp->fd_ofiles[i]) == NULL)
+ continue;
+ bzero(kif, sizeof(*kif));
+ kif->kf_structsize = sizeof(*kif);
+ vp = NULL;
+ so = NULL;
+ tp = NULL;
+ kif->kf_fd = i;
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ kif->kf_type = KF_TYPE_VNODE;
+ vp = fp->f_vnode;
+ break;
+
+ case DTYPE_SOCKET:
+ kif->kf_type = KF_TYPE_SOCKET;
+ so = fp->f_data;
+ break;
+
+ case DTYPE_PIPE:
+ kif->kf_type = KF_TYPE_PIPE;
+ break;
+
+ case DTYPE_FIFO:
+ kif->kf_type = KF_TYPE_FIFO;
+ vp = fp->f_vnode;
+ break;
+
+ case DTYPE_KQUEUE:
+ kif->kf_type = KF_TYPE_KQUEUE;
+ break;
+
+ case DTYPE_CRYPTO:
+ kif->kf_type = KF_TYPE_CRYPTO;
+ break;
+
+ case DTYPE_MQUEUE:
+ kif->kf_type = KF_TYPE_MQUEUE;
+ break;
+
+ case DTYPE_SHM:
+ kif->kf_type = KF_TYPE_SHM;
+ break;
+
+ case DTYPE_SEM:
+ kif->kf_type = KF_TYPE_SEM;
+ break;
+
+ case DTYPE_PTS:
+ kif->kf_type = KF_TYPE_PTS;
+ tp = fp->f_data;
+ break;
+
+ default:
+ kif->kf_type = KF_TYPE_UNKNOWN;
+ break;
+ }
+ kif->kf_ref_count = fp->f_count;
+ if (fp->f_flag & FREAD)
+ kif->kf_flags |= KF_FLAG_READ;
+ if (fp->f_flag & FWRITE)
+ kif->kf_flags |= KF_FLAG_WRITE;
+ if (fp->f_flag & FAPPEND)
+ kif->kf_flags |= KF_FLAG_APPEND;
+ if (fp->f_flag & FASYNC)
+ kif->kf_flags |= KF_FLAG_ASYNC;
+ if (fp->f_flag & FFSYNC)
+ kif->kf_flags |= KF_FLAG_FSYNC;
+ if (fp->f_flag & FNONBLOCK)
+ kif->kf_flags |= KF_FLAG_NONBLOCK;
+ if (fp->f_flag & O_DIRECT)
+ kif->kf_flags |= KF_FLAG_DIRECT;
+ if (fp->f_flag & FHASLOCK)
+ kif->kf_flags |= KF_FLAG_HASLOCK;
+ kif->kf_offset = fp->f_offset;
+ if (vp != NULL) {
+ vref(vp);
+ switch (vp->v_type) {
+ case VNON:
+ kif->kf_vnode_type = KF_VTYPE_VNON;
+ break;
+ case VREG:
+ kif->kf_vnode_type = KF_VTYPE_VREG;
+ break;
+ case VDIR:
+ kif->kf_vnode_type = KF_VTYPE_VDIR;
+ break;
+ case VBLK:
+ kif->kf_vnode_type = KF_VTYPE_VBLK;
+ break;
+ case VCHR:
+ kif->kf_vnode_type = KF_VTYPE_VCHR;
+ break;
+ case VLNK:
+ kif->kf_vnode_type = KF_VTYPE_VLNK;
+ break;
+ case VSOCK:
+ kif->kf_vnode_type = KF_VTYPE_VSOCK;
+ break;
+ case VFIFO:
+ kif->kf_vnode_type = KF_VTYPE_VFIFO;
+ break;
+ case VBAD:
+ kif->kf_vnode_type = KF_VTYPE_VBAD;
+ break;
+ default:
+ kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
+ break;
+ }
+ /*
+ * It is OK to drop the filedesc lock here as we will
+ * re-validate and re-evaluate its properties when
+ * the loop continues.
+ */
+ freepath = NULL;
+ fullpath = "-";
+ FILEDESC_SUNLOCK(fdp);
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vrele(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ strlcpy(kif->kf_path, fullpath,
+ sizeof(kif->kf_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+ FILEDESC_SLOCK(fdp);
+ }
+ if (so != NULL) {
+ struct sockaddr *sa;
+
+ if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
+ == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+ bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
+ == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+ bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ kif->kf_sock_domain =
+ so->so_proto->pr_domain->dom_family;
+ kif->kf_sock_type = so->so_type;
+ kif->kf_sock_protocol = so->so_proto->pr_protocol;
+ }
+ if (tp != NULL) {
+ strlcpy(kif->kf_path, tty_devname(tp),
+ sizeof(kif->kf_path));
+ }
+ error = SYSCTL_OUT(req, kif, sizeof(*kif));
+ if (error)
+ break;
+ }
+ FILEDESC_SUNLOCK(fdp);
+ fddrop(fdp);
+ free(kif, M_TEMP);
+ return (0);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, CTLFLAG_RD,
+ sysctl_kern_proc_ofiledesc, "Process ofiledesc entries");
+#endif /* COMPAT_FREEBSD7 */
+
+#ifdef KINFO_FILE_SIZE
+CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
+#endif
+
+static int
+export_vnode_for_sysctl(struct vnode *vp, int type,
+ struct kinfo_file *kif, struct filedesc *fdp, struct sysctl_req *req)
+{
+ int error;
+ char *fullpath, *freepath;
+ int vfslocked;
+
+ bzero(kif, sizeof(*kif));
+
+ vref(vp);
+ kif->kf_fd = type;
+ kif->kf_type = KF_TYPE_VNODE;
+ /* This function only handles directories. */
+ if (vp->v_type != VDIR) {
+ vrele(vp);
+ return (ENOTDIR);
+ }
+ kif->kf_vnode_type = KF_VTYPE_VDIR;
+
+ /*
+ * This is not a true file descriptor, so we set a bogus refcount
+ * and offset to indicate these fields should be ignored.
+ */
+ kif->kf_ref_count = -1;
+ kif->kf_offset = -1;
+
+ freepath = NULL;
+ fullpath = "-";
+ FILEDESC_SUNLOCK(fdp);
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vrele(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+ /* Pack record size down */
+ kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+ strlen(kif->kf_path) + 1;
+ kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
+ error = SYSCTL_OUT(req, kif, kif->kf_structsize);
+ FILEDESC_SLOCK(fdp);
+ return (error);
+}
+
+/*
+ * Get per-process file descriptors for use by procstat(1), et al.
+ */
+static int
+sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
+{
+ char *fullpath, *freepath;
+ struct kinfo_file *kif;
+ struct filedesc *fdp;
+ int error, i, *name;
+ struct socket *so;
+ struct vnode *vp;
+ struct file *fp;
+ struct proc *p;
+ struct tty *tp;
+ int vfslocked;
+ size_t oldidx;
+
+ name = (int *)arg1;
+ if ((p = pfind((pid_t)name[0])) == NULL)
+ return (ESRCH);
+ if ((error = p_candebug(curthread, p))) {
+ PROC_UNLOCK(p);
+ return (error);
+ }
+ fdp = fdhold(p);
+ PROC_UNLOCK(p);
+ if (fdp == NULL)
+ return (ENOENT);
+ kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+ FILEDESC_SLOCK(fdp);
+ if (fdp->fd_cdir != NULL)
+ export_vnode_for_sysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
+ fdp, req);
+ if (fdp->fd_rdir != NULL)
+ export_vnode_for_sysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
+ fdp, req);
+ if (fdp->fd_jdir != NULL)
+ export_vnode_for_sysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
+ fdp, req);
+ for (i = 0; i < fdp->fd_nfiles; i++) {
+ if ((fp = fdp->fd_ofiles[i]) == NULL)
+ continue;
+ bzero(kif, sizeof(*kif));
+ vp = NULL;
+ so = NULL;
+ tp = NULL;
+ kif->kf_fd = i;
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ kif->kf_type = KF_TYPE_VNODE;
+ vp = fp->f_vnode;
+ break;
+
+ case DTYPE_SOCKET:
+ kif->kf_type = KF_TYPE_SOCKET;
+ so = fp->f_data;
+ break;
+
+ case DTYPE_PIPE:
+ kif->kf_type = KF_TYPE_PIPE;
+ break;
+
+ case DTYPE_FIFO:
+ kif->kf_type = KF_TYPE_FIFO;
+ vp = fp->f_vnode;
+ break;
+
+ case DTYPE_KQUEUE:
+ kif->kf_type = KF_TYPE_KQUEUE;
+ break;
+
+ case DTYPE_CRYPTO:
+ kif->kf_type = KF_TYPE_CRYPTO;
+ break;
+
+ case DTYPE_MQUEUE:
+ kif->kf_type = KF_TYPE_MQUEUE;
+ break;
+
+ case DTYPE_SHM:
+ kif->kf_type = KF_TYPE_SHM;
+ break;
+
+ case DTYPE_SEM:
+ kif->kf_type = KF_TYPE_SEM;
+ break;
+
+ case DTYPE_PTS:
+ kif->kf_type = KF_TYPE_PTS;
+ tp = fp->f_data;
+ break;
+
+ default:
+ kif->kf_type = KF_TYPE_UNKNOWN;
+ break;
+ }
+ kif->kf_ref_count = fp->f_count;
+ if (fp->f_flag & FREAD)
+ kif->kf_flags |= KF_FLAG_READ;
+ if (fp->f_flag & FWRITE)
+ kif->kf_flags |= KF_FLAG_WRITE;
+ if (fp->f_flag & FAPPEND)
+ kif->kf_flags |= KF_FLAG_APPEND;
+ if (fp->f_flag & FASYNC)
+ kif->kf_flags |= KF_FLAG_ASYNC;
+ if (fp->f_flag & FFSYNC)
+ kif->kf_flags |= KF_FLAG_FSYNC;
+ if (fp->f_flag & FNONBLOCK)
+ kif->kf_flags |= KF_FLAG_NONBLOCK;
+ if (fp->f_flag & O_DIRECT)
+ kif->kf_flags |= KF_FLAG_DIRECT;
+ if (fp->f_flag & FHASLOCK)
+ kif->kf_flags |= KF_FLAG_HASLOCK;
+ kif->kf_offset = fp->f_offset;
+ if (vp != NULL) {
+ vref(vp);
+ switch (vp->v_type) {
+ case VNON:
+ kif->kf_vnode_type = KF_VTYPE_VNON;
+ break;
+ case VREG:
+ kif->kf_vnode_type = KF_VTYPE_VREG;
+ break;
+ case VDIR:
+ kif->kf_vnode_type = KF_VTYPE_VDIR;
+ break;
+ case VBLK:
+ kif->kf_vnode_type = KF_VTYPE_VBLK;
+ break;
+ case VCHR:
+ kif->kf_vnode_type = KF_VTYPE_VCHR;
+ break;
+ case VLNK:
+ kif->kf_vnode_type = KF_VTYPE_VLNK;
+ break;
+ case VSOCK:
+ kif->kf_vnode_type = KF_VTYPE_VSOCK;
+ break;
+ case VFIFO:
+ kif->kf_vnode_type = KF_VTYPE_VFIFO;
+ break;
+ case VBAD:
+ kif->kf_vnode_type = KF_VTYPE_VBAD;
+ break;
+ default:
+ kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
+ break;
+ }
+ /*
+ * It is OK to drop the filedesc lock here as we will
+ * re-validate and re-evaluate its properties when
+ * the loop continues.
+ */
+ freepath = NULL;
+ fullpath = "-";
+ FILEDESC_SUNLOCK(fdp);
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vrele(vp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ strlcpy(kif->kf_path, fullpath,
+ sizeof(kif->kf_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+ FILEDESC_SLOCK(fdp);
+ }
+ if (so != NULL) {
+ struct sockaddr *sa;
+
+ if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
+ == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
+ bcopy(sa, &kif->kf_sa_local, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
+ == 0 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
+ bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
+ free(sa, M_SONAME);
+ }
+ kif->kf_sock_domain =
+ so->so_proto->pr_domain->dom_family;
+ kif->kf_sock_type = so->so_type;
+ kif->kf_sock_protocol = so->so_proto->pr_protocol;
+ }
+ if (tp != NULL) {
+ strlcpy(kif->kf_path, tty_devname(tp),
+ sizeof(kif->kf_path));
+ }
+ /* Pack record size down */
+ kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+ strlen(kif->kf_path) + 1;
+ kif->kf_structsize = roundup(kif->kf_structsize,
+ sizeof(uint64_t));
+ oldidx = req->oldidx;
+ error = SYSCTL_OUT(req, kif, kif->kf_structsize);
+ if (error) {
+ if (error == ENOMEM) {
+ /*
+ * The hack to keep the ABI of sysctl
+ * kern.proc.filedesc intact, but not
+ * to account a partially copied
+ * kinfo_file into the oldidx.
+ */
+ req->oldidx = oldidx;
+ error = 0;
+ }
+ break;
+ }
+ }
+ FILEDESC_SUNLOCK(fdp);
+ fddrop(fdp);
+ free(kif, M_TEMP);
+ return (error);
+}
+
+static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD,
+ sysctl_kern_proc_filedesc, "Process filedesc entries");
+
+#ifdef DDB
+/*
+ * For the purposes of debugging, generate a human-readable string for the
+ * file type.
+ */
+static const char *
+file_type_to_name(short type)
+{
+
+ switch (type) {
+ case 0:
+ return ("zero");
+ case DTYPE_VNODE:
+ return ("vnod");
+ case DTYPE_SOCKET:
+ return ("sock");
+ case DTYPE_PIPE:
+ return ("pipe");
+ case DTYPE_FIFO:
+ return ("fifo");
+ case DTYPE_KQUEUE:
+ return ("kque");
+ case DTYPE_CRYPTO:
+ return ("crpt");
+ case DTYPE_MQUEUE:
+ return ("mque");
+ case DTYPE_SHM:
+ return ("shm");
+ case DTYPE_SEM:
+ return ("ksem");
+ default:
+ return ("unkn");
+ }
+}
+
+/*
+ * For the purposes of debugging, identify a process (if any, perhaps one of
+ * many) that references the passed file in its file descriptor array. Return
+ * NULL if none.
+ */
+static struct proc *
+file_to_first_proc(struct file *fp)
+{
+ struct filedesc *fdp;
+ struct proc *p;
+ int n;
+
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ fdp = p->p_fd;
+ if (fdp == NULL)
+ continue;
+ for (n = 0; n < fdp->fd_nfiles; n++) {
+ if (fp == fdp->fd_ofiles[n])
+ return (p);
+ }
+ }
+ return (NULL);
+}
+
+static void
+db_print_file(struct file *fp, int header)
+{
+ struct proc *p;
+
+ if (header)
+ db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
+ "File", "Type", "Data", "Flag", "GCFl", "Count",
+ "MCount", "Vnode", "FPID", "FCmd");
+ p = file_to_first_proc(fp);
+ db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
+ file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
+ 0, fp->f_count, 0, fp->f_vnode,
+ p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
+}
+
+DB_SHOW_COMMAND(file, db_show_file)
+{
+ struct file *fp;
+
+ if (!have_addr) {
+ db_printf("usage: show file <addr>\n");
+ return;
+ }
+ fp = (struct file *)addr;
+ db_print_file(fp, 1);
+}
+
+DB_SHOW_COMMAND(files, db_show_files)
+{
+ struct filedesc *fdp;
+ struct file *fp;
+ struct proc *p;
+ int header;
+ int n;
+
+ header = 1;
+ FOREACH_PROC_IN_SYSTEM(p) {
+ if (p->p_state == PRS_NEW)
+ continue;
+ if ((fdp = p->p_fd) == NULL)
+ continue;
+ for (n = 0; n < fdp->fd_nfiles; ++n) {
+ if ((fp = fdp->fd_ofiles[n]) == NULL)
+ continue;
+ db_print_file(fp, header);
+ header = 0;
+ }
+ }
+}
+#endif
+
+SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
+ &maxfilesperproc, 0, "Maximum files allowed open per process");
+
+SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
+ &maxfiles, 0, "Maximum number of files");
+
+SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
+ __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
+
+/* ARGSUSED*/
+static void
+filelistinit(void *dummy)
+{
+
+ file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
+ mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
+}
+SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
+#endif /* __rtems__ */
+
+/*-------------------------------------------------------------------*/
+
+static int
+badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td)
+{
+
+ return (EBADF);
+}
+
+static int
+badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td)
+{
+
+ return (EINVAL);
+}
+
+static int
+badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td)
+{
+
+ return (EBADF);
+}
+
+static int
+badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td)
+{
+
+ return (0);
+}
+
+static int
+badfo_kqfilter(struct file *fp, struct knote *kn)
+{
+
+ return (EBADF);
+}
+
+static int
+badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td)
+{
+
+ return (EBADF);
+}
+
+static int
+badfo_close(struct file *fp, struct thread *td)
+{
+
+ return (EBADF);
+}
+
+struct fileops badfileops = {
+ .fo_read = badfo_readwrite,
+ .fo_write = badfo_readwrite,
+ .fo_truncate = badfo_truncate,
+ .fo_ioctl = badfo_ioctl,
+ .fo_poll = badfo_poll,
+ .fo_kqfilter = badfo_kqfilter,
+ .fo_stat = badfo_stat,
+ .fo_close = badfo_close,
+};
+
+#ifndef __rtems__
+/*-------------------------------------------------------------------*/
+
+/*
+ * File Descriptor pseudo-device driver (/dev/fd/).
+ *
+ * Opening minor device N dup()s the file (if any) connected to file
+ * descriptor N belonging to the calling process. Note that this driver
+ * consists of only the ``open()'' routine, because all subsequent
+ * references to this file will be direct to the other driver.
+ *
+ * XXX: we could give this one a cloning event handler if necessary.
+ */
+
+/* ARGSUSED */
+static int
+fdopen(struct cdev *dev, int mode, int type, struct thread *td)
+{
+
+ /*
+ * XXX Kludge: set curthread->td_dupfd to contain the value of the
+ * the file descriptor being sought for duplication. The error
+ * return ensures that the vnode for this device will be released
+ * by vn_open. Open will detect this special error and take the
+ * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+ * will simply report the error.
+ */
+ td->td_dupfd = dev2unit(dev);
+ return (ENODEV);
+}
+
+static struct cdevsw fildesc_cdevsw = {
+ .d_version = D_VERSION,
+ .d_open = fdopen,
+ .d_name = "FD",
+};
+
+static void
+fildesc_drvinit(void *unused)
+{
+ struct cdev *dev;
+
+ dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
+ make_dev_alias(dev, "stdin");
+ dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
+ make_dev_alias(dev, "stdout");
+ dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
+ make_dev_alias(dev, "stderr");
+}
+
+SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);
+#endif /* __rtems__ */
+ * XXX Kludge: set curthread->td_dupfd to contain the value of the
+ * the file descriptor being sought for duplication. The error
+ * return ensures that the vnode for this device will be released
+ * by vn_open. Open will detect this special error and take the
+ * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
+ * will simply report the error.
+ */
+ td->td_dupfd = dev2unit(dev);
+ return (ENODEV);
+}
+
+static struct cdevsw fildesc_cdevsw = {
+ .d_version = D_VERSION,
+ .d_open = fdopen,
+ .d_name = "FD",
+};
+
+static void
+fildesc_drvinit(void *unused)
+{
+ struct cdev *dev;
+
+ dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
+ make_dev_alias(dev, "stdin");
+ dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
+ make_dev_alias(dev, "stdout");
+ dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
+ make_dev_alias(dev, "stderr");
+}
+
+SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);
+#endif /* __rtems__ */
diff --git a/freebsd/kern/kern_mtxpool.c b/freebsd/kern/kern_mtxpool.c
new file mode 100644
index 00000000..b2c0267a
--- /dev/null
+++ b/freebsd/kern/kern_mtxpool.c
@@ -0,0 +1,220 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2001 Matthew Dillon. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Mutex pool routines. These routines are designed to be used as short
+ * term leaf mutexes (e.g. the last mutex you might acquire other then
+ * calling msleep()). They operate using a shared pool. A mutex is chosen
+ * from the pool based on the supplied pointer (which may or may not be
+ * valid).
+ *
+ * Advantages:
+ * - no structural overhead. Mutexes can be associated with structures
+ * without adding bloat to the structures.
+ * - mutexes can be obtained for invalid pointers, useful when uses
+ * mutexes to interlock destructor ops.
+ * - no initialization/destructor overhead.
+ * - can be used with msleep.
+ *
+ * Disadvantages:
+ * - should generally only be used as leaf mutexes.
+ * - pool/pool dependancy ordering cannot be depended on.
+ * - possible L1 cache mastersip contention between cpus.
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/systm.h>
+
+
+static MALLOC_DEFINE(M_MTXPOOL, "mtx_pool", "mutex pool");
+
+/* Pool sizes must be a power of two */
+#ifndef MTX_POOL_LOCKBUILDER_SIZE
+#define MTX_POOL_LOCKBUILDER_SIZE 128
+#endif
+#ifndef MTX_POOL_SLEEP_SIZE
+#define MTX_POOL_SLEEP_SIZE 128
+#endif
+
+struct mtxpool_header {
+ int mtxpool_size;
+ int mtxpool_mask;
+ int mtxpool_shift;
+ int mtxpool_next;
+};
+
+struct mtx_pool {
+ struct mtxpool_header mtx_pool_header;
+ struct mtx mtx_pool_ary[1];
+};
+
+static struct mtx_pool_lockbuilder {
+ struct mtxpool_header mtx_pool_header;
+ struct mtx mtx_pool_ary[MTX_POOL_LOCKBUILDER_SIZE];
+} lockbuilder_pool;
+
+#define mtx_pool_size mtx_pool_header.mtxpool_size
+#define mtx_pool_mask mtx_pool_header.mtxpool_mask
+#define mtx_pool_shift mtx_pool_header.mtxpool_shift
+#define mtx_pool_next mtx_pool_header.mtxpool_next
+
+struct mtx_pool *mtxpool_sleep;
+struct mtx_pool *mtxpool_lockbuilder;
+
+#if UINTPTR_MAX == UINT64_MAX /* 64 bits */
+# define POINTER_BITS 64
+# define HASH_MULTIPLIER 11400714819323198485u /* (2^64)*(sqrt(5)-1)/2 */
+#else /* assume 32 bits */
+# define POINTER_BITS 32
+# define HASH_MULTIPLIER 2654435769u /* (2^32)*(sqrt(5)-1)/2 */
+#endif
+
+/*
+ * Return the (shared) pool mutex associated with the specified address.
+ * The returned mutex is a leaf level mutex, meaning that if you obtain it
+ * you cannot obtain any other mutexes until you release it. You can
+ * legally msleep() on the mutex.
+ */
+struct mtx *
+mtx_pool_find(struct mtx_pool *pool, void *ptr)
+{
+ int p;
+
+ KASSERT(pool != NULL, ("_mtx_pool_find(): null pool"));
+ /*
+ * Fibonacci hash, see Knuth's
+ * _Art of Computer Programming, Volume 3 / Sorting and Searching_
+ */
+ p = ((HASH_MULTIPLIER * (uintptr_t)ptr) >> pool->mtx_pool_shift) &
+ pool->mtx_pool_mask;
+ return (&pool->mtx_pool_ary[p]);
+}
+
+static void
+mtx_pool_initialize(struct mtx_pool *pool, const char *mtx_name, int pool_size,
+ int opts)
+{
+ int i, maskbits;
+
+ pool->mtx_pool_size = pool_size;
+ pool->mtx_pool_mask = pool_size - 1;
+ for (i = 1, maskbits = 0; (i & pool_size) == 0; i = i << 1)
+ maskbits++;
+ pool->mtx_pool_shift = POINTER_BITS - maskbits;
+ pool->mtx_pool_next = 0;
+ for (i = 0; i < pool_size; ++i)
+ mtx_init(&pool->mtx_pool_ary[i], mtx_name, NULL, opts);
+}
+
+struct mtx_pool *
+mtx_pool_create(const char *mtx_name, int pool_size, int opts)
+{
+ struct mtx_pool *pool;
+
+ if (pool_size <= 0 || !powerof2(pool_size)) {
+ printf("WARNING: %s pool size is not a power of 2.\n",
+ mtx_name);
+ pool_size = 128;
+ }
+ pool = malloc(sizeof (struct mtx_pool) +
+ ((pool_size - 1) * sizeof (struct mtx)),
+ M_MTXPOOL, M_WAITOK | M_ZERO);
+ mtx_pool_initialize(pool, mtx_name, pool_size, opts);
+ return pool;
+}
+
+void
+mtx_pool_destroy(struct mtx_pool **poolp)
+{
+ int i;
+ struct mtx_pool *pool = *poolp;
+
+ for (i = pool->mtx_pool_size - 1; i >= 0; --i)
+ mtx_destroy(&pool->mtx_pool_ary[i]);
+ free(pool, M_MTXPOOL);
+ *poolp = NULL;
+}
+
+static void
+mtx_pool_setup_static(void *dummy __unused)
+{
+ mtx_pool_initialize((struct mtx_pool *)&lockbuilder_pool,
+ "lockbuilder mtxpool", MTX_POOL_LOCKBUILDER_SIZE,
+ MTX_DEF | MTX_NOWITNESS | MTX_QUIET);
+ mtxpool_lockbuilder = (struct mtx_pool *)&lockbuilder_pool;
+}
+
+static void
+mtx_pool_setup_dynamic(void *dummy __unused)
+{
+ mtxpool_sleep = mtx_pool_create("sleep mtxpool",
+ MTX_POOL_SLEEP_SIZE, MTX_DEF);
+}
+
+/*
+ * Obtain a (shared) mutex from the pool. The returned mutex is a leaf
+ * level mutex, meaning that if you obtain it you cannot obtain any other
+ * mutexes until you release it. You can legally msleep() on the mutex.
+ */
+struct mtx *
+mtx_pool_alloc(struct mtx_pool *pool)
+{
+ int i;
+
+ KASSERT(pool != NULL, ("mtx_pool_alloc(): null pool"));
+ /*
+ * mtx_pool_next is unprotected against multiple accesses,
+ * but simultaneous access by two CPUs should not be very
+ * harmful.
+ */
+ i = pool->mtx_pool_next;
+ pool->mtx_pool_next = (i + 1) & pool->mtx_pool_mask;
+ return (&pool->mtx_pool_ary[i]);
+}
+
+/*
+ * The lockbuilder pool must be initialized early because the lockmgr
+ * and sx locks depend on it. The sx locks are used in the kernel
+ * memory allocator. The lockmgr subsystem is initialized by
+ * SYSINIT(..., SI_SUB_LOCKMGR, ...).
+ *
+ * We can't call malloc() to dynamically allocate the sleep pool
+ * until after kmeminit() has been called, which is done by
+ * SYSINIT(..., SI_SUB_KMEM, ...).
+ */
+SYSINIT(mtxpooli1, SI_SUB_MTX_POOL_STATIC, SI_ORDER_FIRST,
+ mtx_pool_setup_static, NULL);
+SYSINIT(mtxpooli2, SI_SUB_MTX_POOL_DYNAMIC, SI_ORDER_FIRST,
+ mtx_pool_setup_dynamic, NULL);
diff --git a/freebsd/kern/kern_subr.c b/freebsd/kern/kern_subr.c
index fecb91c5..9a28a7d9 100644
--- a/freebsd/kern/kern_subr.c
+++ b/freebsd/kern/kern_subr.c
@@ -545,7 +545,6 @@ copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error)
return (error);
}
-#ifndef __rtems__
int
copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop)
{
@@ -582,6 +581,7 @@ copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop)
return (0);
}
+#ifndef __rtems__
struct uio *
cloneuio(struct uio *uiop)
{
diff --git a/freebsd/kern/kern_time.c b/freebsd/kern/kern_time.c
index e07abc3a..8c760b48 100644
--- a/freebsd/kern/kern_time.c
+++ b/freebsd/kern/kern_time.c
@@ -698,6 +698,7 @@ realitexpire(void *arg)
}
/*NOTREACHED*/
}
+#endif /* __rtems__ */
/*
* Check that a proposed value to load into the .it_value or
@@ -716,6 +717,7 @@ itimerfix(struct timeval *tv)
return (0);
}
+#ifndef __rtems__
/*
* Decrement an interval timer by a specified number
* of microseconds, which must be less than a second,
@@ -756,6 +758,7 @@ expire:
itp->it_value.tv_usec = 0; /* sec is already 0 */
return (0);
}
+#endif /* __rtems__ */
/*
* Add and subtract routines for timevals.
@@ -772,7 +775,6 @@ timevaladd(struct timeval *t1, const struct timeval *t2)
t1->tv_usec += t2->tv_usec;
timevalfix(t1);
}
-#endif /* __rtems__ */
void
timevalsub(struct timeval *t1, const struct timeval *t2)
diff --git a/freebsd/kern/sys_generic.c b/freebsd/kern/sys_generic.c
new file mode 100644
index 00000000..c90d632b
--- /dev/null
+++ b/freebsd/kern/sys_generic.c
@@ -0,0 +1,1665 @@
+#include <freebsd/machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <freebsd/sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+
+#include <freebsd/local/opt_compat.h>
+#include <freebsd/local/opt_ktrace.h>
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/systm.h>
+#include <freebsd/sys/sysproto.h>
+#include <freebsd/sys/filedesc.h>
+#include <freebsd/sys/filio.h>
+#include <freebsd/sys/fcntl.h>
+#include <freebsd/sys/file.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/sys/signalvar.h>
+#include <freebsd/sys/socketvar.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/kernel.h>
+#include <freebsd/sys/ktr.h>
+#include <freebsd/sys/limits.h>
+#include <freebsd/sys/malloc.h>
+#include <freebsd/sys/poll.h>
+#include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/selinfo.h>
+#include <freebsd/sys/sleepqueue.h>
+#include <freebsd/sys/syscallsubr.h>
+#include <freebsd/sys/sysctl.h>
+#include <freebsd/sys/sysent.h>
+#include <freebsd/sys/vnode.h>
+#include <freebsd/sys/bio.h>
+#ifndef __rtems__
+#include <freebsd/sys/buf.h>
+#endif
+#include <freebsd/sys/condvar.h>
+#ifdef KTRACE
+#include <freebsd/sys/ktrace.h>
+#endif
+
+#include <freebsd/security/audit/audit.h>
+
+#ifdef __rtems__
+typedef long fd_mask;
+#include <freebsd/vm/uma.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/machine/rtems-bsd-symbols.h>
+#endif /* __rtems__ */
+
+static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
+static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
+#ifndef __rtems__
+MALLOC_DEFINE(M_IOV, "iov", "large iov's");
+#endif /* __rtems__ */
+
+static int pollout(struct thread *, struct pollfd *, struct pollfd *,
+ u_int);
+static int pollscan(struct thread *, struct pollfd *, u_int);
+static int pollrescan(struct thread *);
+static int selscan(struct thread *, fd_mask **, fd_mask **, int);
+static int selrescan(struct thread *, fd_mask **, fd_mask **);
+static void selfdalloc(struct thread *, void *);
+static void selfdfree(struct seltd *, struct selfd *);
+static int dofileread(struct thread *, int, struct file *, struct uio *,
+ off_t, int);
+static int dofilewrite(struct thread *, int, struct file *, struct uio *,
+ off_t, int);
+static void doselwakeup(struct selinfo *, int);
+static void seltdinit(struct thread *);
+static int seltdwait(struct thread *, int);
+static void seltdclear(struct thread *);
+
+/*
+ * One seltd per-thread allocated on demand as needed.
+ *
+ * t - protected by st_mtx
+ * k - Only accessed by curthread or read-only
+ */
+struct seltd {
+ STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */
+ struct selfd *st_free1; /* (k) free fd for read set. */
+ struct selfd *st_free2; /* (k) free fd for write set. */
+ struct mtx st_mtx; /* Protects struct seltd */
+ struct cv st_wait; /* (t) Wait channel. */
+ int st_flags; /* (t) SELTD_ flags. */
+};
+
+#define SELTD_PENDING 0x0001 /* We have pending events. */
+#define SELTD_RESCAN 0x0002 /* Doing a rescan. */
+
+/*
+ * One selfd allocated per-thread per-file-descriptor.
+ * f - protected by sf_mtx
+ */
+struct selfd {
+ STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */
+ TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */
+ struct selinfo *sf_si; /* (f) selinfo when linked. */
+ struct mtx *sf_mtx; /* Pointer to selinfo mtx. */
+ struct seltd *sf_td; /* (k) owning seltd. */
+ void *sf_cookie; /* (k) fd or pollfd. */
+};
+
+static uma_zone_t selfd_zone;
+static struct mtx_pool *mtxpool_select;
+
+#ifndef _SYS_SYSPROTO_H_
+struct read_args {
+ int fd;
+ void *buf;
+ size_t nbyte;
+};
+#endif
+#ifndef __rtems__
+int
+read(td, uap)
+ struct thread *td;
+ struct read_args *uap;
+{
+ struct uio auio;
+ struct iovec aiov;
+ int error;
+
+ if (uap->nbyte > INT_MAX)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = uap->nbyte;
+ auio.uio_segflg = UIO_USERSPACE;
+ error = kern_readv(td, uap->fd, &auio);
+ return(error);
+}
+#endif
+
+/*
+ * Positioned read system call
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct pread_args {
+ int fd;
+ void *buf;
+ size_t nbyte;
+ int pad;
+ off_t offset;
+};
+#endif
+int
+pread(td, uap)
+ struct thread *td;
+ struct pread_args *uap;
+{
+ struct uio auio;
+ struct iovec aiov;
+ int error;
+
+ if (uap->nbyte > INT_MAX)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = uap->nbyte;
+ auio.uio_segflg = UIO_USERSPACE;
+ error = kern_preadv(td, uap->fd, &auio, uap->offset);
+ return(error);
+}
+
+#ifndef __rtems__
+int
+freebsd6_pread(td, uap)
+ struct thread *td;
+ struct freebsd6_pread_args *uap;
+{
+ struct pread_args oargs;
+
+ oargs.fd = uap->fd;
+ oargs.buf = uap->buf;
+ oargs.nbyte = uap->nbyte;
+ oargs.offset = uap->offset;
+ return (pread(td, &oargs));
+}
+#endif /* __rtems__ */
+
+/*
+ * Scatter read system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct readv_args {
+ int fd;
+ struct iovec *iovp;
+ u_int iovcnt;
+};
+#endif
+int
+readv(struct thread *td, struct readv_args *uap)
+{
+ struct uio *auio;
+ int error;
+
+ error = copyinuio(uap->iovp, uap->iovcnt, &auio);
+ if (error)
+ return (error);
+ error = kern_readv(td, uap->fd, auio);
+ free(auio, M_IOV);
+ return (error);
+}
+
+int
+kern_readv(struct thread *td, int fd, struct uio *auio)
+{
+ struct file *fp;
+ int error;
+
+ error = fget_read(td, fd, &fp);
+ if (error)
+ return (error);
+ error = dofileread(td, fd, fp, auio, (off_t)-1, 0);
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Scatter positioned read system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct preadv_args {
+ int fd;
+ struct iovec *iovp;
+ u_int iovcnt;
+ off_t offset;
+};
+#endif
+int
+preadv(struct thread *td, struct preadv_args *uap)
+{
+ struct uio *auio;
+ int error;
+
+ error = copyinuio(uap->iovp, uap->iovcnt, &auio);
+ if (error)
+ return (error);
+ error = kern_preadv(td, uap->fd, auio, uap->offset);
+ free(auio, M_IOV);
+ return (error);
+}
+
+int
+kern_preadv(td, fd, auio, offset)
+ struct thread *td;
+ int fd;
+ struct uio *auio;
+ off_t offset;
+{
+ struct file *fp;
+ int error;
+
+ error = fget_read(td, fd, &fp);
+ if (error)
+ return (error);
+ if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
+ error = ESPIPE;
+#ifndef __rtems__
+ else if (offset < 0 && fp->f_vnode->v_type != VCHR)
+ error = EINVAL;
+#endif /* __rtems__ */
+ else
+ error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET);
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Common code for readv and preadv that reads data in
+ * from a file using the passed in uio, offset, and flags.
+ */
+static int
+dofileread(td, fd, fp, auio, offset, flags)
+ struct thread *td;
+ int fd;
+ struct file *fp;
+ struct uio *auio;
+ off_t offset;
+ int flags;
+{
+ ssize_t cnt;
+ int error;
+#ifdef KTRACE
+ struct uio *ktruio = NULL;
+#endif
+
+ /* Finish zero length reads right here */
+ if (auio->uio_resid == 0) {
+ td->td_retval[0] = 0;
+ return(0);
+ }
+ auio->uio_rw = UIO_READ;
+ auio->uio_offset = offset;
+ auio->uio_td = td;
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_GENIO))
+ ktruio = cloneuio(auio);
+#endif
+ cnt = auio->uio_resid;
+ if ((error = fo_read(fp, auio, td->td_ucred, flags, td))) {
+ if (auio->uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ }
+ cnt -= auio->uio_resid;
+#ifdef KTRACE
+ if (ktruio != NULL) {
+ ktruio->uio_resid = cnt;
+ ktrgenio(fd, UIO_READ, ktruio, error);
+ }
+#endif
+ td->td_retval[0] = cnt;
+ return (error);
+}
+
+#ifndef __rtems__
+#ifndef _SYS_SYSPROTO_H_
+struct write_args {
+ int fd;
+ const void *buf;
+ size_t nbyte;
+};
+#endif
+int
+write(td, uap)
+ struct thread *td;
+ struct write_args *uap;
+{
+ struct uio auio;
+ struct iovec aiov;
+ int error;
+
+ if (uap->nbyte > INT_MAX)
+ return (EINVAL);
+ aiov.iov_base = (void *)(uintptr_t)uap->buf;
+ aiov.iov_len = uap->nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = uap->nbyte;
+ auio.uio_segflg = UIO_USERSPACE;
+ error = kern_writev(td, uap->fd, &auio);
+ return(error);
+}
+
+/*
+ * Positioned write system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct pwrite_args {
+ int fd;
+ const void *buf;
+ size_t nbyte;
+ int pad;
+ off_t offset;
+};
+#endif
+int
+pwrite(td, uap)
+ struct thread *td;
+ struct pwrite_args *uap;
+{
+ struct uio auio;
+ struct iovec aiov;
+ int error;
+
+ if (uap->nbyte > INT_MAX)
+ return (EINVAL);
+ aiov.iov_base = (void *)(uintptr_t)uap->buf;
+ aiov.iov_len = uap->nbyte;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = uap->nbyte;
+ auio.uio_segflg = UIO_USERSPACE;
+ error = kern_pwritev(td, uap->fd, &auio, uap->offset);
+ return(error);
+}
+
+int
+freebsd6_pwrite(td, uap)
+ struct thread *td;
+ struct freebsd6_pwrite_args *uap;
+{
+ struct pwrite_args oargs;
+
+ oargs.fd = uap->fd;
+ oargs.buf = uap->buf;
+ oargs.nbyte = uap->nbyte;
+ oargs.offset = uap->offset;
+ return (pwrite(td, &oargs));
+}
+
+/*
+ * Gather write system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct writev_args {
+ int fd;
+ struct iovec *iovp;
+ u_int iovcnt;
+};
+#endif
+int
+writev(struct thread *td, struct writev_args *uap)
+{
+ struct uio *auio;
+ int error;
+
+ error = copyinuio(uap->iovp, uap->iovcnt, &auio);
+ if (error)
+ return (error);
+ error = kern_writev(td, uap->fd, auio);
+ free(auio, M_IOV);
+ return (error);
+}
+
+int
+kern_writev(struct thread *td, int fd, struct uio *auio)
+{
+ struct file *fp;
+ int error;
+
+ error = fget_write(td, fd, &fp);
+ if (error)
+ return (error);
+ error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0);
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Gather positioned write system call.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct pwritev_args {
+ int fd;
+ struct iovec *iovp;
+ u_int iovcnt;
+ off_t offset;
+};
+#endif
+int
+pwritev(struct thread *td, struct pwritev_args *uap)
+{
+ struct uio *auio;
+ int error;
+
+ error = copyinuio(uap->iovp, uap->iovcnt, &auio);
+ if (error)
+ return (error);
+ error = kern_pwritev(td, uap->fd, auio, uap->offset);
+ free(auio, M_IOV);
+ return (error);
+}
+
+int
+kern_pwritev(td, fd, auio, offset)
+ struct thread *td;
+ struct uio *auio;
+ int fd;
+ off_t offset;
+{
+ struct file *fp;
+ int error;
+
+ error = fget_write(td, fd, &fp);
+ if (error)
+ return (error);
+ if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
+ error = ESPIPE;
+ else if (offset < 0 && fp->f_vnode->v_type != VCHR)
+ error = EINVAL;
+ else
+ error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET);
+ fdrop(fp, td);
+ return (error);
+}
+
+/*
+ * Common code for writev and pwritev that writes data to
+ * a file using the passed in uio, offset, and flags.
+ */
+static int
+dofilewrite(td, fd, fp, auio, offset, flags)
+ struct thread *td;
+ int fd;
+ struct file *fp;
+ struct uio *auio;
+ off_t offset;
+ int flags;
+{
+ ssize_t cnt;
+ int error;
+#ifdef KTRACE
+ struct uio *ktruio = NULL;
+#endif
+
+ auio->uio_rw = UIO_WRITE;
+ auio->uio_td = td;
+ auio->uio_offset = offset;
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_GENIO))
+ ktruio = cloneuio(auio);
+#endif
+ cnt = auio->uio_resid;
+ if (fp->f_type == DTYPE_VNODE)
+ bwillwrite();
+ if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) {
+ if (auio->uio_resid != cnt && (error == ERESTART ||
+ error == EINTR || error == EWOULDBLOCK))
+ error = 0;
+ /* Socket layer is responsible for issuing SIGPIPE. */
+ if (fp->f_type != DTYPE_SOCKET && error == EPIPE) {
+ PROC_LOCK(td->td_proc);
+ tdksignal(td, SIGPIPE, NULL);
+ PROC_UNLOCK(td->td_proc);
+ }
+ }
+ cnt -= auio->uio_resid;
+#ifdef KTRACE
+ if (ktruio != NULL) {
+ ktruio->uio_resid = cnt;
+ ktrgenio(fd, UIO_WRITE, ktruio, error);
+ }
+#endif
+ td->td_retval[0] = cnt;
+ return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ *
+ * Can't use fget_write() here, since must return EINVAL and not EBADF if the
+ * descriptor isn't writable.
+ */
+int
+kern_ftruncate(td, fd, length)
+ struct thread *td;
+ int fd;
+ off_t length;
+{
+ struct file *fp;
+ int error;
+
+ AUDIT_ARG_FD(fd);
+ if (length < 0)
+ return (EINVAL);
+ error = fget(td, fd, &fp);
+ if (error)
+ return (error);
+ AUDIT_ARG_FILE(td->td_proc, fp);
+ if (!(fp->f_flag & FWRITE)) {
+ fdrop(fp, td);
+ return (EINVAL);
+ }
+ error = fo_truncate(fp, length, td->td_ucred, td);
+ fdrop(fp, td);
+ return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct ftruncate_args {
+ int fd;
+ int pad;
+ off_t length;
+};
+#endif
+int
+ftruncate(td, uap)
+ struct thread *td;
+ struct ftruncate_args *uap;
+{
+
+ return (kern_ftruncate(td, uap->fd, uap->length));
+}
+
+#if defined(COMPAT_43)
+#ifndef _SYS_SYSPROTO_H_
+struct oftruncate_args {
+ int fd;
+ long length;
+};
+#endif
+int
+oftruncate(td, uap)
+ struct thread *td;
+ struct oftruncate_args *uap;
+{
+
+ return (kern_ftruncate(td, uap->fd, uap->length));
+}
+#endif /* COMPAT_43 */
+
+#ifndef _SYS_SYSPROTO_H_
+struct ioctl_args {
+ int fd;
+ u_long com;
+ caddr_t data;
+};
+#endif
+/* ARGSUSED */
+int
+ioctl(struct thread *td, struct ioctl_args *uap)
+{
+ u_long com;
+ int arg, error;
+ u_int size;
+ caddr_t data;
+
+ if (uap->com > 0xffffffff) {
+ printf(
+ "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n",
+ td->td_proc->p_pid, td->td_name, uap->com);
+ uap->com &= 0xffffffff;
+ }
+ com = uap->com;
+
+ /*
+ * Interpret high order word to find amount of data to be
+ * copied to/from the user's address space.
+ */
+ size = IOCPARM_LEN(com);
+ if ((size > IOCPARM_MAX) ||
+ ((com & (IOC_VOID | IOC_IN | IOC_OUT)) == 0) ||
+#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
+ ((com & IOC_OUT) && size == 0) ||
+#else
+ ((com & (IOC_IN | IOC_OUT)) && size == 0) ||
+#endif
+ ((com & IOC_VOID) && size > 0 && size != sizeof(int)))
+ return (ENOTTY);
+
+ if (size > 0) {
+ if (com & IOC_VOID) {
+ /* Integer argument. */
+ arg = (intptr_t)uap->data;
+ data = (void *)&arg;
+ size = 0;
+ } else
+ data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
+ } else
+ data = (void *)&uap->data;
+ if (com & IOC_IN) {
+ error = copyin(uap->data, data, (u_int)size);
+ if (error) {
+ if (size > 0)
+ free(data, M_IOCTLOPS);
+ return (error);
+ }
+ } else if (com & IOC_OUT) {
+ /*
+ * Zero the buffer so the user always
+ * gets back something deterministic.
+ */
+ bzero(data, size);
+ }
+
+ error = kern_ioctl(td, uap->fd, com, data);
+
+ if (error == 0 && (com & IOC_OUT))
+ error = copyout(data, uap->data, (u_int)size);
+
+ if (size > 0)
+ free(data, M_IOCTLOPS);
+ return (error);
+}
+
+int
+kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
+{
+ struct file *fp;
+ struct filedesc *fdp;
+ int error;
+ int tmp;
+
+ AUDIT_ARG_FD(fd);
+ AUDIT_ARG_CMD(com);
+ if ((error = fget(td, fd, &fp)) != 0)
+ return (error);
+ if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
+ fdrop(fp, td);
+ return (EBADF);
+ }
+ fdp = td->td_proc->p_fd;
+ switch (com) {
+ case FIONCLEX:
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
+ FILEDESC_XUNLOCK(fdp);
+ goto out;
+ case FIOCLEX:
+ FILEDESC_XLOCK(fdp);
+ fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
+ FILEDESC_XUNLOCK(fdp);
+ goto out;
+ case FIONBIO:
+ if ((tmp = *(int *)data))
+ atomic_set_int(&fp->f_flag, FNONBLOCK);
+ else
+ atomic_clear_int(&fp->f_flag, FNONBLOCK);
+ data = (void *)&tmp;
+ break;
+ case FIOASYNC:
+ if ((tmp = *(int *)data))
+ atomic_set_int(&fp->f_flag, FASYNC);
+ else
+ atomic_clear_int(&fp->f_flag, FASYNC);
+ data = (void *)&tmp;
+ break;
+ }
+
+ error = fo_ioctl(fp, com, data, td->td_ucred, td);
+out:
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+poll_no_poll(int events)
+{
+ /*
+ * Return true for read/write. If the user asked for something
+ * special, return POLLNVAL, so that clients have a way of
+ * determining reliably whether or not the extended
+ * functionality is present without hard-coding knowledge
+ * of specific filesystem implementations.
+ */
+ if (events & ~POLLSTANDARD)
+ return (POLLNVAL);
+
+ return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
+}
+
+int
+pselect(struct thread *td, struct pselect_args *uap)
+{
+ struct timespec ts;
+ struct timeval tv, *tvp;
+ sigset_t set, *uset;
+ int error;
+
+ if (uap->ts != NULL) {
+ error = copyin(uap->ts, &ts, sizeof(ts));
+ if (error != 0)
+ return (error);
+ TIMESPEC_TO_TIMEVAL(&tv, &ts);
+ tvp = &tv;
+ } else
+ tvp = NULL;
+ if (uap->sm != NULL) {
+ error = copyin(uap->sm, &set, sizeof(set));
+ if (error != 0)
+ return (error);
+ uset = &set;
+ } else
+ uset = NULL;
+ return (kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
+ uset, NFDBITS));
+}
+
+int
+kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex,
+ struct timeval *tvp, sigset_t *uset, int abi_nfdbits)
+{
+ int error;
+
+ if (uset != NULL) {
+ error = kern_sigprocmask(td, SIG_SETMASK, uset,
+ &td->td_oldsigmask, 0);
+ if (error != 0)
+ return (error);
+ td->td_pflags |= TDP_OLDMASK;
+ /*
+ * Make sure that ast() is called on return to
+ * usermode and TDP_OLDMASK is cleared, restoring old
+ * sigmask.
+ */
+ thread_lock(td);
+ td->td_flags |= TDF_ASTPENDING;
+ thread_unlock(td);
+ }
+ error = kern_select(td, nd, in, ou, ex, tvp, abi_nfdbits);
+ return (error);
+}
+
+#ifndef _SYS_SYSPROTO_H_
+struct select_args {
+ int nd;
+ fd_set *in, *ou, *ex;
+ struct timeval *tv;
+};
+#endif
+int
+select(struct thread *td, struct select_args *uap)
+{
+ struct timeval tv, *tvp;
+ int error;
+
+ if (uap->tv != NULL) {
+ error = copyin(uap->tv, &tv, sizeof(tv));
+ if (error)
+ return (error);
+ tvp = &tv;
+ } else
+ tvp = NULL;
+
+ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp,
+ NFDBITS));
+}
+
+int
+kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
+ fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits)
+{
+ struct filedesc *fdp;
+ /*
+ * The magic 2048 here is chosen to be just enough for FD_SETSIZE
+ * infds with the new FD_SETSIZE of 1024, and more than enough for
+ * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
+ * of 256.
+ */
+ fd_mask s_selbits[howmany(2048, NFDBITS)];
+ fd_mask *ibits[3], *obits[3], *selbits, *sbp;
+ struct timeval atv, rtv, ttv;
+ int error, timo;
+ u_int nbufbytes, ncpbytes, ncpubytes, nfdbits;
+
+ if (nd < 0)
+ return (EINVAL);
+ fdp = td->td_proc->p_fd;
+ if (nd > fdp->fd_lastfile + 1)
+ nd = fdp->fd_lastfile + 1;
+
+ /*
+ * Allocate just enough bits for the non-null fd_sets. Use the
+ * preallocated auto buffer if possible.
+ */
+ nfdbits = roundup(nd, NFDBITS);
+ ncpbytes = nfdbits / NBBY;
+ ncpubytes = roundup(nd, abi_nfdbits) / NBBY;
+ nbufbytes = 0;
+ if (fd_in != NULL)
+ nbufbytes += 2 * ncpbytes;
+ if (fd_ou != NULL)
+ nbufbytes += 2 * ncpbytes;
+ if (fd_ex != NULL)
+ nbufbytes += 2 * ncpbytes;
+ if (nbufbytes <= sizeof s_selbits)
+ selbits = &s_selbits[0];
+ else
+ selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
+
+ /*
+ * Assign pointers into the bit buffers and fetch the input bits.
+ * Put the output buffers together so that they can be bzeroed
+ * together.
+ */
+ sbp = selbits;
+#define getbits(name, x) \
+ do { \
+ if (name == NULL) { \
+ ibits[x] = NULL; \
+ obits[x] = NULL; \
+ } else { \
+ ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \
+ obits[x] = sbp; \
+ sbp += ncpbytes / sizeof *sbp; \
+ error = copyin(name, ibits[x], ncpubytes); \
+ if (error != 0) \
+ goto done; \
+ bzero((char *)ibits[x] + ncpubytes, \
+ ncpbytes - ncpubytes); \
+ } \
+ } while (0)
+ getbits(fd_in, 0);
+ getbits(fd_ou, 1);
+ getbits(fd_ex, 2);
+#undef getbits
+
+#if BYTE_ORDER == BIG_ENDIAN && defined(__LP64__)
+ /*
+ * XXX: swizzle_fdset assumes that if abi_nfdbits != NFDBITS,
+ * we are running under 32-bit emulation. This should be more
+ * generic.
+ */
+#define swizzle_fdset(bits) \
+ if (abi_nfdbits != NFDBITS && bits != NULL) { \
+ int i; \
+ for (i = 0; i < ncpbytes / sizeof *sbp; i++) \
+ bits[i] = (bits[i] >> 32) | (bits[i] << 32); \
+ }
+#else
+#define swizzle_fdset(bits)
+#endif
+
+ /* Make sure the bit order makes it through an ABI transition */
+ swizzle_fdset(ibits[0]);
+ swizzle_fdset(ibits[1]);
+ swizzle_fdset(ibits[2]);
+
+ if (nbufbytes != 0)
+ bzero(selbits, nbufbytes / 2);
+
+ if (tvp != NULL) {
+ atv = *tvp;
+ if (itimerfix(&atv)) {
+ error = EINVAL;
+ goto done;
+ }
+ getmicrouptime(&rtv);
+ timevaladd(&atv, &rtv);
+ } else {
+ atv.tv_sec = 0;
+ atv.tv_usec = 0;
+ }
+ timo = 0;
+ seltdinit(td);
+ /* Iterate until the timeout expires or descriptors become ready. */
+ for (;;) {
+ error = selscan(td, ibits, obits, nd);
+ if (error || td->td_retval[0] != 0)
+ break;
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=))
+ break;
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ if (error)
+ break;
+ error = selrescan(td, ibits, obits);
+ if (error || td->td_retval[0] != 0)
+ break;
+ }
+ seltdclear(td);
+
+done:
+ /* select is not restarted after signals... */
+ if (error == ERESTART)
+ error = EINTR;
+ if (error == EWOULDBLOCK)
+ error = 0;
+
+ /* swizzle bit order back, if necessary */
+ swizzle_fdset(obits[0]);
+ swizzle_fdset(obits[1]);
+ swizzle_fdset(obits[2]);
+#undef swizzle_fdset
+
+#define putbits(name, x) \
+ if (name && (error2 = copyout(obits[x], name, ncpubytes))) \
+ error = error2;
+ if (error == 0) {
+ int error2;
+
+ putbits(fd_in, 0);
+ putbits(fd_ou, 1);
+ putbits(fd_ex, 2);
+#undef putbits
+ }
+ if (selbits != &s_selbits[0])
+ free(selbits, M_SELECT);
+
+ return (error);
+}
+/*
+ * Convert a select bit set to poll flags.
+ *
+ * The backend always returns POLLHUP/POLLERR if appropriate and we
+ * return this as a set bit in any set.
+ */
+static int select_flags[3] = {
+ POLLRDNORM | POLLHUP | POLLERR,
+ POLLWRNORM | POLLHUP | POLLERR,
+ POLLRDBAND | POLLERR
+};
+
+/*
+ * Compute the fo_poll flags required for a fd given by the index and
+ * bit position in the fd_mask array.
+ */
+static __inline int
+selflags(fd_mask **ibits, int idx, fd_mask bit)
+{
+ int flags;
+ int msk;
+
+ flags = 0;
+ for (msk = 0; msk < 3; msk++) {
+ if (ibits[msk] == NULL)
+ continue;
+ if ((ibits[msk][idx] & bit) == 0)
+ continue;
+ flags |= select_flags[msk];
+ }
+ return (flags);
+}
+
+/*
+ * Set the appropriate output bits given a mask of fired events and the
+ * input bits originally requested.
+ */
+static __inline int
+selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events)
+{
+ int msk;
+ int n;
+
+ n = 0;
+ for (msk = 0; msk < 3; msk++) {
+ if ((events & select_flags[msk]) == 0)
+ continue;
+ if (ibits[msk] == NULL)
+ continue;
+ if ((ibits[msk][idx] & bit) == 0)
+ continue;
+ /*
+ * XXX Check for a duplicate set. This can occur because a
+ * socket calls selrecord() twice for each poll() call
+ * resulting in two selfds per real fd. selrescan() will
+ * call selsetbits twice as a result.
+ */
+ if ((obits[msk][idx] & bit) != 0)
+ continue;
+ obits[msk][idx] |= bit;
+ n++;
+ }
+
+ return (n);
+}
+
+/*
+ * Traverse the list of fds attached to this thread's seltd and check for
+ * completion.
+ */
+static int
+selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
+{
+ struct filedesc *fdp;
+ struct selinfo *si;
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct file *fp;
+ fd_mask bit;
+ int fd, ev, n, idx;
+
+ fdp = td->td_proc->p_fd;
+ stp = td->td_sel;
+ n = 0;
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+ fd = (int)(uintptr_t)sfp->sf_cookie;
+ si = sfp->sf_si;
+ selfdfree(stp, sfp);
+ /* If the selinfo wasn't cleared the event didn't fire. */
+ if (si != NULL)
+ continue;
+ if ((fp = fget_unlocked(fdp, fd)) == NULL)
+ return (EBADF);
+ idx = fd / NFDBITS;
+ bit = (fd_mask)1 << (fd % NFDBITS);
+ ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td);
+ fdrop(fp, td);
+ if (ev != 0)
+ n += selsetbits(ibits, obits, idx, bit, ev);
+ }
+ stp->st_flags = 0;
+ td->td_retval[0] = n;
+ return (0);
+}
+
+/*
+ * Perform the initial filedescriptor scan and register ourselves with
+ * each selinfo.
+ */
+static int
+selscan(td, ibits, obits, nfd)
+ struct thread *td;
+ fd_mask **ibits, **obits;
+ int nfd;
+{
+ struct filedesc *fdp;
+ struct file *fp;
+ fd_mask bit;
+ int ev, flags, end, fd;
+ int n, idx;
+
+ fdp = td->td_proc->p_fd;
+ n = 0;
+ for (idx = 0, fd = 0; fd < nfd; idx++) {
+ end = imin(fd + NFDBITS, nfd);
+ for (bit = 1; fd < end; bit <<= 1, fd++) {
+ /* Compute the list of events we're interested in. */
+ flags = selflags(ibits, idx, bit);
+ if (flags == 0)
+ continue;
+ if ((fp = fget_unlocked(fdp, fd)) == NULL)
+ return (EBADF);
+ selfdalloc(td, (void *)(uintptr_t)fd);
+ ev = fo_poll(fp, flags, td->td_ucred, td);
+ fdrop(fp, td);
+ if (ev != 0)
+ n += selsetbits(ibits, obits, idx, bit, ev);
+ }
+ }
+
+ td->td_retval[0] = n;
+ return (0);
+}
+#endif /* __rtems__ */
+
+#ifndef _SYS_SYSPROTO_H_
+struct poll_args {
+ struct pollfd *fds;
+ u_int nfds;
+ int timeout;
+};
+#endif
+int
+#ifdef __rtems__
+kern_poll(td, uap)
+#else
+poll(td, uap)
+#endif /* __rtems__ */
+ struct thread *td;
+ struct poll_args *uap;
+{
+ struct pollfd *bits;
+ struct pollfd smallbits[32];
+ struct timeval atv, rtv, ttv;
+ int error = 0, timo;
+ u_int nfds;
+ size_t ni;
+
+ nfds = uap->nfds;
+ if (nfds > maxfilesperproc && nfds > FD_SETSIZE)
+ return (EINVAL);
+ ni = nfds * sizeof(struct pollfd);
+ if (ni > sizeof(smallbits))
+ bits = malloc(ni, M_TEMP, M_WAITOK);
+ else
+ bits = smallbits;
+ error = copyin(uap->fds, bits, ni);
+ if (error)
+ goto done;
+ if (uap->timeout != INFTIM) {
+ atv.tv_sec = uap->timeout / 1000;
+ atv.tv_usec = (uap->timeout % 1000) * 1000;
+ if (itimerfix(&atv)) {
+ error = EINVAL;
+ goto done;
+ }
+ getmicrouptime(&rtv);
+ timevaladd(&atv, &rtv);
+ } else {
+ atv.tv_sec = 0;
+ atv.tv_usec = 0;
+ }
+ timo = 0;
+ seltdinit(td);
+ /* Iterate until the timeout expires or descriptors become ready. */
+ for (;;) {
+ error = pollscan(td, bits, nfds);
+ if (error || td->td_retval[0] != 0)
+ break;
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=))
+ break;
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ if (error)
+ break;
+ error = pollrescan(td);
+ if (error || td->td_retval[0] != 0)
+ break;
+ }
+ seltdclear(td);
+
+done:
+ /* poll is not restarted after signals... */
+ if (error == ERESTART)
+ error = EINTR;
+ if (error == EWOULDBLOCK)
+ error = 0;
+ if (error == 0) {
+ error = pollout(td, bits, uap->fds, nfds);
+ if (error)
+ goto out;
+ }
+out:
+ if (ni > sizeof(smallbits))
+ free(bits, M_TEMP);
+ return (error);
+}
+
+static int
+pollrescan(struct thread *td)
+{
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct selinfo *si;
+ struct filedesc *fdp;
+ struct file *fp;
+ struct pollfd *fd;
+ int n;
+
+ n = 0;
+ fdp = td->td_proc->p_fd;
+ stp = td->td_sel;
+ FILEDESC_SLOCK(fdp);
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+ fd = (struct pollfd *)sfp->sf_cookie;
+ si = sfp->sf_si;
+ selfdfree(stp, sfp);
+ /* If the selinfo wasn't cleared the event didn't fire. */
+ if (si != NULL)
+ continue;
+ fp = fdp->fd_ofiles[fd->fd];
+ if (fp == NULL) {
+ fd->revents = POLLNVAL;
+ n++;
+ continue;
+ }
+ /*
+ * Note: backend also returns POLLHUP and
+ * POLLERR if appropriate.
+ */
+ fd->revents = fo_poll(fp, fd->events, td->td_ucred, td);
+ if (fd->revents != 0)
+ n++;
+ }
+ FILEDESC_SUNLOCK(fdp);
+ stp->st_flags = 0;
+ td->td_retval[0] = n;
+ return (0);
+}
+
+
+static int
+pollout(td, fds, ufds, nfd)
+ struct thread *td;
+ struct pollfd *fds;
+ struct pollfd *ufds;
+ u_int nfd;
+{
+ int error = 0;
+ u_int i = 0;
+ u_int n = 0;
+
+ for (i = 0; i < nfd; i++) {
+ error = copyout(&fds->revents, &ufds->revents,
+ sizeof(ufds->revents));
+ if (error)
+ return (error);
+ if (fds->revents != 0)
+ n++;
+ fds++;
+ ufds++;
+ }
+ td->td_retval[0] = n;
+ return (0);
+}
+
+static int
+pollscan(td, fds, nfd)
+ struct thread *td;
+ struct pollfd *fds;
+ u_int nfd;
+{
+ struct filedesc *fdp = td->td_proc->p_fd;
+ int i;
+ struct file *fp;
+ int n = 0;
+
+ FILEDESC_SLOCK(fdp);
+ for (i = 0; i < nfd; i++, fds++) {
+ if (fds->fd >= fdp->fd_nfiles) {
+ fds->revents = POLLNVAL;
+ n++;
+ } else if (fds->fd < 0) {
+ fds->revents = 0;
+ } else {
+ fp = fdp->fd_ofiles[fds->fd];
+ if (fp == NULL) {
+ fds->revents = POLLNVAL;
+ n++;
+ } else {
+ /*
+ * Note: backend also returns POLLHUP and
+ * POLLERR if appropriate.
+ */
+ selfdalloc(td, fds);
+ fds->revents = fo_poll(fp, fds->events,
+ td->td_ucred, td);
+ /*
+ * POSIX requires POLLOUT to be never
+ * set simultaneously with POLLHUP.
+ */
+ if ((fds->revents & POLLHUP) != 0)
+ fds->revents &= ~POLLOUT;
+
+ if (fds->revents != 0)
+ n++;
+ }
+ }
+ }
+ FILEDESC_SUNLOCK(fdp);
+ td->td_retval[0] = n;
+ return (0);
+}
+
+/*
+ * OpenBSD poll system call.
+ *
+ * XXX this isn't quite a true representation.. OpenBSD uses select ops.
+ */
+#ifndef _SYS_SYSPROTO_H_
+struct openbsd_poll_args {
+ struct pollfd *fds;
+ u_int nfds;
+ int timeout;
+};
+#endif
+int
+openbsd_poll(td, uap)
+ register struct thread *td;
+ register struct openbsd_poll_args *uap;
+{
+#ifdef __rtems__
+ return (kern_poll(td, (struct poll_args *)uap));
+#else
+ return (poll(td, (struct poll_args *)uap));
+#endif
+}
+
+/*
+ * XXX This was created specifically to support netncp and netsmb. This
+ * allows the caller to specify a socket to wait for events on. It returns
+ * 0 if any events matched and an error otherwise. There is no way to
+ * determine which events fired.
+ */
+int
+selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
+{
+ struct timeval atv, rtv, ttv;
+ int error, timo;
+
+ if (tvp != NULL) {
+ atv = *tvp;
+ if (itimerfix(&atv))
+ return (EINVAL);
+ getmicrouptime(&rtv);
+ timevaladd(&atv, &rtv);
+ } else {
+ atv.tv_sec = 0;
+ atv.tv_usec = 0;
+ }
+
+ timo = 0;
+ seltdinit(td);
+ /*
+ * Iterate until the timeout expires or the socket becomes ready.
+ */
+ for (;;) {
+ selfdalloc(td, NULL);
+ error = sopoll(so, events, NULL, td);
+ /* error here is actually the ready events. */
+ if (error)
+ return (0);
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=)) {
+ seltdclear(td);
+ return (EWOULDBLOCK);
+ }
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ seltdclear(td);
+ if (error)
+ break;
+ }
+ /* XXX Duplicates ncp/smb behavior. */
+ if (error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Preallocate two selfds associated with 'cookie'. Some fo_poll routines
+ * have two select sets, one for read and another for write.
+ */
+static void
+selfdalloc(struct thread *td, void *cookie)
+{
+ struct seltd *stp;
+
+ stp = td->td_sel;
+ if (stp->st_free1 == NULL)
+ stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+ stp->st_free1->sf_td = stp;
+ stp->st_free1->sf_cookie = cookie;
+ if (stp->st_free2 == NULL)
+ stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+ stp->st_free2->sf_td = stp;
+ stp->st_free2->sf_cookie = cookie;
+}
+
+static void
+selfdfree(struct seltd *stp, struct selfd *sfp)
+{
+ STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link);
+ mtx_lock(sfp->sf_mtx);
+ if (sfp->sf_si)
+ TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads);
+ mtx_unlock(sfp->sf_mtx);
+ uma_zfree(selfd_zone, sfp);
+}
+
+/*
+ * Record a select request.
+ */
+void
+selrecord(selector, sip)
+ struct thread *selector;
+ struct selinfo *sip;
+{
+ struct selfd *sfp;
+ struct seltd *stp;
+ struct mtx *mtxp;
+
+ stp = selector->td_sel;
+ /*
+ * Don't record when doing a rescan.
+ */
+ if (stp->st_flags & SELTD_RESCAN)
+ return;
+ /*
+ * Grab one of the preallocated descriptors.
+ */
+ sfp = NULL;
+ if ((sfp = stp->st_free1) != NULL)
+ stp->st_free1 = NULL;
+ else if ((sfp = stp->st_free2) != NULL)
+ stp->st_free2 = NULL;
+ else
+ panic("selrecord: No free selfd on selq");
+ mtxp = sip->si_mtx;
+ if (mtxp == NULL)
+ mtxp = mtx_pool_find(mtxpool_select, sip);
+ /*
+ * Initialize the sfp and queue it in the thread.
+ */
+ sfp->sf_si = sip;
+ sfp->sf_mtx = mtxp;
+ STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link);
+ /*
+ * Now that we've locked the sip, check for initialization.
+ */
+ mtx_lock(mtxp);
+ if (sip->si_mtx == NULL) {
+ sip->si_mtx = mtxp;
+ TAILQ_INIT(&sip->si_tdlist);
+ }
+ /*
+ * Add this thread to the list of selfds listening on this selinfo.
+ */
+ TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads);
+ mtx_unlock(sip->si_mtx);
+}
+
+/* Wake up a selecting thread. */
+void
+selwakeup(sip)
+ struct selinfo *sip;
+{
+ doselwakeup(sip, -1);
+}
+
+/* Wake up a selecting thread, and set its priority. */
+void
+selwakeuppri(sip, pri)
+ struct selinfo *sip;
+ int pri;
+{
+ doselwakeup(sip, pri);
+}
+
+/*
+ * Do a wakeup when a selectable event occurs.
+ */
+static void
+doselwakeup(sip, pri)
+ struct selinfo *sip;
+ int pri;
+{
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct seltd *stp;
+
+ /* If it's not initialized there can't be any waiters. */
+ if (sip->si_mtx == NULL)
+ return;
+ /*
+ * Locking the selinfo locks all selfds associated with it.
+ */
+ mtx_lock(sip->si_mtx);
+ TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) {
+ /*
+ * Once we remove this sfp from the list and clear the
+ * sf_si seltdclear will know to ignore this si.
+ */
+ TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads);
+ sfp->sf_si = NULL;
+ stp = sfp->sf_td;
+ mtx_lock(&stp->st_mtx);
+ stp->st_flags |= SELTD_PENDING;
+ cv_broadcastpri(&stp->st_wait, pri);
+ mtx_unlock(&stp->st_mtx);
+ }
+ mtx_unlock(sip->si_mtx);
+}
+
+static void
+seltdinit(struct thread *td)
+{
+ struct seltd *stp;
+
+ if ((stp = td->td_sel) != NULL)
+ goto out;
+ td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO);
+ mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF);
+ cv_init(&stp->st_wait, "select");
+out:
+ stp->st_flags = 0;
+ STAILQ_INIT(&stp->st_selq);
+}
+
+static int
+seltdwait(struct thread *td, int timo)
+{
+ struct seltd *stp;
+ int error;
+
+ stp = td->td_sel;
+ /*
+ * An event of interest may occur while we do not hold the seltd
+ * locked so check the pending flag before we sleep.
+ */
+ mtx_lock(&stp->st_mtx);
+ /*
+ * Any further calls to selrecord will be a rescan.
+ */
+ stp->st_flags |= SELTD_RESCAN;
+ if (stp->st_flags & SELTD_PENDING) {
+ mtx_unlock(&stp->st_mtx);
+ return (0);
+ }
+ if (timo > 0)
+ error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
+ else
+ error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
+ mtx_unlock(&stp->st_mtx);
+
+ return (error);
+}
+
+void
+seltdfini(struct thread *td)
+{
+ struct seltd *stp;
+
+ stp = td->td_sel;
+ if (stp == NULL)
+ return;
+ if (stp->st_free1)
+ uma_zfree(selfd_zone, stp->st_free1);
+ if (stp->st_free2)
+ uma_zfree(selfd_zone, stp->st_free2);
+ td->td_sel = NULL;
+ free(stp, M_SELECT);
+}
+
+/*
+ * Remove the references to the thread from all of the objects we were
+ * polling.
+ */
+static void
+seltdclear(struct thread *td)
+{
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+
+ stp = td->td_sel;
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn)
+ selfdfree(stp, sfp);
+ stp->st_flags = 0;
+}
+
+static void selectinit(void *);
+SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL);
+static void
+selectinit(void *dummy __unused)
+{
+
+ selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
+ mtxpool_select = mtx_pool_create("select mtxpool", 128, MTX_DEF);
+}
diff --git a/freebsd/sys/buf.h b/freebsd/sys/buf.h
new file mode 100644
index 00000000..88e55d95
--- /dev/null
+++ b/freebsd/sys/buf.h
@@ -0,0 +1,526 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)buf.h 8.9 (Berkeley) 3/30/95
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_BUF_HH_
+#define _SYS_BUF_HH_
+
+#include <freebsd/sys/bufobj.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/lockmgr.h>
+
+struct bio;
+struct buf;
+struct bufobj;
+struct mount;
+struct vnode;
+struct uio;
+
+/*
+ * To avoid including <ufs/ffs/softdep.h>
+ */
+LIST_HEAD(workhead, worklist);
+/*
+ * These are currently used only by the soft dependency code, hence
+ * are stored once in a global variable. If other subsystems wanted
+ * to use these hooks, a pointer to a set of bio_ops could be added
+ * to each buffer.
+ */
+extern struct bio_ops {
+ void (*io_start)(struct buf *);
+ void (*io_complete)(struct buf *);
+ void (*io_deallocate)(struct buf *);
+ int (*io_countdeps)(struct buf *, int);
+} bioops;
+
+struct vm_object;
+
+typedef unsigned char b_xflags_t;
+
+/*
+ * The buffer header describes an I/O operation in the kernel.
+ *
+ * NOTES:
+ * b_bufsize, b_bcount. b_bufsize is the allocation size of the
+ * buffer, either DEV_BSIZE or PAGE_SIZE aligned. b_bcount is the
+ * originally requested buffer size and can serve as a bounds check
+ * against EOF. For most, but not all uses, b_bcount == b_bufsize.
+ *
+ * b_dirtyoff, b_dirtyend. Buffers support piecemeal, unaligned
+ * ranges of dirty data that need to be written to backing store.
+ * The range is typically clipped at b_bcount ( not b_bufsize ).
+ *
+ * b_resid. Number of bytes remaining in I/O. After an I/O operation
+ * completes, b_resid is usually 0 indicating 100% success.
+ *
+ * All fields are protected by the buffer lock except those marked:
+ * V - Protected by owning bufobj lock
+ * Q - Protected by the buf queue lock
+ * D - Protected by an dependency implementation specific lock
+ */
+struct buf {
+ struct bufobj *b_bufobj;
+ long b_bcount;
+ void *b_caller1;
+ caddr_t b_data;
+ int b_error;
+ uint8_t b_iocmd;
+ uint8_t b_ioflags;
+ off_t b_iooffset;
+ long b_resid;
+ void (*b_iodone)(struct buf *);
+ daddr_t b_blkno; /* Underlying physical block number. */
+ off_t b_offset; /* Offset into file. */
+ TAILQ_ENTRY(buf) b_bobufs; /* (V) Buffer's associated vnode. */
+ struct buf *b_left; /* (V) splay tree link */
+ struct buf *b_right; /* (V) splay tree link */
+ uint32_t b_vflags; /* (V) BV_* flags */
+ TAILQ_ENTRY(buf) b_freelist; /* (Q) Free list position inactive. */
+ unsigned short b_qindex; /* (Q) buffer queue index */
+ uint32_t b_flags; /* B_* flags. */
+ b_xflags_t b_xflags; /* extra flags */
+ struct lock b_lock; /* Buffer lock */
+ long b_bufsize; /* Allocated buffer size. */
+ long b_runningbufspace; /* when I/O is running, pipelining */
+ caddr_t b_kvabase; /* base kva for buffer */
+ int b_kvasize; /* size of kva for buffer */
+ daddr_t b_lblkno; /* Logical block number. */
+ struct vnode *b_vp; /* Device vnode. */
+ int b_dirtyoff; /* Offset in buffer of dirty region. */
+ int b_dirtyend; /* Offset of end of dirty region. */
+ struct ucred *b_rcred; /* Read credentials reference. */
+ struct ucred *b_wcred; /* Write credentials reference. */
+ void *b_saveaddr; /* Original b_addr for physio. */
+ union pager_info {
+ int pg_reqpage;
+ } b_pager;
+ union cluster_info {
+ TAILQ_HEAD(cluster_list_head, buf) cluster_head;
+ TAILQ_ENTRY(buf) cluster_entry;
+ } b_cluster;
+ struct vm_page *b_pages[btoc(MAXPHYS)];
+ int b_npages;
+ struct workhead b_dep; /* (D) List of filesystem dependencies. */
+ void *b_fsprivate1;
+ void *b_fsprivate2;
+ void *b_fsprivate3;
+ int b_pin_count;
+};
+
+#define b_object b_bufobj->bo_object
+
+/*
+ * These flags are kept in b_flags.
+ *
+ * Notes:
+ *
+ * B_ASYNC VOP calls on bp's are usually async whether or not
+ * B_ASYNC is set, but some subsystems, such as NFS, like
+ * to know what is best for the caller so they can
+ * optimize the I/O.
+ *
+ * B_PAGING Indicates that bp is being used by the paging system or
+ * some paging system and that the bp is not linked into
+ * the b_vp's clean/dirty linked lists or ref counts.
+ * Buffer vp reassignments are illegal in this case.
+ *
+ * B_CACHE This may only be set if the buffer is entirely valid.
+ * The situation where B_DELWRI is set and B_CACHE is
+ * clear MUST be committed to disk by getblk() so
+ * B_DELWRI can also be cleared. See the comments for
+ * getblk() in kern/vfs_bio.c. If B_CACHE is clear,
+ * the caller is expected to clear BIO_ERROR and B_INVAL,
+ * set BIO_READ, and initiate an I/O.
+ *
+ * The 'entire buffer' is defined to be the range from
+ * 0 through b_bcount.
+ *
+ * B_MALLOC Request that the buffer be allocated from the malloc
+ * pool, DEV_BSIZE aligned instead of PAGE_SIZE aligned.
+ *
+ * B_CLUSTEROK This flag is typically set for B_DELWRI buffers
+ * by filesystems that allow clustering when the buffer
+ * is fully dirty and indicates that it may be clustered
+ * with other adjacent dirty buffers. Note the clustering
+ * may not be used with the stage 1 data write under NFS
+ * but may be used for the commit rpc portion.
+ *
+ * B_VMIO Indicates that the buffer is tied into an VM object.
+ * The buffer's data is always PAGE_SIZE aligned even
+ * if b_bufsize and b_bcount are not. ( b_bufsize is
+ * always at least DEV_BSIZE aligned, though ).
+ *
+ * B_DIRECT Hint that we should attempt to completely free
+ * the pages underlying the buffer. B_DIRECT is
+ * sticky until the buffer is released and typically
+ * only has an effect when B_RELBUF is also set.
+ *
+ */
+
+#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
+#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
+#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
+#define B_DIRECT 0x00000008 /* direct I/O flag (pls free vmio) */
+#define B_DEFERRED 0x00000010 /* Skipped over for cleaning */
+#define B_CACHE 0x00000020 /* Bread found us in the cache. */
+#define B_VALIDSUSPWRT 0x00000040 /* Valid write during suspension. */
+#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
+#define B_PERSISTENT 0x00000100 /* Perm. ref'ed while EXT2FS mounted. */
+#define B_DONE 0x00000200 /* I/O completed. */
+#define B_EINTR 0x00000400 /* I/O was interrupted */
+#define B_00000800 0x00000800 /* Available flag. */
+#define B_00001000 0x00001000 /* Available flag. */
+#define B_INVAL 0x00002000 /* Does not contain valid info. */
+#define B_00004000 0x00004000 /* Available flag. */
+#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
+#define B_MALLOC 0x00010000 /* malloced b_data */
+#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
+#define B_000400000 0x00040000 /* Available flag. */
+#define B_000800000 0x00080000 /* Available flag. */
+#define B_00100000 0x00100000 /* Available flag. */
+#define B_DIRTY 0x00200000 /* Needs writing later (in EXT2FS). */
+#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
+#define B_00800000 0x00800000 /* Available flag. */
+#define B_01000000 0x01000000 /* Available flag. */
+#define B_NEEDSGIANT 0x02000000 /* Buffer's vnode needs giant. */
+#define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */
+#define B_MANAGED 0x08000000 /* Managed by FS. */
+#define B_RAM 0x10000000 /* Read ahead mark (flag) */
+#define B_VMIO 0x20000000 /* VMIO flag */
+#define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */
+#define B_REMFREE 0x80000000 /* Delayed bremfree */
+
+#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34b27" \
+ "\33paging\32b25\31b24\30b23\27relbuf\26dirty\25b20" \
+ "\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
+ "\15b12\14b11\13eintr\12done\11persist\10delwri\7validsuspwrt" \
+ "\6cache\5deferred\4direct\3async\2needcommit\1age"
+
+/*
+ * These flags are kept in b_xflags.
+ */
+#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
+#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
+#define BX_BKGRDWRITE 0x00000010 /* Do writes in background */
+#define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */
+#define BX_ALTDATA 0x00000040 /* Holds extended data */
+
+#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
+
+/*
+ * These flags are kept in b_vflags.
+ */
+#define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */
+#define BV_BKGRDINPROG 0x00000002 /* Background write in progress */
+#define BV_BKGRDWAIT 0x00000004 /* Background write waiting */
+#define BV_INFREECNT 0x80000000 /* buf is counted in numfreebufs */
+
+#ifdef _KERNEL
+/*
+ * Buffer locking
+ */
+extern const char *buf_wmesg; /* Default buffer lock message */
+#define BUF_WMESG "bufwait"
+#include <freebsd/sys/proc.h> /* XXX for curthread */
+#include <freebsd/sys/mutex.h>
+
+/*
+ * Initialize a lock.
+ */
+#define BUF_LOCKINIT(bp) \
+ lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
+/*
+ *
+ * Get a lock sleeping non-interruptably until it becomes available.
+ */
+#define BUF_LOCK(bp, locktype, interlock) \
+ _lockmgr_args(&(bp)->b_lock, (locktype), (interlock), \
+ LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, \
+ LOCK_FILE, LOCK_LINE)
+
+/*
+ * Get a lock sleeping with specified interruptably and timeout.
+ */
+#define BUF_TIMELOCK(bp, locktype, interlock, wmesg, catch, timo) \
+ _lockmgr_args(&(bp)->b_lock, (locktype) | LK_TIMELOCK, \
+ (interlock), (wmesg), (PRIBIO + 4) | (catch), (timo), \
+ LOCK_FILE, LOCK_LINE)
+
+/*
+ * Release a lock. Only the acquiring process may free the lock unless
+ * it has been handed off to biodone.
+ */
+#define BUF_UNLOCK(bp) do { \
+ KASSERT(((bp)->b_flags & B_REMFREE) == 0, \
+ ("BUF_UNLOCK %p while B_REMFREE is still set.", (bp))); \
+ \
+ (void)_lockmgr_args(&(bp)->b_lock, LK_RELEASE, NULL, \
+ LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, \
+ LOCK_FILE, LOCK_LINE); \
+} while (0)
+
+/*
+ * Check if a buffer lock is recursed.
+ */
+#define BUF_LOCKRECURSED(bp) \
+ lockmgr_recursed(&(bp)->b_lock)
+
+/*
+ * Check if a buffer lock is currently held.
+ */
+#define BUF_ISLOCKED(bp) \
+ lockstatus(&(bp)->b_lock)
+/*
+ * Free a buffer lock.
+ */
+#define BUF_LOCKFREE(bp) \
+ lockdestroy(&(bp)->b_lock)
+
+/*
+ * Buffer lock assertions.
+ */
+#if defined(INVARIANTS) && defined(INVARIANT_SUPPORT)
+#define BUF_ASSERT_LOCKED(bp) \
+ _lockmgr_assert(&(bp)->b_lock, KA_LOCKED, LOCK_FILE, LOCK_LINE)
+#define BUF_ASSERT_SLOCKED(bp) \
+ _lockmgr_assert(&(bp)->b_lock, KA_SLOCKED, LOCK_FILE, LOCK_LINE)
+#define BUF_ASSERT_XLOCKED(bp) \
+ _lockmgr_assert(&(bp)->b_lock, KA_XLOCKED, LOCK_FILE, LOCK_LINE)
+#define BUF_ASSERT_UNLOCKED(bp) \
+ _lockmgr_assert(&(bp)->b_lock, KA_UNLOCKED, LOCK_FILE, LOCK_LINE)
+#define BUF_ASSERT_HELD(bp)
+#define BUF_ASSERT_UNHELD(bp)
+#else
+#define BUF_ASSERT_LOCKED(bp)
+#define BUF_ASSERT_SLOCKED(bp)
+#define BUF_ASSERT_XLOCKED(bp)
+#define BUF_ASSERT_UNLOCKED(bp)
+#define BUF_ASSERT_HELD(bp)
+#define BUF_ASSERT_UNHELD(bp)
+#endif
+
+#ifdef _SYS_PROC_HH_ /* Avoid #include <freebsd/sys/proc.h> pollution */
+/*
+ * When initiating asynchronous I/O, change ownership of the lock to the
+ * kernel. Once done, the lock may legally released by biodone. The
+ * original owning process can no longer acquire it recursively, but must
+ * wait until the I/O is completed and the lock has been freed by biodone.
+ */
+#define BUF_KERNPROC(bp) \
+ _lockmgr_disown(&(bp)->b_lock, LOCK_FILE, LOCK_LINE)
+#endif
+
+/*
+ * Find out if the lock has waiters or not.
+ */
+#define BUF_LOCKWAITERS(bp) \
+ lockmgr_waiters(&(bp)->b_lock)
+
+#endif /* _KERNEL */
+
+struct buf_queue_head {
+ TAILQ_HEAD(buf_queue, buf) queue;
+ daddr_t last_pblkno;
+ struct buf *insert_point;
+ struct buf *switch_point;
+};
+
+/*
+ * This structure describes a clustered I/O. It is stored in the b_saveaddr
+ * field of the buffer on which I/O is done. At I/O completion, cluster
+ * callback uses the structure to parcel I/O's to individual buffers, and
+ * then free's this structure.
+ */
+struct cluster_save {
+ long bs_bcount; /* Saved b_bcount. */
+ long bs_bufsize; /* Saved b_bufsize. */
+ void *bs_saveaddr; /* Saved b_addr. */
+ int bs_nchildren; /* Number of associated buffers. */
+ struct buf **bs_children; /* List of associated buffers. */
+};
+
+#ifdef _KERNEL
+
+static __inline int
+bwrite(struct buf *bp)
+{
+
+ KASSERT(bp->b_bufobj != NULL, ("bwrite: no bufobj bp=%p", bp));
+ KASSERT(bp->b_bufobj->bo_ops != NULL, ("bwrite: no bo_ops bp=%p", bp));
+ KASSERT(bp->b_bufobj->bo_ops->bop_write != NULL,
+ ("bwrite: no bop_write bp=%p", bp));
+ return (BO_WRITE(bp->b_bufobj, bp));
+}
+
+static __inline void
+bstrategy(struct buf *bp)
+{
+
+ KASSERT(bp->b_bufobj != NULL, ("bstrategy: no bufobj bp=%p", bp));
+ KASSERT(bp->b_bufobj->bo_ops != NULL,
+ ("bstrategy: no bo_ops bp=%p", bp));
+ KASSERT(bp->b_bufobj->bo_ops->bop_strategy != NULL,
+ ("bstrategy: no bop_strategy bp=%p", bp));
+ BO_STRATEGY(bp->b_bufobj, bp);
+}
+
+static __inline void
+buf_start(struct buf *bp)
+{
+ if (bioops.io_start)
+ (*bioops.io_start)(bp);
+}
+
+static __inline void
+buf_complete(struct buf *bp)
+{
+ if (bioops.io_complete)
+ (*bioops.io_complete)(bp);
+}
+
+static __inline void
+buf_deallocate(struct buf *bp)
+{
+ if (bioops.io_deallocate)
+ (*bioops.io_deallocate)(bp);
+ BUF_LOCKFREE(bp);
+}
+
+static __inline int
+buf_countdeps(struct buf *bp, int i)
+{
+ if (bioops.io_countdeps)
+ return ((*bioops.io_countdeps)(bp, i));
+ else
+ return (0);
+}
+
+#endif /* _KERNEL */
+
+/*
+ * Zero out the buffer's data area.
+ */
+#define clrbuf(bp) { \
+ bzero((bp)->b_data, (u_int)(bp)->b_bcount); \
+ (bp)->b_resid = 0; \
+}
+
+/*
+ * Flags for getblk's last parameter.
+ */
+#define GB_LOCK_NOWAIT 0x0001 /* Fail if we block on a buf lock. */
+#define GB_NOCREAT 0x0002 /* Don't create a buf if not found. */
+#define GB_NOWAIT_BD 0x0004 /* Do not wait for bufdaemon */
+
+#ifdef _KERNEL
+extern int nbuf; /* The number of buffer headers */
+extern long maxswzone; /* Max KVA for swap structures */
+extern long maxbcache; /* Max KVA for buffer cache */
+extern long runningbufspace;
+extern long hibufspace;
+extern int dirtybufthresh;
+extern int bdwriteskip;
+extern int dirtybufferflushes;
+extern int altbufferflushes;
+extern int buf_maxio; /* nominal maximum I/O for buffer */
+extern struct buf *buf; /* The buffer headers. */
+extern char *buffers; /* The buffer contents. */
+extern int bufpages; /* Number of memory pages in the buffer pool. */
+extern struct buf *swbuf; /* Swap I/O buffer headers. */
+extern int nswbuf; /* Number of swap I/O buffer headers. */
+extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
+extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
+
+void runningbufwakeup(struct buf *);
+void waitrunningbufspace(void);
+caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est);
+void bufinit(void);
+void bwillwrite(void);
+int buf_dirty_count_severe(void);
+void bremfree(struct buf *);
+void bremfreef(struct buf *); /* XXX Force bremfree, only for nfs. */
+int bread(struct vnode *, daddr_t, int, struct ucred *, struct buf **);
+void breada(struct vnode *, daddr_t *, int *, int, struct ucred *);
+int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
+ struct ucred *, struct buf **);
+void bdwrite(struct buf *);
+void bawrite(struct buf *);
+void bdirty(struct buf *);
+void bundirty(struct buf *);
+void bufstrategy(struct bufobj *, struct buf *);
+void brelse(struct buf *);
+void bqrelse(struct buf *);
+int vfs_bio_awrite(struct buf *);
+struct buf * getpbuf(int *);
+struct buf *incore(struct bufobj *, daddr_t);
+struct buf *gbincore(struct bufobj *, daddr_t);
+struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
+struct buf *geteblk(int, int);
+int bufwait(struct buf *);
+int bufwrite(struct buf *);
+void bufdone(struct buf *);
+void bufdone_finish(struct buf *);
+
+int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
+ struct ucred *, long, int, struct buf **);
+int cluster_wbuild(struct vnode *, long, daddr_t, int);
+void cluster_write(struct vnode *, struct buf *, u_quad_t, int);
+void vfs_bio_set_valid(struct buf *, int base, int size);
+void vfs_bio_clrbuf(struct buf *);
+void vfs_busy_pages(struct buf *, int clear_modify);
+void vfs_unbusy_pages(struct buf *);
+int vmapbuf(struct buf *);
+void vunmapbuf(struct buf *);
+void relpbuf(struct buf *, int *);
+void brelvp(struct buf *);
+void bgetvp(struct vnode *, struct buf *);
+void pbgetbo(struct bufobj *bo, struct buf *bp);
+void pbgetvp(struct vnode *, struct buf *);
+void pbrelbo(struct buf *);
+void pbrelvp(struct buf *);
+int allocbuf(struct buf *bp, int size);
+void reassignbuf(struct buf *);
+struct buf *trypbuf(int *);
+void bwait(struct buf *, u_char, const char *);
+void bdone(struct buf *);
+void bpin(struct buf *);
+void bunpin(struct buf *);
+void bunpin_wait(struct buf *);
+
+#endif /* _KERNEL */
+
+#endif /* !_SYS_BUF_HH_ */
diff --git a/freebsd/sys/mqueue.h b/freebsd/sys/mqueue.h
new file mode 100644
index 00000000..80d40479
--- /dev/null
+++ b/freebsd/sys/mqueue.h
@@ -0,0 +1,45 @@
+/*-
+ * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_MQUEUE_HH_
+#define _SYS_MQUEUE_HH_
+
+struct mq_attr {
+ long mq_flags; /* Message queue flags. */
+ long mq_maxmsg; /* Maximum number of messages. */
+ long mq_msgsize; /* Maximum message size. */
+ long mq_curmsgs; /* Number of messages currently queued. */
+ long __reserved[4]; /* Ignored for input, zeroed for output */
+};
+
+#ifdef _KERNEL
+struct thread;
+struct file;
+extern void (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
+#endif
+#endif
diff --git a/freebsd/sys/proc.h b/freebsd/sys/proc.h
index 39ddd782..04022ee5 100644
--- a/freebsd/sys/proc.h
+++ b/freebsd/sys/proc.h
@@ -208,7 +208,9 @@ struct thread {
TAILQ_ENTRY(thread) td_slpq; /* (t) Sleep queue. */
TAILQ_ENTRY(thread) td_lockq; /* (t) Lock queue. */
struct cpuset *td_cpuset; /* (t) CPU affinity mask. */
+#endif /* __rtems__ */
struct seltd *td_sel; /* Select queue/channel. */
+#ifndef __rtems__
struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
struct turnstile *td_turnstile; /* (k) Associated turnstile. */
struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */
@@ -478,7 +480,9 @@ struct proc {
TAILQ_HEAD(, thread) p_threads; /* (c) all threads. */
struct mtx p_slock; /* process spin lock */
struct ucred *p_ucred; /* (c) Process owner's identity. */
+#endif /* __rtems__ */
struct filedesc *p_fd; /* (b) Open files. */
+#ifndef __rtems__
struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */
struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */
struct plimit *p_limit; /* (c) Process limits. */
@@ -572,6 +576,9 @@ struct proc {
struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */
struct cv p_pwait; /* (*) wait cv for exit/exec */
#else /* __rtems__ */
+ struct sigiolst p_sigiolst; /* (c) List of sigio sources. */
+ int p_flag; /* (c) P_* flags. */
+ struct proc *p_leader; /* (b) */
struct ucred *p_ucred; /* (c) Process owner's identity. */
struct mtx p_mtx; /* (n) Lock for this struct. */
rtems_id p_pid;
diff --git a/freebsd/sys/tty.h b/freebsd/sys/tty.h
new file mode 100644
index 00000000..a56d38fb
--- /dev/null
+++ b/freebsd/sys/tty.h
@@ -0,0 +1,217 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions of this software were developed under sponsorship from Snow
+ * B.V., the Netherlands.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTY_HH_
+#define _SYS_TTY_HH_
+
+#include <freebsd/sys/param.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/lock.h>
+#include <freebsd/sys/mutex.h>
+#include <freebsd/sys/condvar.h>
+#include <freebsd/sys/selinfo.h>
+#ifndef __rtems__
+#include <freebsd/sys/termios.h>
+#endif
+#include <freebsd/sys/ttycom.h>
+#include <freebsd/sys/ttyqueue.h>
+
+struct cdev;
+struct file;
+struct pgrp;
+struct session;
+struct ucred;
+
+struct ttydevsw;
+
+/*
+ * Per-TTY structure, containing buffers, etc.
+ *
+ * List of locks
+ * (t) locked by t_mtx
+ * (l) locked by tty_list_sx
+ * (c) const until freeing
+ */
+struct tty {
+ struct mtx *t_mtx; /* TTY lock. */
+ struct mtx t_mtxobj; /* Per-TTY lock (when not borrowing). */
+ TAILQ_ENTRY(tty) t_list; /* (l) TTY list entry. */
+ unsigned int t_flags; /* (t) Terminal option flags. */
+/* Keep flags in sync with db_show_tty and pstat(8). */
+#define TF_NOPREFIX 0x00001 /* Don't prepend "tty" to device name. */
+#define TF_INITLOCK 0x00002 /* Create init/lock state devices. */
+#define TF_CALLOUT 0x00004 /* Create "cua" devices. */
+#define TF_OPENED_IN 0x00008 /* "tty" node is in use. */
+#define TF_OPENED_OUT 0x00010 /* "cua" node is in use. */
+#define TF_OPENED_CONS 0x00020 /* Device in use as console. */
+#define TF_OPENED (TF_OPENED_IN|TF_OPENED_OUT|TF_OPENED_CONS)
+#define TF_GONE 0x00040 /* Device node is gone. */
+#define TF_OPENCLOSE 0x00080 /* Device is in open()/close(). */
+#define TF_ASYNC 0x00100 /* Asynchronous I/O enabled. */
+#define TF_LITERAL 0x00200 /* Accept the next character literally. */
+#define TF_HIWAT_IN 0x00400 /* We've reached the input watermark. */
+#define TF_HIWAT_OUT 0x00800 /* We've reached the output watermark. */
+#define TF_HIWAT (TF_HIWAT_IN|TF_HIWAT_OUT)
+#define TF_STOPPED 0x01000 /* Output flow control - stopped. */
+#define TF_EXCLUDE 0x02000 /* Exclusive access. */
+#define TF_BYPASS 0x04000 /* Optimized input path. */
+#define TF_ZOMBIE 0x08000 /* Modem disconnect received. */
+#define TF_HOOK 0x10000 /* TTY has hook attached. */
+#define TF_BUSY_IN 0x20000 /* Process busy in read() -- not supported. */
+#define TF_BUSY_OUT 0x40000 /* Process busy in write(). */
+#define TF_BUSY (TF_BUSY_IN|TF_BUSY_OUT)
+ unsigned int t_revokecnt; /* (t) revoke() count. */
+
+ /* Buffering mechanisms. */
+ struct ttyinq t_inq; /* (t) Input queue. */
+ size_t t_inlow; /* (t) Input low watermark. */
+ struct ttyoutq t_outq; /* (t) Output queue. */
+ size_t t_outlow; /* (t) Output low watermark. */
+
+ /* Sleeping mechanisms. */
+ struct cv t_inwait; /* (t) Input wait queue. */
+ struct cv t_outwait; /* (t) Output wait queue. */
+ struct cv t_outserwait; /* (t) Serial output wait queue. */
+ struct cv t_bgwait; /* (t) Background wait queue. */
+ struct cv t_dcdwait; /* (t) Carrier Detect wait queue. */
+
+ /* Polling mechanisms. */
+ struct selinfo t_inpoll; /* (t) Input poll queue. */
+ struct selinfo t_outpoll; /* (t) Output poll queue. */
+ struct sigio *t_sigio; /* (t) Asynchronous I/O. */
+
+ struct termios t_termios; /* (t) I/O processing flags. */
+ struct winsize t_winsize; /* (t) Window size. */
+ unsigned int t_column; /* (t) Current cursor position. */
+ unsigned int t_writepos; /* (t) Where input was interrupted. */
+ int t_compatflags; /* (t) COMPAT_43TTY flags. */
+
+ /* Init/lock-state devices. */
+ struct termios t_termios_init_in; /* tty%s.init. */
+ struct termios t_termios_lock_in; /* tty%s.lock. */
+ struct termios t_termios_init_out; /* cua%s.init. */
+ struct termios t_termios_lock_out; /* cua%s.lock. */
+
+ struct ttydevsw *t_devsw; /* (c) Driver hooks. */
+ struct ttyhook *t_hook; /* (t) Capture/inject hook. */
+
+ /* Process signal delivery. */
+ struct pgrp *t_pgrp; /* (t) Foreground process group. */
+ struct session *t_session; /* (t) Associated session. */
+ unsigned int t_sessioncnt; /* (t) Backpointing sessions. */
+
+ void *t_devswsoftc; /* (c) Soft config, for drivers. */
+ void *t_hooksoftc; /* (t) Soft config, for hooks. */
+ struct cdev *t_dev; /* (c) Primary character device. */
+};
+
+/*
+ * Userland version of struct tty, for sysctl kern.ttys
+ */
+struct xtty {
+ size_t xt_size; /* Structure size. */
+ size_t xt_insize; /* Input queue size. */
+ size_t xt_incc; /* Canonicalized characters. */
+ size_t xt_inlc; /* Input line charaters. */
+ size_t xt_inlow; /* Input low watermark. */
+ size_t xt_outsize; /* Output queue size. */
+ size_t xt_outcc; /* Output queue usage. */
+ size_t xt_outlow; /* Output low watermark. */
+ unsigned int xt_column; /* Current column position. */
+ pid_t xt_pgid; /* Foreground process group. */
+ pid_t xt_sid; /* Session. */
+ unsigned int xt_flags; /* Terminal option flags. */
+ dev_t xt_dev; /* Userland device. */
+};
+
+#ifdef _KERNEL
+
+/* Allocation and deallocation. */
+struct tty *tty_alloc(struct ttydevsw *tsw, void *softc);
+struct tty *tty_alloc_mutex(struct ttydevsw *tsw, void *softc, struct mtx *mtx);
+void tty_rel_pgrp(struct tty *tp, struct pgrp *pgrp);
+void tty_rel_sess(struct tty *tp, struct session *sess);
+void tty_rel_gone(struct tty *tp);
+
+#define tty_lock(tp) mtx_lock((tp)->t_mtx)
+#define tty_unlock(tp) mtx_unlock((tp)->t_mtx)
+#define tty_lock_assert(tp,ma) mtx_assert((tp)->t_mtx, (ma))
+#define tty_getlock(tp) ((tp)->t_mtx)
+
+/* Device node creation. */
+void tty_makedev(struct tty *tp, struct ucred *cred, const char *fmt, ...)
+ __printflike(3, 4);
+#define tty_makealias(tp,fmt,...) \
+ make_dev_alias((tp)->t_dev, fmt, ## __VA_ARGS__)
+
+/* Signalling processes. */
+void tty_signal_sessleader(struct tty *tp, int signal);
+void tty_signal_pgrp(struct tty *tp, int signal);
+/* Waking up readers/writers. */
+int tty_wait(struct tty *tp, struct cv *cv);
+int tty_timedwait(struct tty *tp, struct cv *cv, int timo);
+void tty_wakeup(struct tty *tp, int flags);
+
+/* System messages. */
+int tty_checkoutq(struct tty *tp);
+int tty_putchar(struct tty *tp, char c);
+
+int tty_ioctl(struct tty *tp, u_long cmd, void *data, int fflag,
+ struct thread *td);
+int tty_ioctl_compat(struct tty *tp, u_long cmd, caddr_t data,
+ int fflag, struct thread *td);
+void tty_init_console(struct tty *tp, speed_t speed);
+void tty_flush(struct tty *tp, int flags);
+void tty_hiwat_in_block(struct tty *tp);
+void tty_hiwat_in_unblock(struct tty *tp);
+dev_t tty_udev(struct tty *tp);
+#define tty_opened(tp) ((tp)->t_flags & TF_OPENED)
+#define tty_gone(tp) ((tp)->t_flags & TF_GONE)
+#define tty_softc(tp) ((tp)->t_devswsoftc)
+#define tty_devname(tp) devtoname((tp)->t_dev)
+
+/* Status line printing. */
+void tty_info(struct tty *tp);
+
+/* /dev/console selection. */
+void ttyconsdev_select(const char *name);
+
+/* Pseudo-terminal hooks. */
+int pts_alloc_external(int fd, struct thread *td, struct file *fp,
+ struct cdev *dev, const char *name);
+
+/* Drivers and line disciplines also need to call these. */
+#include <freebsd/sys/ttydisc.h>
+#include <freebsd/sys/ttydevsw.h>
+#include <freebsd/sys/ttyhook.h>
+#endif /* _KERNEL */
+
+#endif /* !_SYS_TTY_HH_ */
diff --git a/freebsd/sys/ttydevsw.h b/freebsd/sys/ttydevsw.h
new file mode 100644
index 00000000..a4035585
--- /dev/null
+++ b/freebsd/sys/ttydevsw.h
@@ -0,0 +1,169 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions of this software were developed under sponsorship from Snow
+ * B.V., the Netherlands.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTYDEVSW_HH_
+#define _SYS_TTYDEVSW_HH_
+
+#ifndef _SYS_TTY_HH_
+#error "can only be included through <sys/tty.h>"
+#endif /* !_SYS_TTY_HH_ */
+
+/*
+ * Driver routines that are called from the line discipline to adjust
+ * hardware parameters and such.
+ */
+typedef int tsw_open_t(struct tty *tp);
+typedef void tsw_close_t(struct tty *tp);
+typedef void tsw_outwakeup_t(struct tty *tp);
+typedef void tsw_inwakeup_t(struct tty *tp);
+typedef int tsw_ioctl_t(struct tty *tp, u_long cmd, caddr_t data,
+ struct thread *td);
+typedef int tsw_param_t(struct tty *tp, struct termios *t);
+typedef int tsw_modem_t(struct tty *tp, int sigon, int sigoff);
+typedef int tsw_mmap_t(struct tty *tp, vm_offset_t offset,
+ vm_paddr_t * paddr, int nprot);
+typedef void tsw_pktnotify_t(struct tty *tp, char event);
+typedef void tsw_free_t(void *softc);
+
+struct ttydevsw {
+ unsigned int tsw_flags; /* Default TTY flags. */
+
+ tsw_open_t *tsw_open; /* Device opening. */
+ tsw_close_t *tsw_close; /* Device closure. */
+
+ tsw_outwakeup_t *tsw_outwakeup; /* Output available. */
+ tsw_inwakeup_t *tsw_inwakeup; /* Input can be stored again. */
+
+ tsw_ioctl_t *tsw_ioctl; /* ioctl() hooks. */
+ tsw_param_t *tsw_param; /* TIOCSETA device parameter setting. */
+ tsw_modem_t *tsw_modem; /* Modem sigon/sigoff. */
+
+ tsw_mmap_t *tsw_mmap; /* mmap() hooks. */
+ tsw_pktnotify_t *tsw_pktnotify; /* TIOCPKT events. */
+
+ tsw_free_t *tsw_free; /* Destructor. */
+};
+
+static __inline int
+ttydevsw_open(struct tty *tp)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return tp->t_devsw->tsw_open(tp);
+}
+
+static __inline void
+ttydevsw_close(struct tty *tp)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ tp->t_devsw->tsw_close(tp);
+}
+
+static __inline void
+ttydevsw_outwakeup(struct tty *tp)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ /* Prevent spurious wakeups. */
+ if (ttydisc_getc_poll(tp) == 0)
+ return;
+
+ tp->t_devsw->tsw_outwakeup(tp);
+}
+
+static __inline void
+ttydevsw_inwakeup(struct tty *tp)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ /* Prevent spurious wakeups. */
+ if (tp->t_flags & TF_HIWAT_IN)
+ return;
+
+ tp->t_devsw->tsw_inwakeup(tp);
+}
+
+static __inline int
+ttydevsw_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return tp->t_devsw->tsw_ioctl(tp, cmd, data, td);
+}
+
+static __inline int
+ttydevsw_param(struct tty *tp, struct termios *t)
+{
+ MPASS(!tty_gone(tp));
+
+ return tp->t_devsw->tsw_param(tp, t);
+}
+
+static __inline int
+ttydevsw_modem(struct tty *tp, int sigon, int sigoff)
+{
+ MPASS(!tty_gone(tp));
+
+ return tp->t_devsw->tsw_modem(tp, sigon, sigoff);
+}
+
+static __inline int
+ttydevsw_mmap(struct tty *tp, vm_offset_t offset, vm_paddr_t *paddr, int nprot)
+{
+ MPASS(!tty_gone(tp));
+
+ return tp->t_devsw->tsw_mmap(tp, offset, paddr, nprot);
+}
+
+static __inline void
+ttydevsw_pktnotify(struct tty *tp, char event)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ tp->t_devsw->tsw_pktnotify(tp, event);
+}
+
+static __inline void
+ttydevsw_free(struct tty *tp)
+{
+ MPASS(tty_gone(tp));
+
+ tp->t_devsw->tsw_free(tty_softc(tp));
+}
+
+#endif /* !_SYS_TTYDEVSW_HH_ */
diff --git a/freebsd/sys/ttydisc.h b/freebsd/sys/ttydisc.h
new file mode 100644
index 00000000..00194988
--- /dev/null
+++ b/freebsd/sys/ttydisc.h
@@ -0,0 +1,86 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions of this software were developed under sponsorship from Snow
+ * B.V., the Netherlands.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTYDISC_HH_
+#define _SYS_TTYDISC_HH_
+
+#ifndef _SYS_TTY_HH_
+#error "can only be included through <sys/tty.h>"
+#endif /* !_SYS_TTY_HH_ */
+
+struct cv;
+struct thread;
+struct tty;
+struct uio;
+
+/* Top half routines. */
+void ttydisc_open(struct tty *tp);
+void ttydisc_close(struct tty *tp);
+int ttydisc_read(struct tty *tp, struct uio *uio, int ioflag);
+int ttydisc_write(struct tty *tp, struct uio *uio, int ioflag);
+void ttydisc_optimize(struct tty *tp);
+
+/* Bottom half routines. */
+void ttydisc_modem(struct tty *tp, int open);
+#define ttydisc_can_bypass(tp) ((tp)->t_flags & TF_BYPASS)
+int ttydisc_rint(struct tty *tp, char c, int flags);
+size_t ttydisc_rint_bypass(struct tty *tp, const void *buf, size_t len);
+void ttydisc_rint_done(struct tty *tp);
+size_t ttydisc_rint_poll(struct tty *tp);
+size_t ttydisc_getc(struct tty *tp, void *buf, size_t len);
+int ttydisc_getc_uio(struct tty *tp, struct uio *uio);
+size_t ttydisc_getc_poll(struct tty *tp);
+
+/* Error codes for ttydisc_rint(). */
+#define TRE_FRAMING 0x01
+#define TRE_PARITY 0x02
+#define TRE_OVERRUN 0x04
+#define TRE_BREAK 0x08
+
+static __inline size_t
+ttydisc_read_poll(struct tty *tp)
+{
+
+ tty_lock_assert(tp, MA_OWNED);
+
+ return ttyinq_bytescanonicalized(&tp->t_inq);
+}
+
+static __inline size_t
+ttydisc_write_poll(struct tty *tp)
+{
+
+ tty_lock_assert(tp, MA_OWNED);
+
+ return ttyoutq_bytesleft(&tp->t_outq);
+}
+
+#endif /* !_SYS_TTYDISC_HH_ */
diff --git a/freebsd/sys/ttyhook.h b/freebsd/sys/ttyhook.h
new file mode 100644
index 00000000..a15fbbb7
--- /dev/null
+++ b/freebsd/sys/ttyhook.h
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTYHOOK_HH_
+#define _SYS_TTYHOOK_HH_
+
+#ifndef _SYS_TTY_HH_
+#error "can only be included through <sys/tty.h>"
+#endif /* !_SYS_TTY_HH_ */
+
+struct tty;
+
+/*
+ * Hooks interface, which allows to capture and inject traffic into the
+ * input and output paths of a TTY.
+ */
+
+typedef int th_rint_t(struct tty *tp, char c, int flags);
+typedef size_t th_rint_bypass_t(struct tty *tp, const void *buf, size_t len);
+typedef void th_rint_done_t(struct tty *tp);
+typedef size_t th_rint_poll_t(struct tty *tp);
+
+typedef size_t th_getc_inject_t(struct tty *tp, void *buf, size_t len);
+typedef void th_getc_capture_t(struct tty *tp, const void *buf, size_t len);
+typedef size_t th_getc_poll_t(struct tty *tp);
+
+typedef void th_close_t(struct tty *tp);
+
+struct ttyhook {
+ /* Character input. */
+ th_rint_t *th_rint;
+ th_rint_bypass_t *th_rint_bypass;
+ th_rint_done_t *th_rint_done;
+ th_rint_poll_t *th_rint_poll;
+
+ /* Character output. */
+ th_getc_inject_t *th_getc_inject;
+ th_getc_capture_t *th_getc_capture;
+ th_getc_poll_t *th_getc_poll;
+
+ th_close_t *th_close;
+};
+
+int ttyhook_register(struct tty **, struct proc *, int,
+ struct ttyhook *, void *);
+void ttyhook_unregister(struct tty *);
+#define ttyhook_softc(tp) ((tp)->t_hooksoftc)
+#define ttyhook_hashook(tp,hook) ((tp)->t_hook != NULL && \
+ (tp)->t_hook->th_ ## hook != NULL)
+
+static __inline int
+ttyhook_rint(struct tty *tp, char c, int flags)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return tp->t_hook->th_rint(tp, c, flags);
+}
+
+static __inline size_t
+ttyhook_rint_bypass(struct tty *tp, const void *buf, size_t len)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return tp->t_hook->th_rint_bypass(tp, buf, len);
+}
+
+static __inline void
+ttyhook_rint_done(struct tty *tp)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ tp->t_hook->th_rint_done(tp);
+}
+
+static __inline size_t
+ttyhook_rint_poll(struct tty *tp)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return tp->t_hook->th_rint_poll(tp);
+}
+
+static __inline size_t
+ttyhook_getc_inject(struct tty *tp, void *buf, size_t len)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return tp->t_hook->th_getc_inject(tp, buf, len);
+}
+
+static __inline void
+ttyhook_getc_capture(struct tty *tp, const void *buf, size_t len)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ tp->t_hook->th_getc_capture(tp, buf, len);
+}
+
+static __inline size_t
+ttyhook_getc_poll(struct tty *tp)
+{
+ tty_lock_assert(tp, MA_OWNED);
+ MPASS(!tty_gone(tp));
+
+ return tp->t_hook->th_getc_poll(tp);
+}
+
+static __inline void
+ttyhook_close(struct tty *tp)
+{
+ tty_lock_assert(tp, MA_OWNED);
+
+ tp->t_hook->th_close(tp);
+}
+
+#endif /* !_SYS_TTYHOOK_HH_ */
diff --git a/freebsd/sys/ttyqueue.h b/freebsd/sys/ttyqueue.h
new file mode 100644
index 00000000..b9228bdc
--- /dev/null
+++ b/freebsd/sys/ttyqueue.h
@@ -0,0 +1,178 @@
+/*-
+ * Copyright (c) 2008 Ed Schouten <ed@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Portions of this software were developed under sponsorship from Snow
+ * B.V., the Netherlands.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_TTYQUEUE_HH_
+#define _SYS_TTYQUEUE_HH_
+
+#ifndef _SYS_TTY_HH_
+#error "can only be included through <sys/tty.h>"
+#endif /* !_SYS_TTY_HH_ */
+
+struct tty;
+struct ttyinq_block;
+struct ttyoutq_block;
+struct uio;
+
+/* Data input queue. */
+struct ttyinq {
+ struct ttyinq_block *ti_firstblock;
+ struct ttyinq_block *ti_startblock;
+ struct ttyinq_block *ti_reprintblock;
+ struct ttyinq_block *ti_lastblock;
+ unsigned int ti_begin;
+ unsigned int ti_linestart;
+ unsigned int ti_reprint;
+ unsigned int ti_end;
+ unsigned int ti_nblocks;
+ unsigned int ti_quota;
+};
+#define TTYINQ_DATASIZE 128
+
+/* Data output queue. */
+struct ttyoutq {
+ struct ttyoutq_block *to_firstblock;
+ struct ttyoutq_block *to_lastblock;
+ unsigned int to_begin;
+ unsigned int to_end;
+ unsigned int to_nblocks;
+ unsigned int to_quota;
+};
+#define TTYOUTQ_DATASIZE (256 - sizeof(struct ttyoutq_block *))
+
+#ifdef _KERNEL
+/* Input queue handling routines. */
+void ttyinq_setsize(struct ttyinq *ti, struct tty *tp, size_t len);
+void ttyinq_free(struct ttyinq *ti);
+int ttyinq_read_uio(struct ttyinq *ti, struct tty *tp, struct uio *uio,
+ size_t readlen, size_t flushlen);
+size_t ttyinq_write(struct ttyinq *ti, const void *buf, size_t len,
+ int quote);
+int ttyinq_write_nofrag(struct ttyinq *ti, const void *buf, size_t len,
+ int quote);
+void ttyinq_canonicalize(struct ttyinq *ti);
+size_t ttyinq_findchar(struct ttyinq *ti, const char *breakc, size_t maxlen,
+ char *lastc);
+void ttyinq_flush(struct ttyinq *ti);
+int ttyinq_peekchar(struct ttyinq *ti, char *c, int *quote);
+void ttyinq_unputchar(struct ttyinq *ti);
+void ttyinq_reprintpos_set(struct ttyinq *ti);
+void ttyinq_reprintpos_reset(struct ttyinq *ti);
+
+static __inline size_t
+ttyinq_getsize(struct ttyinq *ti)
+{
+ return (ti->ti_nblocks * TTYINQ_DATASIZE);
+}
+
+static __inline size_t
+ttyinq_getallocatedsize(struct ttyinq *ti)
+{
+
+ return (ti->ti_quota * TTYINQ_DATASIZE);
+}
+
+static __inline size_t
+ttyinq_bytesleft(struct ttyinq *ti)
+{
+ size_t len;
+
+ /* Make sure the usage never exceeds the length. */
+ len = ti->ti_nblocks * TTYINQ_DATASIZE;
+ MPASS(len >= ti->ti_end);
+
+ return (len - ti->ti_end);
+}
+
+static __inline size_t
+ttyinq_bytescanonicalized(struct ttyinq *ti)
+{
+ MPASS(ti->ti_begin <= ti->ti_linestart);
+
+ return (ti->ti_linestart - ti->ti_begin);
+}
+
+static __inline size_t
+ttyinq_bytesline(struct ttyinq *ti)
+{
+ MPASS(ti->ti_linestart <= ti->ti_end);
+
+ return (ti->ti_end - ti->ti_linestart);
+}
+
+/* Input buffer iteration. */
+typedef void ttyinq_line_iterator_t(void *data, char c, int flags);
+void ttyinq_line_iterate_from_linestart(struct ttyinq *ti,
+ ttyinq_line_iterator_t *iterator, void *data);
+void ttyinq_line_iterate_from_reprintpos(struct ttyinq *ti,
+ ttyinq_line_iterator_t *iterator, void *data);
+
+/* Output queue handling routines. */
+void ttyoutq_flush(struct ttyoutq *to);
+void ttyoutq_setsize(struct ttyoutq *to, struct tty *tp, size_t len);
+void ttyoutq_free(struct ttyoutq *to);
+size_t ttyoutq_read(struct ttyoutq *to, void *buf, size_t len);
+int ttyoutq_read_uio(struct ttyoutq *to, struct tty *tp, struct uio *uio);
+size_t ttyoutq_write(struct ttyoutq *to, const void *buf, size_t len);
+int ttyoutq_write_nofrag(struct ttyoutq *to, const void *buf, size_t len);
+
+static __inline size_t
+ttyoutq_getsize(struct ttyoutq *to)
+{
+ return (to->to_nblocks * TTYOUTQ_DATASIZE);
+}
+
+static __inline size_t
+ttyoutq_getallocatedsize(struct ttyoutq *to)
+{
+
+ return (to->to_quota * TTYOUTQ_DATASIZE);
+}
+
+static __inline size_t
+ttyoutq_bytesleft(struct ttyoutq *to)
+{
+ size_t len;
+
+ /* Make sure the usage never exceeds the length. */
+ len = to->to_nblocks * TTYOUTQ_DATASIZE;
+ MPASS(len >= to->to_end);
+
+ return (len - to->to_end);
+}
+
+static __inline size_t
+ttyoutq_bytesused(struct ttyoutq *to)
+{
+ return (to->to_end - to->to_begin);
+}
+#endif /* _KERNEL */
+
+#endif /* !_SYS_TTYQUEUE_HH_ */
diff --git a/freebsd/sys/user.h b/freebsd/sys/user.h
new file mode 100644
index 00000000..df788c06
--- /dev/null
+++ b/freebsd/sys/user.h
@@ -0,0 +1,414 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California.
+ * Copyright (c) 2007 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)user.h 8.2 (Berkeley) 9/23/93
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_USER_HH_
+#define _SYS_USER_HH_
+
+#include <freebsd/machine/pcb.h>
+#ifndef _KERNEL
+/* stuff that *used* to be included by user.h, or is now needed */
+#include <freebsd/sys/errno.h>
+#include <freebsd/sys/time.h>
+#include <freebsd/sys/resource.h>
+#include <freebsd/sys/ucred.h>
+#include <freebsd/sys/uio.h>
+#include <freebsd/sys/queue.h>
+#include <freebsd/sys/_lock.h>
+#include <freebsd/sys/_mutex.h>
+#include <freebsd/sys/proc.h>
+#include <freebsd/vm/vm.h> /* XXX */
+#include <freebsd/vm/vm_param.h> /* XXX */
+#include <freebsd/vm/pmap.h> /* XXX */
+#include <freebsd/vm/vm_map.h> /* XXX */
+#endif /* !_KERNEL */
+#ifndef _SYS_RESOURCEVAR_HH_
+#include <freebsd/sys/resourcevar.h>
+#endif
+#ifndef _SYS_SIGNALVAR_HH_
+#include <freebsd/sys/signalvar.h>
+#endif
+#ifndef _SYS_SOCKET_VAR_HH_
+#include <freebsd/sys/socket.h>
+#endif
+
+/*
+ * KERN_PROC subtype ops return arrays of selected proc structure entries:
+ *
+ * This struct includes several arrays of spare space, with different arrays
+ * for different standard C-types. When adding new variables to this struct,
+ * the space for byte-aligned data should be taken from the ki_sparestring,
+ * pointers from ki_spareptrs, word-aligned data from ki_spareints, and
+ * doubleword-aligned data from ki_sparelongs. Make sure the space for new
+ * variables come from the array which matches the size and alignment of
+ * those variables on ALL hardware platforms, and then adjust the appropriate
+ * KI_NSPARE_* value(s) to match.
+ *
+ * Always verify that sizeof(struct kinfo_proc) == KINFO_PROC_SIZE on all
+ * platforms after you have added new variables. Note that if you change
+ * the value of KINFO_PROC_SIZE, then many userland programs will stop
+ * working until they are recompiled!
+ *
+ * Once you have added the new field, you will need to add code to initialize
+ * it in two places: function fill_kinfo_proc in sys/kern/kern_proc.c and
+ * function kvm_proclist in lib/libkvm/kvm_proc.c .
+ */
+#define KI_NSPARE_INT 9
+#define KI_NSPARE_LONG 12
+#define KI_NSPARE_PTR 6
+
+#ifndef _KERNEL
+#ifndef KINFO_PROC_SIZE
+#error "Unknown architecture"
+#endif
+#endif /* !_KERNEL */
+
+#define WMESGLEN 8 /* size of returned wchan message */
+#define LOCKNAMELEN 8 /* size of returned lock name */
+#define OCOMMLEN 16 /* size of returned thread name */
+#define COMMLEN 19 /* size of returned ki_comm name */
+#define KI_EMULNAMELEN 16 /* size of returned ki_emul */
+#define KI_NGROUPS 16 /* number of groups in ki_groups */
+#define LOGNAMELEN 17 /* size of returned ki_login */
+
+/*
+ * Steal a bit from ki_cr_flags (cr_flags is never used) to indicate
+ * that the cred had more than KI_NGROUPS groups.
+ */
+#define KI_CRF_GRP_OVERFLOW 0x80000000
+
+struct kinfo_proc {
+ int ki_structsize; /* size of this structure */
+ int ki_layout; /* reserved: layout identifier */
+ struct pargs *ki_args; /* address of command arguments */
+ struct proc *ki_paddr; /* address of proc */
+ struct user *ki_addr; /* kernel virtual addr of u-area */
+ struct vnode *ki_tracep; /* pointer to trace file */
+ struct vnode *ki_textvp; /* pointer to executable file */
+ struct filedesc *ki_fd; /* pointer to open file info */
+ struct vmspace *ki_vmspace; /* pointer to kernel vmspace struct */
+ void *ki_wchan; /* sleep address */
+ pid_t ki_pid; /* Process identifier */
+ pid_t ki_ppid; /* parent process id */
+ pid_t ki_pgid; /* process group id */
+ pid_t ki_tpgid; /* tty process group id */
+ pid_t ki_sid; /* Process session ID */
+ pid_t ki_tsid; /* Terminal session ID */
+ short ki_jobc; /* job control counter */
+ short ki_spare_short1; /* unused (just here for alignment) */
+ dev_t ki_tdev; /* controlling tty dev */
+ sigset_t ki_siglist; /* Signals arrived but not delivered */
+ sigset_t ki_sigmask; /* Current signal mask */
+ sigset_t ki_sigignore; /* Signals being ignored */
+ sigset_t ki_sigcatch; /* Signals being caught by user */
+ uid_t ki_uid; /* effective user id */
+ uid_t ki_ruid; /* Real user id */
+ uid_t ki_svuid; /* Saved effective user id */
+ gid_t ki_rgid; /* Real group id */
+ gid_t ki_svgid; /* Saved effective group id */
+ short ki_ngroups; /* number of groups */
+ short ki_spare_short2; /* unused (just here for alignment) */
+ gid_t ki_groups[KI_NGROUPS]; /* groups */
+ vm_size_t ki_size; /* virtual size */
+ segsz_t ki_rssize; /* current resident set size in pages */
+ segsz_t ki_swrss; /* resident set size before last swap */
+ segsz_t ki_tsize; /* text size (pages) XXX */
+ segsz_t ki_dsize; /* data size (pages) XXX */
+ segsz_t ki_ssize; /* stack size (pages) */
+ u_short ki_xstat; /* Exit status for wait & stop signal */
+ u_short ki_acflag; /* Accounting flags */
+ fixpt_t ki_pctcpu; /* %cpu for process during ki_swtime */
+ u_int ki_estcpu; /* Time averaged value of ki_cpticks */
+ u_int ki_slptime; /* Time since last blocked */
+ u_int ki_swtime; /* Time swapped in or out */
+ int ki_spareint1; /* unused (just here for alignment) */
+ u_int64_t ki_runtime; /* Real time in microsec */
+ struct timeval ki_start; /* starting time */
+ struct timeval ki_childtime; /* time used by process children */
+ long ki_flag; /* P_* flags */
+ long ki_kiflag; /* KI_* flags (below) */
+ int ki_traceflag; /* Kernel trace points */
+ char ki_stat; /* S* process status */
+ signed char ki_nice; /* Process "nice" value */
+ char ki_lock; /* Process lock (prevent swap) count */
+ char ki_rqindex; /* Run queue index */
+ u_char ki_oncpu; /* Which cpu we are on */
+ u_char ki_lastcpu; /* Last cpu we were on */
+ char ki_ocomm[OCOMMLEN+1]; /* thread name */
+ char ki_wmesg[WMESGLEN+1]; /* wchan message */
+ char ki_login[LOGNAMELEN+1]; /* setlogin name */
+ char ki_lockname[LOCKNAMELEN+1]; /* lock name */
+ char ki_comm[COMMLEN+1]; /* command name */
+ char ki_emul[KI_EMULNAMELEN+1]; /* emulation name */
+ /*
+ * When adding new variables, take space for char-strings from the
+ * front of ki_sparestrings, and ints from the end of ki_spareints.
+ * That way the spare room from both arrays will remain contiguous.
+ */
+ char ki_sparestrings[68]; /* spare string space */
+ int ki_spareints[KI_NSPARE_INT]; /* spare room for growth */
+ u_int ki_cr_flags; /* Credential flags */
+ int ki_jid; /* Process jail ID */
+ int ki_numthreads; /* XXXKSE number of threads in total */
+ lwpid_t ki_tid; /* XXXKSE thread id */
+ struct priority ki_pri; /* process priority */
+ struct rusage ki_rusage; /* process rusage statistics */
+ /* XXX - most fields in ki_rusage_ch are not (yet) filled in */
+ struct rusage ki_rusage_ch; /* rusage of children processes */
+ struct pcb *ki_pcb; /* kernel virtual addr of pcb */
+ void *ki_kstack; /* kernel virtual addr of stack */
+ void *ki_udata; /* User convenience pointer */
+ struct thread *ki_tdaddr; /* address of thread */
+ /*
+ * When adding new variables, take space for pointers from the
+ * front of ki_spareptrs, and longs from the end of ki_sparelongs.
+ * That way the spare room from both arrays will remain contiguous.
+ */
+ void *ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */
+ long ki_sparelongs[KI_NSPARE_LONG]; /* spare room for growth */
+ long ki_sflag; /* PS_* flags */
+ long ki_tdflags; /* XXXKSE kthread flag */
+};
+void fill_kinfo_proc(struct proc *, struct kinfo_proc *);
+/* XXX - the following two defines are temporary */
+#define ki_childstime ki_rusage_ch.ru_stime
+#define ki_childutime ki_rusage_ch.ru_utime
+
+/*
+ * Legacy PS_ flag. This moved to p_flag but is maintained for
+ * compatibility.
+ */
+#define PS_INMEM 0x00001 /* Loaded into memory. */
+
+/* ki_sessflag values */
+#define KI_CTTY 0x00000001 /* controlling tty vnode active */
+#define KI_SLEADER 0x00000002 /* session leader */
+#define KI_LOCKBLOCK 0x00000004 /* proc blocked on lock ki_lockname */
+
+/*
+ * This used to be the per-process structure containing data that
+ * isn't needed in core when the process is swapped out, but now it
+ * remains only for the benefit of a.out core dumps.
+ */
+struct user {
+ struct pstats u_stats; /* *p_stats */
+ struct kinfo_proc u_kproc; /* eproc */
+};
+
+/*
+ * The KERN_PROC_FILE sysctl allows a process to dump the file descriptor
+ * array of another process.
+ */
+#define KF_TYPE_NONE 0
+#define KF_TYPE_VNODE 1
+#define KF_TYPE_SOCKET 2
+#define KF_TYPE_PIPE 3
+#define KF_TYPE_FIFO 4
+#define KF_TYPE_KQUEUE 5
+#define KF_TYPE_CRYPTO 6
+#define KF_TYPE_MQUEUE 7
+#define KF_TYPE_SHM 8
+#define KF_TYPE_SEM 9
+#define KF_TYPE_PTS 10
+#define KF_TYPE_UNKNOWN 255
+
+#define KF_VTYPE_VNON 0
+#define KF_VTYPE_VREG 1
+#define KF_VTYPE_VDIR 2
+#define KF_VTYPE_VBLK 3
+#define KF_VTYPE_VCHR 4
+#define KF_VTYPE_VLNK 5
+#define KF_VTYPE_VSOCK 6
+#define KF_VTYPE_VFIFO 7
+#define KF_VTYPE_VBAD 8
+#define KF_VTYPE_UNKNOWN 255
+
+#define KF_FD_TYPE_CWD -1 /* Current working directory */
+#define KF_FD_TYPE_ROOT -2 /* Root directory */
+#define KF_FD_TYPE_JAIL -3 /* Jail directory */
+
+#define KF_FLAG_READ 0x00000001
+#define KF_FLAG_WRITE 0x00000002
+#define KF_FLAG_APPEND 0x00000004
+#define KF_FLAG_ASYNC 0x00000008
+#define KF_FLAG_FSYNC 0x00000010
+#define KF_FLAG_NONBLOCK 0x00000020
+#define KF_FLAG_DIRECT 0x00000040
+#define KF_FLAG_HASLOCK 0x00000080
+
+/*
+ * Old format. Has variable hidden padding due to alignment.
+ * This is a compatability hack for pre-build 7.1 packages.
+ */
+#if defined(__amd64__)
+#define KINFO_OFILE_SIZE 1328
+#endif
+#if defined(__i386__)
+#define KINFO_OFILE_SIZE 1324
+#endif
+
+struct kinfo_ofile {
+ int kf_structsize; /* Size of kinfo_file. */
+ int kf_type; /* Descriptor type. */
+ int kf_fd; /* Array index. */
+ int kf_ref_count; /* Reference count. */
+ int kf_flags; /* Flags. */
+ /* XXX Hidden alignment padding here on amd64 */
+ off_t kf_offset; /* Seek location. */
+ int kf_vnode_type; /* Vnode type. */
+ int kf_sock_domain; /* Socket domain. */
+ int kf_sock_type; /* Socket type. */
+ int kf_sock_protocol; /* Socket protocol. */
+ char kf_path[PATH_MAX]; /* Path to file, if any. */
+ struct sockaddr_storage kf_sa_local; /* Socket address. */
+ struct sockaddr_storage kf_sa_peer; /* Peer address. */
+};
+
+#if defined(__amd64__) || defined(__i386__)
+#define KINFO_FILE_SIZE 1392
+#endif
+
+struct kinfo_file {
+ int kf_structsize; /* Variable size of record. */
+ int kf_type; /* Descriptor type. */
+ int kf_fd; /* Array index. */
+ int kf_ref_count; /* Reference count. */
+ int kf_flags; /* Flags. */
+ int _kf_pad0; /* Round to 64 bit alignment */
+ int64_t kf_offset; /* Seek location. */
+ int kf_vnode_type; /* Vnode type. */
+ int kf_sock_domain; /* Socket domain. */
+ int kf_sock_type; /* Socket type. */
+ int kf_sock_protocol; /* Socket protocol. */
+ struct sockaddr_storage kf_sa_local; /* Socket address. */
+ struct sockaddr_storage kf_sa_peer; /* Peer address. */
+ int _kf_ispare[16]; /* Space for more stuff. */
+ /* Truncated before copyout in sysctl */
+ char kf_path[PATH_MAX]; /* Path to file, if any. */
+};
+
+/*
+ * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of
+ * another process as a series of entries.
+ */
+#define KVME_TYPE_NONE 0
+#define KVME_TYPE_DEFAULT 1
+#define KVME_TYPE_VNODE 2
+#define KVME_TYPE_SWAP 3
+#define KVME_TYPE_DEVICE 4
+#define KVME_TYPE_PHYS 5
+#define KVME_TYPE_DEAD 6
+#define KVME_TYPE_SG 7
+#define KVME_TYPE_UNKNOWN 255
+
+#define KVME_PROT_READ 0x00000001
+#define KVME_PROT_WRITE 0x00000002
+#define KVME_PROT_EXEC 0x00000004
+
+#define KVME_FLAG_COW 0x00000001
+#define KVME_FLAG_NEEDS_COPY 0x00000002
+#define KVME_FLAG_NOCOREDUMP 0x00000004
+
+#if defined(__amd64__)
+#define KINFO_OVMENTRY_SIZE 1168
+#endif
+#if defined(__i386__)
+#define KINFO_OVMENTRY_SIZE 1128
+#endif
+
+struct kinfo_ovmentry {
+ int kve_structsize; /* Size of kinfo_vmmapentry. */
+ int kve_type; /* Type of map entry. */
+ void *kve_start; /* Starting address. */
+ void *kve_end; /* Finishing address. */
+ int kve_flags; /* Flags on map entry. */
+ int kve_resident; /* Number of resident pages. */
+ int kve_private_resident; /* Number of private pages. */
+ int kve_protection; /* Protection bitmask. */
+ int kve_ref_count; /* VM obj ref count. */
+ int kve_shadow_count; /* VM obj shadow count. */
+ char kve_path[PATH_MAX]; /* Path to VM obj, if any. */
+ void *_kve_pspare[8]; /* Space for more stuff. */
+ off_t kve_offset; /* Mapping offset in object */
+ uint64_t kve_fileid; /* inode number if vnode */
+ dev_t kve_fsid; /* dev_t of vnode location */
+ int _kve_ispare[3]; /* Space for more stuff. */
+};
+
+#if defined(__amd64__) || defined(__i386__)
+#define KINFO_VMENTRY_SIZE 1160
+#endif
+
+struct kinfo_vmentry {
+ int kve_structsize; /* Variable size of record. */
+ int kve_type; /* Type of map entry. */
+ uint64_t kve_start; /* Starting address. */
+ uint64_t kve_end; /* Finishing address. */
+ uint64_t kve_offset; /* Mapping offset in object */
+ uint64_t kve_fileid; /* inode number if vnode */
+ uint32_t kve_fsid; /* dev_t of vnode location */
+ int kve_flags; /* Flags on map entry. */
+ int kve_resident; /* Number of resident pages. */
+ int kve_private_resident; /* Number of private pages. */
+ int kve_protection; /* Protection bitmask. */
+ int kve_ref_count; /* VM obj ref count. */
+ int kve_shadow_count; /* VM obj shadow count. */
+ int _kve_pad0; /* 64bit align next field */
+ int _kve_ispare[16]; /* Space for more stuff. */
+ /* Truncated before copyout in sysctl */
+ char kve_path[PATH_MAX]; /* Path to VM obj, if any. */
+};
+
+/*
+ * The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of
+ * another process as a series of entries. Each stack is represented by a
+ * series of symbol names and offsets as generated by stack_sbuf_print(9).
+ */
+#define KKST_MAXLEN 1024
+
+#define KKST_STATE_STACKOK 0 /* Stack is valid. */
+#define KKST_STATE_SWAPPED 1 /* Stack swapped out. */
+#define KKST_STATE_RUNNING 2 /* Stack ephemeral. */
+
+#if defined(__amd64__) || defined(__i386__)
+#define KINFO_KSTACK_SIZE 1096
+#endif
+
+struct kinfo_kstack {
+ lwpid_t kkst_tid; /* ID of thread. */
+ int kkst_state; /* Validity of stack. */
+ char kkst_trace[KKST_MAXLEN]; /* String representing stack. */
+ int _kkst_ispare[16]; /* Space for more stuff. */
+};
+
+#endif
diff --git a/rtemsbsd/src/rtems-bsd-condvar.c b/rtemsbsd/src/rtems-bsd-condvar.c
index 7c16940d..63f30007 100644
--- a/rtemsbsd/src/rtems-bsd-condvar.c
+++ b/rtemsbsd/src/rtems-bsd-condvar.c
@@ -183,3 +183,20 @@ cv_broadcastpri(struct cv *cv, int pri)
rv = pthread_cond_broadcast(&cv->cv_id);
BSD_ASSERT_RV(rv);
}
+int
+_cv_wait_sig(struct cv *cvp, struct lock_object *lock)
+{
+ /* XXX */
+ _cv_wait_support(cvp, lock, 0, true);
+}
+
+int
+_cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
+{
+ /* XXX */
+ if (timo <= 0) {
+ timo = 1;
+ }
+
+ return _cv_wait_support(cvp, lock, timo, true);
+}
diff --git a/rtemsbsd/src/rtems-bsd-thread.c b/rtemsbsd/src/rtems-bsd-thread.c
index fc414114..ca1ff1f2 100644
--- a/rtemsbsd/src/rtems-bsd-thread.c
+++ b/rtemsbsd/src/rtems-bsd-thread.c
@@ -49,6 +49,7 @@
#include <freebsd/sys/mutex.h>
#include <freebsd/sys/jail.h>
#include <freebsd/sys/resourcevar.h>
+#include <freebsd/sys/filedesc.h>
RTEMS_CHAIN_DEFINE_EMPTY(rtems_bsd_thread_chain);
@@ -56,6 +57,9 @@ RTEMS_CHAIN_DEFINE_EMPTY(rtems_bsd_thread_chain);
static struct ucred FIXME_ucred = {
.cr_ref = 1 /* reference count */
};
+static struct filedesc FIXME_fd = {
+ .fd_ofiles = NULL /* file structures for open files */
+};
static struct proc FIXME_proc = {
.p_ucred = NULL /* (c) Process owner's identity. */
};
@@ -67,29 +71,34 @@ static struct prison FIXME_prison = {
static struct uidinfo FIXME_uidinfo; /* per euid resource consumption */
static struct uidinfo FIXME_ruidinfo; /* per ruid resource consumption */
+static struct thread *rtems_bsd_current_td = NULL;
+
+static void rtems_bsd_thread_descriptor_dtor(void *td)
+{
+ // XXX are there other pieces to clean up?
+ free(td, M_TEMP);
+}
+
static struct thread *
-rtems_bsd_thread_init_note( rtems_id id )
+rtems_bsd_thread_init( rtems_id id )
{
rtems_status_code sc = RTEMS_SUCCESSFUL;
unsigned index = 0;
char name [5] = "_???";
- struct thread *td = malloc(sizeof(struct thread), M_TEMP, M_WAITOK | M_ZERO);
- struct proc *proc;
-
- if ( td == NULL )
- return td;
+ struct thread *td;
+ struct proc *proc;
- sc = rtems_task_set_note( id, RTEMS_NOTEPAD_0, ( uint32_t )td );
- if (sc != RTEMS_SUCCESSFUL) {
- free(td, M_TEMP);
+ td = malloc(sizeof(struct thread), M_TEMP, M_WAITOK | M_ZERO);
+ if (td == NULL)
return NULL;
- }
+ // Initialize the thread descriptor
index = rtems_object_id_get_index(id);
snprintf(name + 1, sizeof(name) - 1, "%03u", index);
sc = rtems_object_set_name(id, name);
if (sc != RTEMS_SUCCESSFUL) {
- rtems_task_delete(id);
+ // XXX does the thread get deleted? Seems wrong
+ // rtems_task_delete(id);
free(td, M_TEMP);
return NULL;
}
@@ -98,55 +107,62 @@ rtems_bsd_thread_init_note( rtems_id id )
td->td_ucred = crhold(&FIXME_ucred);
td->td_proc = &FIXME_proc;
- if (td->td_proc->p_ucred != NULL)
- return td;
-
- if (prison_init ) {
- mtx_init(&FIXME_prison.pr_mtx, "prison lock", NULL, MTX_DEF | MTX_DUPOK);
-
- prison_init = 0;
- }
+ if (td->td_proc->p_ucred == NULL) {
+ if ( prison_init ) {
+ mtx_init(&FIXME_prison.pr_mtx, "prison lock", NULL, MTX_DEF | MTX_DUPOK);
+ prison_init = 0;
+ }
+ FIXME_ucred.cr_prison = &FIXME_prison; /* jail(2) */
+ FIXME_ucred.cr_uidinfo = uifind(0);
+ FIXME_ucred.cr_ruidinfo = uifind(0);
+ FIXME_ucred.cr_ngroups = 1; /* group 0 */
+
+ td->td_proc->p_ucred = crhold(&FIXME_ucred);
+ mtx_init(&td->td_proc->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
+ td->td_proc->p_pid = getpid();
+ td->td_proc->p_fibnum = 0;
+ td->td_proc->p_fd = &FIXME_fd;
+ sx_init_flags(&FIXME_fd.fd_sx, "config SX thread lock", SX_DUPOK);
+ }
- FIXME_ucred.cr_prison = &FIXME_prison; /* jail(2) */
- FIXME_ucred.cr_uidinfo = uifind(0);
- FIXME_ucred.cr_ruidinfo = uifind(0);
- FIXME_ucred.cr_ngroups = 1; /* group 0 */
+ // Actually set the global pointer
+ rtems_bsd_current_td = td;
- td->td_proc->p_ucred = crhold(&FIXME_ucred);
- mtx_init(&td->td_proc->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
- td->td_proc->p_pid = getpid();
- td->td_proc->p_fibnum = 0;
+ // Now add the task descriptor as a per-task variable
+ sc = rtems_task_variable_add(
+ id,
+ &rtems_bsd_current_td,
+ rtems_bsd_thread_descriptor_dtor
+ );
+ if (sc != RTEMS_SUCCESSFUL) {
+ free(td, M_TEMP);
+ return NULL;
+ }
- return td;
+ return td;
}
/*
- * XXX Threads which delete themselves will leak this
- * XXX Maybe better integrated into the TCB OR a task variable.
- * XXX but this is OK for now
+ * Threads which delete themselves would leak the task
+ * descriptor so we are using the per-task variable so
+ * it can be cleaned up.
*/
struct thread *rtems_get_curthread(void)
{
struct thread *td;
- rtems_status_code sc;
- rtems_id id;
/*
* If we already have a struct thread associated with this thread,
- * obtain it
+ * obtain it. Otherwise, allocate and initialize one.
*/
- id = rtems_task_self();
-
- sc = rtems_task_get_note( id, RTEMS_NOTEPAD_0, (uint32_t *) &td );
- if (sc != RTEMS_SUCCESSFUL) {
- panic("rtems_get_curthread: get note Error\n");
+ td = rtems_bsd_current_td;
+ if ( td == NULL ) {
+ td = rtems_bsd_thread_init( rtems_task_self() );
+ if ( td == NULL ){
+ panic("rtems_get_curthread: Unable to thread descriptor\n");
+ }
}
- td = rtems_bsd_thread_init_note( id);
- if ( td == NULL ){
- panic("rtems_get_curthread: Unable to generate thread note\n");
- }
-
return td;
}
@@ -163,6 +179,8 @@ rtems_bsd_thread_start(struct thread **td_ptr, void (*func)(void *), void *arg,
BSD_ASSERT(pages >= 0);
+ memset( td, 0, sizeof(struct thread) );
+
sc = rtems_task_create(
rtems_build_name('_', 'T', 'S', 'K'),
BSD_TASK_PRIORITY_NORMAL,
@@ -177,8 +195,8 @@ rtems_bsd_thread_start(struct thread **td_ptr, void (*func)(void *), void *arg,
return ENOMEM;
}
- td = rtems_bsd_thread_init_note( id );
- if (!td)
+ td = rtems_bsd_thread_init( id );
+ if (!td)
return ENOMEM;
sc = rtems_task_start(id, (rtems_task_entry) func, (rtems_task_argument) arg);