summaryrefslogtreecommitdiffstats
path: root/cpukit/libblock/src/bdbuf.c
diff options
context:
space:
mode:
Diffstat (limited to 'cpukit/libblock/src/bdbuf.c')
-rw-r--r--cpukit/libblock/src/bdbuf.c2989
1 files changed, 2989 insertions, 0 deletions
diff --git a/cpukit/libblock/src/bdbuf.c b/cpukit/libblock/src/bdbuf.c
new file mode 100644
index 0000000000..841c03bdbe
--- /dev/null
+++ b/cpukit/libblock/src/bdbuf.c
@@ -0,0 +1,2989 @@
+/**
+ * @file
+ *
+ * @ingroup rtems_bdbuf
+ *
+ * Block device buffer management.
+ */
+
+/*
+ * Disk I/O buffering
+ * Buffer managment
+ *
+ * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
+ * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
+ * Victor V. Vengerov <vvv@oktet.ru>
+ * Alexander Kukuta <kam@oktet.ru>
+ *
+ * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
+ * Rewritten to remove score mutex access. Fixes many performance
+ * issues.
+ *
+ * Copyright (c) 2009 embedded brains GmbH.
+ *
+ * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
+ */
+
+/**
+ * Set to 1 to enable debug tracing.
+ */
+#define RTEMS_BDBUF_TRACE 0
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <limits.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <rtems.h>
+#include <rtems/error.h>
+#include <rtems/malloc.h>
+
+#include "rtems/bdbuf.h"
+
+#define BDBUF_INVALID_DEV ((dev_t) -1)
+
+/*
+ * Simpler label for this file.
+ */
+#define bdbuf_config rtems_bdbuf_configuration
+
+/**
+ * A swapout transfer transaction data. This data is passed to a worked thread
+ * to handle the write phase of the transfer.
+ */
+typedef struct rtems_bdbuf_swapout_transfer
+{
+ rtems_chain_control bds; /**< The transfer list of BDs. */
+ dev_t dev; /**< The device the transfer is for. */
+ bool syncing; /**< The data is a sync'ing. */
+ rtems_blkdev_request* write_req; /**< The write request array. */
+ uint32_t bufs_per_bd; /**< Number of buffers per bd. */
+} rtems_bdbuf_swapout_transfer;
+
+/**
+ * Swapout worker thread. These are available to take processing from the
+ * main swapout thread and handle the I/O operation.
+ */
+typedef struct rtems_bdbuf_swapout_worker
+{
+ rtems_chain_node link; /**< The threads sit on a chain when
+ * idle. */
+ rtems_id id; /**< The id of the task so we can wake
+ * it. */
+ volatile bool enabled; /**< The worker is enabled. */
+ rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
+ * thread. */
+} rtems_bdbuf_swapout_worker;
+
+/**
+ * Buffer waiters synchronization.
+ */
+typedef struct rtems_bdbuf_waiters {
+ volatile unsigned count;
+ rtems_id sema;
+} rtems_bdbuf_waiters;
+
+/**
+ * The BD buffer cache.
+ */
+typedef struct rtems_bdbuf_cache
+{
+ rtems_id swapout; /**< Swapout task ID */
+ volatile bool swapout_enabled; /**< Swapout is only running if
+ * enabled. Set to false to kill the
+ * swap out task. It deletes itself. */
+ rtems_chain_control swapout_workers; /**< The work threads for the swapout
+ * task. */
+
+ rtems_bdbuf_buffer* bds; /**< Pointer to table of buffer
+ * descriptors. */
+ void* buffers; /**< The buffer's memory. */
+ size_t buffer_min_count; /**< Number of minimum size buffers
+ * that fit the buffer memory. */
+ size_t max_bds_per_group; /**< The number of BDs of minimum
+ * buffer size that fit in a group. */
+ uint32_t flags; /**< Configuration flags. */
+
+ rtems_id lock; /**< The cache lock. It locks all
+ * cache data, BD and lists. */
+ rtems_id sync_lock; /**< Sync calls block writes. */
+ volatile bool sync_active; /**< True if a sync is active. */
+ volatile rtems_id sync_requester; /**< The sync requester. */
+ volatile dev_t sync_device; /**< The device to sync and
+ * BDBUF_INVALID_DEV not a device
+ * sync. */
+
+ rtems_bdbuf_buffer* tree; /**< Buffer descriptor lookup AVL tree
+ * root. There is only one. */
+ rtems_chain_control lru; /**< Least recently used list */
+ rtems_chain_control modified; /**< Modified buffers list */
+ rtems_chain_control sync; /**< Buffers to sync list */
+
+ rtems_bdbuf_waiters access_waiters; /**< Wait for a buffer in
+ * ACCESS_CACHED, ACCESS_MODIFIED or
+ * ACCESS_EMPTY
+ * state. */
+ rtems_bdbuf_waiters transfer_waiters; /**< Wait for a buffer in TRANSFER
+ * state. */
+ rtems_bdbuf_waiters buffer_waiters; /**< Wait for a buffer and no one is
+ * available. */
+
+ size_t group_count; /**< The number of groups. */
+ rtems_bdbuf_group* groups; /**< The groups. */
+
+ bool initialised; /**< Initialised state. */
+} rtems_bdbuf_cache;
+
+/**
+ * Fatal errors
+ */
+#define RTEMS_BLKDEV_FATAL_ERROR(n) \
+ (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
+
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11 RTEMS_BLKDEV_FATAL_ERROR(1)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4 RTEMS_BLKDEV_FATAL_ERROR(2)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5 RTEMS_BLKDEV_FATAL_ERROR(3)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6 RTEMS_BLKDEV_FATAL_ERROR(4)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7 RTEMS_BLKDEV_FATAL_ERROR(5)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8 RTEMS_BLKDEV_FATAL_ERROR(6)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9 RTEMS_BLKDEV_FATAL_ERROR(7)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10 RTEMS_BLKDEV_FATAL_ERROR(8)
+#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM RTEMS_BLKDEV_FATAL_ERROR(9)
+#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT RTEMS_BLKDEV_FATAL_ERROR(10)
+#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK RTEMS_BLKDEV_FATAL_ERROR(11)
+#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(12)
+#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK RTEMS_BLKDEV_FATAL_ERROR(13)
+#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(14)
+#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS RTEMS_BLKDEV_FATAL_ERROR(15)
+#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2 RTEMS_BLKDEV_FATAL_ERROR(16)
+#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST RTEMS_BLKDEV_FATAL_ERROR(17)
+#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
+#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE RTEMS_BLKDEV_FATAL_ERROR(19)
+#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE RTEMS_BLKDEV_FATAL_ERROR(20)
+#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM RTEMS_BLKDEV_FATAL_ERROR(21)
+#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE RTEMS_BLKDEV_FATAL_ERROR(22)
+#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START RTEMS_BLKDEV_FATAL_ERROR(23)
+#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE RTEMS_BLKDEV_FATAL_ERROR(24)
+#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS RTEMS_BLKDEV_FATAL_ERROR(25)
+#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT RTEMS_BLKDEV_FATAL_ERROR(26)
+#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE RTEMS_BLKDEV_FATAL_ERROR(27)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0 RTEMS_BLKDEV_FATAL_ERROR(28)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1 RTEMS_BLKDEV_FATAL_ERROR(29)
+#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2 RTEMS_BLKDEV_FATAL_ERROR(30)
+#define RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL RTEMS_BLKDEV_FATAL_ERROR(31)
+
+/**
+ * The events used in this code. These should be system events rather than
+ * application events.
+ */
+#define RTEMS_BDBUF_TRANSFER_SYNC RTEMS_EVENT_1
+#define RTEMS_BDBUF_SWAPOUT_SYNC RTEMS_EVENT_2
+
+/**
+ * The swap out task size. Should be more than enough for most drivers with
+ * tracing turned on.
+ */
+#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
+
+/**
+ * Lock semaphore attributes. This is used for locking type mutexes.
+ *
+ * @warning Priority inheritance is on.
+ */
+#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
+ (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
+ RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
+
+/**
+ * Waiter semaphore attributes.
+ *
+ * @warning Do not configure as inherit priority. If a driver is in the driver
+ * initialisation table this locked semaphore will have the IDLE task
+ * as the holder and a blocking task will raise the priority of the
+ * IDLE task which can cause unsual side effects.
+ */
+#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
+ (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
+ RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
+
+/**
+ * Waiter timeout. Set to non-zero to find some info on a waiter that is
+ * waiting too long.
+ */
+#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
+#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
+#define RTEMS_BDBUF_WAIT_TIMEOUT \
+ (TOD_MICROSECONDS_TO_TICKS (20000000))
+#endif
+
+/*
+ * The swap out task.
+ */
+static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
+
+/**
+ * The Buffer Descriptor cache.
+ */
+static rtems_bdbuf_cache bdbuf_cache;
+
+#if RTEMS_BDBUF_TRACE
+/**
+ * If true output the trace message.
+ */
+bool rtems_bdbuf_tracer;
+
+/**
+ * Return the number of items on the list.
+ *
+ * @param list The chain control.
+ * @return uint32_t The number of items on the list.
+ */
+uint32_t
+rtems_bdbuf_list_count (rtems_chain_control* list)
+{
+ rtems_chain_node* node = rtems_chain_first (list);
+ uint32_t count = 0;
+ while (!rtems_chain_is_tail (list, node))
+ {
+ count++;
+ node = rtems_chain_next (node);
+ }
+ return count;
+}
+
+/**
+ * Show the usage for the bdbuf cache.
+ */
+void
+rtems_bdbuf_show_usage (void)
+{
+ uint32_t group;
+ uint32_t total = 0;
+ uint32_t val;
+
+ for (group = 0; group < bdbuf_cache.group_count; group++)
+ total += bdbuf_cache.groups[group].users;
+ printf ("bdbuf:group users=%lu", total);
+ val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
+ printf (", lru=%lu", val);
+ total = val;
+ val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
+ printf (", mod=%lu", val);
+ total += val;
+ val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
+ printf (", sync=%lu", val);
+ total += val;
+ printf (", total=%lu\n", total);
+}
+
+/**
+ * Show the users for a group of a bd.
+ *
+ * @param where A label to show the context of output.
+ * @param bd The bd to show the users of.
+ */
+void
+rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
+{
+ const char* states[] =
+ { "EM", "FR", "CH", "AC", "AM", "MD", "SY", "TR" };
+
+ printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
+ where,
+ bd->block, states[bd->state],
+ bd->group - bdbuf_cache.groups,
+ bd - bdbuf_cache.bds,
+ bd->group->users,
+ bd->group->users > 8 ? "<<<<<<<" : "");
+}
+#else
+#define rtems_bdbuf_tracer (0)
+#define rtems_bdbuf_show_usage() ((void) 0)
+#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
+#endif
+
+/**
+ * The default maximum height of 32 allows for AVL trees having between
+ * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion. You may
+ * change this compile-time constant as you wish.
+ */
+#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
+#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
+#endif
+
+static void
+rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
+{
+ rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
+}
+
+/**
+ * Searches for the node with specified dev/block.
+ *
+ * @param root pointer to the root node of the AVL-Tree
+ * @param dev device search key
+ * @param block block search key
+ * @retval NULL node with the specified dev/block is not found
+ * @return pointer to the node with specified dev/block
+ */
+static rtems_bdbuf_buffer *
+rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
+ dev_t dev,
+ rtems_blkdev_bnum block)
+{
+ rtems_bdbuf_buffer* p = *root;
+
+ while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
+ {
+ if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
+ {
+ p = p->avl.right;
+ }
+ else
+ {
+ p = p->avl.left;
+ }
+ }
+
+ return p;
+}
+
+/**
+ * Inserts the specified node to the AVl-Tree.
+ *
+ * @param root pointer to the root node of the AVL-Tree
+ * @param node Pointer to the node to add.
+ * @retval 0 The node added successfully
+ * @retval -1 An error occured
+ */
+static int
+rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
+ rtems_bdbuf_buffer* node)
+{
+ dev_t dev = node->dev;
+ rtems_blkdev_bnum block = node->block;
+
+ rtems_bdbuf_buffer* p = *root;
+ rtems_bdbuf_buffer* q;
+ rtems_bdbuf_buffer* p1;
+ rtems_bdbuf_buffer* p2;
+ rtems_bdbuf_buffer* buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
+ rtems_bdbuf_buffer** buf_prev = buf_stack;
+
+ bool modified = false;
+
+ if (p == NULL)
+ {
+ *root = node;
+ node->avl.left = NULL;
+ node->avl.right = NULL;
+ node->avl.bal = 0;
+ return 0;
+ }
+
+ while (p != NULL)
+ {
+ *buf_prev++ = p;
+
+ if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
+ {
+ p->avl.cache = 1;
+ q = p->avl.right;
+ if (q == NULL)
+ {
+ q = node;
+ p->avl.right = q = node;
+ break;
+ }
+ }
+ else if ((p->dev != dev) || (p->block != block))
+ {
+ p->avl.cache = -1;
+ q = p->avl.left;
+ if (q == NULL)
+ {
+ q = node;
+ p->avl.left = q;
+ break;
+ }
+ }
+ else
+ {
+ return -1;
+ }
+
+ p = q;
+ }
+
+ q->avl.left = q->avl.right = NULL;
+ q->avl.bal = 0;
+ modified = true;
+ buf_prev--;
+
+ while (modified)
+ {
+ if (p->avl.cache == -1)
+ {
+ switch (p->avl.bal)
+ {
+ case 1:
+ p->avl.bal = 0;
+ modified = false;
+ break;
+
+ case 0:
+ p->avl.bal = -1;
+ break;
+
+ case -1:
+ p1 = p->avl.left;
+ if (p1->avl.bal == -1) /* simple LL-turn */
+ {
+ p->avl.left = p1->avl.right;
+ p1->avl.right = p;
+ p->avl.bal = 0;
+ p = p1;
+ }
+ else /* double LR-turn */
+ {
+ p2 = p1->avl.right;
+ p1->avl.right = p2->avl.left;
+ p2->avl.left = p1;
+ p->avl.left = p2->avl.right;
+ p2->avl.right = p;
+ if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
+ if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
+ p = p2;
+ }
+ p->avl.bal = 0;
+ modified = false;
+ break;
+
+ default:
+ break;
+ }
+ }
+ else
+ {
+ switch (p->avl.bal)
+ {
+ case -1:
+ p->avl.bal = 0;
+ modified = false;
+ break;
+
+ case 0:
+ p->avl.bal = 1;
+ break;
+
+ case 1:
+ p1 = p->avl.right;
+ if (p1->avl.bal == 1) /* simple RR-turn */
+ {
+ p->avl.right = p1->avl.left;
+ p1->avl.left = p;
+ p->avl.bal = 0;
+ p = p1;
+ }
+ else /* double RL-turn */
+ {
+ p2 = p1->avl.left;
+ p1->avl.left = p2->avl.right;
+ p2->avl.right = p1;
+ p->avl.right = p2->avl.left;
+ p2->avl.left = p;
+ if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
+ if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
+ p = p2;
+ }
+ p->avl.bal = 0;
+ modified = false;
+ break;
+
+ default:
+ break;
+ }
+ }
+ q = p;
+ if (buf_prev > buf_stack)
+ {
+ p = *--buf_prev;
+
+ if (p->avl.cache == -1)
+ {
+ p->avl.left = q;
+ }
+ else
+ {
+ p->avl.right = q;
+ }
+ }
+ else
+ {
+ *root = p;
+ break;
+ }
+ };
+
+ return 0;
+}
+
+
+/**
+ * Removes the node from the tree.
+ *
+ * @param root Pointer to pointer to the root node
+ * @param node Pointer to the node to remove
+ * @retval 0 Item removed
+ * @retval -1 No such item found
+ */
+static int
+rtems_bdbuf_avl_remove(rtems_bdbuf_buffer** root,
+ const rtems_bdbuf_buffer* node)
+{
+ dev_t dev = node->dev;
+ rtems_blkdev_bnum block = node->block;
+
+ rtems_bdbuf_buffer* p = *root;
+ rtems_bdbuf_buffer* q;
+ rtems_bdbuf_buffer* r;
+ rtems_bdbuf_buffer* s;
+ rtems_bdbuf_buffer* p1;
+ rtems_bdbuf_buffer* p2;
+ rtems_bdbuf_buffer* buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
+ rtems_bdbuf_buffer** buf_prev = buf_stack;
+
+ bool modified = false;
+
+ memset (buf_stack, 0, sizeof(buf_stack));
+
+ while (p != NULL)
+ {
+ *buf_prev++ = p;
+
+ if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
+ {
+ p->avl.cache = 1;
+ p = p->avl.right;
+ }
+ else if ((p->dev != dev) || (p->block != block))
+ {
+ p->avl.cache = -1;
+ p = p->avl.left;
+ }
+ else
+ {
+ /* node found */
+ break;
+ }
+ }
+
+ if (p == NULL)
+ {
+ /* there is no such node */
+ return -1;
+ }
+
+ q = p;
+
+ buf_prev--;
+ if (buf_prev > buf_stack)
+ {
+ p = *(buf_prev - 1);
+ }
+ else
+ {
+ p = NULL;
+ }
+
+ /* at this moment q - is a node to delete, p is q's parent */
+ if (q->avl.right == NULL)
+ {
+ r = q->avl.left;
+ if (r != NULL)
+ {
+ r->avl.bal = 0;
+ }
+ q = r;
+ }
+ else
+ {
+ rtems_bdbuf_buffer **t;
+
+ r = q->avl.right;
+
+ if (r->avl.left == NULL)
+ {
+ r->avl.left = q->avl.left;
+ r->avl.bal = q->avl.bal;
+ r->avl.cache = 1;
+ *buf_prev++ = q = r;
+ }
+ else
+ {
+ t = buf_prev++;
+ s = r;
+
+ while (s->avl.left != NULL)
+ {
+ *buf_prev++ = r = s;
+ s = r->avl.left;
+ r->avl.cache = -1;
+ }
+
+ s->avl.left = q->avl.left;
+ r->avl.left = s->avl.right;
+ s->avl.right = q->avl.right;
+ s->avl.bal = q->avl.bal;
+ s->avl.cache = 1;
+
+ *t = q = s;
+ }
+ }
+
+ if (p != NULL)
+ {
+ if (p->avl.cache == -1)
+ {
+ p->avl.left = q;
+ }
+ else
+ {
+ p->avl.right = q;
+ }
+ }
+ else
+ {
+ *root = q;
+ }
+
+ modified = true;
+
+ while (modified)
+ {
+ if (buf_prev > buf_stack)
+ {
+ p = *--buf_prev;
+ }
+ else
+ {
+ break;
+ }
+
+ if (p->avl.cache == -1)
+ {
+ /* rebalance left branch */
+ switch (p->avl.bal)
+ {
+ case -1:
+ p->avl.bal = 0;
+ break;
+ case 0:
+ p->avl.bal = 1;
+ modified = false;
+ break;
+
+ case +1:
+ p1 = p->avl.right;
+
+ if (p1->avl.bal >= 0) /* simple RR-turn */
+ {
+ p->avl.right = p1->avl.left;
+ p1->avl.left = p;
+
+ if (p1->avl.bal == 0)
+ {
+ p1->avl.bal = -1;
+ modified = false;
+ }
+ else
+ {
+ p->avl.bal = 0;
+ p1->avl.bal = 0;
+ }
+ p = p1;
+ }
+ else /* double RL-turn */
+ {
+ p2 = p1->avl.left;
+
+ p1->avl.left = p2->avl.right;
+ p2->avl.right = p1;
+ p->avl.right = p2->avl.left;
+ p2->avl.left = p;
+
+ if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
+ if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
+
+ p = p2;
+ p2->avl.bal = 0;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ else
+ {
+ /* rebalance right branch */
+ switch (p->avl.bal)
+ {
+ case +1:
+ p->avl.bal = 0;
+ break;
+
+ case 0:
+ p->avl.bal = -1;
+ modified = false;
+ break;
+
+ case -1:
+ p1 = p->avl.left;
+
+ if (p1->avl.bal <= 0) /* simple LL-turn */
+ {
+ p->avl.left = p1->avl.right;
+ p1->avl.right = p;
+ if (p1->avl.bal == 0)
+ {
+ p1->avl.bal = 1;
+ modified = false;
+ }
+ else
+ {
+ p->avl.bal = 0;
+ p1->avl.bal = 0;
+ }
+ p = p1;
+ }
+ else /* double LR-turn */
+ {
+ p2 = p1->avl.right;
+
+ p1->avl.right = p2->avl.left;
+ p2->avl.left = p1;
+ p->avl.left = p2->avl.right;
+ p2->avl.right = p;
+
+ if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
+ if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
+
+ p = p2;
+ p2->avl.bal = 0;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (buf_prev > buf_stack)
+ {
+ q = *(buf_prev - 1);
+
+ if (q->avl.cache == -1)
+ {
+ q->avl.left = p;
+ }
+ else
+ {
+ q->avl.right = p;
+ }
+ }
+ else
+ {
+ *root = p;
+ break;
+ }
+
+ }
+
+ return 0;
+}
+
+static void
+rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
+{
+ bd->state = state;
+}
+
+/**
+ * Change the block number for the block size to the block number for the media
+ * block size. We have to use 64bit maths. There is no short cut here.
+ *
+ * @param block The logical block number in the block size terms.
+ * @param block_size The block size.
+ * @param media_block_size The block size of the media.
+ * @return rtems_blkdev_bnum The media block number.
+ */
+static rtems_blkdev_bnum
+rtems_bdbuf_media_block (rtems_blkdev_bnum block,
+ size_t block_size,
+ size_t media_block_size)
+{
+ return (rtems_blkdev_bnum)
+ ((((uint64_t) block) * block_size) / media_block_size);
+}
+
+/**
+ * Lock the mutex. A single task can nest calls.
+ *
+ * @param lock The mutex to lock.
+ * @param fatal_error_code The error code if the call fails.
+ */
+static void
+rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
+{
+ rtems_status_code sc = rtems_semaphore_obtain (lock,
+ RTEMS_WAIT,
+ RTEMS_NO_TIMEOUT);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (fatal_error_code);
+}
+
+/**
+ * Unlock the mutex.
+ *
+ * @param lock The mutex to unlock.
+ * @param fatal_error_code The error code if the call fails.
+ */
+static void
+rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
+{
+ rtems_status_code sc = rtems_semaphore_release (lock);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (fatal_error_code);
+}
+
+/**
+ * Lock the cache. A single task can nest calls.
+ */
+static void
+rtems_bdbuf_lock_cache (void)
+{
+ rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
+}
+
+/**
+ * Unlock the cache.
+ */
+static void
+rtems_bdbuf_unlock_cache (void)
+{
+ rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
+}
+
+/**
+ * Lock the cache's sync. A single task can nest calls.
+ */
+static void
+rtems_bdbuf_lock_sync (void)
+{
+ rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
+}
+
+/**
+ * Unlock the cache's sync lock. Any blocked writers are woken.
+ */
+static void
+rtems_bdbuf_unlock_sync (void)
+{
+ rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
+ RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
+}
+
+static void
+rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
+{
+ ++bd->group->users;
+}
+
+static void
+rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
+{
+ --bd->group->users;
+}
+
+static rtems_mode
+rtems_bdbuf_disable_preemption (void)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+ rtems_mode prev_mode = 0;
+
+ sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
+
+ return prev_mode;
+}
+
+static void
+rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+
+ sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
+}
+
+/**
+ * Wait until woken. Semaphores are used so a number of tasks can wait and can
+ * be woken at once. Task events would require we maintain a list of tasks to
+ * be woken and this would require storage and we do not know the number of
+ * tasks that could be waiting.
+ *
+ * While we have the cache locked we can try and claim the semaphore and
+ * therefore know when we release the lock to the cache we will block until the
+ * semaphore is released. This may even happen before we get to block.
+ *
+ * A counter is used to save the release call when no one is waiting.
+ *
+ * The function assumes the cache is locked on entry and it will be locked on
+ * exit.
+ */
+static void
+rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
+{
+ rtems_status_code sc;
+ rtems_mode prev_mode;
+
+ /*
+ * Indicate we are waiting.
+ */
+ ++waiters->count;
+
+ /*
+ * Disable preemption then unlock the cache and block. There is no POSIX
+ * condition variable in the core API so this is a work around.
+ *
+ * The issue is a task could preempt after the cache is unlocked because it is
+ * blocking or just hits that window, and before this task has blocked on the
+ * semaphore. If the preempting task flushes the queue this task will not see
+ * the flush and may block for ever or until another transaction flushes this
+ * semaphore.
+ */
+ prev_mode = rtems_bdbuf_disable_preemption ();
+
+ /*
+ * Unlock the cache, wait, and lock the cache when we return.
+ */
+ rtems_bdbuf_unlock_cache ();
+
+ sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
+
+ if (sc == RTEMS_TIMEOUT)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
+
+ if (sc != RTEMS_UNSATISFIED)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
+
+ rtems_bdbuf_lock_cache ();
+
+ rtems_bdbuf_restore_preemption (prev_mode);
+
+ --waiters->count;
+}
+
+static void
+rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
+{
+ rtems_bdbuf_group_obtain (bd);
+ ++bd->waiters;
+ rtems_bdbuf_anonymous_wait (waiters);
+ --bd->waiters;
+ rtems_bdbuf_group_release (bd);
+}
+
+/**
+ * Wake a blocked resource. The resource has a counter that lets us know if
+ * there are any waiters.
+ */
+static void
+rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+
+ if (waiters->count > 0)
+ {
+ sc = rtems_semaphore_flush (waiters->sema);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
+ }
+}
+
+static void
+rtems_bdbuf_wake_swapper (void)
+{
+ rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
+ RTEMS_BDBUF_SWAPOUT_SYNC);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
+}
+
+static bool
+rtems_bdbuf_has_buffer_waiters (void)
+{
+ return bdbuf_cache.buffer_waiters.count;
+}
+
+static void
+rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
+{
+ if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
+}
+
+static void
+rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
+{
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_FREE:
+ break;
+ case RTEMS_BDBUF_STATE_CACHED:
+ rtems_bdbuf_remove_from_tree (bd);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
+ }
+
+ rtems_chain_extract (&bd->link);
+}
+
+static void
+rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
+{
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
+ rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
+}
+
+static void
+rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
+{
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
+}
+
+static void
+rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
+{
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
+ rtems_chain_append (&bdbuf_cache.lru, &bd->link);
+}
+
+static void
+rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
+{
+ rtems_bdbuf_make_empty (bd);
+
+ if (bd->waiters == 0)
+ {
+ rtems_bdbuf_remove_from_tree (bd);
+ rtems_bdbuf_make_free_and_add_to_lru_list (bd);
+ }
+}
+
+static void
+rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
+{
+ if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dev)
+ {
+ rtems_bdbuf_unlock_cache ();
+
+ /*
+ * Wait for the sync lock.
+ */
+ rtems_bdbuf_lock_sync ();
+
+ rtems_bdbuf_unlock_sync ();
+ rtems_bdbuf_lock_cache ();
+ }
+
+ /*
+ * Only the first modified release sets the timer and any further user
+ * accesses do not change the timer value which should move down. This
+ * assumes the user's hold of the buffer is much less than the time on the
+ * modified list. Resetting the timer on each access which could result in a
+ * buffer never getting to 0 and never being forced onto disk. This raises a
+ * difficult question. Is a snapshot of a block that is changing better than
+ * nothing being written? We have tended to think we should hold changes for
+ * only a specific period of time even if still changing and get onto disk
+ * and letting the file system try and recover this position if it can.
+ */
+ if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
+ || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
+ bd->hold_timer = bdbuf_config.swap_block_hold;
+
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
+ rtems_chain_append (&bdbuf_cache.modified, &bd->link);
+
+ if (bd->waiters)
+ rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
+ else if (rtems_bdbuf_has_buffer_waiters ())
+ rtems_bdbuf_wake_swapper ();
+}
+
+static void
+rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
+{
+ rtems_bdbuf_group_release (bd);
+ rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
+
+ if (bd->waiters)
+ rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
+ else
+ rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
+}
+
+/**
+ * Compute the number of BDs per group for a given buffer size.
+ *
+ * @param size The buffer size. It can be any size and we scale up.
+ */
+static size_t
+rtems_bdbuf_bds_per_group (size_t size)
+{
+ size_t bufs_per_size;
+ size_t bds_per_size;
+
+ if (size > bdbuf_config.buffer_max)
+ return 0;
+
+ bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
+
+ for (bds_per_size = 1;
+ bds_per_size < bufs_per_size;
+ bds_per_size <<= 1)
+ ;
+
+ return bdbuf_cache.max_bds_per_group / bds_per_size;
+}
+
+static void
+rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
+{
+ rtems_bdbuf_group_release (bd);
+ rtems_bdbuf_discard_buffer (bd);
+
+ if (bd->waiters)
+ rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
+ else
+ rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
+}
+
+/**
+ * Reallocate a group. The BDs currently allocated in the group are removed
+ * from the ALV tree and any lists then the new BD's are prepended to the ready
+ * list of the cache.
+ *
+ * @param group The group to reallocate.
+ * @param new_bds_per_group The new count of BDs per group.
+ * @return A buffer of this group.
+ */
+static rtems_bdbuf_buffer *
+rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
+{
+ rtems_bdbuf_buffer* bd;
+ size_t b;
+ size_t bufs_per_bd;
+
+ if (rtems_bdbuf_tracer)
+ printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
+ group - bdbuf_cache.groups, group->bds_per_group,
+ new_bds_per_group);
+
+ bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
+
+ for (b = 0, bd = group->bdbuf;
+ b < group->bds_per_group;
+ b++, bd += bufs_per_bd)
+ rtems_bdbuf_remove_from_tree_and_lru_list (bd);
+
+ group->bds_per_group = new_bds_per_group;
+ bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
+
+ for (b = 1, bd = group->bdbuf + bufs_per_bd;
+ b < group->bds_per_group;
+ b++, bd += bufs_per_bd)
+ rtems_bdbuf_make_free_and_add_to_lru_list (bd);
+
+ if (b > 1)
+ rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
+
+ return group->bdbuf;
+}
+
+static void
+rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
+ dev_t dev,
+ rtems_blkdev_bnum block)
+{
+ bd->dev = dev;
+ bd->block = block;
+ bd->avl.left = NULL;
+ bd->avl.right = NULL;
+ bd->waiters = 0;
+
+ if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
+
+ rtems_bdbuf_make_empty (bd);
+}
+
+static rtems_bdbuf_buffer *
+rtems_bdbuf_get_buffer_from_lru_list (dev_t dev,
+ rtems_blkdev_bnum block,
+ size_t bds_per_group)
+{
+ rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
+
+ while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
+ {
+ rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
+ rtems_bdbuf_buffer *empty_bd = NULL;
+
+ if (rtems_bdbuf_tracer)
+ printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
+ bd - bdbuf_cache.bds,
+ bd->group - bdbuf_cache.groups, bd->group->users,
+ bd->group->bds_per_group, bds_per_group);
+
+ /*
+ * If nobody waits for this BD, we may recycle it.
+ */
+ if (bd->waiters == 0)
+ {
+ if (bd->group->bds_per_group == bds_per_group)
+ {
+ rtems_bdbuf_remove_from_tree_and_lru_list (bd);
+
+ empty_bd = bd;
+ }
+ else if (bd->group->users == 0)
+ empty_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
+ }
+
+ if (empty_bd != NULL)
+ {
+ rtems_bdbuf_setup_empty_buffer (empty_bd, dev, block);
+
+ return empty_bd;
+ }
+
+ node = rtems_chain_next (node);
+ }
+
+ return NULL;
+}
+
+/**
+ * Initialise the cache.
+ *
+ * @return rtems_status_code The initialisation status.
+ */
+rtems_status_code
+rtems_bdbuf_init (void)
+{
+ rtems_bdbuf_group* group;
+ rtems_bdbuf_buffer* bd;
+ uint8_t* buffer;
+ size_t b;
+ size_t cache_aligment;
+ rtems_status_code sc;
+ rtems_mode prev_mode;
+
+ if (rtems_bdbuf_tracer)
+ printf ("bdbuf:init\n");
+
+ if (rtems_interrupt_is_in_progress())
+ return RTEMS_CALLED_FROM_ISR;
+
+ /*
+ * Check the configuration table values.
+ */
+ if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
+ return RTEMS_INVALID_NUMBER;
+
+ /*
+ * We use a special variable to manage the initialisation incase we have
+ * completing threads doing this. You may get errors if the another thread
+ * makes a call and we have not finished initialisation.
+ */
+ prev_mode = rtems_bdbuf_disable_preemption ();
+ if (bdbuf_cache.initialised)
+ {
+ rtems_bdbuf_restore_preemption (prev_mode);
+ return RTEMS_RESOURCE_IN_USE;
+ }
+
+ memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
+ bdbuf_cache.initialised = true;
+ rtems_bdbuf_restore_preemption (prev_mode);
+
+ /*
+ * For unspecified cache alignments we use the CPU alignment.
+ */
+ cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
+ if (cache_aligment <= 0)
+ cache_aligment = CPU_ALIGNMENT;
+
+ bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
+
+ rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
+ rtems_chain_initialize_empty (&bdbuf_cache.lru);
+ rtems_chain_initialize_empty (&bdbuf_cache.modified);
+ rtems_chain_initialize_empty (&bdbuf_cache.sync);
+
+ /*
+ * Create the locks for the cache.
+ */
+ sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
+ 1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
+ &bdbuf_cache.lock);
+ if (sc != RTEMS_SUCCESSFUL)
+ goto error;
+
+ rtems_bdbuf_lock_cache ();
+
+ sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
+ 1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
+ &bdbuf_cache.sync_lock);
+ if (sc != RTEMS_SUCCESSFUL)
+ goto error;
+
+ sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
+ 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
+ &bdbuf_cache.access_waiters.sema);
+ if (sc != RTEMS_SUCCESSFUL)
+ goto error;
+
+ sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
+ 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
+ &bdbuf_cache.transfer_waiters.sema);
+ if (sc != RTEMS_SUCCESSFUL)
+ goto error;
+
+ sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
+ 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
+ &bdbuf_cache.buffer_waiters.sema);
+ if (sc != RTEMS_SUCCESSFUL)
+ goto error;
+
+ /*
+ * Compute the various number of elements in the cache.
+ */
+ bdbuf_cache.buffer_min_count =
+ bdbuf_config.size / bdbuf_config.buffer_min;
+ bdbuf_cache.max_bds_per_group =
+ bdbuf_config.buffer_max / bdbuf_config.buffer_min;
+ bdbuf_cache.group_count =
+ bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
+
+ /*
+ * Allocate the memory for the buffer descriptors.
+ */
+ bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
+ bdbuf_cache.buffer_min_count);
+ if (!bdbuf_cache.bds)
+ goto error;
+
+ /*
+ * Allocate the memory for the buffer descriptors.
+ */
+ bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
+ bdbuf_cache.group_count);
+ if (!bdbuf_cache.groups)
+ goto error;
+
+ /*
+ * Allocate memory for buffer memory. The buffer memory will be cache
+ * aligned. It is possible to free the memory allocated by rtems_memalign()
+ * with free(). Return 0 if allocated.
+ *
+ * The memory allocate allows a
+ */
+ if (rtems_memalign ((void **) &bdbuf_cache.buffers,
+ cache_aligment,
+ bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
+ goto error;
+
+ /*
+ * The cache is empty after opening so we need to add all the buffers to it
+ * and initialise the groups.
+ */
+ for (b = 0, group = bdbuf_cache.groups,
+ bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
+ b < bdbuf_cache.buffer_min_count;
+ b++, bd++, buffer += bdbuf_config.buffer_min)
+ {
+ bd->dev = BDBUF_INVALID_DEV;
+ bd->group = group;
+ bd->buffer = buffer;
+
+ rtems_chain_append (&bdbuf_cache.lru, &bd->link);
+
+ if ((b % bdbuf_cache.max_bds_per_group) ==
+ (bdbuf_cache.max_bds_per_group - 1))
+ group++;
+ }
+
+ for (b = 0,
+ group = bdbuf_cache.groups,
+ bd = bdbuf_cache.bds;
+ b < bdbuf_cache.group_count;
+ b++,
+ group++,
+ bd += bdbuf_cache.max_bds_per_group)
+ {
+ group->bds_per_group = bdbuf_cache.max_bds_per_group;
+ group->bdbuf = bd;
+ }
+
+ /*
+ * Create and start swapout task. This task will create and manage the worker
+ * threads.
+ */
+ bdbuf_cache.swapout_enabled = true;
+
+ sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
+ bdbuf_config.swapout_priority ?
+ bdbuf_config.swapout_priority :
+ RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
+ SWAPOUT_TASK_STACK_SIZE,
+ RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
+ RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
+ &bdbuf_cache.swapout);
+ if (sc != RTEMS_SUCCESSFUL)
+ goto error;
+
+ sc = rtems_task_start (bdbuf_cache.swapout,
+ rtems_bdbuf_swapout_task,
+ (rtems_task_argument) &bdbuf_cache);
+ if (sc != RTEMS_SUCCESSFUL)
+ goto error;
+
+ rtems_bdbuf_unlock_cache ();
+
+ return RTEMS_SUCCESSFUL;
+
+error:
+
+ if (bdbuf_cache.swapout != 0)
+ rtems_task_delete (bdbuf_cache.swapout);
+
+ free (bdbuf_cache.buffers);
+ free (bdbuf_cache.groups);
+ free (bdbuf_cache.bds);
+
+ rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
+ rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
+ rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
+ rtems_semaphore_delete (bdbuf_cache.sync_lock);
+
+ if (bdbuf_cache.lock != 0)
+ {
+ rtems_bdbuf_unlock_cache ();
+ rtems_semaphore_delete (bdbuf_cache.lock);
+ }
+
+ bdbuf_cache.initialised = false;
+
+ return RTEMS_UNSATISFIED;
+}
+
+static void
+rtems_bdbuf_wait_for_event (rtems_event_set event)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+ rtems_event_set out = 0;
+
+ sc = rtems_event_receive (event,
+ RTEMS_EVENT_ALL | RTEMS_WAIT,
+ RTEMS_NO_TIMEOUT,
+ &out);
+
+ if (sc != RTEMS_SUCCESSFUL || out != event)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
+}
+
+static void
+rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
+{
+ while (true)
+ {
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_MODIFIED:
+ rtems_bdbuf_group_release (bd);
+ /* Fall through */
+ case RTEMS_BDBUF_STATE_CACHED:
+ rtems_chain_extract (&bd->link);
+ /* Fall through */
+ case RTEMS_BDBUF_STATE_EMPTY:
+ return;
+ case RTEMS_BDBUF_STATE_ACCESS_CACHED:
+ case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
+ case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
+ case RTEMS_BDBUF_STATE_ACCESS_PURGED:
+ rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
+ break;
+ case RTEMS_BDBUF_STATE_SYNC:
+ case RTEMS_BDBUF_STATE_TRANSFER:
+ case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
+ rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
+ }
+ }
+}
+
+static void
+rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
+{
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
+ rtems_chain_extract (&bd->link);
+ rtems_chain_append (&bdbuf_cache.sync, &bd->link);
+ rtems_bdbuf_wake_swapper ();
+}
+
+/**
+ * @brief Waits until the buffer is ready for recycling.
+ *
+ * @retval @c true Buffer is valid and may be recycled.
+ * @retval @c false Buffer is invalid and has to searched again.
+ */
+static bool
+rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
+{
+ while (true)
+ {
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_FREE:
+ return true;
+ case RTEMS_BDBUF_STATE_MODIFIED:
+ rtems_bdbuf_request_sync_for_modified_buffer (bd);
+ break;
+ case RTEMS_BDBUF_STATE_CACHED:
+ case RTEMS_BDBUF_STATE_EMPTY:
+ if (bd->waiters == 0)
+ return true;
+ else
+ {
+ /*
+ * It is essential that we wait here without a special wait count and
+ * without the group in use. Otherwise we could trigger a wait ping
+ * pong with another recycle waiter. The state of the buffer is
+ * arbitrary afterwards.
+ */
+ rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
+ return false;
+ }
+ case RTEMS_BDBUF_STATE_ACCESS_CACHED:
+ case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
+ case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
+ case RTEMS_BDBUF_STATE_ACCESS_PURGED:
+ rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
+ break;
+ case RTEMS_BDBUF_STATE_SYNC:
+ case RTEMS_BDBUF_STATE_TRANSFER:
+ case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
+ rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
+ }
+ }
+}
+
+static void
+rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
+{
+ while (true)
+ {
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_CACHED:
+ case RTEMS_BDBUF_STATE_EMPTY:
+ case RTEMS_BDBUF_STATE_MODIFIED:
+ case RTEMS_BDBUF_STATE_ACCESS_CACHED:
+ case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
+ case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
+ case RTEMS_BDBUF_STATE_ACCESS_PURGED:
+ return;
+ case RTEMS_BDBUF_STATE_SYNC:
+ case RTEMS_BDBUF_STATE_TRANSFER:
+ case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
+ rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
+ }
+ }
+}
+
+static void
+rtems_bdbuf_wait_for_buffer (void)
+{
+ if (!rtems_chain_is_empty (&bdbuf_cache.modified))
+ rtems_bdbuf_wake_swapper ();
+
+ rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
+}
+
+static void
+rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
+{
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
+
+ rtems_chain_append (&bdbuf_cache.sync, &bd->link);
+
+ if (bd->waiters)
+ rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
+
+ rtems_bdbuf_wake_swapper ();
+ rtems_bdbuf_wait_for_sync_done (bd);
+
+ /*
+ * We may have created a cached or empty buffer which may be recycled.
+ */
+ if (bd->waiters == 0
+ && (bd->state == RTEMS_BDBUF_STATE_CACHED
+ || bd->state == RTEMS_BDBUF_STATE_EMPTY))
+ {
+ if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
+ {
+ rtems_bdbuf_remove_from_tree (bd);
+ rtems_bdbuf_make_free_and_add_to_lru_list (bd);
+ }
+ rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
+ }
+}
+
+static rtems_bdbuf_buffer *
+rtems_bdbuf_get_buffer_for_read_ahead (dev_t dev,
+ rtems_blkdev_bnum block,
+ size_t bds_per_group)
+{
+ rtems_bdbuf_buffer *bd = NULL;
+
+ bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
+
+ if (bd == NULL)
+ {
+ bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
+
+ if (bd != NULL)
+ rtems_bdbuf_group_obtain (bd);
+ }
+ else
+ /*
+ * The buffer is in the cache. So it is already available or in use, and
+ * thus no need for a read ahead.
+ */
+ bd = NULL;
+
+ return bd;
+}
+
+static rtems_bdbuf_buffer *
+rtems_bdbuf_get_buffer_for_access (dev_t dev,
+ rtems_blkdev_bnum block,
+ size_t bds_per_group)
+{
+ rtems_bdbuf_buffer *bd = NULL;
+
+ do
+ {
+ bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
+
+ if (bd != NULL)
+ {
+ if (bd->group->bds_per_group != bds_per_group)
+ {
+ if (rtems_bdbuf_wait_for_recycle (bd))
+ {
+ rtems_bdbuf_remove_from_tree_and_lru_list (bd);
+ rtems_bdbuf_make_free_and_add_to_lru_list (bd);
+ rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
+ }
+ bd = NULL;
+ }
+ }
+ else
+ {
+ bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
+
+ if (bd == NULL)
+ rtems_bdbuf_wait_for_buffer ();
+ }
+ }
+ while (bd == NULL);
+
+ rtems_bdbuf_wait_for_access (bd);
+ rtems_bdbuf_group_obtain (bd);
+
+ return bd;
+}
+
+static rtems_status_code
+rtems_bdbuf_obtain_disk (dev_t dev,
+ rtems_blkdev_bnum block,
+ rtems_disk_device **dd_ptr,
+ rtems_blkdev_bnum *media_block_ptr,
+ size_t *bds_per_group_ptr)
+{
+ rtems_disk_device *dd = NULL;
+
+ if (!bdbuf_cache.initialised)
+ return RTEMS_NOT_CONFIGURED;
+
+ /*
+ * Do not hold the cache lock when obtaining the disk table.
+ */
+ dd = rtems_disk_obtain (dev);
+ if (dd == NULL)
+ return RTEMS_INVALID_ID;
+
+ *dd_ptr = dd;
+
+ if (media_block_ptr != NULL)
+ {
+ /*
+ * Compute the media block number. Drivers work with media block number not
+ * the block number a BD may have as this depends on the block size set by
+ * the user.
+ */
+ rtems_blkdev_bnum mb = rtems_bdbuf_media_block (block,
+ dd->block_size,
+ dd->media_block_size);
+ if (mb >= dd->size)
+ {
+ rtems_disk_release(dd);
+ return RTEMS_INVALID_NUMBER;
+ }
+
+ *media_block_ptr = mb + dd->start;
+ }
+
+ if (bds_per_group_ptr != NULL)
+ {
+ size_t bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
+
+ if (bds_per_group == 0)
+ {
+ rtems_disk_release (dd);
+ return RTEMS_INVALID_NUMBER;
+ }
+
+ *bds_per_group_ptr = bds_per_group;
+ }
+
+ return RTEMS_SUCCESSFUL;
+}
+
+static void
+rtems_bdbuf_release_disk (rtems_disk_device *dd)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+
+ sc = rtems_disk_release (dd);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL);
+}
+
+rtems_status_code
+rtems_bdbuf_get (dev_t dev,
+ rtems_blkdev_bnum block,
+ rtems_bdbuf_buffer **bd_ptr)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+ rtems_disk_device *dd = NULL;
+ rtems_bdbuf_buffer *bd = NULL;
+ rtems_blkdev_bnum media_block = 0;
+ size_t bds_per_group = 0;
+
+ sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
+ if (sc != RTEMS_SUCCESSFUL)
+ return sc;
+
+ rtems_bdbuf_lock_cache ();
+
+ /*
+ * Print the block index relative to the physical disk.
+ */
+ if (rtems_bdbuf_tracer)
+ printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
+ media_block, block, (unsigned) dev);
+
+ bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
+
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_CACHED:
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
+ break;
+ case RTEMS_BDBUF_STATE_EMPTY:
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
+ break;
+ case RTEMS_BDBUF_STATE_MODIFIED:
+ /*
+ * To get a modified buffer could be considered a bug in the caller
+ * because you should not be getting an already modified buffer but user
+ * may have modified a byte in a block then decided to seek the start and
+ * write the whole block and the file system will have no record of this
+ * so just gets the block to fill.
+ */
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
+ break;
+ }
+
+ if (rtems_bdbuf_tracer)
+ {
+ rtems_bdbuf_show_users ("get", bd);
+ rtems_bdbuf_show_usage ();
+ }
+
+ rtems_bdbuf_unlock_cache ();
+
+ rtems_bdbuf_release_disk (dd);
+
+ *bd_ptr = bd;
+
+ return RTEMS_SUCCESSFUL;
+}
+
+/**
+ * Call back handler called by the low level driver when the transfer has
+ * completed. This function may be invoked from interrupt handler.
+ *
+ * @param arg Arbitrary argument specified in block device request
+ * structure (in this case - pointer to the appropriate
+ * block device request structure).
+ * @param status I/O completion status
+ */
+static void
+rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
+{
+ rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
+
+ req->status = status;
+
+ rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
+}
+
+static void
+rtems_bdbuf_create_read_request (const rtems_disk_device *dd,
+ rtems_blkdev_bnum media_block,
+ size_t bds_per_group,
+ rtems_blkdev_request *req,
+ rtems_bdbuf_buffer **bd_ptr)
+{
+ rtems_bdbuf_buffer *bd = NULL;
+ rtems_blkdev_bnum media_block_end = dd->start + dd->size;
+ rtems_blkdev_bnum media_block_count = dd->block_size / dd->media_block_size;
+ dev_t dev = dd->dev;
+ uint32_t block_size = dd->block_size;
+ uint32_t transfer_index = 1;
+ uint32_t transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
+
+ if (media_block_end - media_block < transfer_count)
+ transfer_count = media_block_end - media_block;
+
+ req->req = RTEMS_BLKDEV_REQ_READ;
+ req->req_done = rtems_bdbuf_transfer_done;
+ req->done_arg = req;
+ req->io_task = rtems_task_self ();
+ req->status = RTEMS_RESOURCE_IN_USE;
+ req->bufnum = 0;
+
+ bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
+
+ *bd_ptr = bd;
+
+ req->bufs [0].user = bd;
+ req->bufs [0].block = media_block;
+ req->bufs [0].length = block_size;
+ req->bufs [0].buffer = bd->buffer;
+
+ if (rtems_bdbuf_tracer)
+ rtems_bdbuf_show_users ("read", bd);
+
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_CACHED:
+ case RTEMS_BDBUF_STATE_MODIFIED:
+ return;
+ case RTEMS_BDBUF_STATE_EMPTY:
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
+ break;
+ }
+
+ while (transfer_index < transfer_count)
+ {
+ media_block += media_block_count;
+
+ bd = rtems_bdbuf_get_buffer_for_read_ahead (dev, media_block,
+ bds_per_group);
+
+ if (bd == NULL)
+ break;
+
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
+
+ req->bufs [transfer_index].user = bd;
+ req->bufs [transfer_index].block = media_block;
+ req->bufs [transfer_index].length = block_size;
+ req->bufs [transfer_index].buffer = bd->buffer;
+
+ if (rtems_bdbuf_tracer)
+ rtems_bdbuf_show_users ("read-ahead", bd);
+
+ ++transfer_index;
+ }
+
+ req->bufnum = transfer_index;
+}
+
+static rtems_status_code
+rtems_bdbuf_execute_transfer_request (const rtems_disk_device *dd,
+ rtems_blkdev_request *req,
+ bool cache_locked)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+ int result = 0;
+ uint32_t transfer_index = 0;
+ bool wake_transfer_waiters = false;
+ bool wake_buffer_waiters = false;
+
+ if (cache_locked)
+ rtems_bdbuf_unlock_cache ();
+
+ result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
+
+ if (result == 0)
+ {
+ rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
+ sc = req->status;
+ }
+ else
+ sc = RTEMS_IO_ERROR;
+
+ rtems_bdbuf_lock_cache ();
+
+ for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
+ {
+ rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
+ bool waiters = bd->waiters;
+
+ if (waiters)
+ wake_transfer_waiters = true;
+ else
+ wake_buffer_waiters = true;
+
+ rtems_bdbuf_group_release (bd);
+
+ if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
+ rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
+ else
+ rtems_bdbuf_discard_buffer (bd);
+
+ if (rtems_bdbuf_tracer)
+ rtems_bdbuf_show_users ("transfer", bd);
+ }
+
+ if (wake_transfer_waiters)
+ rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
+
+ if (wake_buffer_waiters)
+ rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
+
+ if (!cache_locked)
+ rtems_bdbuf_unlock_cache ();
+
+ if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
+ return sc;
+ else
+ return RTEMS_IO_ERROR;
+}
+
+rtems_status_code
+rtems_bdbuf_read (dev_t dev,
+ rtems_blkdev_bnum block,
+ rtems_bdbuf_buffer **bd_ptr)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+ rtems_disk_device *dd = NULL;
+ rtems_blkdev_request *req = NULL;
+ rtems_bdbuf_buffer *bd = NULL;
+ rtems_blkdev_bnum media_block = 0;
+ size_t bds_per_group = 0;
+
+ sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
+ if (sc != RTEMS_SUCCESSFUL)
+ return sc;
+
+ /*
+ * TODO: This type of request structure is wrong and should be removed.
+ */
+#define bdbuf_alloc(size) __builtin_alloca (size)
+
+ req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
+ sizeof (rtems_blkdev_sg_buffer) *
+ (bdbuf_config.max_read_ahead_blocks + 1));
+
+ if (rtems_bdbuf_tracer)
+ printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
+ media_block + dd->start, block, (unsigned) dev);
+
+ rtems_bdbuf_lock_cache ();
+ rtems_bdbuf_create_read_request (dd, media_block, bds_per_group, req, &bd);
+
+ if (req->bufnum > 0)
+ {
+ sc = rtems_bdbuf_execute_transfer_request (dd, req, true);
+ if (sc == RTEMS_SUCCESSFUL)
+ {
+ rtems_chain_extract (&bd->link);
+ rtems_bdbuf_group_obtain (bd);
+ }
+ }
+
+ if (sc == RTEMS_SUCCESSFUL)
+ {
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_CACHED:
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
+ break;
+ case RTEMS_BDBUF_STATE_MODIFIED:
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
+ break;
+ }
+
+ if (rtems_bdbuf_tracer)
+ {
+ rtems_bdbuf_show_users ("read", bd);
+ rtems_bdbuf_show_usage ();
+ }
+
+ *bd_ptr = bd;
+ }
+ else
+ *bd_ptr = NULL;
+
+ rtems_bdbuf_unlock_cache ();
+ rtems_bdbuf_release_disk (dd);
+
+ return sc;
+}
+
+static rtems_status_code
+rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
+{
+ if (!bdbuf_cache.initialised)
+ return RTEMS_NOT_CONFIGURED;
+ if (bd == NULL)
+ return RTEMS_INVALID_ADDRESS;
+ if (rtems_bdbuf_tracer)
+ {
+ printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
+ rtems_bdbuf_show_users (kind, bd);
+ }
+ rtems_bdbuf_lock_cache();
+
+ return RTEMS_SUCCESSFUL;
+}
+
+rtems_status_code
+rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+
+ sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
+ if (sc != RTEMS_SUCCESSFUL)
+ return sc;
+
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_ACCESS_CACHED:
+ rtems_bdbuf_add_to_lru_list_after_access (bd);
+ break;
+ case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
+ case RTEMS_BDBUF_STATE_ACCESS_PURGED:
+ rtems_bdbuf_discard_buffer_after_access (bd);
+ break;
+ case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
+ rtems_bdbuf_add_to_modified_list_after_access (bd);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
+ break;
+ }
+
+ if (rtems_bdbuf_tracer)
+ rtems_bdbuf_show_usage ();
+
+ rtems_bdbuf_unlock_cache ();
+
+ return RTEMS_SUCCESSFUL;
+}
+
+rtems_status_code
+rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+
+ sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
+ if (sc != RTEMS_SUCCESSFUL)
+ return sc;
+
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_ACCESS_CACHED:
+ case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
+ case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
+ rtems_bdbuf_add_to_modified_list_after_access (bd);
+ break;
+ case RTEMS_BDBUF_STATE_ACCESS_PURGED:
+ rtems_bdbuf_discard_buffer_after_access (bd);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
+ break;
+ }
+
+ if (rtems_bdbuf_tracer)
+ rtems_bdbuf_show_usage ();
+
+ rtems_bdbuf_unlock_cache ();
+
+ return RTEMS_SUCCESSFUL;
+}
+
+rtems_status_code
+rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+
+ sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
+ if (sc != RTEMS_SUCCESSFUL)
+ return sc;
+
+ switch (bd->state)
+ {
+ case RTEMS_BDBUF_STATE_ACCESS_CACHED:
+ case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
+ case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
+ rtems_bdbuf_sync_after_access (bd);
+ break;
+ case RTEMS_BDBUF_STATE_ACCESS_PURGED:
+ rtems_bdbuf_discard_buffer_after_access (bd);
+ break;
+ default:
+ rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
+ break;
+ }
+
+ if (rtems_bdbuf_tracer)
+ rtems_bdbuf_show_usage ();
+
+ rtems_bdbuf_unlock_cache ();
+
+ return RTEMS_SUCCESSFUL;
+}
+
+rtems_status_code
+rtems_bdbuf_syncdev (dev_t dev)
+{
+ rtems_status_code sc = RTEMS_SUCCESSFUL;
+ rtems_disk_device *dd = NULL;
+
+ if (rtems_bdbuf_tracer)
+ printf ("bdbuf:syncdev: %08x\n", (unsigned) dev);
+
+ sc = rtems_bdbuf_obtain_disk (dev, 0, &dd, NULL, NULL);
+ if (sc != RTEMS_SUCCESSFUL)
+ return sc;
+
+ /*
+ * Take the sync lock before locking the cache. Once we have the sync lock we
+ * can lock the cache. If another thread has the sync lock it will cause this
+ * thread to block until it owns the sync lock then it can own the cache. The
+ * sync lock can only be obtained with the cache unlocked.
+ */
+ rtems_bdbuf_lock_sync ();
+ rtems_bdbuf_lock_cache ();
+
+ /*
+ * Set the cache to have a sync active for a specific device and let the swap
+ * out task know the id of the requester to wake when done.
+ *
+ * The swap out task will negate the sync active flag when no more buffers
+ * for the device are held on the "modified for sync" queues.
+ */
+ bdbuf_cache.sync_active = true;
+ bdbuf_cache.sync_requester = rtems_task_self ();
+ bdbuf_cache.sync_device = dev;
+
+ rtems_bdbuf_wake_swapper ();
+ rtems_bdbuf_unlock_cache ();
+ rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
+ rtems_bdbuf_unlock_sync ();
+ rtems_bdbuf_release_disk (dd);
+
+ return RTEMS_SUCCESSFUL;
+}
+
+static int
+rtems_bdbuf_null_disk_ioctl (rtems_disk_device *dd, uint32_t req, void *arg)
+{
+ return -1;
+}
+
+/**
+ * Swapout transfer to the driver. The driver will break this I/O into groups
+ * of consecutive write requests is multiple consecutive buffers are required
+ * by the driver. The cache is not locked.
+ *
+ * @param transfer The transfer transaction.
+ */
+static void
+rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
+{
+ rtems_chain_node *node;
+ static rtems_disk_device null_disk = {
+ .phys_dev = &null_disk,
+ .capabilities = 0,
+ .ioctl = rtems_bdbuf_null_disk_ioctl
+ };
+
+ if (rtems_bdbuf_tracer)
+ printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dev);
+
+ /*
+ * If there are buffers to transfer to the media transfer them.
+ */
+ if (!rtems_chain_is_empty (&transfer->bds))
+ {
+ /*
+ * The last block number used when the driver only supports
+ * continuous blocks in a single request.
+ */
+ uint32_t last_block = 0;
+
+ /*
+ * Number of buffers per bd. This is used to detect the next
+ * block.
+ */
+ uint32_t bufs_per_bd = 0;
+
+ /*
+ * Obtain the disk device. The cache's mutex has been released to avoid a
+ * dead lock.
+ */
+ rtems_disk_device *dd = rtems_disk_obtain (transfer->dev);
+
+ if (dd == NULL)
+ dd = &null_disk;
+
+ bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
+
+ /*
+ * Take as many buffers as configured and pass to the driver. Note, the
+ * API to the drivers has an array of buffers and if a chain was passed
+ * we could have just passed the list. If the driver API is updated it
+ * should be possible to make this change with little effect in this
+ * code. The array that is passed is broken in design and should be
+ * removed. Merging members of a struct into the first member is
+ * trouble waiting to happen.
+ */
+ transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
+ transfer->write_req->bufnum = 0;
+
+ while ((node = rtems_chain_get(&transfer->bds)) != NULL)
+ {
+ rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
+ bool write = false;
+
+ /*
+ * If the device only accepts sequential buffers and this is not the
+ * first buffer (the first is always sequential, and the buffer is not
+ * sequential then put the buffer back on the transfer chain and write
+ * the committed buffers.
+ */
+
+ if (rtems_bdbuf_tracer)
+ printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
+ bd->block, transfer->write_req->bufnum,
+ dd->phys_dev->capabilities &
+ RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
+
+ if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
+ transfer->write_req->bufnum &&
+ (bd->block != (last_block + bufs_per_bd)))
+ {
+ rtems_chain_prepend (&transfer->bds, &bd->link);
+ write = true;
+ }
+ else
+ {
+ rtems_blkdev_sg_buffer* buf;
+ buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
+ transfer->write_req->bufnum++;
+ buf->user = bd;
+ buf->block = bd->block;
+ buf->length = dd->block_size;
+ buf->buffer = bd->buffer;
+ last_block = bd->block;
+ }
+
+ /*
+ * Perform the transfer if there are no more buffers, or the transfer
+ * size has reached the configured max. value.
+ */
+
+ if (rtems_chain_is_empty (&transfer->bds) ||
+ (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
+ write = true;
+
+ if (write)
+ {
+ rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
+
+ transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
+ transfer->write_req->bufnum = 0;
+ }
+ }
+
+ if (dd != &null_disk)
+ {
+ /*
+ * If sync'ing and the deivce is capability of handling a sync IO control
+ * call perform the call.
+ */
+ if (transfer->syncing &&
+ (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
+ {
+ /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
+ /* How should the error be handled ? */
+ }
+
+ rtems_disk_release (dd);
+ }
+ }
+}
+
+/**
+ * Process the modified list of buffers. There is a sync or modified list that
+ * needs to be handled so we have a common function to do the work.
+ *
+ * @param dev The device to handle. If BDBUF_INVALID_DEV no device is selected
+ * so select the device of the first buffer to be written to disk.
+ * @param chain The modified chain to process.
+ * @param transfer The chain to append buffers to be written too.
+ * @param sync_active If true this is a sync operation so expire all timers.
+ * @param update_timers If true update the timers.
+ * @param timer_delta It update_timers is true update the timers by this
+ * amount.
+ */
+static void
+rtems_bdbuf_swapout_modified_processing (dev_t* dev,
+ rtems_chain_control* chain,
+ rtems_chain_control* transfer,
+ bool sync_active,
+ bool update_timers,
+ uint32_t timer_delta)
+{
+ if (!rtems_chain_is_empty (chain))
+ {
+ rtems_chain_node* node = rtems_chain_head (chain);
+ bool sync_all;
+
+ node = node->next;
+
+ /*
+ * A sync active with no valid dev means sync all.
+ */
+ if (sync_active && (*dev == BDBUF_INVALID_DEV))
+ sync_all = true;
+ else
+ sync_all = false;
+
+ while (!rtems_chain_is_tail (chain, node))
+ {
+ rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
+
+ /*
+ * Check if the buffer's hold timer has reached 0. If a sync is active
+ * or someone waits for a buffer written force all the timers to 0.
+ *
+ * @note Lots of sync requests will skew this timer. It should be based
+ * on TOD to be accurate. Does it matter ?
+ */
+ if (sync_all || (sync_active && (*dev == bd->dev))
+ || rtems_bdbuf_has_buffer_waiters ())
+ bd->hold_timer = 0;
+
+ if (bd->hold_timer)
+ {
+ if (update_timers)
+ {
+ if (bd->hold_timer > timer_delta)
+ bd->hold_timer -= timer_delta;
+ else
+ bd->hold_timer = 0;
+ }
+
+ if (bd->hold_timer)
+ {
+ node = node->next;
+ continue;
+ }
+ }
+
+ /*
+ * This assumes we can set dev_t to BDBUF_INVALID_DEV which is just an
+ * assumption. Cannot use the transfer list being empty the sync dev
+ * calls sets the dev to use.
+ */
+ if (*dev == BDBUF_INVALID_DEV)
+ *dev = bd->dev;
+
+ if (bd->dev == *dev)
+ {
+ rtems_chain_node* next_node = node->next;
+ rtems_chain_node* tnode = rtems_chain_tail (transfer);
+
+ /*
+ * The blocks on the transfer list are sorted in block order. This
+ * means multi-block transfers for drivers that require consecutive
+ * blocks perform better with sorted blocks and for real disks it may
+ * help lower head movement.
+ */
+
+ rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
+
+ rtems_chain_extract (node);
+
+ tnode = tnode->previous;
+
+ while (node && !rtems_chain_is_head (transfer, tnode))
+ {
+ rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
+
+ if (bd->block > tbd->block)
+ {
+ rtems_chain_insert (tnode, node);
+ node = NULL;
+ }
+ else
+ tnode = tnode->previous;
+ }
+
+ if (node)
+ rtems_chain_prepend (transfer, node);
+
+ node = next_node;
+ }
+ else
+ {
+ node = node->next;
+ }
+ }
+ }
+}
+
+/**
+ * Process the cache's modified buffers. Check the sync list first then the
+ * modified list extracting the buffers suitable to be written to disk. We have
+ * a device at a time. The task level loop will repeat this operation while
+ * there are buffers to be written. If the transfer fails place the buffers
+ * back on the modified list and try again later. The cache is unlocked while
+ * the buffers are being written to disk.
+ *
+ * @param timer_delta It update_timers is true update the timers by this
+ * amount.
+ * @param update_timers If true update the timers.
+ * @param transfer The transfer transaction data.
+ *
+ * @retval true Buffers where written to disk so scan again.
+ * @retval false No buffers where written to disk.
+ */
+static bool
+rtems_bdbuf_swapout_processing (unsigned long timer_delta,
+ bool update_timers,
+ rtems_bdbuf_swapout_transfer* transfer)
+{
+ rtems_bdbuf_swapout_worker* worker;
+ bool transfered_buffers = false;
+
+ rtems_bdbuf_lock_cache ();
+
+ /*
+ * If a sync is active do not use a worker because the current code does not
+ * cleaning up after. We need to know the buffers have been written when
+ * syncing to release sync lock and currently worker threads do not return to
+ * here. We do not know the worker is the last in a sequence of sync writes
+ * until after we have it running so we do not know to tell it to release the
+ * lock. The simplest solution is to get the main swap out task perform all
+ * sync operations.
+ */
+ if (bdbuf_cache.sync_active)
+ worker = NULL;
+ else
+ {
+ worker = (rtems_bdbuf_swapout_worker*)
+ rtems_chain_get (&bdbuf_cache.swapout_workers);
+ if (worker)
+ transfer = &worker->transfer;
+ }
+
+ rtems_chain_initialize_empty (&transfer->bds);
+ transfer->dev = BDBUF_INVALID_DEV;
+ transfer->syncing = bdbuf_cache.sync_active;
+
+ /*
+ * When the sync is for a device limit the sync to that device. If the sync
+ * is for a buffer handle process the devices in the order on the sync
+ * list. This means the dev is BDBUF_INVALID_DEV.
+ */
+ if (bdbuf_cache.sync_active)
+ transfer->dev = bdbuf_cache.sync_device;
+
+ /*
+ * If we have any buffers in the sync queue move them to the modified
+ * list. The first sync buffer will select the device we use.
+ */
+ rtems_bdbuf_swapout_modified_processing (&transfer->dev,
+ &bdbuf_cache.sync,
+ &transfer->bds,
+ true, false,
+ timer_delta);
+
+ /*
+ * Process the cache's modified list.
+ */
+ rtems_bdbuf_swapout_modified_processing (&transfer->dev,
+ &bdbuf_cache.modified,
+ &transfer->bds,
+ bdbuf_cache.sync_active,
+ update_timers,
+ timer_delta);
+
+ /*
+ * We have all the buffers that have been modified for this device so the
+ * cache can be unlocked because the state of each buffer has been set to
+ * TRANSFER.
+ */
+ rtems_bdbuf_unlock_cache ();
+
+ /*
+ * If there are buffers to transfer to the media transfer them.
+ */
+ if (!rtems_chain_is_empty (&transfer->bds))
+ {
+ if (worker)
+ {
+ rtems_status_code sc = rtems_event_send (worker->id,
+ RTEMS_BDBUF_SWAPOUT_SYNC);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
+ }
+ else
+ {
+ rtems_bdbuf_swapout_write (transfer);
+ }
+
+ transfered_buffers = true;
+ }
+
+ if (bdbuf_cache.sync_active && !transfered_buffers)
+ {
+ rtems_id sync_requester;
+ rtems_bdbuf_lock_cache ();
+ sync_requester = bdbuf_cache.sync_requester;
+ bdbuf_cache.sync_active = false;
+ bdbuf_cache.sync_requester = 0;
+ rtems_bdbuf_unlock_cache ();
+ if (sync_requester)
+ rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
+ }
+
+ return transfered_buffers;
+}
+
+/**
+ * Allocate the write request and initialise it for good measure.
+ *
+ * @return rtems_blkdev_request* The write reference memory.
+ */
+static rtems_blkdev_request*
+rtems_bdbuf_swapout_writereq_alloc (void)
+{
+ /*
+ * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
+ * I am disappointment at finding code like this in RTEMS. The request should
+ * have been a rtems_chain_control. Simple, fast and less storage as the node
+ * is already part of the buffer structure.
+ */
+ rtems_blkdev_request* write_req =
+ malloc (sizeof (rtems_blkdev_request) +
+ (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
+
+ if (!write_req)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
+
+ write_req->req = RTEMS_BLKDEV_REQ_WRITE;
+ write_req->req_done = rtems_bdbuf_transfer_done;
+ write_req->done_arg = write_req;
+ write_req->io_task = rtems_task_self ();
+
+ return write_req;
+}
+
+/**
+ * The swapout worker thread body.
+ *
+ * @param arg A pointer to the worker thread's private data.
+ * @return rtems_task Not used.
+ */
+static rtems_task
+rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
+{
+ rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
+
+ while (worker->enabled)
+ {
+ rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
+
+ rtems_bdbuf_swapout_write (&worker->transfer);
+
+ rtems_bdbuf_lock_cache ();
+
+ rtems_chain_initialize_empty (&worker->transfer.bds);
+ worker->transfer.dev = BDBUF_INVALID_DEV;
+
+ rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
+
+ rtems_bdbuf_unlock_cache ();
+ }
+
+ free (worker->transfer.write_req);
+ free (worker);
+
+ rtems_task_delete (RTEMS_SELF);
+}
+
+/**
+ * Open the swapout worker threads.
+ */
+static void
+rtems_bdbuf_swapout_workers_open (void)
+{
+ rtems_status_code sc;
+ size_t w;
+
+ rtems_bdbuf_lock_cache ();
+
+ for (w = 0; w < bdbuf_config.swapout_workers; w++)
+ {
+ rtems_bdbuf_swapout_worker* worker;
+
+ worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
+ if (!worker)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
+
+ rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
+ worker->enabled = true;
+ worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
+
+ rtems_chain_initialize_empty (&worker->transfer.bds);
+ worker->transfer.dev = BDBUF_INVALID_DEV;
+
+ sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
+ (bdbuf_config.swapout_priority ?
+ bdbuf_config.swapout_priority :
+ RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
+ SWAPOUT_TASK_STACK_SIZE,
+ RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
+ RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
+ &worker->id);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
+
+ sc = rtems_task_start (worker->id,
+ rtems_bdbuf_swapout_worker_task,
+ (rtems_task_argument) worker);
+ if (sc != RTEMS_SUCCESSFUL)
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
+ }
+
+ rtems_bdbuf_unlock_cache ();
+}
+
+/**
+ * Close the swapout worker threads.
+ */
+static void
+rtems_bdbuf_swapout_workers_close (void)
+{
+ rtems_chain_node* node;
+
+ rtems_bdbuf_lock_cache ();
+
+ node = rtems_chain_first (&bdbuf_cache.swapout_workers);
+ while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
+ {
+ rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
+ worker->enabled = false;
+ rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
+ node = rtems_chain_next (node);
+ }
+
+ rtems_bdbuf_unlock_cache ();
+}
+
+/**
+ * Body of task which takes care on flushing modified buffers to the disk.
+ *
+ * @param arg A pointer to the global cache data. Use the global variable and
+ * not this.
+ * @return rtems_task Not used.
+ */
+static rtems_task
+rtems_bdbuf_swapout_task (rtems_task_argument arg)
+{
+ rtems_bdbuf_swapout_transfer transfer;
+ uint32_t period_in_ticks;
+ const uint32_t period_in_msecs = bdbuf_config.swapout_period;;
+ uint32_t timer_delta;
+
+ transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
+ rtems_chain_initialize_empty (&transfer.bds);
+ transfer.dev = BDBUF_INVALID_DEV;
+ transfer.syncing = false;
+
+ /*
+ * Localise the period.
+ */
+ period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
+
+ /*
+ * This is temporary. Needs to be changed to use the real time clock.
+ */
+ timer_delta = period_in_msecs;
+
+ /*
+ * Create the worker threads.
+ */
+ rtems_bdbuf_swapout_workers_open ();
+
+ while (bdbuf_cache.swapout_enabled)
+ {
+ rtems_event_set out;
+ rtems_status_code sc;
+
+ /*
+ * Only update the timers once in the processing cycle.
+ */
+ bool update_timers = true;
+
+ /*
+ * If we write buffers to any disk perform a check again. We only write a
+ * single device at a time and the cache may have more than one device's
+ * buffers modified waiting to be written.
+ */
+ bool transfered_buffers;
+
+ do
+ {
+ transfered_buffers = false;
+
+ /*
+ * Extact all the buffers we find for a specific device. The device is
+ * the first one we find on a modified list. Process the sync queue of
+ * buffers first.
+ */
+ if (rtems_bdbuf_swapout_processing (timer_delta,
+ update_timers,
+ &transfer))
+ {
+ transfered_buffers = true;
+ }
+
+ /*
+ * Only update the timers once.
+ */
+ update_timers = false;
+ }
+ while (transfered_buffers);
+
+ sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
+ RTEMS_EVENT_ALL | RTEMS_WAIT,
+ period_in_ticks,
+ &out);
+
+ if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
+ rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
+ }
+
+ rtems_bdbuf_swapout_workers_close ();
+
+ free (transfer.write_req);
+
+ rtems_task_delete (RTEMS_SELF);
+}
+
+static void
+rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
+{
+ bool wake_buffer_waiters = false;
+ rtems_chain_node *node = NULL;
+
+ while ((node = rtems_chain_get (purge_list)) != NULL)
+ {
+ rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
+
+ if (bd->waiters == 0)
+ wake_buffer_waiters = true;
+
+ rtems_bdbuf_discard_buffer (bd);
+ }
+
+ if (wake_buffer_waiters)
+ rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
+}
+
+typedef bool (*rtems_bdbuf_purge_compare)(dev_t a, dev_t b);
+
+static void
+rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
+ rtems_bdbuf_purge_compare compare,
+ dev_t dev)
+{
+ rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
+ rtems_bdbuf_buffer **prev = stack;
+ rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
+
+ *prev = NULL;
+
+ while (cur != NULL)
+ {
+ if ((*compare) (cur->dev, dev))
+ {
+ switch (cur->state)
+ {
+ case RTEMS_BDBUF_STATE_FREE:
+ case RTEMS_BDBUF_STATE_EMPTY:
+ case RTEMS_BDBUF_STATE_ACCESS_PURGED:
+ case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
+ break;
+ case RTEMS_BDBUF_STATE_SYNC:
+ rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
+ /* Fall through */
+ case RTEMS_BDBUF_STATE_MODIFIED:
+ rtems_bdbuf_group_release (cur);
+ /* Fall through */
+ case RTEMS_BDBUF_STATE_CACHED:
+ rtems_chain_extract (&cur->link);
+ rtems_chain_append (purge_list, &cur->link);
+ break;
+ case RTEMS_BDBUF_STATE_TRANSFER:
+ rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
+ break;
+ case RTEMS_BDBUF_STATE_ACCESS_CACHED:
+ case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
+ case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
+ rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
+ break;
+ default:
+ rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
+ }
+ }
+
+ if (cur->avl.left != NULL)
+ {
+ /* Left */
+ ++prev;
+ *prev = cur;
+ cur = cur->avl.left;
+ }
+ else if (cur->avl.right != NULL)
+ {
+ /* Right */
+ ++prev;
+ *prev = cur;
+ cur = cur->avl.right;
+ }
+ else
+ {
+ while (*prev != NULL && cur == (*prev)->avl.right)
+ {
+ /* Up */
+ cur = *prev;
+ --prev;
+ }
+ if (*prev != NULL)
+ /* Right */
+ cur = (*prev)->avl.right;
+ else
+ /* Finished */
+ cur = NULL;
+ }
+ }
+}
+
+static void
+rtems_bdbuf_purge (rtems_bdbuf_purge_compare compare, dev_t dev)
+{
+ rtems_chain_control purge_list;
+
+ rtems_chain_initialize_empty (&purge_list);
+ rtems_bdbuf_lock_cache ();
+ rtems_bdbuf_gather_for_purge (&purge_list, compare, dev);
+ rtems_bdbuf_purge_list (&purge_list);
+ rtems_bdbuf_unlock_cache ();
+}
+
+static bool
+rtems_bdbuf_purge_compare_dev (dev_t a, dev_t b)
+{
+ return a == b;
+}
+
+void
+rtems_bdbuf_purge_dev (dev_t dev)
+{
+ rtems_bdbuf_purge (rtems_bdbuf_purge_compare_dev, dev);
+}
+
+static bool
+rtems_bdbuf_purge_compare_major (dev_t a, dev_t b)
+{
+ return rtems_filesystem_dev_major_t (a) == rtems_filesystem_dev_major_t (b);
+}
+
+void
+rtems_bdbuf_purge_major (rtems_device_major_number major)
+{
+ dev_t dev = rtems_filesystem_make_dev_t (major, 0);
+
+ rtems_bdbuf_purge (rtems_bdbuf_purge_compare_major, dev);
+}