diff options
Diffstat (limited to 'cpukit/libblock/src/bdbuf.c')
-rw-r--r-- | cpukit/libblock/src/bdbuf.c | 2989 |
1 files changed, 2989 insertions, 0 deletions
diff --git a/cpukit/libblock/src/bdbuf.c b/cpukit/libblock/src/bdbuf.c new file mode 100644 index 0000000000..841c03bdbe --- /dev/null +++ b/cpukit/libblock/src/bdbuf.c @@ -0,0 +1,2989 @@ +/** + * @file + * + * @ingroup rtems_bdbuf + * + * Block device buffer management. + */ + +/* + * Disk I/O buffering + * Buffer managment + * + * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia + * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru> + * Victor V. Vengerov <vvv@oktet.ru> + * Alexander Kukuta <kam@oktet.ru> + * + * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org> + * Rewritten to remove score mutex access. Fixes many performance + * issues. + * + * Copyright (c) 2009 embedded brains GmbH. + * + * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp + */ + +/** + * Set to 1 to enable debug tracing. + */ +#define RTEMS_BDBUF_TRACE 0 + +#if HAVE_CONFIG_H +#include "config.h" +#endif +#include <limits.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + +#include <rtems.h> +#include <rtems/error.h> +#include <rtems/malloc.h> + +#include "rtems/bdbuf.h" + +#define BDBUF_INVALID_DEV ((dev_t) -1) + +/* + * Simpler label for this file. + */ +#define bdbuf_config rtems_bdbuf_configuration + +/** + * A swapout transfer transaction data. This data is passed to a worked thread + * to handle the write phase of the transfer. + */ +typedef struct rtems_bdbuf_swapout_transfer +{ + rtems_chain_control bds; /**< The transfer list of BDs. */ + dev_t dev; /**< The device the transfer is for. */ + bool syncing; /**< The data is a sync'ing. */ + rtems_blkdev_request* write_req; /**< The write request array. */ + uint32_t bufs_per_bd; /**< Number of buffers per bd. */ +} rtems_bdbuf_swapout_transfer; + +/** + * Swapout worker thread. These are available to take processing from the + * main swapout thread and handle the I/O operation. + */ +typedef struct rtems_bdbuf_swapout_worker +{ + rtems_chain_node link; /**< The threads sit on a chain when + * idle. */ + rtems_id id; /**< The id of the task so we can wake + * it. */ + volatile bool enabled; /**< The worker is enabled. */ + rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this + * thread. */ +} rtems_bdbuf_swapout_worker; + +/** + * Buffer waiters synchronization. + */ +typedef struct rtems_bdbuf_waiters { + volatile unsigned count; + rtems_id sema; +} rtems_bdbuf_waiters; + +/** + * The BD buffer cache. + */ +typedef struct rtems_bdbuf_cache +{ + rtems_id swapout; /**< Swapout task ID */ + volatile bool swapout_enabled; /**< Swapout is only running if + * enabled. Set to false to kill the + * swap out task. It deletes itself. */ + rtems_chain_control swapout_workers; /**< The work threads for the swapout + * task. */ + + rtems_bdbuf_buffer* bds; /**< Pointer to table of buffer + * descriptors. */ + void* buffers; /**< The buffer's memory. */ + size_t buffer_min_count; /**< Number of minimum size buffers + * that fit the buffer memory. */ + size_t max_bds_per_group; /**< The number of BDs of minimum + * buffer size that fit in a group. */ + uint32_t flags; /**< Configuration flags. */ + + rtems_id lock; /**< The cache lock. It locks all + * cache data, BD and lists. */ + rtems_id sync_lock; /**< Sync calls block writes. */ + volatile bool sync_active; /**< True if a sync is active. */ + volatile rtems_id sync_requester; /**< The sync requester. */ + volatile dev_t sync_device; /**< The device to sync and + * BDBUF_INVALID_DEV not a device + * sync. */ + + rtems_bdbuf_buffer* tree; /**< Buffer descriptor lookup AVL tree + * root. There is only one. */ + rtems_chain_control lru; /**< Least recently used list */ + rtems_chain_control modified; /**< Modified buffers list */ + rtems_chain_control sync; /**< Buffers to sync list */ + + rtems_bdbuf_waiters access_waiters; /**< Wait for a buffer in + * ACCESS_CACHED, ACCESS_MODIFIED or + * ACCESS_EMPTY + * state. */ + rtems_bdbuf_waiters transfer_waiters; /**< Wait for a buffer in TRANSFER + * state. */ + rtems_bdbuf_waiters buffer_waiters; /**< Wait for a buffer and no one is + * available. */ + + size_t group_count; /**< The number of groups. */ + rtems_bdbuf_group* groups; /**< The groups. */ + + bool initialised; /**< Initialised state. */ +} rtems_bdbuf_cache; + +/** + * Fatal errors + */ +#define RTEMS_BLKDEV_FATAL_ERROR(n) \ + (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF)) + +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11 RTEMS_BLKDEV_FATAL_ERROR(1) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4 RTEMS_BLKDEV_FATAL_ERROR(2) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5 RTEMS_BLKDEV_FATAL_ERROR(3) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6 RTEMS_BLKDEV_FATAL_ERROR(4) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7 RTEMS_BLKDEV_FATAL_ERROR(5) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8 RTEMS_BLKDEV_FATAL_ERROR(6) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9 RTEMS_BLKDEV_FATAL_ERROR(7) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10 RTEMS_BLKDEV_FATAL_ERROR(8) +#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM RTEMS_BLKDEV_FATAL_ERROR(9) +#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT RTEMS_BLKDEV_FATAL_ERROR(10) +#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK RTEMS_BLKDEV_FATAL_ERROR(11) +#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(12) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK RTEMS_BLKDEV_FATAL_ERROR(13) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(14) +#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS RTEMS_BLKDEV_FATAL_ERROR(15) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2 RTEMS_BLKDEV_FATAL_ERROR(16) +#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST RTEMS_BLKDEV_FATAL_ERROR(17) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE RTEMS_BLKDEV_FATAL_ERROR(19) +#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE RTEMS_BLKDEV_FATAL_ERROR(20) +#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM RTEMS_BLKDEV_FATAL_ERROR(21) +#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE RTEMS_BLKDEV_FATAL_ERROR(22) +#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START RTEMS_BLKDEV_FATAL_ERROR(23) +#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE RTEMS_BLKDEV_FATAL_ERROR(24) +#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS RTEMS_BLKDEV_FATAL_ERROR(25) +#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT RTEMS_BLKDEV_FATAL_ERROR(26) +#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE RTEMS_BLKDEV_FATAL_ERROR(27) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0 RTEMS_BLKDEV_FATAL_ERROR(28) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1 RTEMS_BLKDEV_FATAL_ERROR(29) +#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2 RTEMS_BLKDEV_FATAL_ERROR(30) +#define RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL RTEMS_BLKDEV_FATAL_ERROR(31) + +/** + * The events used in this code. These should be system events rather than + * application events. + */ +#define RTEMS_BDBUF_TRANSFER_SYNC RTEMS_EVENT_1 +#define RTEMS_BDBUF_SWAPOUT_SYNC RTEMS_EVENT_2 + +/** + * The swap out task size. Should be more than enough for most drivers with + * tracing turned on. + */ +#define SWAPOUT_TASK_STACK_SIZE (8 * 1024) + +/** + * Lock semaphore attributes. This is used for locking type mutexes. + * + * @warning Priority inheritance is on. + */ +#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \ + (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \ + RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL) + +/** + * Waiter semaphore attributes. + * + * @warning Do not configure as inherit priority. If a driver is in the driver + * initialisation table this locked semaphore will have the IDLE task + * as the holder and a blocking task will raise the priority of the + * IDLE task which can cause unsual side effects. + */ +#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \ + (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \ + RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL) + +/** + * Waiter timeout. Set to non-zero to find some info on a waiter that is + * waiting too long. + */ +#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT +#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT) +#define RTEMS_BDBUF_WAIT_TIMEOUT \ + (TOD_MICROSECONDS_TO_TICKS (20000000)) +#endif + +/* + * The swap out task. + */ +static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg); + +/** + * The Buffer Descriptor cache. + */ +static rtems_bdbuf_cache bdbuf_cache; + +#if RTEMS_BDBUF_TRACE +/** + * If true output the trace message. + */ +bool rtems_bdbuf_tracer; + +/** + * Return the number of items on the list. + * + * @param list The chain control. + * @return uint32_t The number of items on the list. + */ +uint32_t +rtems_bdbuf_list_count (rtems_chain_control* list) +{ + rtems_chain_node* node = rtems_chain_first (list); + uint32_t count = 0; + while (!rtems_chain_is_tail (list, node)) + { + count++; + node = rtems_chain_next (node); + } + return count; +} + +/** + * Show the usage for the bdbuf cache. + */ +void +rtems_bdbuf_show_usage (void) +{ + uint32_t group; + uint32_t total = 0; + uint32_t val; + + for (group = 0; group < bdbuf_cache.group_count; group++) + total += bdbuf_cache.groups[group].users; + printf ("bdbuf:group users=%lu", total); + val = rtems_bdbuf_list_count (&bdbuf_cache.lru); + printf (", lru=%lu", val); + total = val; + val = rtems_bdbuf_list_count (&bdbuf_cache.modified); + printf (", mod=%lu", val); + total += val; + val = rtems_bdbuf_list_count (&bdbuf_cache.sync); + printf (", sync=%lu", val); + total += val; + printf (", total=%lu\n", total); +} + +/** + * Show the users for a group of a bd. + * + * @param where A label to show the context of output. + * @param bd The bd to show the users of. + */ +void +rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd) +{ + const char* states[] = + { "EM", "FR", "CH", "AC", "AM", "MD", "SY", "TR" }; + + printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n", + where, + bd->block, states[bd->state], + bd->group - bdbuf_cache.groups, + bd - bdbuf_cache.bds, + bd->group->users, + bd->group->users > 8 ? "<<<<<<<" : ""); +} +#else +#define rtems_bdbuf_tracer (0) +#define rtems_bdbuf_show_usage() ((void) 0) +#define rtems_bdbuf_show_users(_w, _b) ((void) 0) +#endif + +/** + * The default maximum height of 32 allows for AVL trees having between + * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion. You may + * change this compile-time constant as you wish. + */ +#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT +#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32) +#endif + +static void +rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error) +{ + rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error); +} + +/** + * Searches for the node with specified dev/block. + * + * @param root pointer to the root node of the AVL-Tree + * @param dev device search key + * @param block block search key + * @retval NULL node with the specified dev/block is not found + * @return pointer to the node with specified dev/block + */ +static rtems_bdbuf_buffer * +rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root, + dev_t dev, + rtems_blkdev_bnum block) +{ + rtems_bdbuf_buffer* p = *root; + + while ((p != NULL) && ((p->dev != dev) || (p->block != block))) + { + if ((p->dev < dev) || ((p->dev == dev) && (p->block < block))) + { + p = p->avl.right; + } + else + { + p = p->avl.left; + } + } + + return p; +} + +/** + * Inserts the specified node to the AVl-Tree. + * + * @param root pointer to the root node of the AVL-Tree + * @param node Pointer to the node to add. + * @retval 0 The node added successfully + * @retval -1 An error occured + */ +static int +rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root, + rtems_bdbuf_buffer* node) +{ + dev_t dev = node->dev; + rtems_blkdev_bnum block = node->block; + + rtems_bdbuf_buffer* p = *root; + rtems_bdbuf_buffer* q; + rtems_bdbuf_buffer* p1; + rtems_bdbuf_buffer* p2; + rtems_bdbuf_buffer* buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT]; + rtems_bdbuf_buffer** buf_prev = buf_stack; + + bool modified = false; + + if (p == NULL) + { + *root = node; + node->avl.left = NULL; + node->avl.right = NULL; + node->avl.bal = 0; + return 0; + } + + while (p != NULL) + { + *buf_prev++ = p; + + if ((p->dev < dev) || ((p->dev == dev) && (p->block < block))) + { + p->avl.cache = 1; + q = p->avl.right; + if (q == NULL) + { + q = node; + p->avl.right = q = node; + break; + } + } + else if ((p->dev != dev) || (p->block != block)) + { + p->avl.cache = -1; + q = p->avl.left; + if (q == NULL) + { + q = node; + p->avl.left = q; + break; + } + } + else + { + return -1; + } + + p = q; + } + + q->avl.left = q->avl.right = NULL; + q->avl.bal = 0; + modified = true; + buf_prev--; + + while (modified) + { + if (p->avl.cache == -1) + { + switch (p->avl.bal) + { + case 1: + p->avl.bal = 0; + modified = false; + break; + + case 0: + p->avl.bal = -1; + break; + + case -1: + p1 = p->avl.left; + if (p1->avl.bal == -1) /* simple LL-turn */ + { + p->avl.left = p1->avl.right; + p1->avl.right = p; + p->avl.bal = 0; + p = p1; + } + else /* double LR-turn */ + { + p2 = p1->avl.right; + p1->avl.right = p2->avl.left; + p2->avl.left = p1; + p->avl.left = p2->avl.right; + p2->avl.right = p; + if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0; + if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0; + p = p2; + } + p->avl.bal = 0; + modified = false; + break; + + default: + break; + } + } + else + { + switch (p->avl.bal) + { + case -1: + p->avl.bal = 0; + modified = false; + break; + + case 0: + p->avl.bal = 1; + break; + + case 1: + p1 = p->avl.right; + if (p1->avl.bal == 1) /* simple RR-turn */ + { + p->avl.right = p1->avl.left; + p1->avl.left = p; + p->avl.bal = 0; + p = p1; + } + else /* double RL-turn */ + { + p2 = p1->avl.left; + p1->avl.left = p2->avl.right; + p2->avl.right = p1; + p->avl.right = p2->avl.left; + p2->avl.left = p; + if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0; + if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0; + p = p2; + } + p->avl.bal = 0; + modified = false; + break; + + default: + break; + } + } + q = p; + if (buf_prev > buf_stack) + { + p = *--buf_prev; + + if (p->avl.cache == -1) + { + p->avl.left = q; + } + else + { + p->avl.right = q; + } + } + else + { + *root = p; + break; + } + }; + + return 0; +} + + +/** + * Removes the node from the tree. + * + * @param root Pointer to pointer to the root node + * @param node Pointer to the node to remove + * @retval 0 Item removed + * @retval -1 No such item found + */ +static int +rtems_bdbuf_avl_remove(rtems_bdbuf_buffer** root, + const rtems_bdbuf_buffer* node) +{ + dev_t dev = node->dev; + rtems_blkdev_bnum block = node->block; + + rtems_bdbuf_buffer* p = *root; + rtems_bdbuf_buffer* q; + rtems_bdbuf_buffer* r; + rtems_bdbuf_buffer* s; + rtems_bdbuf_buffer* p1; + rtems_bdbuf_buffer* p2; + rtems_bdbuf_buffer* buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT]; + rtems_bdbuf_buffer** buf_prev = buf_stack; + + bool modified = false; + + memset (buf_stack, 0, sizeof(buf_stack)); + + while (p != NULL) + { + *buf_prev++ = p; + + if ((p->dev < dev) || ((p->dev == dev) && (p->block < block))) + { + p->avl.cache = 1; + p = p->avl.right; + } + else if ((p->dev != dev) || (p->block != block)) + { + p->avl.cache = -1; + p = p->avl.left; + } + else + { + /* node found */ + break; + } + } + + if (p == NULL) + { + /* there is no such node */ + return -1; + } + + q = p; + + buf_prev--; + if (buf_prev > buf_stack) + { + p = *(buf_prev - 1); + } + else + { + p = NULL; + } + + /* at this moment q - is a node to delete, p is q's parent */ + if (q->avl.right == NULL) + { + r = q->avl.left; + if (r != NULL) + { + r->avl.bal = 0; + } + q = r; + } + else + { + rtems_bdbuf_buffer **t; + + r = q->avl.right; + + if (r->avl.left == NULL) + { + r->avl.left = q->avl.left; + r->avl.bal = q->avl.bal; + r->avl.cache = 1; + *buf_prev++ = q = r; + } + else + { + t = buf_prev++; + s = r; + + while (s->avl.left != NULL) + { + *buf_prev++ = r = s; + s = r->avl.left; + r->avl.cache = -1; + } + + s->avl.left = q->avl.left; + r->avl.left = s->avl.right; + s->avl.right = q->avl.right; + s->avl.bal = q->avl.bal; + s->avl.cache = 1; + + *t = q = s; + } + } + + if (p != NULL) + { + if (p->avl.cache == -1) + { + p->avl.left = q; + } + else + { + p->avl.right = q; + } + } + else + { + *root = q; + } + + modified = true; + + while (modified) + { + if (buf_prev > buf_stack) + { + p = *--buf_prev; + } + else + { + break; + } + + if (p->avl.cache == -1) + { + /* rebalance left branch */ + switch (p->avl.bal) + { + case -1: + p->avl.bal = 0; + break; + case 0: + p->avl.bal = 1; + modified = false; + break; + + case +1: + p1 = p->avl.right; + + if (p1->avl.bal >= 0) /* simple RR-turn */ + { + p->avl.right = p1->avl.left; + p1->avl.left = p; + + if (p1->avl.bal == 0) + { + p1->avl.bal = -1; + modified = false; + } + else + { + p->avl.bal = 0; + p1->avl.bal = 0; + } + p = p1; + } + else /* double RL-turn */ + { + p2 = p1->avl.left; + + p1->avl.left = p2->avl.right; + p2->avl.right = p1; + p->avl.right = p2->avl.left; + p2->avl.left = p; + + if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0; + if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0; + + p = p2; + p2->avl.bal = 0; + } + break; + + default: + break; + } + } + else + { + /* rebalance right branch */ + switch (p->avl.bal) + { + case +1: + p->avl.bal = 0; + break; + + case 0: + p->avl.bal = -1; + modified = false; + break; + + case -1: + p1 = p->avl.left; + + if (p1->avl.bal <= 0) /* simple LL-turn */ + { + p->avl.left = p1->avl.right; + p1->avl.right = p; + if (p1->avl.bal == 0) + { + p1->avl.bal = 1; + modified = false; + } + else + { + p->avl.bal = 0; + p1->avl.bal = 0; + } + p = p1; + } + else /* double LR-turn */ + { + p2 = p1->avl.right; + + p1->avl.right = p2->avl.left; + p2->avl.left = p1; + p->avl.left = p2->avl.right; + p2->avl.right = p; + + if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0; + if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0; + + p = p2; + p2->avl.bal = 0; + } + break; + + default: + break; + } + } + + if (buf_prev > buf_stack) + { + q = *(buf_prev - 1); + + if (q->avl.cache == -1) + { + q->avl.left = p; + } + else + { + q->avl.right = p; + } + } + else + { + *root = p; + break; + } + + } + + return 0; +} + +static void +rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state) +{ + bd->state = state; +} + +/** + * Change the block number for the block size to the block number for the media + * block size. We have to use 64bit maths. There is no short cut here. + * + * @param block The logical block number in the block size terms. + * @param block_size The block size. + * @param media_block_size The block size of the media. + * @return rtems_blkdev_bnum The media block number. + */ +static rtems_blkdev_bnum +rtems_bdbuf_media_block (rtems_blkdev_bnum block, + size_t block_size, + size_t media_block_size) +{ + return (rtems_blkdev_bnum) + ((((uint64_t) block) * block_size) / media_block_size); +} + +/** + * Lock the mutex. A single task can nest calls. + * + * @param lock The mutex to lock. + * @param fatal_error_code The error code if the call fails. + */ +static void +rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code) +{ + rtems_status_code sc = rtems_semaphore_obtain (lock, + RTEMS_WAIT, + RTEMS_NO_TIMEOUT); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (fatal_error_code); +} + +/** + * Unlock the mutex. + * + * @param lock The mutex to unlock. + * @param fatal_error_code The error code if the call fails. + */ +static void +rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code) +{ + rtems_status_code sc = rtems_semaphore_release (lock); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (fatal_error_code); +} + +/** + * Lock the cache. A single task can nest calls. + */ +static void +rtems_bdbuf_lock_cache (void) +{ + rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK); +} + +/** + * Unlock the cache. + */ +static void +rtems_bdbuf_unlock_cache (void) +{ + rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK); +} + +/** + * Lock the cache's sync. A single task can nest calls. + */ +static void +rtems_bdbuf_lock_sync (void) +{ + rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK); +} + +/** + * Unlock the cache's sync lock. Any blocked writers are woken. + */ +static void +rtems_bdbuf_unlock_sync (void) +{ + rtems_bdbuf_unlock (bdbuf_cache.sync_lock, + RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK); +} + +static void +rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd) +{ + ++bd->group->users; +} + +static void +rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd) +{ + --bd->group->users; +} + +static rtems_mode +rtems_bdbuf_disable_preemption (void) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + rtems_mode prev_mode = 0; + + sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS); + + return prev_mode; +} + +static void +rtems_bdbuf_restore_preemption (rtems_mode prev_mode) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + + sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST); +} + +/** + * Wait until woken. Semaphores are used so a number of tasks can wait and can + * be woken at once. Task events would require we maintain a list of tasks to + * be woken and this would require storage and we do not know the number of + * tasks that could be waiting. + * + * While we have the cache locked we can try and claim the semaphore and + * therefore know when we release the lock to the cache we will block until the + * semaphore is released. This may even happen before we get to block. + * + * A counter is used to save the release call when no one is waiting. + * + * The function assumes the cache is locked on entry and it will be locked on + * exit. + */ +static void +rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters) +{ + rtems_status_code sc; + rtems_mode prev_mode; + + /* + * Indicate we are waiting. + */ + ++waiters->count; + + /* + * Disable preemption then unlock the cache and block. There is no POSIX + * condition variable in the core API so this is a work around. + * + * The issue is a task could preempt after the cache is unlocked because it is + * blocking or just hits that window, and before this task has blocked on the + * semaphore. If the preempting task flushes the queue this task will not see + * the flush and may block for ever or until another transaction flushes this + * semaphore. + */ + prev_mode = rtems_bdbuf_disable_preemption (); + + /* + * Unlock the cache, wait, and lock the cache when we return. + */ + rtems_bdbuf_unlock_cache (); + + sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT); + + if (sc == RTEMS_TIMEOUT) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO); + + if (sc != RTEMS_UNSATISFIED) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2); + + rtems_bdbuf_lock_cache (); + + rtems_bdbuf_restore_preemption (prev_mode); + + --waiters->count; +} + +static void +rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters) +{ + rtems_bdbuf_group_obtain (bd); + ++bd->waiters; + rtems_bdbuf_anonymous_wait (waiters); + --bd->waiters; + rtems_bdbuf_group_release (bd); +} + +/** + * Wake a blocked resource. The resource has a counter that lets us know if + * there are any waiters. + */ +static void +rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + + if (waiters->count > 0) + { + sc = rtems_semaphore_flush (waiters->sema); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE); + } +} + +static void +rtems_bdbuf_wake_swapper (void) +{ + rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout, + RTEMS_BDBUF_SWAPOUT_SYNC); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE); +} + +static bool +rtems_bdbuf_has_buffer_waiters (void) +{ + return bdbuf_cache.buffer_waiters.count; +} + +static void +rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd) +{ + if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0) + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM); +} + +static void +rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd) +{ + switch (bd->state) + { + case RTEMS_BDBUF_STATE_FREE: + break; + case RTEMS_BDBUF_STATE_CACHED: + rtems_bdbuf_remove_from_tree (bd); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10); + } + + rtems_chain_extract (&bd->link); +} + +static void +rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd) +{ + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE); + rtems_chain_prepend (&bdbuf_cache.lru, &bd->link); +} + +static void +rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd) +{ + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY); +} + +static void +rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd) +{ + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED); + rtems_chain_append (&bdbuf_cache.lru, &bd->link); +} + +static void +rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd) +{ + rtems_bdbuf_make_empty (bd); + + if (bd->waiters == 0) + { + rtems_bdbuf_remove_from_tree (bd); + rtems_bdbuf_make_free_and_add_to_lru_list (bd); + } +} + +static void +rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd) +{ + if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dev) + { + rtems_bdbuf_unlock_cache (); + + /* + * Wait for the sync lock. + */ + rtems_bdbuf_lock_sync (); + + rtems_bdbuf_unlock_sync (); + rtems_bdbuf_lock_cache (); + } + + /* + * Only the first modified release sets the timer and any further user + * accesses do not change the timer value which should move down. This + * assumes the user's hold of the buffer is much less than the time on the + * modified list. Resetting the timer on each access which could result in a + * buffer never getting to 0 and never being forced onto disk. This raises a + * difficult question. Is a snapshot of a block that is changing better than + * nothing being written? We have tended to think we should hold changes for + * only a specific period of time even if still changing and get onto disk + * and letting the file system try and recover this position if it can. + */ + if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED + || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY) + bd->hold_timer = bdbuf_config.swap_block_hold; + + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED); + rtems_chain_append (&bdbuf_cache.modified, &bd->link); + + if (bd->waiters) + rtems_bdbuf_wake (&bdbuf_cache.access_waiters); + else if (rtems_bdbuf_has_buffer_waiters ()) + rtems_bdbuf_wake_swapper (); +} + +static void +rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd) +{ + rtems_bdbuf_group_release (bd); + rtems_bdbuf_make_cached_and_add_to_lru_list (bd); + + if (bd->waiters) + rtems_bdbuf_wake (&bdbuf_cache.access_waiters); + else + rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters); +} + +/** + * Compute the number of BDs per group for a given buffer size. + * + * @param size The buffer size. It can be any size and we scale up. + */ +static size_t +rtems_bdbuf_bds_per_group (size_t size) +{ + size_t bufs_per_size; + size_t bds_per_size; + + if (size > bdbuf_config.buffer_max) + return 0; + + bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1; + + for (bds_per_size = 1; + bds_per_size < bufs_per_size; + bds_per_size <<= 1) + ; + + return bdbuf_cache.max_bds_per_group / bds_per_size; +} + +static void +rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd) +{ + rtems_bdbuf_group_release (bd); + rtems_bdbuf_discard_buffer (bd); + + if (bd->waiters) + rtems_bdbuf_wake (&bdbuf_cache.access_waiters); + else + rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters); +} + +/** + * Reallocate a group. The BDs currently allocated in the group are removed + * from the ALV tree and any lists then the new BD's are prepended to the ready + * list of the cache. + * + * @param group The group to reallocate. + * @param new_bds_per_group The new count of BDs per group. + * @return A buffer of this group. + */ +static rtems_bdbuf_buffer * +rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group) +{ + rtems_bdbuf_buffer* bd; + size_t b; + size_t bufs_per_bd; + + if (rtems_bdbuf_tracer) + printf ("bdbuf:realloc: %tu: %zd -> %zd\n", + group - bdbuf_cache.groups, group->bds_per_group, + new_bds_per_group); + + bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group; + + for (b = 0, bd = group->bdbuf; + b < group->bds_per_group; + b++, bd += bufs_per_bd) + rtems_bdbuf_remove_from_tree_and_lru_list (bd); + + group->bds_per_group = new_bds_per_group; + bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group; + + for (b = 1, bd = group->bdbuf + bufs_per_bd; + b < group->bds_per_group; + b++, bd += bufs_per_bd) + rtems_bdbuf_make_free_and_add_to_lru_list (bd); + + if (b > 1) + rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters); + + return group->bdbuf; +} + +static void +rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd, + dev_t dev, + rtems_blkdev_bnum block) +{ + bd->dev = dev; + bd->block = block; + bd->avl.left = NULL; + bd->avl.right = NULL; + bd->waiters = 0; + + if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE); + + rtems_bdbuf_make_empty (bd); +} + +static rtems_bdbuf_buffer * +rtems_bdbuf_get_buffer_from_lru_list (dev_t dev, + rtems_blkdev_bnum block, + size_t bds_per_group) +{ + rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru); + + while (!rtems_chain_is_tail (&bdbuf_cache.lru, node)) + { + rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node; + rtems_bdbuf_buffer *empty_bd = NULL; + + if (rtems_bdbuf_tracer) + printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n", + bd - bdbuf_cache.bds, + bd->group - bdbuf_cache.groups, bd->group->users, + bd->group->bds_per_group, bds_per_group); + + /* + * If nobody waits for this BD, we may recycle it. + */ + if (bd->waiters == 0) + { + if (bd->group->bds_per_group == bds_per_group) + { + rtems_bdbuf_remove_from_tree_and_lru_list (bd); + + empty_bd = bd; + } + else if (bd->group->users == 0) + empty_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group); + } + + if (empty_bd != NULL) + { + rtems_bdbuf_setup_empty_buffer (empty_bd, dev, block); + + return empty_bd; + } + + node = rtems_chain_next (node); + } + + return NULL; +} + +/** + * Initialise the cache. + * + * @return rtems_status_code The initialisation status. + */ +rtems_status_code +rtems_bdbuf_init (void) +{ + rtems_bdbuf_group* group; + rtems_bdbuf_buffer* bd; + uint8_t* buffer; + size_t b; + size_t cache_aligment; + rtems_status_code sc; + rtems_mode prev_mode; + + if (rtems_bdbuf_tracer) + printf ("bdbuf:init\n"); + + if (rtems_interrupt_is_in_progress()) + return RTEMS_CALLED_FROM_ISR; + + /* + * Check the configuration table values. + */ + if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0) + return RTEMS_INVALID_NUMBER; + + /* + * We use a special variable to manage the initialisation incase we have + * completing threads doing this. You may get errors if the another thread + * makes a call and we have not finished initialisation. + */ + prev_mode = rtems_bdbuf_disable_preemption (); + if (bdbuf_cache.initialised) + { + rtems_bdbuf_restore_preemption (prev_mode); + return RTEMS_RESOURCE_IN_USE; + } + + memset(&bdbuf_cache, 0, sizeof(bdbuf_cache)); + bdbuf_cache.initialised = true; + rtems_bdbuf_restore_preemption (prev_mode); + + /* + * For unspecified cache alignments we use the CPU alignment. + */ + cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */ + if (cache_aligment <= 0) + cache_aligment = CPU_ALIGNMENT; + + bdbuf_cache.sync_device = BDBUF_INVALID_DEV; + + rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers); + rtems_chain_initialize_empty (&bdbuf_cache.lru); + rtems_chain_initialize_empty (&bdbuf_cache.modified); + rtems_chain_initialize_empty (&bdbuf_cache.sync); + + /* + * Create the locks for the cache. + */ + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'), + 1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0, + &bdbuf_cache.lock); + if (sc != RTEMS_SUCCESSFUL) + goto error; + + rtems_bdbuf_lock_cache (); + + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'), + 1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0, + &bdbuf_cache.sync_lock); + if (sc != RTEMS_SUCCESSFUL) + goto error; + + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'), + 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0, + &bdbuf_cache.access_waiters.sema); + if (sc != RTEMS_SUCCESSFUL) + goto error; + + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'), + 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0, + &bdbuf_cache.transfer_waiters.sema); + if (sc != RTEMS_SUCCESSFUL) + goto error; + + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'), + 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0, + &bdbuf_cache.buffer_waiters.sema); + if (sc != RTEMS_SUCCESSFUL) + goto error; + + /* + * Compute the various number of elements in the cache. + */ + bdbuf_cache.buffer_min_count = + bdbuf_config.size / bdbuf_config.buffer_min; + bdbuf_cache.max_bds_per_group = + bdbuf_config.buffer_max / bdbuf_config.buffer_min; + bdbuf_cache.group_count = + bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group; + + /* + * Allocate the memory for the buffer descriptors. + */ + bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer), + bdbuf_cache.buffer_min_count); + if (!bdbuf_cache.bds) + goto error; + + /* + * Allocate the memory for the buffer descriptors. + */ + bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group), + bdbuf_cache.group_count); + if (!bdbuf_cache.groups) + goto error; + + /* + * Allocate memory for buffer memory. The buffer memory will be cache + * aligned. It is possible to free the memory allocated by rtems_memalign() + * with free(). Return 0 if allocated. + * + * The memory allocate allows a + */ + if (rtems_memalign ((void **) &bdbuf_cache.buffers, + cache_aligment, + bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0) + goto error; + + /* + * The cache is empty after opening so we need to add all the buffers to it + * and initialise the groups. + */ + for (b = 0, group = bdbuf_cache.groups, + bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers; + b < bdbuf_cache.buffer_min_count; + b++, bd++, buffer += bdbuf_config.buffer_min) + { + bd->dev = BDBUF_INVALID_DEV; + bd->group = group; + bd->buffer = buffer; + + rtems_chain_append (&bdbuf_cache.lru, &bd->link); + + if ((b % bdbuf_cache.max_bds_per_group) == + (bdbuf_cache.max_bds_per_group - 1)) + group++; + } + + for (b = 0, + group = bdbuf_cache.groups, + bd = bdbuf_cache.bds; + b < bdbuf_cache.group_count; + b++, + group++, + bd += bdbuf_cache.max_bds_per_group) + { + group->bds_per_group = bdbuf_cache.max_bds_per_group; + group->bdbuf = bd; + } + + /* + * Create and start swapout task. This task will create and manage the worker + * threads. + */ + bdbuf_cache.swapout_enabled = true; + + sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'), + bdbuf_config.swapout_priority ? + bdbuf_config.swapout_priority : + RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT, + SWAPOUT_TASK_STACK_SIZE, + RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR, + RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT, + &bdbuf_cache.swapout); + if (sc != RTEMS_SUCCESSFUL) + goto error; + + sc = rtems_task_start (bdbuf_cache.swapout, + rtems_bdbuf_swapout_task, + (rtems_task_argument) &bdbuf_cache); + if (sc != RTEMS_SUCCESSFUL) + goto error; + + rtems_bdbuf_unlock_cache (); + + return RTEMS_SUCCESSFUL; + +error: + + if (bdbuf_cache.swapout != 0) + rtems_task_delete (bdbuf_cache.swapout); + + free (bdbuf_cache.buffers); + free (bdbuf_cache.groups); + free (bdbuf_cache.bds); + + rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema); + rtems_semaphore_delete (bdbuf_cache.access_waiters.sema); + rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + + if (bdbuf_cache.lock != 0) + { + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + } + + bdbuf_cache.initialised = false; + + return RTEMS_UNSATISFIED; +} + +static void +rtems_bdbuf_wait_for_event (rtems_event_set event) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + rtems_event_set out = 0; + + sc = rtems_event_receive (event, + RTEMS_EVENT_ALL | RTEMS_WAIT, + RTEMS_NO_TIMEOUT, + &out); + + if (sc != RTEMS_SUCCESSFUL || out != event) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT); +} + +static void +rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd) +{ + while (true) + { + switch (bd->state) + { + case RTEMS_BDBUF_STATE_MODIFIED: + rtems_bdbuf_group_release (bd); + /* Fall through */ + case RTEMS_BDBUF_STATE_CACHED: + rtems_chain_extract (&bd->link); + /* Fall through */ + case RTEMS_BDBUF_STATE_EMPTY: + return; + case RTEMS_BDBUF_STATE_ACCESS_CACHED: + case RTEMS_BDBUF_STATE_ACCESS_EMPTY: + case RTEMS_BDBUF_STATE_ACCESS_MODIFIED: + case RTEMS_BDBUF_STATE_ACCESS_PURGED: + rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters); + break; + case RTEMS_BDBUF_STATE_SYNC: + case RTEMS_BDBUF_STATE_TRANSFER: + case RTEMS_BDBUF_STATE_TRANSFER_PURGED: + rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7); + } + } +} + +static void +rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd) +{ + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC); + rtems_chain_extract (&bd->link); + rtems_chain_append (&bdbuf_cache.sync, &bd->link); + rtems_bdbuf_wake_swapper (); +} + +/** + * @brief Waits until the buffer is ready for recycling. + * + * @retval @c true Buffer is valid and may be recycled. + * @retval @c false Buffer is invalid and has to searched again. + */ +static bool +rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd) +{ + while (true) + { + switch (bd->state) + { + case RTEMS_BDBUF_STATE_FREE: + return true; + case RTEMS_BDBUF_STATE_MODIFIED: + rtems_bdbuf_request_sync_for_modified_buffer (bd); + break; + case RTEMS_BDBUF_STATE_CACHED: + case RTEMS_BDBUF_STATE_EMPTY: + if (bd->waiters == 0) + return true; + else + { + /* + * It is essential that we wait here without a special wait count and + * without the group in use. Otherwise we could trigger a wait ping + * pong with another recycle waiter. The state of the buffer is + * arbitrary afterwards. + */ + rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters); + return false; + } + case RTEMS_BDBUF_STATE_ACCESS_CACHED: + case RTEMS_BDBUF_STATE_ACCESS_EMPTY: + case RTEMS_BDBUF_STATE_ACCESS_MODIFIED: + case RTEMS_BDBUF_STATE_ACCESS_PURGED: + rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters); + break; + case RTEMS_BDBUF_STATE_SYNC: + case RTEMS_BDBUF_STATE_TRANSFER: + case RTEMS_BDBUF_STATE_TRANSFER_PURGED: + rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8); + } + } +} + +static void +rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd) +{ + while (true) + { + switch (bd->state) + { + case RTEMS_BDBUF_STATE_CACHED: + case RTEMS_BDBUF_STATE_EMPTY: + case RTEMS_BDBUF_STATE_MODIFIED: + case RTEMS_BDBUF_STATE_ACCESS_CACHED: + case RTEMS_BDBUF_STATE_ACCESS_EMPTY: + case RTEMS_BDBUF_STATE_ACCESS_MODIFIED: + case RTEMS_BDBUF_STATE_ACCESS_PURGED: + return; + case RTEMS_BDBUF_STATE_SYNC: + case RTEMS_BDBUF_STATE_TRANSFER: + case RTEMS_BDBUF_STATE_TRANSFER_PURGED: + rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9); + } + } +} + +static void +rtems_bdbuf_wait_for_buffer (void) +{ + if (!rtems_chain_is_empty (&bdbuf_cache.modified)) + rtems_bdbuf_wake_swapper (); + + rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters); +} + +static void +rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd) +{ + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC); + + rtems_chain_append (&bdbuf_cache.sync, &bd->link); + + if (bd->waiters) + rtems_bdbuf_wake (&bdbuf_cache.access_waiters); + + rtems_bdbuf_wake_swapper (); + rtems_bdbuf_wait_for_sync_done (bd); + + /* + * We may have created a cached or empty buffer which may be recycled. + */ + if (bd->waiters == 0 + && (bd->state == RTEMS_BDBUF_STATE_CACHED + || bd->state == RTEMS_BDBUF_STATE_EMPTY)) + { + if (bd->state == RTEMS_BDBUF_STATE_EMPTY) + { + rtems_bdbuf_remove_from_tree (bd); + rtems_bdbuf_make_free_and_add_to_lru_list (bd); + } + rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters); + } +} + +static rtems_bdbuf_buffer * +rtems_bdbuf_get_buffer_for_read_ahead (dev_t dev, + rtems_blkdev_bnum block, + size_t bds_per_group) +{ + rtems_bdbuf_buffer *bd = NULL; + + bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block); + + if (bd == NULL) + { + bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group); + + if (bd != NULL) + rtems_bdbuf_group_obtain (bd); + } + else + /* + * The buffer is in the cache. So it is already available or in use, and + * thus no need for a read ahead. + */ + bd = NULL; + + return bd; +} + +static rtems_bdbuf_buffer * +rtems_bdbuf_get_buffer_for_access (dev_t dev, + rtems_blkdev_bnum block, + size_t bds_per_group) +{ + rtems_bdbuf_buffer *bd = NULL; + + do + { + bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block); + + if (bd != NULL) + { + if (bd->group->bds_per_group != bds_per_group) + { + if (rtems_bdbuf_wait_for_recycle (bd)) + { + rtems_bdbuf_remove_from_tree_and_lru_list (bd); + rtems_bdbuf_make_free_and_add_to_lru_list (bd); + rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters); + } + bd = NULL; + } + } + else + { + bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group); + + if (bd == NULL) + rtems_bdbuf_wait_for_buffer (); + } + } + while (bd == NULL); + + rtems_bdbuf_wait_for_access (bd); + rtems_bdbuf_group_obtain (bd); + + return bd; +} + +static rtems_status_code +rtems_bdbuf_obtain_disk (dev_t dev, + rtems_blkdev_bnum block, + rtems_disk_device **dd_ptr, + rtems_blkdev_bnum *media_block_ptr, + size_t *bds_per_group_ptr) +{ + rtems_disk_device *dd = NULL; + + if (!bdbuf_cache.initialised) + return RTEMS_NOT_CONFIGURED; + + /* + * Do not hold the cache lock when obtaining the disk table. + */ + dd = rtems_disk_obtain (dev); + if (dd == NULL) + return RTEMS_INVALID_ID; + + *dd_ptr = dd; + + if (media_block_ptr != NULL) + { + /* + * Compute the media block number. Drivers work with media block number not + * the block number a BD may have as this depends on the block size set by + * the user. + */ + rtems_blkdev_bnum mb = rtems_bdbuf_media_block (block, + dd->block_size, + dd->media_block_size); + if (mb >= dd->size) + { + rtems_disk_release(dd); + return RTEMS_INVALID_NUMBER; + } + + *media_block_ptr = mb + dd->start; + } + + if (bds_per_group_ptr != NULL) + { + size_t bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size); + + if (bds_per_group == 0) + { + rtems_disk_release (dd); + return RTEMS_INVALID_NUMBER; + } + + *bds_per_group_ptr = bds_per_group; + } + + return RTEMS_SUCCESSFUL; +} + +static void +rtems_bdbuf_release_disk (rtems_disk_device *dd) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + + sc = rtems_disk_release (dd); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL); +} + +rtems_status_code +rtems_bdbuf_get (dev_t dev, + rtems_blkdev_bnum block, + rtems_bdbuf_buffer **bd_ptr) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + rtems_disk_device *dd = NULL; + rtems_bdbuf_buffer *bd = NULL; + rtems_blkdev_bnum media_block = 0; + size_t bds_per_group = 0; + + sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group); + if (sc != RTEMS_SUCCESSFUL) + return sc; + + rtems_bdbuf_lock_cache (); + + /* + * Print the block index relative to the physical disk. + */ + if (rtems_bdbuf_tracer) + printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n", + media_block, block, (unsigned) dev); + + bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group); + + switch (bd->state) + { + case RTEMS_BDBUF_STATE_CACHED: + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED); + break; + case RTEMS_BDBUF_STATE_EMPTY: + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY); + break; + case RTEMS_BDBUF_STATE_MODIFIED: + /* + * To get a modified buffer could be considered a bug in the caller + * because you should not be getting an already modified buffer but user + * may have modified a byte in a block then decided to seek the start and + * write the whole block and the file system will have no record of this + * so just gets the block to fill. + */ + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2); + break; + } + + if (rtems_bdbuf_tracer) + { + rtems_bdbuf_show_users ("get", bd); + rtems_bdbuf_show_usage (); + } + + rtems_bdbuf_unlock_cache (); + + rtems_bdbuf_release_disk (dd); + + *bd_ptr = bd; + + return RTEMS_SUCCESSFUL; +} + +/** + * Call back handler called by the low level driver when the transfer has + * completed. This function may be invoked from interrupt handler. + * + * @param arg Arbitrary argument specified in block device request + * structure (in this case - pointer to the appropriate + * block device request structure). + * @param status I/O completion status + */ +static void +rtems_bdbuf_transfer_done (void* arg, rtems_status_code status) +{ + rtems_blkdev_request* req = (rtems_blkdev_request*) arg; + + req->status = status; + + rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC); +} + +static void +rtems_bdbuf_create_read_request (const rtems_disk_device *dd, + rtems_blkdev_bnum media_block, + size_t bds_per_group, + rtems_blkdev_request *req, + rtems_bdbuf_buffer **bd_ptr) +{ + rtems_bdbuf_buffer *bd = NULL; + rtems_blkdev_bnum media_block_end = dd->start + dd->size; + rtems_blkdev_bnum media_block_count = dd->block_size / dd->media_block_size; + dev_t dev = dd->dev; + uint32_t block_size = dd->block_size; + uint32_t transfer_index = 1; + uint32_t transfer_count = bdbuf_config.max_read_ahead_blocks + 1; + + if (media_block_end - media_block < transfer_count) + transfer_count = media_block_end - media_block; + + req->req = RTEMS_BLKDEV_REQ_READ; + req->req_done = rtems_bdbuf_transfer_done; + req->done_arg = req; + req->io_task = rtems_task_self (); + req->status = RTEMS_RESOURCE_IN_USE; + req->bufnum = 0; + + bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group); + + *bd_ptr = bd; + + req->bufs [0].user = bd; + req->bufs [0].block = media_block; + req->bufs [0].length = block_size; + req->bufs [0].buffer = bd->buffer; + + if (rtems_bdbuf_tracer) + rtems_bdbuf_show_users ("read", bd); + + switch (bd->state) + { + case RTEMS_BDBUF_STATE_CACHED: + case RTEMS_BDBUF_STATE_MODIFIED: + return; + case RTEMS_BDBUF_STATE_EMPTY: + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1); + break; + } + + while (transfer_index < transfer_count) + { + media_block += media_block_count; + + bd = rtems_bdbuf_get_buffer_for_read_ahead (dev, media_block, + bds_per_group); + + if (bd == NULL) + break; + + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER); + + req->bufs [transfer_index].user = bd; + req->bufs [transfer_index].block = media_block; + req->bufs [transfer_index].length = block_size; + req->bufs [transfer_index].buffer = bd->buffer; + + if (rtems_bdbuf_tracer) + rtems_bdbuf_show_users ("read-ahead", bd); + + ++transfer_index; + } + + req->bufnum = transfer_index; +} + +static rtems_status_code +rtems_bdbuf_execute_transfer_request (const rtems_disk_device *dd, + rtems_blkdev_request *req, + bool cache_locked) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + int result = 0; + uint32_t transfer_index = 0; + bool wake_transfer_waiters = false; + bool wake_buffer_waiters = false; + + if (cache_locked) + rtems_bdbuf_unlock_cache (); + + result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req); + + if (result == 0) + { + rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC); + sc = req->status; + } + else + sc = RTEMS_IO_ERROR; + + rtems_bdbuf_lock_cache (); + + for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index) + { + rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user; + bool waiters = bd->waiters; + + if (waiters) + wake_transfer_waiters = true; + else + wake_buffer_waiters = true; + + rtems_bdbuf_group_release (bd); + + if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER) + rtems_bdbuf_make_cached_and_add_to_lru_list (bd); + else + rtems_bdbuf_discard_buffer (bd); + + if (rtems_bdbuf_tracer) + rtems_bdbuf_show_users ("transfer", bd); + } + + if (wake_transfer_waiters) + rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters); + + if (wake_buffer_waiters) + rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters); + + if (!cache_locked) + rtems_bdbuf_unlock_cache (); + + if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED) + return sc; + else + return RTEMS_IO_ERROR; +} + +rtems_status_code +rtems_bdbuf_read (dev_t dev, + rtems_blkdev_bnum block, + rtems_bdbuf_buffer **bd_ptr) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + rtems_disk_device *dd = NULL; + rtems_blkdev_request *req = NULL; + rtems_bdbuf_buffer *bd = NULL; + rtems_blkdev_bnum media_block = 0; + size_t bds_per_group = 0; + + sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group); + if (sc != RTEMS_SUCCESSFUL) + return sc; + + /* + * TODO: This type of request structure is wrong and should be removed. + */ +#define bdbuf_alloc(size) __builtin_alloca (size) + + req = bdbuf_alloc (sizeof (rtems_blkdev_request) + + sizeof (rtems_blkdev_sg_buffer) * + (bdbuf_config.max_read_ahead_blocks + 1)); + + if (rtems_bdbuf_tracer) + printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n", + media_block + dd->start, block, (unsigned) dev); + + rtems_bdbuf_lock_cache (); + rtems_bdbuf_create_read_request (dd, media_block, bds_per_group, req, &bd); + + if (req->bufnum > 0) + { + sc = rtems_bdbuf_execute_transfer_request (dd, req, true); + if (sc == RTEMS_SUCCESSFUL) + { + rtems_chain_extract (&bd->link); + rtems_bdbuf_group_obtain (bd); + } + } + + if (sc == RTEMS_SUCCESSFUL) + { + switch (bd->state) + { + case RTEMS_BDBUF_STATE_CACHED: + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED); + break; + case RTEMS_BDBUF_STATE_MODIFIED: + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4); + break; + } + + if (rtems_bdbuf_tracer) + { + rtems_bdbuf_show_users ("read", bd); + rtems_bdbuf_show_usage (); + } + + *bd_ptr = bd; + } + else + *bd_ptr = NULL; + + rtems_bdbuf_unlock_cache (); + rtems_bdbuf_release_disk (dd); + + return sc; +} + +static rtems_status_code +rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind) +{ + if (!bdbuf_cache.initialised) + return RTEMS_NOT_CONFIGURED; + if (bd == NULL) + return RTEMS_INVALID_ADDRESS; + if (rtems_bdbuf_tracer) + { + printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block); + rtems_bdbuf_show_users (kind, bd); + } + rtems_bdbuf_lock_cache(); + + return RTEMS_SUCCESSFUL; +} + +rtems_status_code +rtems_bdbuf_release (rtems_bdbuf_buffer *bd) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + + sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release"); + if (sc != RTEMS_SUCCESSFUL) + return sc; + + switch (bd->state) + { + case RTEMS_BDBUF_STATE_ACCESS_CACHED: + rtems_bdbuf_add_to_lru_list_after_access (bd); + break; + case RTEMS_BDBUF_STATE_ACCESS_EMPTY: + case RTEMS_BDBUF_STATE_ACCESS_PURGED: + rtems_bdbuf_discard_buffer_after_access (bd); + break; + case RTEMS_BDBUF_STATE_ACCESS_MODIFIED: + rtems_bdbuf_add_to_modified_list_after_access (bd); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0); + break; + } + + if (rtems_bdbuf_tracer) + rtems_bdbuf_show_usage (); + + rtems_bdbuf_unlock_cache (); + + return RTEMS_SUCCESSFUL; +} + +rtems_status_code +rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + + sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified"); + if (sc != RTEMS_SUCCESSFUL) + return sc; + + switch (bd->state) + { + case RTEMS_BDBUF_STATE_ACCESS_CACHED: + case RTEMS_BDBUF_STATE_ACCESS_EMPTY: + case RTEMS_BDBUF_STATE_ACCESS_MODIFIED: + rtems_bdbuf_add_to_modified_list_after_access (bd); + break; + case RTEMS_BDBUF_STATE_ACCESS_PURGED: + rtems_bdbuf_discard_buffer_after_access (bd); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6); + break; + } + + if (rtems_bdbuf_tracer) + rtems_bdbuf_show_usage (); + + rtems_bdbuf_unlock_cache (); + + return RTEMS_SUCCESSFUL; +} + +rtems_status_code +rtems_bdbuf_sync (rtems_bdbuf_buffer *bd) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + + sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync"); + if (sc != RTEMS_SUCCESSFUL) + return sc; + + switch (bd->state) + { + case RTEMS_BDBUF_STATE_ACCESS_CACHED: + case RTEMS_BDBUF_STATE_ACCESS_EMPTY: + case RTEMS_BDBUF_STATE_ACCESS_MODIFIED: + rtems_bdbuf_sync_after_access (bd); + break; + case RTEMS_BDBUF_STATE_ACCESS_PURGED: + rtems_bdbuf_discard_buffer_after_access (bd); + break; + default: + rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5); + break; + } + + if (rtems_bdbuf_tracer) + rtems_bdbuf_show_usage (); + + rtems_bdbuf_unlock_cache (); + + return RTEMS_SUCCESSFUL; +} + +rtems_status_code +rtems_bdbuf_syncdev (dev_t dev) +{ + rtems_status_code sc = RTEMS_SUCCESSFUL; + rtems_disk_device *dd = NULL; + + if (rtems_bdbuf_tracer) + printf ("bdbuf:syncdev: %08x\n", (unsigned) dev); + + sc = rtems_bdbuf_obtain_disk (dev, 0, &dd, NULL, NULL); + if (sc != RTEMS_SUCCESSFUL) + return sc; + + /* + * Take the sync lock before locking the cache. Once we have the sync lock we + * can lock the cache. If another thread has the sync lock it will cause this + * thread to block until it owns the sync lock then it can own the cache. The + * sync lock can only be obtained with the cache unlocked. + */ + rtems_bdbuf_lock_sync (); + rtems_bdbuf_lock_cache (); + + /* + * Set the cache to have a sync active for a specific device and let the swap + * out task know the id of the requester to wake when done. + * + * The swap out task will negate the sync active flag when no more buffers + * for the device are held on the "modified for sync" queues. + */ + bdbuf_cache.sync_active = true; + bdbuf_cache.sync_requester = rtems_task_self (); + bdbuf_cache.sync_device = dev; + + rtems_bdbuf_wake_swapper (); + rtems_bdbuf_unlock_cache (); + rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC); + rtems_bdbuf_unlock_sync (); + rtems_bdbuf_release_disk (dd); + + return RTEMS_SUCCESSFUL; +} + +static int +rtems_bdbuf_null_disk_ioctl (rtems_disk_device *dd, uint32_t req, void *arg) +{ + return -1; +} + +/** + * Swapout transfer to the driver. The driver will break this I/O into groups + * of consecutive write requests is multiple consecutive buffers are required + * by the driver. The cache is not locked. + * + * @param transfer The transfer transaction. + */ +static void +rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer) +{ + rtems_chain_node *node; + static rtems_disk_device null_disk = { + .phys_dev = &null_disk, + .capabilities = 0, + .ioctl = rtems_bdbuf_null_disk_ioctl + }; + + if (rtems_bdbuf_tracer) + printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dev); + + /* + * If there are buffers to transfer to the media transfer them. + */ + if (!rtems_chain_is_empty (&transfer->bds)) + { + /* + * The last block number used when the driver only supports + * continuous blocks in a single request. + */ + uint32_t last_block = 0; + + /* + * Number of buffers per bd. This is used to detect the next + * block. + */ + uint32_t bufs_per_bd = 0; + + /* + * Obtain the disk device. The cache's mutex has been released to avoid a + * dead lock. + */ + rtems_disk_device *dd = rtems_disk_obtain (transfer->dev); + + if (dd == NULL) + dd = &null_disk; + + bufs_per_bd = dd->block_size / bdbuf_config.buffer_min; + + /* + * Take as many buffers as configured and pass to the driver. Note, the + * API to the drivers has an array of buffers and if a chain was passed + * we could have just passed the list. If the driver API is updated it + * should be possible to make this change with little effect in this + * code. The array that is passed is broken in design and should be + * removed. Merging members of a struct into the first member is + * trouble waiting to happen. + */ + transfer->write_req->status = RTEMS_RESOURCE_IN_USE; + transfer->write_req->bufnum = 0; + + while ((node = rtems_chain_get(&transfer->bds)) != NULL) + { + rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node; + bool write = false; + + /* + * If the device only accepts sequential buffers and this is not the + * first buffer (the first is always sequential, and the buffer is not + * sequential then put the buffer back on the transfer chain and write + * the committed buffers. + */ + + if (rtems_bdbuf_tracer) + printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n", + bd->block, transfer->write_req->bufnum, + dd->phys_dev->capabilities & + RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT"); + + if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) && + transfer->write_req->bufnum && + (bd->block != (last_block + bufs_per_bd))) + { + rtems_chain_prepend (&transfer->bds, &bd->link); + write = true; + } + else + { + rtems_blkdev_sg_buffer* buf; + buf = &transfer->write_req->bufs[transfer->write_req->bufnum]; + transfer->write_req->bufnum++; + buf->user = bd; + buf->block = bd->block; + buf->length = dd->block_size; + buf->buffer = bd->buffer; + last_block = bd->block; + } + + /* + * Perform the transfer if there are no more buffers, or the transfer + * size has reached the configured max. value. + */ + + if (rtems_chain_is_empty (&transfer->bds) || + (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks)) + write = true; + + if (write) + { + rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false); + + transfer->write_req->status = RTEMS_RESOURCE_IN_USE; + transfer->write_req->bufnum = 0; + } + } + + if (dd != &null_disk) + { + /* + * If sync'ing and the deivce is capability of handling a sync IO control + * call perform the call. + */ + if (transfer->syncing && + (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC)) + { + /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL); + /* How should the error be handled ? */ + } + + rtems_disk_release (dd); + } + } +} + +/** + * Process the modified list of buffers. There is a sync or modified list that + * needs to be handled so we have a common function to do the work. + * + * @param dev The device to handle. If BDBUF_INVALID_DEV no device is selected + * so select the device of the first buffer to be written to disk. + * @param chain The modified chain to process. + * @param transfer The chain to append buffers to be written too. + * @param sync_active If true this is a sync operation so expire all timers. + * @param update_timers If true update the timers. + * @param timer_delta It update_timers is true update the timers by this + * amount. + */ +static void +rtems_bdbuf_swapout_modified_processing (dev_t* dev, + rtems_chain_control* chain, + rtems_chain_control* transfer, + bool sync_active, + bool update_timers, + uint32_t timer_delta) +{ + if (!rtems_chain_is_empty (chain)) + { + rtems_chain_node* node = rtems_chain_head (chain); + bool sync_all; + + node = node->next; + + /* + * A sync active with no valid dev means sync all. + */ + if (sync_active && (*dev == BDBUF_INVALID_DEV)) + sync_all = true; + else + sync_all = false; + + while (!rtems_chain_is_tail (chain, node)) + { + rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node; + + /* + * Check if the buffer's hold timer has reached 0. If a sync is active + * or someone waits for a buffer written force all the timers to 0. + * + * @note Lots of sync requests will skew this timer. It should be based + * on TOD to be accurate. Does it matter ? + */ + if (sync_all || (sync_active && (*dev == bd->dev)) + || rtems_bdbuf_has_buffer_waiters ()) + bd->hold_timer = 0; + + if (bd->hold_timer) + { + if (update_timers) + { + if (bd->hold_timer > timer_delta) + bd->hold_timer -= timer_delta; + else + bd->hold_timer = 0; + } + + if (bd->hold_timer) + { + node = node->next; + continue; + } + } + + /* + * This assumes we can set dev_t to BDBUF_INVALID_DEV which is just an + * assumption. Cannot use the transfer list being empty the sync dev + * calls sets the dev to use. + */ + if (*dev == BDBUF_INVALID_DEV) + *dev = bd->dev; + + if (bd->dev == *dev) + { + rtems_chain_node* next_node = node->next; + rtems_chain_node* tnode = rtems_chain_tail (transfer); + + /* + * The blocks on the transfer list are sorted in block order. This + * means multi-block transfers for drivers that require consecutive + * blocks perform better with sorted blocks and for real disks it may + * help lower head movement. + */ + + rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER); + + rtems_chain_extract (node); + + tnode = tnode->previous; + + while (node && !rtems_chain_is_head (transfer, tnode)) + { + rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode; + + if (bd->block > tbd->block) + { + rtems_chain_insert (tnode, node); + node = NULL; + } + else + tnode = tnode->previous; + } + + if (node) + rtems_chain_prepend (transfer, node); + + node = next_node; + } + else + { + node = node->next; + } + } + } +} + +/** + * Process the cache's modified buffers. Check the sync list first then the + * modified list extracting the buffers suitable to be written to disk. We have + * a device at a time. The task level loop will repeat this operation while + * there are buffers to be written. If the transfer fails place the buffers + * back on the modified list and try again later. The cache is unlocked while + * the buffers are being written to disk. + * + * @param timer_delta It update_timers is true update the timers by this + * amount. + * @param update_timers If true update the timers. + * @param transfer The transfer transaction data. + * + * @retval true Buffers where written to disk so scan again. + * @retval false No buffers where written to disk. + */ +static bool +rtems_bdbuf_swapout_processing (unsigned long timer_delta, + bool update_timers, + rtems_bdbuf_swapout_transfer* transfer) +{ + rtems_bdbuf_swapout_worker* worker; + bool transfered_buffers = false; + + rtems_bdbuf_lock_cache (); + + /* + * If a sync is active do not use a worker because the current code does not + * cleaning up after. We need to know the buffers have been written when + * syncing to release sync lock and currently worker threads do not return to + * here. We do not know the worker is the last in a sequence of sync writes + * until after we have it running so we do not know to tell it to release the + * lock. The simplest solution is to get the main swap out task perform all + * sync operations. + */ + if (bdbuf_cache.sync_active) + worker = NULL; + else + { + worker = (rtems_bdbuf_swapout_worker*) + rtems_chain_get (&bdbuf_cache.swapout_workers); + if (worker) + transfer = &worker->transfer; + } + + rtems_chain_initialize_empty (&transfer->bds); + transfer->dev = BDBUF_INVALID_DEV; + transfer->syncing = bdbuf_cache.sync_active; + + /* + * When the sync is for a device limit the sync to that device. If the sync + * is for a buffer handle process the devices in the order on the sync + * list. This means the dev is BDBUF_INVALID_DEV. + */ + if (bdbuf_cache.sync_active) + transfer->dev = bdbuf_cache.sync_device; + + /* + * If we have any buffers in the sync queue move them to the modified + * list. The first sync buffer will select the device we use. + */ + rtems_bdbuf_swapout_modified_processing (&transfer->dev, + &bdbuf_cache.sync, + &transfer->bds, + true, false, + timer_delta); + + /* + * Process the cache's modified list. + */ + rtems_bdbuf_swapout_modified_processing (&transfer->dev, + &bdbuf_cache.modified, + &transfer->bds, + bdbuf_cache.sync_active, + update_timers, + timer_delta); + + /* + * We have all the buffers that have been modified for this device so the + * cache can be unlocked because the state of each buffer has been set to + * TRANSFER. + */ + rtems_bdbuf_unlock_cache (); + + /* + * If there are buffers to transfer to the media transfer them. + */ + if (!rtems_chain_is_empty (&transfer->bds)) + { + if (worker) + { + rtems_status_code sc = rtems_event_send (worker->id, + RTEMS_BDBUF_SWAPOUT_SYNC); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE); + } + else + { + rtems_bdbuf_swapout_write (transfer); + } + + transfered_buffers = true; + } + + if (bdbuf_cache.sync_active && !transfered_buffers) + { + rtems_id sync_requester; + rtems_bdbuf_lock_cache (); + sync_requester = bdbuf_cache.sync_requester; + bdbuf_cache.sync_active = false; + bdbuf_cache.sync_requester = 0; + rtems_bdbuf_unlock_cache (); + if (sync_requester) + rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC); + } + + return transfered_buffers; +} + +/** + * Allocate the write request and initialise it for good measure. + * + * @return rtems_blkdev_request* The write reference memory. + */ +static rtems_blkdev_request* +rtems_bdbuf_swapout_writereq_alloc (void) +{ + /* + * @note chrisj The rtems_blkdev_request and the array at the end is a hack. + * I am disappointment at finding code like this in RTEMS. The request should + * have been a rtems_chain_control. Simple, fast and less storage as the node + * is already part of the buffer structure. + */ + rtems_blkdev_request* write_req = + malloc (sizeof (rtems_blkdev_request) + + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer))); + + if (!write_req) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM); + + write_req->req = RTEMS_BLKDEV_REQ_WRITE; + write_req->req_done = rtems_bdbuf_transfer_done; + write_req->done_arg = write_req; + write_req->io_task = rtems_task_self (); + + return write_req; +} + +/** + * The swapout worker thread body. + * + * @param arg A pointer to the worker thread's private data. + * @return rtems_task Not used. + */ +static rtems_task +rtems_bdbuf_swapout_worker_task (rtems_task_argument arg) +{ + rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg; + + while (worker->enabled) + { + rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC); + + rtems_bdbuf_swapout_write (&worker->transfer); + + rtems_bdbuf_lock_cache (); + + rtems_chain_initialize_empty (&worker->transfer.bds); + worker->transfer.dev = BDBUF_INVALID_DEV; + + rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link); + + rtems_bdbuf_unlock_cache (); + } + + free (worker->transfer.write_req); + free (worker); + + rtems_task_delete (RTEMS_SELF); +} + +/** + * Open the swapout worker threads. + */ +static void +rtems_bdbuf_swapout_workers_open (void) +{ + rtems_status_code sc; + size_t w; + + rtems_bdbuf_lock_cache (); + + for (w = 0; w < bdbuf_config.swapout_workers; w++) + { + rtems_bdbuf_swapout_worker* worker; + + worker = malloc (sizeof (rtems_bdbuf_swapout_worker)); + if (!worker) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM); + + rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link); + worker->enabled = true; + worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc (); + + rtems_chain_initialize_empty (&worker->transfer.bds); + worker->transfer.dev = BDBUF_INVALID_DEV; + + sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w), + (bdbuf_config.swapout_priority ? + bdbuf_config.swapout_priority : + RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT), + SWAPOUT_TASK_STACK_SIZE, + RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR, + RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT, + &worker->id); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE); + + sc = rtems_task_start (worker->id, + rtems_bdbuf_swapout_worker_task, + (rtems_task_argument) worker); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START); + } + + rtems_bdbuf_unlock_cache (); +} + +/** + * Close the swapout worker threads. + */ +static void +rtems_bdbuf_swapout_workers_close (void) +{ + rtems_chain_node* node; + + rtems_bdbuf_lock_cache (); + + node = rtems_chain_first (&bdbuf_cache.swapout_workers); + while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node)) + { + rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node; + worker->enabled = false; + rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC); + node = rtems_chain_next (node); + } + + rtems_bdbuf_unlock_cache (); +} + +/** + * Body of task which takes care on flushing modified buffers to the disk. + * + * @param arg A pointer to the global cache data. Use the global variable and + * not this. + * @return rtems_task Not used. + */ +static rtems_task +rtems_bdbuf_swapout_task (rtems_task_argument arg) +{ + rtems_bdbuf_swapout_transfer transfer; + uint32_t period_in_ticks; + const uint32_t period_in_msecs = bdbuf_config.swapout_period;; + uint32_t timer_delta; + + transfer.write_req = rtems_bdbuf_swapout_writereq_alloc (); + rtems_chain_initialize_empty (&transfer.bds); + transfer.dev = BDBUF_INVALID_DEV; + transfer.syncing = false; + + /* + * Localise the period. + */ + period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000); + + /* + * This is temporary. Needs to be changed to use the real time clock. + */ + timer_delta = period_in_msecs; + + /* + * Create the worker threads. + */ + rtems_bdbuf_swapout_workers_open (); + + while (bdbuf_cache.swapout_enabled) + { + rtems_event_set out; + rtems_status_code sc; + + /* + * Only update the timers once in the processing cycle. + */ + bool update_timers = true; + + /* + * If we write buffers to any disk perform a check again. We only write a + * single device at a time and the cache may have more than one device's + * buffers modified waiting to be written. + */ + bool transfered_buffers; + + do + { + transfered_buffers = false; + + /* + * Extact all the buffers we find for a specific device. The device is + * the first one we find on a modified list. Process the sync queue of + * buffers first. + */ + if (rtems_bdbuf_swapout_processing (timer_delta, + update_timers, + &transfer)) + { + transfered_buffers = true; + } + + /* + * Only update the timers once. + */ + update_timers = false; + } + while (transfered_buffers); + + sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC, + RTEMS_EVENT_ALL | RTEMS_WAIT, + period_in_ticks, + &out); + + if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT)) + rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE); + } + + rtems_bdbuf_swapout_workers_close (); + + free (transfer.write_req); + + rtems_task_delete (RTEMS_SELF); +} + +static void +rtems_bdbuf_purge_list (rtems_chain_control *purge_list) +{ + bool wake_buffer_waiters = false; + rtems_chain_node *node = NULL; + + while ((node = rtems_chain_get (purge_list)) != NULL) + { + rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node; + + if (bd->waiters == 0) + wake_buffer_waiters = true; + + rtems_bdbuf_discard_buffer (bd); + } + + if (wake_buffer_waiters) + rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters); +} + +typedef bool (*rtems_bdbuf_purge_compare)(dev_t a, dev_t b); + +static void +rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list, + rtems_bdbuf_purge_compare compare, + dev_t dev) +{ + rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT]; + rtems_bdbuf_buffer **prev = stack; + rtems_bdbuf_buffer *cur = bdbuf_cache.tree; + + *prev = NULL; + + while (cur != NULL) + { + if ((*compare) (cur->dev, dev)) + { + switch (cur->state) + { + case RTEMS_BDBUF_STATE_FREE: + case RTEMS_BDBUF_STATE_EMPTY: + case RTEMS_BDBUF_STATE_ACCESS_PURGED: + case RTEMS_BDBUF_STATE_TRANSFER_PURGED: + break; + case RTEMS_BDBUF_STATE_SYNC: + rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters); + /* Fall through */ + case RTEMS_BDBUF_STATE_MODIFIED: + rtems_bdbuf_group_release (cur); + /* Fall through */ + case RTEMS_BDBUF_STATE_CACHED: + rtems_chain_extract (&cur->link); + rtems_chain_append (purge_list, &cur->link); + break; + case RTEMS_BDBUF_STATE_TRANSFER: + rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED); + break; + case RTEMS_BDBUF_STATE_ACCESS_CACHED: + case RTEMS_BDBUF_STATE_ACCESS_EMPTY: + case RTEMS_BDBUF_STATE_ACCESS_MODIFIED: + rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED); + break; + default: + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11); + } + } + + if (cur->avl.left != NULL) + { + /* Left */ + ++prev; + *prev = cur; + cur = cur->avl.left; + } + else if (cur->avl.right != NULL) + { + /* Right */ + ++prev; + *prev = cur; + cur = cur->avl.right; + } + else + { + while (*prev != NULL && cur == (*prev)->avl.right) + { + /* Up */ + cur = *prev; + --prev; + } + if (*prev != NULL) + /* Right */ + cur = (*prev)->avl.right; + else + /* Finished */ + cur = NULL; + } + } +} + +static void +rtems_bdbuf_purge (rtems_bdbuf_purge_compare compare, dev_t dev) +{ + rtems_chain_control purge_list; + + rtems_chain_initialize_empty (&purge_list); + rtems_bdbuf_lock_cache (); + rtems_bdbuf_gather_for_purge (&purge_list, compare, dev); + rtems_bdbuf_purge_list (&purge_list); + rtems_bdbuf_unlock_cache (); +} + +static bool +rtems_bdbuf_purge_compare_dev (dev_t a, dev_t b) +{ + return a == b; +} + +void +rtems_bdbuf_purge_dev (dev_t dev) +{ + rtems_bdbuf_purge (rtems_bdbuf_purge_compare_dev, dev); +} + +static bool +rtems_bdbuf_purge_compare_major (dev_t a, dev_t b) +{ + return rtems_filesystem_dev_major_t (a) == rtems_filesystem_dev_major_t (b); +} + +void +rtems_bdbuf_purge_major (rtems_device_major_number major) +{ + dev_t dev = rtems_filesystem_make_dev_t (major, 0); + + rtems_bdbuf_purge (rtems_bdbuf_purge_compare_major, dev); +} |