diff options
author | Chris Johns <chrisj@rtems.org> | 2009-08-05 00:00:54 +0000 |
---|---|---|
committer | Chris Johns <chrisj@rtems.org> | 2009-08-05 00:00:54 +0000 |
commit | 0d15414ed6c144f7f7e4ce63476b3eb9b94acceb (patch) | |
tree | d403db6ba0eae020e8613bb81a0427ca1ab85867 /cpukit/libblock/src/bdbuf.c | |
parent | 2009-08-04 Joel Sherrill <joel.sherrill@OARcorp.com> (diff) | |
download | rtems-0d15414ed6c144f7f7e4ce63476b3eb9b94acceb.tar.bz2 |
009-08-05 Chris Johns <chrisj@rtems.org>
* libmisc/dummy/dummy-networking.c: New.
* libmisc/dummy/dummy.c, libmisc/Makefile.am: Move
trhe networking configuration into a separate file so
configuration varations do not cause conflicts.
* score/inline/rtems/score/object.inl,
score/include/rtems/score/object.h: Remove warnings.
* score/inline/rtems/score/object.inl: Add _Chain_First,
_Chain_Last, _Chain_Mext, and _Chain_Previous.
* sapi/inline/rtems/chain.inl: Add rtems_chain_first,
rtems_chain_last, rtems_chain_mext, and rtems_chain_previous.
* libblock/include/rtems/diskdevs.h: Remove the bdbuf pool id and
block_size_log2. Add media_block_size.
* libblock/src/diskdevs.c: Remove size restrictions on block
size. Add media block size initialisation. Remove comment to clean
up the bdbuf cache.
* libblock/src/blkdev.c: Remove references to
block_size_log2. Allow any block size.
* libblock/include/rtems/bdbuf.h, libblock/src/bdbuf.c: Remove all
references to pools and make the cache handle demand driver
variable buffer size allocation. Added worker threads support the
swapout task.
* sapi/include/confdefs.h: Updated the bdbuf configutation.
Diffstat (limited to 'cpukit/libblock/src/bdbuf.c')
-rw-r--r-- | cpukit/libblock/src/bdbuf.c | 1833 |
1 files changed, 1110 insertions, 723 deletions
diff --git a/cpukit/libblock/src/bdbuf.c b/cpukit/libblock/src/bdbuf.c index 2bfa70efb1..3a686f5757 100644 --- a/cpukit/libblock/src/bdbuf.c +++ b/cpukit/libblock/src/bdbuf.c @@ -15,7 +15,7 @@ * Victor V. Vengerov <vvv@oktet.ru> * Alexander Kukuta <kam@oktet.ru> * - * Copyright (C) 2008 Chris Johns <chrisj@rtems.org> + * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org> * Rewritten to remove score mutex access. Fixes many performance * issues. * @@ -44,15 +44,89 @@ #include "rtems/bdbuf.h" +/* + * Simpler label for this file. + */ +#define bdbuf_config rtems_bdbuf_configuration + +/** + * A swapout transfer transaction data. This data is passed to a worked thread + * to handle the write phase of the transfer. + */ +typedef struct rtems_bdbuf_swapout_transfer +{ + rtems_chain_control bds; /**< The transfer list of BDs. */ + dev_t dev; /**< The device the transfer is for. */ + rtems_blkdev_request* write_req; /**< The write request array. */ +} rtems_bdbuf_swapout_transfer; + /** - * The BD buffer context. + * Swapout worker thread. These are available to take processing from the + * main swapout thread and handle the I/O operation. */ -typedef struct rtems_bdbuf_context { - rtems_bdbuf_pool* pool; /*< Table of buffer pools */ - int npools; /*< Number of entries in pool table */ - rtems_id swapout; /*< Swapout task ID */ - bool swapout_enabled; -} rtems_bdbuf_context; +typedef struct rtems_bdbuf_swapout_worker +{ + rtems_chain_node link; /**< The threads sit on a chain when + * idle. */ + rtems_id id; /**< The id of the task so we can wake + * it. */ + volatile bool enabled; /**< The worked is enabled. */ + rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this + * thread. */ +} rtems_bdbuf_swapout_worker; + +/** + * The BD buffer cache. + */ +typedef struct rtems_bdbuf_cache +{ + rtems_id swapout; /**< Swapout task ID */ + volatile bool swapout_enabled; /**< Swapout is only running if + * enabled. Set to false to kill the + * swap out task. It deletes itself. */ + rtems_chain_control swapout_workers; /**< The work threads for the swapout + * task. */ + + rtems_bdbuf_buffer* bds; /**< Pointer to table of buffer + * descriptors. */ + void* buffers; /**< The buffer's memory. */ + size_t buffer_min_count; /**< Number of minimum size buffers + * that fit the buffer memory. */ + size_t max_bds_per_group; /**< The number of BDs of minimum + * buffer size that fit in a group. */ + uint32_t flags; /**< Configuration flags. */ + + rtems_id lock; /**< The cache lock. It locks all + * cache data, BD and lists. */ + rtems_id sync_lock; /**< Sync calls block writes. */ + volatile bool sync_active; /**< True if a sync is active. */ + volatile rtems_id sync_requester; /**< The sync requester. */ + volatile dev_t sync_device; /**< The device to sync and -1 not a + * device sync. */ + + rtems_bdbuf_buffer* tree; /**< Buffer descriptor lookup AVL tree + * root. There is only one. */ + rtems_chain_control ready; /**< Free buffers list, read-ahead, or + * resized group buffers. */ + rtems_chain_control lru; /**< Least recently used list */ + rtems_chain_control modified; /**< Modified buffers list */ + rtems_chain_control sync; /**< Buffers to sync list */ + + rtems_id access; /**< Obtain if waiting for a buffer in + * the ACCESS state. */ + volatile uint32_t access_waiters; /**< Count of access blockers. */ + rtems_id transfer; /**< Obtain if waiting for a buffer in + * the TRANSFER state. */ + volatile uint32_t transfer_waiters; /**< Count of transfer blockers. */ + rtems_id waiting; /**< Obtain if waiting for a buffer + * and the none are available. */ + volatile uint32_t wait_waiters; /**< Count of waiting blockers. */ + + size_t group_count; /**< The number of groups. */ + rtems_bdbuf_group* groups; /**< The groups. */ + + bool initialised; /**< Initialised state. */ +} rtems_bdbuf_cache; /** * Fatal errors @@ -60,18 +134,22 @@ typedef struct rtems_bdbuf_context { #define RTEMS_BLKDEV_FATAL_ERROR(n) \ (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF)) -#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY RTEMS_BLKDEV_FATAL_ERROR(1) -#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT RTEMS_BLKDEV_FATAL_ERROR(2) -#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK RTEMS_BLKDEV_FATAL_ERROR(3) -#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(4) -#define RTEMS_BLKDEV_FATAL_BDBUF_POOL_LOCK RTEMS_BLKDEV_FATAL_ERROR(5) -#define RTEMS_BLKDEV_FATAL_BDBUF_POOL_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(6) -#define RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAIT RTEMS_BLKDEV_FATAL_ERROR(7) -#define RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAKE RTEMS_BLKDEV_FATAL_ERROR(8) -#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE RTEMS_BLKDEV_FATAL_ERROR(9) -#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM RTEMS_BLKDEV_FATAL_ERROR(10) -#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE RTEMS_BLKDEV_FATAL_ERROR(11) -#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS RTEMS_BLKDEV_FATAL_ERROR(12) +#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY RTEMS_BLKDEV_FATAL_ERROR(1) +#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT RTEMS_BLKDEV_FATAL_ERROR(2) +#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK RTEMS_BLKDEV_FATAL_ERROR(3) +#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(4) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK RTEMS_BLKDEV_FATAL_ERROR(5) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(6) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_1 RTEMS_BLKDEV_FATAL_ERROR(7) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2 RTEMS_BLKDEV_FATAL_ERROR(8) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_3 RTEMS_BLKDEV_FATAL_ERROR(9) +#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE RTEMS_BLKDEV_FATAL_ERROR(10) +#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE RTEMS_BLKDEV_FATAL_ERROR(11) +#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM RTEMS_BLKDEV_FATAL_ERROR(12) +#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE RTEMS_BLKDEV_FATAL_ERROR(13) +#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START RTEMS_BLKDEV_FATAL_ERROR(14) +#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE RTEMS_BLKDEV_FATAL_ERROR(15) +#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS RTEMS_BLKDEV_FATAL_ERROR(16) /** * The events used in this code. These should be system events rather than @@ -91,7 +169,7 @@ typedef struct rtems_bdbuf_context { * * @warning Priority inheritance is on. */ -#define RTEMS_BDBUF_POOL_LOCK_ATTRIBS \ +#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \ (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \ RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL) @@ -103,7 +181,7 @@ typedef struct rtems_bdbuf_context { * as the holder and a blocking task will raise the priority of the * IDLE task which can cause unsual side effects. */ -#define RTEMS_BDBUF_POOL_WAITER_ATTRIBS \ +#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \ (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \ RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL) @@ -113,9 +191,9 @@ typedef struct rtems_bdbuf_context { static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg); /** - * The context of buffering layer. + * The Buffer Descriptor cache. */ -static rtems_bdbuf_context rtems_bdbuf_ctx; +static rtems_bdbuf_cache bdbuf_cache; /** * Print a message to the bdbuf trace output and flush it. @@ -636,70 +714,70 @@ rtems_bdbuf_avl_remove(rtems_bdbuf_buffer** root, } /** - * Get the pool for the device. + * Lock the mutex. A single task can nest calls. * - * @param pid Physical disk device. + * @param lock The mutex to lock. + * @param fatal_error_code The error code if the call fails. */ -static rtems_bdbuf_pool* -rtems_bdbuf_get_pool (const rtems_bdpool_id pid) +static void +rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code) { - return &rtems_bdbuf_ctx.pool[pid]; + rtems_status_code sc = rtems_semaphore_obtain (lock, + RTEMS_WAIT, + RTEMS_NO_TIMEOUT); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (fatal_error_code); } /** - * Lock the pool. A single task can nest calls. + * Unlock the mutex. * - * @param pool The pool to lock. + * @param lock The mutex to unlock. + * @param fatal_error_code The error code if the call fails. */ static void -rtems_bdbuf_lock_pool (rtems_bdbuf_pool* pool) +rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code) { - rtems_status_code sc = rtems_semaphore_obtain (pool->lock, - RTEMS_WAIT, - RTEMS_NO_TIMEOUT); + rtems_status_code sc = rtems_semaphore_release (lock); if (sc != RTEMS_SUCCESSFUL) - rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_LOCK); + rtems_fatal_error_occurred (fatal_error_code); } /** - * Unlock the pool. - * - * @param pool The pool to unlock. + * Lock the cache. A single task can nest calls. */ static void -rtems_bdbuf_unlock_pool (rtems_bdbuf_pool* pool) +rtems_bdbuf_lock_cache (void) { - rtems_status_code sc = rtems_semaphore_release (pool->lock); - if (sc != RTEMS_SUCCESSFUL) - rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_UNLOCK); + rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK); } /** - * Lock the pool's sync. A single task can nest calls. - * - * @param pool The pool's sync to lock. + * Unlock the cache. */ static void -rtems_bdbuf_lock_sync (rtems_bdbuf_pool* pool) +rtems_bdbuf_unlock_cache (void) { - rtems_status_code sc = rtems_semaphore_obtain (pool->sync_lock, - RTEMS_WAIT, - RTEMS_NO_TIMEOUT); - if (sc != RTEMS_SUCCESSFUL) - rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK); + rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK); } /** - * Unlock the pool's sync. - * - * @param pool The pool's sync to unlock. + * Lock the cache's sync. A single task can nest calls. */ static void -rtems_bdbuf_unlock_sync (rtems_bdbuf_pool* pool) +rtems_bdbuf_lock_sync (void) { - rtems_status_code sc = rtems_semaphore_release (pool->sync_lock); - if (sc != RTEMS_SUCCESSFUL) - rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK); + rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK); +} + +/** + * Unlock the cache's sync lock. Any blocked writers are woken. + */ +static void +rtems_bdbuf_unlock_sync (void) +{ + rtems_bdbuf_unlock (bdbuf_cache.sync_lock, + RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK); } /** @@ -708,22 +786,20 @@ rtems_bdbuf_unlock_sync (rtems_bdbuf_pool* pool) * be woken and this would require storgage and we do not know the number of * tasks that could be waiting. * - * While we have the pool locked we can try and claim the semaphore and - * therefore know when we release the lock to the pool we will block until the + * While we have the cache locked we can try and claim the semaphore and + * therefore know when we release the lock to the cache we will block until the * semaphore is released. This may even happen before we get to block. * * A counter is used to save the release call when no one is waiting. * - * The function assumes the pool is locked on entry and it will be locked on + * The function assumes the cache is locked on entry and it will be locked on * exit. * - * @param pool The pool to wait for a buffer to return. * @param sema The semaphore to block on and wait. * @param waiters The wait counter for this semaphore. */ static void -rtems_bdbuf_wait (rtems_bdbuf_pool* pool, rtems_id* sema, - volatile uint32_t* waiters) +rtems_bdbuf_wait (rtems_id* sema, volatile uint32_t* waiters) { rtems_status_code sc; rtems_mode prev_mode; @@ -734,38 +810,36 @@ rtems_bdbuf_wait (rtems_bdbuf_pool* pool, rtems_id* sema, *waiters += 1; /* - * Disable preemption then unlock the pool and block. - * There is no POSIX condition variable in the core API so - * this is a work around. + * Disable preemption then unlock the cache and block. There is no POSIX + * condition variable in the core API so this is a work around. * - * The issue is a task could preempt after the pool is unlocked - * because it is blocking or just hits that window, and before - * this task has blocked on the semaphore. If the preempting task - * flushes the queue this task will not see the flush and may - * block for ever or until another transaction flushes this + * The issue is a task could preempt after the cache is unlocked because it is + * blocking or just hits that window, and before this task has blocked on the + * semaphore. If the preempting task flushes the queue this task will not see + * the flush and may block for ever or until another transaction flushes this * semaphore. */ sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode); if (sc != RTEMS_SUCCESSFUL) - rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAIT); + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_1); /* - * Unlock the pool, wait, and lock the pool when we return. + * Unlock the cache, wait, and lock the cache when we return. */ - rtems_bdbuf_unlock_pool (pool); + rtems_bdbuf_unlock_cache (); sc = rtems_semaphore_obtain (*sema, RTEMS_WAIT, RTEMS_NO_TIMEOUT); if (sc != RTEMS_UNSATISFIED) - rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAIT); + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2); - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode); if (sc != RTEMS_SUCCESSFUL) - rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAIT); + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_3); *waiters -= 1; } @@ -787,16 +861,16 @@ rtems_bdbuf_wake (rtems_id sema, volatile uint32_t* waiters) sc = rtems_semaphore_flush (sema); if (sc != RTEMS_SUCCESSFUL) - rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAKE); + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE); } } /** * Add a buffer descriptor to the modified list. This modified list is treated - * a litte differently to the other lists. To access it you must have the pool + * a litte differently to the other lists. To access it you must have the cache * locked and this is assumed to be the case on entry to this call. * - * If the pool has a device being sync'ed and the bd is for that device the + * If the cache has a device being sync'ed and the bd is for that device the * call must block and wait until the sync is over before adding the bd to the * modified list. Once a sync happens for a device no bd's can be added the * modified list. The disk image is forced to be snapshot at that moment in @@ -806,30 +880,28 @@ rtems_bdbuf_wake (rtems_id sema, volatile uint32_t* waiters) * hold the sync lock. The sync lock is used to block writes while a sync is * active. * - * @param pool The pool the bd belongs to. - * @param bd The bd to queue to the pool's modified list. + * @param bd The bd to queue to the cache's modified list. */ static void -rtems_bdbuf_append_modified (rtems_bdbuf_pool* pool, rtems_bdbuf_buffer* bd) +rtems_bdbuf_append_modified (rtems_bdbuf_buffer* bd) { /* - * If the pool has a device being sync'ed check if this bd is for that - * device. If it is unlock the pool and block on the sync lock. once we have - * the sync lock reelase it. - * - * If the + * If the cache has a device being sync'ed check if this bd is for that + * device. If it is unlock the cache and block on the sync lock. Once we have + * the sync lock release it. */ - if (pool->sync_active && (pool->sync_device == bd->dev)) + if (bdbuf_cache.sync_active && (bdbuf_cache.sync_device == bd->dev)) { - rtems_bdbuf_unlock_pool (pool); - rtems_bdbuf_lock_sync (pool); - rtems_bdbuf_unlock_sync (pool); - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_unlock_cache (); + /* Wait for the sync lock */ + rtems_bdbuf_lock_sync (); + rtems_bdbuf_unlock_sync (); + rtems_bdbuf_lock_cache (); } bd->state = RTEMS_BDBUF_STATE_MODIFIED; - rtems_chain_append (&pool->modified, &bd->link); + rtems_chain_append (&bdbuf_cache.modified, &bd->link); } /** @@ -838,299 +910,423 @@ rtems_bdbuf_append_modified (rtems_bdbuf_pool* pool, rtems_bdbuf_buffer* bd) static void rtems_bdbuf_wake_swapper (void) { - rtems_status_code sc = rtems_event_send (rtems_bdbuf_ctx.swapout, + rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout, RTEMS_BDBUF_SWAPOUT_SYNC); if (sc != RTEMS_SUCCESSFUL) rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE); } /** - * Initialize single buffer pool. + * Compute the number of BDs per group for a given buffer size. * - * @param config Buffer pool configuration - * @param pid Pool number + * @param size The buffer size. It can be any size and we scale up. + */ +static size_t +rtems_bdbuf_bds_per_group (size_t size) +{ + size_t bufs_per_size; + size_t bds_per_size; + + if (size > rtems_bdbuf_configuration.buffer_max) + return 0; + + bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1; + + for (bds_per_size = 1; + bds_per_size < bufs_per_size; + bds_per_size <<= 1) + ; + + return bdbuf_cache.max_bds_per_group / bds_per_size; +} + +/** + * Reallocate a group. The BDs currently allocated in the group are removed + * from the ALV tree and any lists then the new BD's are prepended to the ready + * list of the cache. * - * @return RTEMS_SUCCESSFUL, if buffer pool initialized successfully, or error - * code if error occured. + * @param group The group to reallocate. + * @param new_bds_per_group The new count of BDs per group. */ -static rtems_status_code -rtems_bdbuf_initialize_pool (rtems_bdbuf_pool_config* config, - rtems_bdpool_id pid) +static void +rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group) { - int rv = 0; - unsigned char* buffer = config->mem_area; - rtems_bdbuf_pool* pool; rtems_bdbuf_buffer* bd; - rtems_status_code sc; - uint32_t b; - int cache_aligment = 32 /* FIXME rtems_cache_get_data_line_size() */; + int b; + size_t bufs_per_bd; - /* For unspecified cache alignments we use the CPU alignment */ - if (cache_aligment <= 0) + bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group; + + for (b = 0, bd = group->bdbuf; + b < group->bds_per_group; + b++, bd += bufs_per_bd) { - cache_aligment = CPU_ALIGNMENT; + if ((bd->state == RTEMS_BDBUF_STATE_CACHED) || + (bd->state == RTEMS_BDBUF_STATE_MODIFIED) || + (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)) + { + if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY); + rtems_chain_extract (&bd->link); + } } - - pool = rtems_bdbuf_get_pool (pid); - pool->blksize = config->size; - pool->nblks = config->num; - pool->flags = 0; - pool->sync_active = false; - pool->sync_device = -1; - pool->sync_requester = 0; - pool->tree = NULL; - pool->buffers = NULL; - - rtems_chain_initialize_empty (&pool->ready); - rtems_chain_initialize_empty (&pool->lru); - rtems_chain_initialize_empty (&pool->modified); - rtems_chain_initialize_empty (&pool->sync); - - pool->access = 0; - pool->access_waiters = 0; - pool->transfer = 0; - pool->transfer_waiters = 0; - pool->waiting = 0; - pool->wait_waiters = 0; + group->bds_per_group = new_bds_per_group; + bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group; + for (b = 0, bd = group->bdbuf; + b < group->bds_per_group; + b++, bd += bufs_per_bd) + rtems_chain_prepend (&bdbuf_cache.ready, &bd->link); +} + +/** + * Get the next BD from the list. This call assumes the cache is locked. + * + * @param bds_per_group The number of BDs per block we are need. + * @param list The list to find the BD on. + * @return The next BD if found or NULL is none are available. + */ +static rtems_bdbuf_buffer* +rtems_bdbuf_get_next_bd (size_t bds_per_group, + rtems_chain_control* list) +{ + rtems_chain_node* node = rtems_chain_first (list); + while (!rtems_chain_is_tail (list, node)) + { + rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node; + + /* + * If this bd is already part of a group that supports the same number of + * BDs per group return it. If the bd is part of another group check the + * number of users and if 0 we can take this group and resize it. + */ + if (bd->group->bds_per_group == bds_per_group) + { + rtems_chain_extract (node); + bd->group->users++; + return bd; + } + + if (bd->group->users == 0) + { + /* + * We use the group to locate the start of the BDs for this group. + */ + rtems_bdbuf_group_realloc (bd->group, bds_per_group); + bd = (rtems_bdbuf_buffer*) rtems_chain_get (&bdbuf_cache.ready); + return bd; + } + + node = rtems_chain_next (node); + } + + return NULL; +} + +/** + * Initialise the cache. + * + * @return rtems_status_code The initialisation status. + */ +rtems_status_code +rtems_bdbuf_init (void) +{ + rtems_bdbuf_group* group; + rtems_bdbuf_buffer* bd; + uint8_t* buffer; + int b; + int cache_aligment; + rtems_status_code sc; + +#if RTEMS_BDBUF_TRACE + rtems_bdbuf_printf ("init\n"); +#endif + /* - * Allocate memory for buffer descriptors + * Check the configuration table values. */ - pool->bds = calloc (config->num, sizeof (rtems_bdbuf_buffer)); + if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0) + return RTEMS_INVALID_NUMBER; - if (!pool->bds) - return RTEMS_NO_MEMORY; + /* + * We use a special variable to manage the initialisation incase we have + * completing threads doing this. You may get errors if the another thread + * makes a call and we have not finished initialisation. + */ + if (bdbuf_cache.initialised) + return RTEMS_RESOURCE_IN_USE; + bdbuf_cache.initialised = true; + /* - * Allocate memory for buffers if required. The pool memory will be cache - * aligned. It is possible to free the memory allocated by rtems_memalign() - * with free(). + * For unspecified cache alignments we use the CPU alignment. */ - if (buffer == NULL) - { - rv = rtems_memalign ((void **) &buffer, - cache_aligment, - config->num * config->size); - if (rv != 0) - { - free (pool->bds); - return RTEMS_NO_MEMORY; - } - pool->buffers = buffer; - } + cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */ + if (cache_aligment <= 0) + cache_aligment = CPU_ALIGNMENT; - for (b = 0, bd = pool->bds; - b < pool->nblks; - b++, bd++, buffer += pool->blksize) - { - bd->dev = -1; - bd->block = 0; - bd->buffer = buffer; - bd->avl.left = NULL; - bd->avl.right = NULL; - bd->state = RTEMS_BDBUF_STATE_EMPTY; - bd->pool = pid; - bd->error = 0; - bd->waiters = 0; - bd->hold_timer = 0; - - rtems_chain_append (&pool->ready, &bd->link); - } + bdbuf_cache.sync_active = false; + bdbuf_cache.sync_device = -1; + bdbuf_cache.sync_requester = 0; + bdbuf_cache.tree = NULL; - sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 'L'), - 1, RTEMS_BDBUF_POOL_LOCK_ATTRIBS, 0, - &pool->lock); + rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers); + rtems_chain_initialize_empty (&bdbuf_cache.ready); + rtems_chain_initialize_empty (&bdbuf_cache.lru); + rtems_chain_initialize_empty (&bdbuf_cache.modified); + rtems_chain_initialize_empty (&bdbuf_cache.sync); + + bdbuf_cache.access = 0; + bdbuf_cache.access_waiters = 0; + bdbuf_cache.transfer = 0; + bdbuf_cache.transfer_waiters = 0; + bdbuf_cache.waiting = 0; + bdbuf_cache.wait_waiters = 0; + + /* + * Create the locks for the cache. + */ + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'), + 1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0, + &bdbuf_cache.lock); if (sc != RTEMS_SUCCESSFUL) { - free (pool->buffers); - free (pool->bds); + bdbuf_cache.initialised = false; return sc; } - sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 'S'), - 1, RTEMS_BDBUF_POOL_LOCK_ATTRIBS, 0, - &pool->sync_lock); + rtems_bdbuf_lock_cache (); + + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'), + 1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0, + &bdbuf_cache.sync_lock); if (sc != RTEMS_SUCCESSFUL) { - rtems_semaphore_delete (pool->lock); - free (pool->buffers); - free (pool->bds); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; return sc; } - sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 'a'), - 0, RTEMS_BDBUF_POOL_WAITER_ATTRIBS, 0, - &pool->access); + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'), + 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0, + &bdbuf_cache.access); if (sc != RTEMS_SUCCESSFUL) { - rtems_semaphore_delete (pool->sync_lock); - rtems_semaphore_delete (pool->lock); - free (pool->buffers); - free (pool->bds); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; return sc; } - sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 't'), - 0, RTEMS_BDBUF_POOL_WAITER_ATTRIBS, 0, - &pool->transfer); + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'), + 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0, + &bdbuf_cache.transfer); if (sc != RTEMS_SUCCESSFUL) { - rtems_semaphore_delete (pool->access); - rtems_semaphore_delete (pool->sync_lock); - rtems_semaphore_delete (pool->lock); - free (pool->buffers); - free (pool->bds); + rtems_semaphore_delete (bdbuf_cache.access); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; return sc; } - sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 'w'), - 0, RTEMS_BDBUF_POOL_WAITER_ATTRIBS, 0, - &pool->waiting); + sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'w'), + 0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0, + &bdbuf_cache.waiting); if (sc != RTEMS_SUCCESSFUL) { - rtems_semaphore_delete (pool->transfer); - rtems_semaphore_delete (pool->access); - rtems_semaphore_delete (pool->sync_lock); - rtems_semaphore_delete (pool->lock); - free (pool->buffers); - free (pool->bds); + rtems_semaphore_delete (bdbuf_cache.transfer); + rtems_semaphore_delete (bdbuf_cache.access); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; return sc; } - - return RTEMS_SUCCESSFUL; -} - -/** - * Free resources allocated for buffer pool with specified number. - * - * @param pid Buffer pool number - * - * @retval RTEMS_SUCCESSFUL - */ -static rtems_status_code -rtems_bdbuf_release_pool (rtems_bdpool_id pid) -{ - rtems_bdbuf_pool* pool = rtems_bdbuf_get_pool (pid); - - rtems_bdbuf_lock_pool (pool); - - rtems_semaphore_delete (pool->waiting); - rtems_semaphore_delete (pool->transfer); - rtems_semaphore_delete (pool->access); - rtems_semaphore_delete (pool->lock); - free (pool->buffers); - free (pool->bds); - - return RTEMS_SUCCESSFUL; -} - -rtems_status_code -rtems_bdbuf_init (void) -{ - rtems_bdpool_id p; - rtems_status_code sc; - -#if RTEMS_BDBUF_TRACE - rtems_bdbuf_printf ("init\n"); -#endif - - if (rtems_bdbuf_pool_configuration_size <= 0) - return RTEMS_INVALID_SIZE; - - if (rtems_bdbuf_ctx.npools) - return RTEMS_RESOURCE_IN_USE; + /* + * Allocate the memory for the buffer descriptors. + */ + bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer), + bdbuf_config.size / bdbuf_config.buffer_min); + if (!bdbuf_cache.bds) + { + rtems_semaphore_delete (bdbuf_cache.transfer); + rtems_semaphore_delete (bdbuf_cache.access); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; + return RTEMS_NO_MEMORY; + } - rtems_bdbuf_ctx.npools = rtems_bdbuf_pool_configuration_size; + /* + * Compute the various number of elements in the cache. + */ + bdbuf_cache.buffer_min_count = + bdbuf_config.size / bdbuf_config.buffer_min; + bdbuf_cache.max_bds_per_group = + bdbuf_config.buffer_max / bdbuf_config.buffer_min; + bdbuf_cache.group_count = + bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group; /* - * Allocate memory for buffer pool descriptors + * Allocate the memory for the buffer descriptors. */ - rtems_bdbuf_ctx.pool = calloc (rtems_bdbuf_pool_configuration_size, - sizeof (rtems_bdbuf_pool)); + bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group), + bdbuf_cache.group_count); + if (!bdbuf_cache.groups) + { + free (bdbuf_cache.bds); + rtems_semaphore_delete (bdbuf_cache.transfer); + rtems_semaphore_delete (bdbuf_cache.access); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; + return RTEMS_NO_MEMORY; + } - if (rtems_bdbuf_ctx.pool == NULL) + /* + * Allocate memory for buffer memory. The buffer memory will be cache + * aligned. It is possible to free the memory allocated by rtems_memalign() + * with free(). Return 0 if allocated. + */ + if (rtems_memalign ((void **) &bdbuf_cache.buffers, + cache_aligment, + bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0) + { + free (bdbuf_cache.groups); + free (bdbuf_cache.bds); + rtems_semaphore_delete (bdbuf_cache.transfer); + rtems_semaphore_delete (bdbuf_cache.access); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; return RTEMS_NO_MEMORY; + } /* - * Initialize buffer pools and roll out if something failed, + * The cache is empty after opening so we need to add all the buffers to it + * and initialise the groups. */ - for (p = 0; p < rtems_bdbuf_ctx.npools; p++) + for (b = 0, group = bdbuf_cache.groups, + bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers; + b < bdbuf_cache.buffer_min_count; + b++, bd++, buffer += bdbuf_config.buffer_min) { - sc = rtems_bdbuf_initialize_pool (&rtems_bdbuf_pool_configuration[p], p); - if (sc != RTEMS_SUCCESSFUL) - { - rtems_bdpool_id j; - for (j = 0; j < p - 1; j++) - rtems_bdbuf_release_pool (j); - return sc; - } + bd->dev = -1; + bd->group = group; + bd->buffer = buffer; + bd->avl.left = NULL; + bd->avl.right = NULL; + bd->state = RTEMS_BDBUF_STATE_EMPTY; + bd->error = 0; + bd->waiters = 0; + bd->hold_timer = 0; + + rtems_chain_append (&bdbuf_cache.ready, &bd->link); + + if ((b % bdbuf_cache.max_bds_per_group) == + (bdbuf_cache.max_bds_per_group - 1)) + group++; } + for (b = 0, + group = bdbuf_cache.groups, + bd = bdbuf_cache.bds; + b < bdbuf_cache.group_count; + b++, + group++, + bd += bdbuf_cache.max_bds_per_group) + { + group->bds_per_group = bdbuf_cache.max_bds_per_group; + group->users = 0; + group->bdbuf = bd; + } + /* - * Create and start swapout task + * Create and start swapout task. This task will create and manage the worker + * threads. */ - - rtems_bdbuf_ctx.swapout_enabled = true; + bdbuf_cache.swapout_enabled = true; sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'), - (rtems_bdbuf_configuration.swapout_priority ? - rtems_bdbuf_configuration.swapout_priority : + (bdbuf_config.swapout_priority ? + bdbuf_config.swapout_priority : RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT), SWAPOUT_TASK_STACK_SIZE, RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR, RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT, - &rtems_bdbuf_ctx.swapout); + &bdbuf_cache.swapout); if (sc != RTEMS_SUCCESSFUL) { - for (p = 0; p < rtems_bdbuf_ctx.npools; p++) - rtems_bdbuf_release_pool (p); - free (rtems_bdbuf_ctx.pool); + free (bdbuf_cache.buffers); + free (bdbuf_cache.groups); + free (bdbuf_cache.bds); + rtems_semaphore_delete (bdbuf_cache.transfer); + rtems_semaphore_delete (bdbuf_cache.access); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; return sc; } - sc = rtems_task_start (rtems_bdbuf_ctx.swapout, + sc = rtems_task_start (bdbuf_cache.swapout, rtems_bdbuf_swapout_task, - (rtems_task_argument) &rtems_bdbuf_ctx); + (rtems_task_argument) &bdbuf_cache); if (sc != RTEMS_SUCCESSFUL) { - rtems_task_delete (rtems_bdbuf_ctx.swapout); - for (p = 0; p < rtems_bdbuf_ctx.npools; p++) - rtems_bdbuf_release_pool (p); - free (rtems_bdbuf_ctx.pool); + rtems_task_delete (bdbuf_cache.swapout); + free (bdbuf_cache.buffers); + free (bdbuf_cache.groups); + free (bdbuf_cache.bds); + rtems_semaphore_delete (bdbuf_cache.transfer); + rtems_semaphore_delete (bdbuf_cache.access); + rtems_semaphore_delete (bdbuf_cache.sync_lock); + rtems_bdbuf_unlock_cache (); + rtems_semaphore_delete (bdbuf_cache.lock); + bdbuf_cache.initialised = false; return sc; } + rtems_bdbuf_unlock_cache (); + return RTEMS_SUCCESSFUL; } /** * Get a buffer for this device and block. This function returns a buffer once * placed into the AVL tree. If no buffer is available and it is not a read - * ahead request and no buffers are waiting to the written to disk wait until - * one is available. If buffers are waiting to be written to disk and non are - * available expire the hold timer and wake the swap out task. If the buffer is - * for a read ahead transfer return NULL if there is not buffer or it is in the - * cache. + * ahead request and no buffers are waiting to the written to disk wait until a + * buffer is available. If buffers are waiting to be written to disk and none + * are available expire the hold timer's of the queued buffers and wake the + * swap out task. If the buffer is for a read ahead transfer return NULL if + * there are no buffers available or the buffer is already in the cache. * - * The AVL tree of buffers for the pool is searched and if not located check - * obtain a buffer and insert it into the AVL tree. Buffers are first obtained - * from the ready list until all empty/ready buffers are used. Once all buffers - * are in use buffers are taken from the LRU list with the least recently used - * buffer taken first. A buffer taken from the LRU list is removed from the AVL - * tree. The ready list or LRU list buffer is initialised to this device and - * block. If no buffers are available due to the ready and LRU lists being - * empty a check is made of the modified list. Buffers may be queued waiting - * for the hold timer to expire. These buffers should be written to disk and - * returned to the LRU list where they can be used rather than this call - * blocking. If buffers are on the modified list the max. write block size of - * buffers have their hold timer expired and the swap out task woken. The - * caller then blocks on the waiting semaphore and counter. When buffers return - * from the upper layers (access) or lower driver (transfer) the blocked caller - * task is woken and this procedure is repeated. The repeat handles a case of a - * another thread pre-empting getting a buffer first and adding it to the AVL - * tree. + * The AVL tree of buffers for the cache is searched and if not found obtain a + * buffer and insert it into the AVL tree. Buffers are first obtained from the + * ready list until all empty/ready buffers are used. Once all buffers are in + * use the LRU list is searched for a buffer of the same group size or a group + * that has no active buffers in use. A buffer taken from the LRU list is + * removed from the AVL tree and assigned the new block number. The ready or + * LRU list buffer is initialised to this device and block. If no buffers are + * available due to the ready and LRU lists being empty a check is made of the + * modified list. Buffers may be queued waiting for the hold timer to + * expire. These buffers should be written to disk and returned to the LRU list + * where they can be used. If buffers are on the modified list the max. write + * block size of buffers have their hold timer's expired and the swap out task + * woken. The caller then blocks on the waiting semaphore and counter. When + * buffers return from the upper layers (access) or lower driver (transfer) the + * blocked caller task is woken and this procedure is repeated. The repeat + * handles a case of a another thread pre-empting getting a buffer first and + * adding it to the AVL tree. * * A buffer located in the AVL tree means it is already in the cache and maybe * in use somewhere. The buffer can be either: @@ -1142,44 +1338,52 @@ rtems_bdbuf_init (void) * If cached we assign the new state, extract it from any list it maybe part of * and return to the user. * - * This function assumes the pool the buffer is being taken from is locked and - * it will make sure the pool is locked when it returns. The pool will be + * This function assumes the cache the buffer is being taken from is locked and + * it will make sure the cache is locked when it returns. The cache will be * unlocked if the call could block. * - * @param pdd The physical disk device - * @param pool The pool reference - * @param block Absolute media block number - * @param read_ahead The get is for a read ahead buffer + * Variable sized buffer is handled by groups. A group is the size of the + * maximum buffer that can be allocated. The group can size in multiples of the + * minimum buffer size where the mulitples are 1,2,4,8, etc. If the buffer is + * found in the AVL tree the number of BDs in the group is check and if + * different the buffer size for the block has changed. The buffer needs to be + * invalidated. * - * @return RTEMS status code ( if operation completed successfully or error + * @param dd The disk device. Has the configured block size. + * @param bds_per_group The number of BDs in a group for this block. + * @param block Absolute media block number for the device + * @param read_ahead The get is for a read ahead buffer if true + * @return RTEMS status code (if operation completed successfully or error * code if error is occured) */ static rtems_bdbuf_buffer* -rtems_bdbuf_get_buffer (rtems_disk_device* pdd, - rtems_bdbuf_pool* pool, +rtems_bdbuf_get_buffer (rtems_disk_device* dd, + size_t bds_per_group, rtems_blkdev_bnum block, bool read_ahead) { - dev_t device = pdd->dev; + dev_t device = dd->dev; rtems_bdbuf_buffer* bd; bool available; - + /* * Loop until we get a buffer. Under load we could find no buffers are * available requiring this task to wait until some become available before - * proceeding. There is no timeout. If the call is to block and the buffer is - * for a read ahead buffer return NULL. + * proceeding. There is no timeout. If this call is to block and the buffer + * is for a read ahead buffer return NULL. The read ahead is nice but not + * that important. * * The search procedure is repeated as another thread could have pre-empted * us while we waited for a buffer, obtained an empty buffer and loaded the - * AVL tree with the one we are after. + * AVL tree with the one we are after. In this case we move down and wait for + * the buffer to return to the cache. */ do { /* * Search for buffer descriptor for this dev/block key. */ - bd = rtems_bdbuf_avl_search (&pool->tree, device, block); + bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, device, block); /* * No buffer in the cache for this block. We need to obtain a buffer and @@ -1194,19 +1398,22 @@ rtems_bdbuf_get_buffer (rtems_disk_device* pdd, if (!bd) { /* - * Assign new buffer descriptor from the empty list if one is present. If - * the empty queue is empty get the oldest buffer from LRU list. If the + * Assign new buffer descriptor from the ready list if one is present. If + * the ready queue is empty get the oldest buffer from LRU list. If the * LRU list is empty there are no available buffers check the modified * list. */ - if (rtems_chain_is_empty (&pool->ready)) + bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.ready); + + if (!bd) { /* - * No unsed or read-ahead buffers. + * No unused or read-ahead buffers. * * If this is a read ahead buffer just return. No need to place further * pressure on the cache by reading something that may be needed when - * we have data in the cache that was needed and may still be. + * we have data in the cache that was needed and may still be in the + * future. */ if (read_ahead) return NULL; @@ -1214,14 +1421,14 @@ rtems_bdbuf_get_buffer (rtems_disk_device* pdd, /* * Check the LRU list. */ - bd = (rtems_bdbuf_buffer *) rtems_chain_get (&pool->lru); + bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.lru); if (bd) { /* * Remove the buffer from the AVL tree. */ - if (rtems_bdbuf_avl_remove (&pool->tree, bd) != 0) + if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0) rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY); } else @@ -1229,43 +1436,46 @@ rtems_bdbuf_get_buffer (rtems_disk_device* pdd, /* * If there are buffers on the modified list expire the hold timer * and wake the swap out task then wait else just go and wait. + * + * The check for an empty list is made so the swapper is only woken + * when if timers are changed. */ - if (!rtems_chain_is_empty (&pool->modified)) + if (!rtems_chain_is_empty (&bdbuf_cache.modified)) { - rtems_chain_node* node = rtems_chain_head (&pool->modified); + rtems_chain_node* node = rtems_chain_first (&bdbuf_cache.modified); uint32_t write_blocks = 0; - node = node->next; - while ((write_blocks < rtems_bdbuf_configuration.max_write_blocks) && - !rtems_chain_is_tail (&pool->modified, node)) + while ((write_blocks < bdbuf_config.max_write_blocks) && + !rtems_chain_is_tail (&bdbuf_cache.modified, node)) { rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node; bd->hold_timer = 0; write_blocks++; - node = node->next; + node = rtems_chain_next (node); } rtems_bdbuf_wake_swapper (); } /* - * Wait for a buffer to be returned to the pool. The buffer will be + * Wait for a buffer to be returned to the cache. The buffer will be * placed on the LRU list. */ - rtems_bdbuf_wait (pool, &pool->waiting, &pool->wait_waiters); + rtems_bdbuf_wait (&bdbuf_cache.waiting, &bdbuf_cache.wait_waiters); } } else { - bd = (rtems_bdbuf_buffer *) rtems_chain_get (&(pool->ready)); - + /* + * We have a new buffer for this block. + */ if ((bd->state != RTEMS_BDBUF_STATE_EMPTY) && (bd->state != RTEMS_BDBUF_STATE_READ_AHEAD)) rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY); if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD) { - if (rtems_bdbuf_avl_remove (&pool->tree, bd) != 0) + if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0) rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY); } } @@ -1280,12 +1490,26 @@ rtems_bdbuf_get_buffer (rtems_disk_device* pdd, bd->error = 0; bd->waiters = 0; - if (rtems_bdbuf_avl_insert (&pool->tree, bd) != 0) + if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0) rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY); return bd; } } + else + { + /* + * We have the buffer for the block from the cache. Check if the buffer + * in the cache is the same size and the requested size we are after. + */ + if (bd->group->bds_per_group != bds_per_group) + { + bd->state = RTEMS_BDBUF_STATE_EMPTY; + rtems_chain_extract (&bd->link); + rtems_chain_prepend (&bdbuf_cache.ready, &bd->link); + bd = NULL; + } + } } while (!bd); @@ -1314,14 +1538,16 @@ rtems_bdbuf_get_buffer (rtems_disk_device* pdd, case RTEMS_BDBUF_STATE_ACCESS: case RTEMS_BDBUF_STATE_ACCESS_MODIFIED: bd->waiters++; - rtems_bdbuf_wait (pool, &pool->access, &pool->access_waiters); + rtems_bdbuf_wait (&bdbuf_cache.access, + &bdbuf_cache.access_waiters); bd->waiters--; break; case RTEMS_BDBUF_STATE_SYNC: case RTEMS_BDBUF_STATE_TRANSFER: bd->waiters++; - rtems_bdbuf_wait (pool, &pool->transfer, &pool->transfer_waiters); + rtems_bdbuf_wait (&bdbuf_cache.transfer, + &bdbuf_cache.transfer_waiters); bd->waiters--; break; @@ -1344,44 +1570,52 @@ rtems_bdbuf_get (dev_t device, rtems_bdbuf_buffer** bdp) { rtems_disk_device* dd; - rtems_bdbuf_pool* pool; rtems_bdbuf_buffer* bd; + size_t bds_per_group; + + if (!bdbuf_cache.initialised) + return RTEMS_NOT_CONFIGURED; /* - * Do not hold the pool lock when obtaining the disk table. + * Do not hold the cache lock when obtaining the disk table. */ dd = rtems_disk_obtain (device); - if (dd == NULL) + if (!dd) return RTEMS_INVALID_ID; if (block >= dd->size) { rtems_disk_release (dd); - return RTEMS_INVALID_NUMBER; + return RTEMS_INVALID_ADDRESS; } - pool = rtems_bdbuf_get_pool (dd->phys_dev->pool); + bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size); + if (!bds_per_group) + { + rtems_disk_release (dd); + return RTEMS_INVALID_NUMBER; + } - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); #if RTEMS_BDBUF_TRACE /* Print the block index relative to the physical disk */ rtems_bdbuf_printf ("get: %d (dev = %08x)\n", block + dd->start, device); #endif - bd = rtems_bdbuf_get_buffer (dd->phys_dev, pool, block + dd->start, false); + bd = rtems_bdbuf_get_buffer (dd, bds_per_group, block + dd->start, false); if (bd->state == RTEMS_BDBUF_STATE_MODIFIED) bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED; else bd->state = RTEMS_BDBUF_STATE_ACCESS; - - rtems_bdbuf_unlock_pool (pool); + + rtems_bdbuf_unlock_cache (); rtems_disk_release(dd); *bdp = bd; - + return RTEMS_SUCCESSFUL; } @@ -1412,11 +1646,14 @@ rtems_bdbuf_read (dev_t device, rtems_bdbuf_buffer** bdp) { rtems_disk_device* dd; - rtems_bdbuf_pool* pool; rtems_bdbuf_buffer* bd = NULL; uint32_t read_ahead_count; rtems_blkdev_request* req; + size_t bds_per_group; + if (!bdbuf_cache.initialised) + return RTEMS_NOT_CONFIGURED; + /* * @todo This type of request structure is wrong and should be removed. */ @@ -1427,10 +1664,10 @@ rtems_bdbuf_read (dev_t device, rtems_bdbuf_configuration.max_read_ahead_blocks)); /* - * Do not hold the pool lock when obtaining the disk table. + * Do not hold the cache lock when obtaining the disk table. */ dd = rtems_disk_obtain (device); - if (dd == NULL) + if (!dd) return RTEMS_INVALID_ID; if (block >= dd->size) { @@ -1438,6 +1675,13 @@ rtems_bdbuf_read (dev_t device, return RTEMS_INVALID_NUMBER; } + bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size); + if (!bds_per_group) + { + rtems_disk_release (dd); + return RTEMS_INVALID_NUMBER; + } + #if RTEMS_BDBUF_TRACE /* Print the block index relative to the physical disk */ rtems_bdbuf_printf ("read: %d (dev = %08x)\n", block + dd->start, device); @@ -1457,9 +1701,7 @@ rtems_bdbuf_read (dev_t device, else read_ahead_count = dd->size - block; - pool = rtems_bdbuf_get_pool (dd->phys_dev->pool); - - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); while (req->bufnum < read_ahead_count) { @@ -1471,7 +1713,7 @@ rtems_bdbuf_read (dev_t device, * We need to clean up any buffers allocated and not passed back to the * caller. */ - bd = rtems_bdbuf_get_buffer (dd->phys_dev, pool, + bd = rtems_bdbuf_get_buffer (dd, bds_per_group, block + dd->start + req->bufnum, req->bufnum == 0 ? false : true); @@ -1514,14 +1756,14 @@ rtems_bdbuf_read (dev_t device, if (req->bufnum) { /* - * Unlock the pool. We have the buffer for the block and it will be in the + * Unlock the cache. We have the buffer for the block and it will be in the * access or transfer state. We may also have a number of read ahead blocks * if we need to transfer data. At this point any other threads can gain - * access to the pool and if they are after any of the buffers we have they - * will block and be woken when the buffer is returned to the pool. + * access to the cache and if they are after any of the buffers we have + * they will block and be woken when the buffer is returned to the cache. * * If a transfer is needed the I/O operation will occur with pre-emption - * enabled and the pool unlocked. This is a change to the previous version + * enabled and the cache unlocked. This is a change to the previous version * of the bdbuf code. */ rtems_event_set out; @@ -1535,7 +1777,7 @@ rtems_bdbuf_read (dev_t device, RTEMS_EVENT_ALL | RTEMS_NO_WAIT, 0, &out); - rtems_bdbuf_unlock_pool (pool); + rtems_bdbuf_unlock_cache (); req->req = RTEMS_BLKDEV_REQ_READ; req->req_done = rtems_bdbuf_read_done; @@ -1567,7 +1809,7 @@ rtems_bdbuf_read (dev_t device, rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE); } - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); for (b = 1; b < req->bufnum; b++) { @@ -1588,7 +1830,7 @@ rtems_bdbuf_read (dev_t device, else bd->state = RTEMS_BDBUF_STATE_ACCESS; - rtems_bdbuf_unlock_pool (pool); + rtems_bdbuf_unlock_cache (); rtems_disk_release (dd); *bdp = bd; @@ -1599,14 +1841,13 @@ rtems_bdbuf_read (dev_t device, rtems_status_code rtems_bdbuf_release (rtems_bdbuf_buffer* bd) { - rtems_bdbuf_pool* pool; + if (!bdbuf_cache.initialised) + return RTEMS_NOT_CONFIGURED; if (bd == NULL) return RTEMS_INVALID_ADDRESS; - pool = rtems_bdbuf_get_pool (bd->pool); - - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); #if RTEMS_BDBUF_TRACE rtems_bdbuf_printf ("release: %d\n", bd->block); @@ -1614,7 +1855,7 @@ rtems_bdbuf_release (rtems_bdbuf_buffer* bd) if (bd->state == RTEMS_BDBUF_STATE_ACCESS_MODIFIED) { - rtems_bdbuf_append_modified (pool, bd); + rtems_bdbuf_append_modified (bd); } else { @@ -1624,12 +1865,17 @@ rtems_bdbuf_release (rtems_bdbuf_buffer* bd) * furthermost from the read buffer will be used. */ if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD) - rtems_chain_prepend (&pool->ready, &bd->link); + rtems_chain_prepend (&bdbuf_cache.ready, &bd->link); else { bd->state = RTEMS_BDBUF_STATE_CACHED; - rtems_chain_append (&pool->lru, &bd->link); + rtems_chain_append (&bdbuf_cache.lru, &bd->link); } + + /* + * One less user for the group of bds. + */ + bd->group->users--; } /* @@ -1637,22 +1883,22 @@ rtems_bdbuf_release (rtems_bdbuf_buffer* bd) * waiters if this is the first buffer to placed back onto the queue. */ if (bd->waiters) - rtems_bdbuf_wake (pool->access, &pool->access_waiters); + rtems_bdbuf_wake (bdbuf_cache.access, &bdbuf_cache.access_waiters); else { if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD) { - if (rtems_chain_has_only_one_node (&pool->ready)) - rtems_bdbuf_wake (pool->waiting, &pool->wait_waiters); + if (rtems_chain_has_only_one_node (&bdbuf_cache.ready)) + rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters); } else { - if (rtems_chain_has_only_one_node (&pool->lru)) - rtems_bdbuf_wake (pool->waiting, &pool->wait_waiters); + if (rtems_chain_has_only_one_node (&bdbuf_cache.lru)) + rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters); } } - rtems_bdbuf_unlock_pool (pool); + rtems_bdbuf_unlock_cache (); return RTEMS_SUCCESSFUL; } @@ -1660,14 +1906,13 @@ rtems_bdbuf_release (rtems_bdbuf_buffer* bd) rtems_status_code rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd) { - rtems_bdbuf_pool* pool; + if (!bdbuf_cache.initialised) + return RTEMS_NOT_CONFIGURED; - if (bd == NULL) + if (!bd) return RTEMS_INVALID_ADDRESS; - pool = rtems_bdbuf_get_pool (bd->pool); - - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); #if RTEMS_BDBUF_TRACE rtems_bdbuf_printf ("release modified: %d\n", bd->block); @@ -1675,12 +1920,12 @@ rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd) bd->hold_timer = rtems_bdbuf_configuration.swap_block_hold; - rtems_bdbuf_append_modified (pool, bd); + rtems_bdbuf_append_modified (bd); if (bd->waiters) - rtems_bdbuf_wake (pool->access, &pool->access_waiters); + rtems_bdbuf_wake (bdbuf_cache.access, &bdbuf_cache.access_waiters); - rtems_bdbuf_unlock_pool (pool); + rtems_bdbuf_unlock_cache (); return RTEMS_SUCCESSFUL; } @@ -1688,23 +1933,23 @@ rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd) rtems_status_code rtems_bdbuf_sync (rtems_bdbuf_buffer* bd) { - rtems_bdbuf_pool* pool; - bool available; + bool available; #if RTEMS_BDBUF_TRACE rtems_bdbuf_printf ("sync: %d\n", bd->block); #endif - if (bd == NULL) - return RTEMS_INVALID_ADDRESS; + if (!bdbuf_cache.initialised) + return RTEMS_NOT_CONFIGURED; - pool = rtems_bdbuf_get_pool (bd->pool); + if (!bd) + return RTEMS_INVALID_ADDRESS; - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); bd->state = RTEMS_BDBUF_STATE_SYNC; - rtems_chain_append (&pool->sync, &bd->link); + rtems_chain_append (&bdbuf_cache.sync, &bd->link); rtems_bdbuf_wake_swapper (); @@ -1724,7 +1969,7 @@ rtems_bdbuf_sync (rtems_bdbuf_buffer* bd) case RTEMS_BDBUF_STATE_SYNC: case RTEMS_BDBUF_STATE_TRANSFER: bd->waiters++; - rtems_bdbuf_wait (pool, &pool->transfer, &pool->transfer_waiters); + rtems_bdbuf_wait (&bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters); bd->waiters--; break; @@ -1733,7 +1978,7 @@ rtems_bdbuf_sync (rtems_bdbuf_buffer* bd) } } - rtems_bdbuf_unlock_pool (pool); + rtems_bdbuf_unlock_cache (); return RTEMS_SUCCESSFUL; } @@ -1742,7 +1987,6 @@ rtems_status_code rtems_bdbuf_syncdev (dev_t dev) { rtems_disk_device* dd; - rtems_bdbuf_pool* pool; rtems_status_code sc; rtems_event_set out; @@ -1750,38 +1994,39 @@ rtems_bdbuf_syncdev (dev_t dev) rtems_bdbuf_printf ("syncdev: %08x\n", dev); #endif + if (!bdbuf_cache.initialised) + return RTEMS_NOT_CONFIGURED; + /* - * Do not hold the pool lock when obtaining the disk table. + * Do not hold the cache lock when obtaining the disk table. */ dd = rtems_disk_obtain (dev); - if (dd == NULL) + if (!dd) return RTEMS_INVALID_ID; - pool = rtems_bdbuf_get_pool (dd->pool); - /* - * Take the sync lock before locking the pool. Once we have the sync lock - * we can lock the pool. If another thread has the sync lock it will cause - * this thread to block until it owns the sync lock then it can own the - * pool. The sync lock can only be obtained with the pool unlocked. + * Take the sync lock before locking the cache. Once we have the sync lock we + * can lock the cache. If another thread has the sync lock it will cause this + * thread to block until it owns the sync lock then it can own the cache. The + * sync lock can only be obtained with the cache unlocked. */ - rtems_bdbuf_lock_sync (pool); - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_sync (); + rtems_bdbuf_lock_cache (); /* - * Set the pool to have a sync active for a specific device and let the swap + * Set the cache to have a sync active for a specific device and let the swap * out task know the id of the requester to wake when done. * * The swap out task will negate the sync active flag when no more buffers - * for the device are held on the modified for sync queues. + * for the device are held on the "modified for sync" queues. */ - pool->sync_active = true; - pool->sync_requester = rtems_task_self (); - pool->sync_device = dev; + bdbuf_cache.sync_active = true; + bdbuf_cache.sync_requester = rtems_task_self (); + bdbuf_cache.sync_device = dev; rtems_bdbuf_wake_swapper (); - rtems_bdbuf_unlock_pool (pool); + rtems_bdbuf_unlock_cache (); sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC, RTEMS_EVENT_ALL | RTEMS_WAIT, @@ -1790,9 +2035,9 @@ rtems_bdbuf_syncdev (dev_t dev) if (sc != RTEMS_SUCCESSFUL) rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE); - rtems_bdbuf_unlock_sync (pool); + rtems_bdbuf_unlock_sync (); - return rtems_disk_release(dd); + return rtems_disk_release (dd); } /** @@ -1817,200 +2062,32 @@ rtems_bdbuf_write_done(void *arg, rtems_status_code status, int error) } /** - * Process the modified list of buffers. There us a sync or modified list that - * needs to be handled. + * Swapout transfer to the driver. The driver will break this I/O into groups + * of consecutive write requests is multiple consecutive buffers are required + * by the driver. * - * @param pid The pool id to process modified buffers on. - * @param dev The device to handle. If -1 no device is selected so select the - * device of the first buffer to be written to disk. - * @param chain The modified chain to process. - * @param transfer The chain to append buffers to be written too. - * @param sync_active If true this is a sync operation so expire all timers. - * @param update_timers If true update the timers. - * @param timer_delta It update_timers is true update the timers by this - * amount. + * @param transfer The transfer transaction. */ static void -rtems_bdbuf_swapout_modified_processing (rtems_bdpool_id pid, - dev_t* dev, - rtems_chain_control* chain, - rtems_chain_control* transfer, - bool sync_active, - bool update_timers, - uint32_t timer_delta) -{ - if (!rtems_chain_is_empty (chain)) - { - rtems_chain_node* node = rtems_chain_head (chain); - node = node->next; - - while (!rtems_chain_is_tail (chain, node)) - { - rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node; - - if (bd->pool == pid) - { - /* - * Check if the buffer's hold timer has reached 0. If a sync is active - * force all the timers to 0. - * - * @note Lots of sync requests will skew this timer. It should be based - * on TOD to be accurate. Does it matter ? - */ - if (sync_active) - bd->hold_timer = 0; - - if (bd->hold_timer) - { - if (update_timers) - { - if (bd->hold_timer > timer_delta) - bd->hold_timer -= timer_delta; - else - bd->hold_timer = 0; - } - - if (bd->hold_timer) - { - node = node->next; - continue; - } - } - - /* - * This assumes we can set dev_t to -1 which is just an - * assumption. Cannot use the transfer list being empty the sync dev - * calls sets the dev to use. - */ - if (*dev == (dev_t)-1) - *dev = bd->dev; - - if (bd->dev == *dev) - { - rtems_chain_node* next_node = node->next; - rtems_chain_node* tnode = rtems_chain_tail (transfer); - - /* - * The blocks on the transfer list are sorted in block order. This - * means multi-block transfers for drivers that require consecutive - * blocks perform better with sorted blocks and for real disks it may - * help lower head movement. - */ - - bd->state = RTEMS_BDBUF_STATE_TRANSFER; - - rtems_chain_extract (node); - - tnode = tnode->previous; - - while (node && !rtems_chain_is_head (transfer, tnode)) - { - rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode; - - if (bd->block > tbd->block) - { - rtems_chain_insert (tnode, node); - node = NULL; - } - else - tnode = tnode->previous; - } - - if (node) - rtems_chain_prepend (transfer, node); - - node = next_node; - } - else - { - node = node->next; - } - } - } - } -} - -/** - * Process a pool's modified buffers. Check the sync list first then the - * modified list extracting the buffers suitable to be written to disk. We have - * a device at a time. The task level loop will repeat this operation while - * there are buffers to be written. If the transfer fails place the buffers - * back on the modified list and try again later. The pool is unlocked while - * the buffers are being written to disk. - * - * @param pid The pool id to process modified buffers on. - * @param timer_delta It update_timers is true update the timers by this - * amount. - * @param update_timers If true update the timers. - * @param write_req The write request structure. There is only one. - * - * @retval true Buffers where written to disk so scan again. - * @retval false No buffers where written to disk. - */ -static bool -rtems_bdbuf_swapout_pool_processing (rtems_bdpool_id pid, - unsigned long timer_delta, - bool update_timers, - rtems_blkdev_request* write_req) +rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer) { - rtems_bdbuf_pool* pool = rtems_bdbuf_get_pool (pid); - rtems_chain_control transfer; - dev_t dev = -1; rtems_disk_device* dd; - bool transfered_buffers = true; - - rtems_chain_initialize_empty (&transfer); - - rtems_bdbuf_lock_pool (pool); - - /* - * When the sync is for a device limit the sync to that device. If the sync - * is for a buffer handle process the devices in the order on the sync - * list. This means the dev is -1. - */ - if (pool->sync_active) - dev = pool->sync_device; - - /* - * If we have any buffers in the sync queue move them to the modified - * list. The first sync buffer will select the device we use. - */ - rtems_bdbuf_swapout_modified_processing (pid, &dev, - &pool->sync, &transfer, - true, false, - timer_delta); - - /* - * Process the pool's modified list. - */ - rtems_bdbuf_swapout_modified_processing (pid, &dev, - &pool->modified, &transfer, - pool->sync_active, - update_timers, - timer_delta); - - /* - * We have all the buffers that have been modified for this device so the - * pool can be unlocked because the state of each buffer has been set to - * TRANSFER. - */ - rtems_bdbuf_unlock_pool (pool); + +#if RTEMS_BDBUF_TRACE + rtems_bdbuf_printf ("swapout transfer: %08x\n", transfer->dev); +#endif /* * If there are buffers to transfer to the media transfer them. */ - if (rtems_chain_is_empty (&transfer)) - transfered_buffers = false; - else + if (!rtems_chain_is_empty (&transfer->bds)) { /* - * Obtain the disk device. The pool's mutex has been released to avoid a + * Obtain the disk device. The cache's mutex has been released to avoid a * dead lock. */ - dd = rtems_disk_obtain (dev); - if (dd == NULL) - transfered_buffers = false; - else + dd = rtems_disk_obtain (transfer->dev); + if (dd) { /* * The last block number used when the driver only supports @@ -2027,14 +2104,14 @@ rtems_bdbuf_swapout_pool_processing (rtems_bdpool_id pid, * removed. Merging members of a struct into the first member is * trouble waiting to happen. */ - write_req->status = RTEMS_RESOURCE_IN_USE; - write_req->error = 0; - write_req->bufnum = 0; + transfer->write_req->status = RTEMS_RESOURCE_IN_USE; + transfer->write_req->error = 0; + transfer->write_req->bufnum = 0; - while (!rtems_chain_is_empty (&transfer)) + while (!rtems_chain_is_empty (&transfer->bds)) { rtems_bdbuf_buffer* bd = - (rtems_bdbuf_buffer*) rtems_chain_get (&transfer); + (rtems_bdbuf_buffer*) rtems_chain_get (&transfer->bds); bool write = false; @@ -2045,21 +2122,30 @@ rtems_bdbuf_swapout_pool_processing (rtems_bdpool_id pid, * the committed buffers. */ - if ((dd->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) && - write_req->bufnum && +#if RTEMS_BDBUF_TRACE + rtems_bdbuf_printf ("swapout write: bd:%d, bufnum:%d mode:%s\n", + bd->block, transfer->write_req->bufnum, + dd->phys_dev->capabilities & + RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT"); +#endif + + if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) && + transfer->write_req->bufnum && (bd->block != (last_block + 1))) { - rtems_chain_prepend (&transfer, &bd->link); + rtems_chain_prepend (&transfer->bds, &bd->link); write = true; } else { - write_req->bufs[write_req->bufnum].user = bd; - write_req->bufs[write_req->bufnum].block = bd->block; - write_req->bufs[write_req->bufnum].length = dd->block_size; - write_req->bufs[write_req->bufnum].buffer = bd->buffer; - write_req->bufnum++; - last_block = bd->block; + rtems_blkdev_sg_buffer* buf; + buf = &transfer->write_req->bufs[transfer->write_req->bufnum]; + transfer->write_req->bufnum++; + buf->user = bd; + buf->block = bd->block; + buf->length = dd->block_size; + buf->buffer = bd->buffer; + last_block = bd->block; } /* @@ -2067,8 +2153,8 @@ rtems_bdbuf_swapout_pool_processing (rtems_bdpool_id pid, * size has reached the configured max. value. */ - if (rtems_chain_is_empty (&transfer) || - (write_req->bufnum >= rtems_bdbuf_configuration.max_write_blocks)) + if (rtems_chain_is_empty (&transfer->bds) || + (transfer->write_req->bufnum >= rtems_bdbuf_configuration.max_write_blocks)) write = true; if (write) @@ -2076,30 +2162,34 @@ rtems_bdbuf_swapout_pool_processing (rtems_bdpool_id pid, int result; uint32_t b; +#if RTEMS_BDBUF_TRACE + rtems_bdbuf_printf ("swapout write: writing bufnum:%d\n", + transfer->write_req->bufnum); +#endif /* - * Perform the transfer. No pool locks, no preemption, only the disk + * Perform the transfer. No cache locks, no preemption, only the disk * device is being held. */ - result = dd->ioctl (dd->phys_dev->dev, - RTEMS_BLKIO_REQUEST, write_req); + result = dd->phys_dev->ioctl (dd->phys_dev->dev, + RTEMS_BLKIO_REQUEST, transfer->write_req); if (result < 0) { - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); - for (b = 0; b < write_req->bufnum; b++) + for (b = 0; b < transfer->write_req->bufnum; b++) { - bd = write_req->bufs[b].user; + bd = transfer->write_req->bufs[b].user; bd->state = RTEMS_BDBUF_STATE_MODIFIED; bd->error = errno; /* - * Place back on the pools modified queue and try again. + * Place back on the cache's modified queue and try again. * * @warning Not sure this is the best option but I do not know * what else can be done. */ - rtems_chain_append (&pool->modified, &bd->link); + rtems_chain_append (&bdbuf_cache.modified, &bd->link); } } else @@ -2114,72 +2204,290 @@ rtems_bdbuf_swapout_pool_processing (rtems_bdpool_id pid, if (sc != RTEMS_SUCCESSFUL) rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE); - rtems_bdbuf_lock_pool (pool); + rtems_bdbuf_lock_cache (); - for (b = 0; b < write_req->bufnum; b++) + for (b = 0; b < transfer->write_req->bufnum; b++) { - bd = write_req->bufs[b].user; + bd = transfer->write_req->bufs[b].user; bd->state = RTEMS_BDBUF_STATE_CACHED; bd->error = 0; - - rtems_chain_append (&pool->lru, &bd->link); + bd->group->users--; + + rtems_chain_append (&bdbuf_cache.lru, &bd->link); if (bd->waiters) - rtems_bdbuf_wake (pool->transfer, &pool->transfer_waiters); + rtems_bdbuf_wake (bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters); else { - if (rtems_chain_has_only_one_node (&pool->lru)) - rtems_bdbuf_wake (pool->waiting, &pool->wait_waiters); + if (rtems_chain_has_only_one_node (&bdbuf_cache.lru)) + rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters); } } } + + rtems_bdbuf_unlock_cache (); - rtems_bdbuf_unlock_pool (pool); - - write_req->status = RTEMS_RESOURCE_IN_USE; - write_req->error = 0; - write_req->bufnum = 0; + transfer->write_req->status = RTEMS_RESOURCE_IN_USE; + transfer->write_req->error = 0; + transfer->write_req->bufnum = 0; } } rtems_disk_release (dd); } + else + { + /* + * We have buffers but no device. Put the BDs back onto the + * ready queue and exit. + */ + /* @todo fixme */ + } + } +} + +/** + * Process the modified list of buffers. There is a sync or modified list that + * needs to be handled so we have a common function to do the work. + * + * @param dev The device to handle. If -1 no device is selected so select the + * device of the first buffer to be written to disk. + * @param chain The modified chain to process. + * @param transfer The chain to append buffers to be written too. + * @param sync_active If true this is a sync operation so expire all timers. + * @param update_timers If true update the timers. + * @param timer_delta It update_timers is true update the timers by this + * amount. + */ +static void +rtems_bdbuf_swapout_modified_processing (dev_t* dev, + rtems_chain_control* chain, + rtems_chain_control* transfer, + bool sync_active, + bool update_timers, + uint32_t timer_delta) +{ + if (!rtems_chain_is_empty (chain)) + { + rtems_chain_node* node = rtems_chain_head (chain); + node = node->next; + + while (!rtems_chain_is_tail (chain, node)) + { + rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node; + + /* + * Check if the buffer's hold timer has reached 0. If a sync is active + * force all the timers to 0. + * + * @note Lots of sync requests will skew this timer. It should be based + * on TOD to be accurate. Does it matter ? + */ + if (sync_active) + bd->hold_timer = 0; + + if (bd->hold_timer) + { + if (update_timers) + { + if (bd->hold_timer > timer_delta) + bd->hold_timer -= timer_delta; + else + bd->hold_timer = 0; + } + + if (bd->hold_timer) + { + node = node->next; + continue; + } + } + + /* + * This assumes we can set dev_t to -1 which is just an + * assumption. Cannot use the transfer list being empty the sync dev + * calls sets the dev to use. + */ + if (*dev == (dev_t)-1) + *dev = bd->dev; + + if (bd->dev == *dev) + { + rtems_chain_node* next_node = node->next; + rtems_chain_node* tnode = rtems_chain_tail (transfer); + + /* + * The blocks on the transfer list are sorted in block order. This + * means multi-block transfers for drivers that require consecutive + * blocks perform better with sorted blocks and for real disks it may + * help lower head movement. + */ + + bd->state = RTEMS_BDBUF_STATE_TRANSFER; + + rtems_chain_extract (node); + + tnode = tnode->previous; + + while (node && !rtems_chain_is_head (transfer, tnode)) + { + rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode; + + if (bd->block > tbd->block) + { + rtems_chain_insert (tnode, node); + node = NULL; + } + else + tnode = tnode->previous; + } + + if (node) + rtems_chain_prepend (transfer, node); + + node = next_node; + } + else + { + node = node->next; + } + } + } +} + +/** + * Process the cache's modified buffers. Check the sync list first then the + * modified list extracting the buffers suitable to be written to disk. We have + * a device at a time. The task level loop will repeat this operation while + * there are buffers to be written. If the transfer fails place the buffers + * back on the modified list and try again later. The cache is unlocked while + * the buffers are being written to disk. + * + * @param timer_delta It update_timers is true update the timers by this + * amount. + * @param update_timers If true update the timers. + * @param transfer The transfer transaction data. + * + * @retval true Buffers where written to disk so scan again. + * @retval false No buffers where written to disk. + */ +static bool +rtems_bdbuf_swapout_processing (unsigned long timer_delta, + bool update_timers, + rtems_bdbuf_swapout_transfer* transfer) +{ + rtems_bdbuf_swapout_worker* worker; + bool transfered_buffers = false; + + rtems_bdbuf_lock_cache (); + + /* + * If a sync is active do not use a worker because the current code does not + * cleaning up after. We need to know the buffers have been written when + * syncing to release sync lock and currently worker threads do not return to + * here. We do not know the worker is the last in a sequence of sync writes + * until after we have it running so we do not know to tell it to release the + * lock. The simplest solution is to get the main swap out task perform all + * sync operations. + */ + if (bdbuf_cache.sync_active) + worker = NULL; + else + { + worker = (rtems_bdbuf_swapout_worker*) + rtems_chain_get (&bdbuf_cache.swapout_workers); + if (worker) + transfer = &worker->transfer; } + + rtems_chain_initialize_empty (&transfer->bds); + transfer->dev = -1; + + /* + * When the sync is for a device limit the sync to that device. If the sync + * is for a buffer handle process the devices in the order on the sync + * list. This means the dev is -1. + */ + if (bdbuf_cache.sync_active) + transfer->dev = bdbuf_cache.sync_device; + + /* + * If we have any buffers in the sync queue move them to the modified + * list. The first sync buffer will select the device we use. + */ + rtems_bdbuf_swapout_modified_processing (&transfer->dev, + &bdbuf_cache.sync, + &transfer->bds, + true, false, + timer_delta); - if (pool->sync_active && ! transfered_buffers) + /* + * Process the cache's modified list. + */ + rtems_bdbuf_swapout_modified_processing (&transfer->dev, + &bdbuf_cache.modified, + &transfer->bds, + bdbuf_cache.sync_active, + update_timers, + timer_delta); + + /* + * We have all the buffers that have been modified for this device so the + * cache can be unlocked because the state of each buffer has been set to + * TRANSFER. + */ + rtems_bdbuf_unlock_cache (); + + /* + * If there are buffers to transfer to the media transfer them. + */ + if (!rtems_chain_is_empty (&transfer->bds)) + { + if (worker) + { + rtems_status_code sc = rtems_event_send (worker->id, + RTEMS_BDBUF_SWAPOUT_SYNC); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE); + } + else + { + rtems_bdbuf_swapout_write (transfer); + } + + transfered_buffers = true; + } + + if (bdbuf_cache.sync_active && !transfered_buffers) { - rtems_id sync_requester = pool->sync_requester; - pool->sync_active = false; - pool->sync_requester = 0; + rtems_id sync_requester; + rtems_bdbuf_lock_cache (); + sync_requester = bdbuf_cache.sync_requester; + bdbuf_cache.sync_active = false; + bdbuf_cache.sync_requester = 0; + rtems_bdbuf_unlock_cache (); if (sync_requester) rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC); } - return transfered_buffers; + return transfered_buffers; } /** - * Body of task which takes care on flushing modified buffers to the disk. + * Allocate the write request and initialise it for good measure. * - * @param arg The task argument which is the context. + * @return rtems_blkdev_request* The write reference memory. */ -static rtems_task -rtems_bdbuf_swapout_task (rtems_task_argument arg) +static rtems_blkdev_request* +rtems_bdbuf_swapout_writereq_alloc (void) { - rtems_bdbuf_context* context = (rtems_bdbuf_context*) arg; - rtems_blkdev_request* write_req; - uint32_t period_in_ticks; - const uint32_t period_in_msecs = rtems_bdbuf_configuration.swapout_period; - uint32_t timer_delta; - rtems_status_code sc; - /* * @note chrisj The rtems_blkdev_request and the array at the end is a hack. * I am disappointment at finding code like this in RTEMS. The request should * have been a rtems_chain_control. Simple, fast and less storage as the node * is already part of the buffer structure. */ - write_req = + rtems_blkdev_request* write_req = malloc (sizeof (rtems_blkdev_request) + (rtems_bdbuf_configuration.max_write_blocks * sizeof (rtems_blkdev_sg_buffer))); @@ -2192,6 +2500,142 @@ rtems_bdbuf_swapout_task (rtems_task_argument arg) write_req->done_arg = write_req; write_req->io_task = rtems_task_self (); + return write_req; +} + +/** + * The swapout worker thread body. + * + * @param arg A pointer to the worker thread's private data. + * @return rtems_task Not used. + */ +static rtems_task +rtems_bdbuf_swapout_worker_task (rtems_task_argument arg) +{ + rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg; + + while (worker->enabled) + { + rtems_event_set out; + rtems_status_code sc; + + sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC, + RTEMS_EVENT_ALL | RTEMS_WAIT, + RTEMS_NO_TIMEOUT, + &out); + + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE); + + rtems_bdbuf_swapout_write (&worker->transfer); + + rtems_bdbuf_lock_cache (); + + rtems_chain_initialize_empty (&worker->transfer.bds); + worker->transfer.dev = -1; + + rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link); + + rtems_bdbuf_unlock_cache (); + } + + free (worker->transfer.write_req); + free (worker); + + rtems_task_delete (RTEMS_SELF); +} + +/** + * Open the swapout worker threads. + */ +static void +rtems_bdbuf_swapout_workers_open (void) +{ + rtems_status_code sc; + int w; + + rtems_bdbuf_lock_cache (); + + for (w = 0; w < rtems_bdbuf_configuration.swapout_workers; w++) + { + rtems_bdbuf_swapout_worker* worker; + + worker = malloc (sizeof (rtems_bdbuf_swapout_worker)); + if (!worker) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM); + + rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link); + worker->enabled = true; + worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc (); + + rtems_chain_initialize_empty (&worker->transfer.bds); + worker->transfer.dev = -1; + + sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w), + (rtems_bdbuf_configuration.swapout_priority ? + rtems_bdbuf_configuration.swapout_priority : + RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT), + SWAPOUT_TASK_STACK_SIZE, + RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR, + RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT, + &worker->id); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE); + + sc = rtems_task_start (worker->id, + rtems_bdbuf_swapout_worker_task, + (rtems_task_argument) worker); + if (sc != RTEMS_SUCCESSFUL) + rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START); + } + + rtems_bdbuf_unlock_cache (); +} + +/** + * Close the swapout worker threads. + */ +static void +rtems_bdbuf_swapout_workers_close (void) +{ + rtems_chain_node* node; + + rtems_bdbuf_lock_cache (); + + node = rtems_chain_first (&bdbuf_cache.swapout_workers); + while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node)) + { + rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node; + worker->enabled = false; + rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC); + node = rtems_chain_next (node); + } + + rtems_bdbuf_unlock_cache (); +} + +/** + * Body of task which takes care on flushing modified buffers to the disk. + * + * @param arg A pointer to the global cache data. Use the global variable and + * not this. + * @return rtems_task Not used. + */ +static rtems_task +rtems_bdbuf_swapout_task (rtems_task_argument arg) +{ + rtems_bdbuf_swapout_transfer transfer; + uint32_t period_in_ticks; + const uint32_t period_in_msecs = bdbuf_config.swapout_period;; + uint32_t timer_delta; + + transfer.write_req = rtems_bdbuf_swapout_writereq_alloc (); + rtems_chain_initialize_empty (&transfer.bds); + transfer.dev = -1; + + /* + * Localise the period. + */ period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000); /* @@ -2199,9 +2643,15 @@ rtems_bdbuf_swapout_task (rtems_task_argument arg) */ timer_delta = period_in_msecs; - while (context->swapout_enabled) + /* + * Create the worker threads. + */ + rtems_bdbuf_swapout_workers_open (); + + while (bdbuf_cache.swapout_enabled) { - rtems_event_set out; + rtems_event_set out; + rtems_status_code sc; /* * Only update the timers once in the processing cycle. @@ -2210,33 +2660,27 @@ rtems_bdbuf_swapout_task (rtems_task_argument arg) /* * If we write buffers to any disk perform a check again. We only write a - * single device at a time and a pool may have more than one devices + * single device at a time and the cache may have more than one device's * buffers modified waiting to be written. */ bool transfered_buffers; do { - rtems_bdpool_id pid; - transfered_buffers = false; /* - * Loop over each pool extacting all the buffers we find for a specific - * device. The device is the first one we find on a modified list of a - * pool. Process the sync queue of buffers first. + * Extact all the buffers we find for a specific device. The device is + * the first one we find on a modified list. Process the sync queue of + * buffers first. */ - for (pid = 0; pid < context->npools; pid++) + if (rtems_bdbuf_swapout_processing (timer_delta, + update_timers, + &transfer)) { - if (rtems_bdbuf_swapout_pool_processing (pid, - timer_delta, - update_timers, - write_req)) - { - transfered_buffers = true; - } + transfered_buffers = true; } - + /* * Only update the timers once. */ @@ -2253,67 +2697,10 @@ rtems_bdbuf_swapout_task (rtems_task_argument arg) rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE); } - free (write_req); + rtems_bdbuf_swapout_workers_close (); + + free (transfer.write_req); rtems_task_delete (RTEMS_SELF); } -rtems_status_code -rtems_bdbuf_find_pool (uint32_t block_size, rtems_bdpool_id *pool) -{ - rtems_bdbuf_pool* p; - rtems_bdpool_id i; - rtems_bdpool_id curid = -1; - bool found = false; - uint32_t cursize = UINT_MAX; - int j; - - for (j = block_size; (j != 0) && ((j & 1) == 0); j >>= 1); - if (j != 1) - return RTEMS_INVALID_SIZE; - - for (i = 0; i < rtems_bdbuf_ctx.npools; i++) - { - p = rtems_bdbuf_get_pool (i); - if ((p->blksize >= block_size) && - (p->blksize < cursize)) - { - curid = i; - cursize = p->blksize; - found = true; - } - } - - if (found) - { - if (pool != NULL) - *pool = curid; - return RTEMS_SUCCESSFUL; - } - else - { - return RTEMS_NOT_DEFINED; - } -} - -rtems_status_code rtems_bdbuf_get_pool_info( - rtems_bdpool_id pool, - uint32_t *block_size, - uint32_t *blocks -) -{ - if (pool >= rtems_bdbuf_ctx.npools) - return RTEMS_INVALID_NUMBER; - - if (block_size != NULL) - { - *block_size = rtems_bdbuf_ctx.pool[pool].blksize; - } - - if (blocks != NULL) - { - *blocks = rtems_bdbuf_ctx.pool[pool].nblks; - } - - return RTEMS_SUCCESSFUL; -} |