/* Copyright (C) 2005,2006 David Decotigny This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include #include #include #include #include "blkcache.h" /** * @note In the implementation below, we choose to transfer the blocks * from list to list /before/ doing the disk access (if needed). The * aim is to have the entries in the right list before any other * thread can lock them. Actually, these "other threads" can lock the * entry only during the disk accesses (kernel is non * preemptible). So, putting the entries in the correct list /before/ * doing the actual disk read/write access will lower the probability * for the thread to learn that, at the end, the entry it was waiting * for has been transferred on another list. * * @note What is extremely important however, is that the * sos_hash_insert/remove operation be carried out BEFORE the * bocck_read/write operations. If not, then it may be possible to * insert an entry already inserted (which causes the system to halt), * etc. */ struct sos_block_cache_entry { /** The key of the hash map */ sos_luoffset_t block_index; /** Kernel address where the data for the block are stored */ sos_vaddr_t block_contents; /** Is the block available, dirty or not ? */ enum { ENTRY_FREE, ENTRY_SYNC, ENTRY_DIRTY } state; /** Lock to protect the entry */ struct sos_kmutex lock; /** Linkage structure to keep the cache block in the hash map */ struct sos_hash_linkage hlink; /** Links to insert the bloc into the free/sync/dirty lists */ struct sos_block_cache_entry *prev, *next; }; /** The entries of all the cache entries are stored in a single SLAB pool */ static struct sos_kslab_cache * cache_of_bkcache_entries; struct sos_block_cache { /** SLAB pool for the block contents */ struct sos_kslab_cache * slab_cache; /** Dictionary offset -> block cache entry */ struct sos_hash_table * lookup_table; /** Hardware read/write operations */ struct sos_blockdev_operations * operations; void * blkdev_instance_custom_data; /* Lists to look into in order to free a node. LRU is enforced */ struct sos_block_cache_entry * free_list; /**< List of entries still available */ struct sos_block_cache_entry * sync_list; /**< Blocks in sync with disk (LRU at end) */ struct sos_block_cache_entry * dirty_list; /**< Dirty blocks (LRU last) */ }; sos_ret_t sos_blkcache_subsystem_setup() { cache_of_bkcache_entries = sos_kmem_cache_create("blkcache_entry", sizeof(struct sos_block_cache_entry), 1, 0, SOS_KSLAB_CREATE_MAP | SOS_KSLAB_CREATE_ZERO); if (NULL == cache_of_bkcache_entries) return -SOS_ENOMEM; return SOS_OK; } struct sos_block_cache * sos_blkcache_new_cache(void * blockdev_instance_custom_data, sos_size_t block_size, sos_count_t cache_size_in_blocks, struct sos_blockdev_operations * blockdev_ops) { sos_count_t idx; struct sos_block_cache * blkcache; SOS_ASSERT_FATAL(block_size > 0); SOS_ASSERT_FATAL(cache_size_in_blocks > 0); blkcache = (struct sos_block_cache*) sos_kmalloc(sizeof(struct sos_block_cache), 0); if (NULL == blkcache) return NULL; blkcache->blkdev_instance_custom_data = blockdev_instance_custom_data; blkcache->operations = blockdev_ops; /* Allocate the hash table */ blkcache->lookup_table = sos_hash_create("blkcache", struct sos_block_cache_entry, sos_hash_ui64, sos_hash_key_eq_ui64, 17, block_index, hlink); if (NULL == blkcache->lookup_table) { sos_kfree((sos_vaddr_t)blkcache); return NULL; } /* Allocate the slab cache for the blocks */ blkcache->slab_cache = sos_kmem_cache_create("blkcache", block_size, 4, 0, SOS_KSLAB_CREATE_MAP | SOS_KSLAB_CREATE_ZERO); if (NULL == blkcache->slab_cache) { sos_hash_dispose(blkcache->lookup_table); sos_kfree((sos_vaddr_t)blkcache); return NULL; } /* Allocate the cache blocks in this slab cache, and mark them as "free" */ for (idx = 0 ; idx < cache_size_in_blocks ; idx ++) { struct sos_block_cache_entry * bkcache_entry; sos_vaddr_t bkcache_contents; bkcache_entry = (struct sos_block_cache_entry*)sos_kmem_cache_alloc(cache_of_bkcache_entries, 0); if (NULL == bkcache_entry) { sos_blkcache_delete_cache(blkcache); return NULL; } bkcache_contents = sos_kmem_cache_alloc(blkcache->slab_cache, 0); if (NULL == (void*)bkcache_contents) { sos_kmem_cache_free((sos_vaddr_t)bkcache_entry); sos_blkcache_delete_cache(blkcache); return NULL; } /* Initialize the cache entry */ bkcache_entry->block_contents = bkcache_contents; bkcache_entry->state = ENTRY_FREE; SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_init(& bkcache_entry->lock, "bkcache_lock", SOS_KWQ_ORDER_FIFO)); /* Now add it in the free list */ list_add_head(blkcache->free_list, bkcache_entry); } return blkcache; } /** Helper function used to collapse the sync/free lists */ static sos_ret_t blkcache_collapse_list(struct sos_block_cache_entry ** l) { while (! list_is_empty(*l)) { struct sos_block_cache_entry * entry = list_pop_head(*l); if (NULL == entry) break; /* The delete_cache function should be called only when the block device is not currently in use */ SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_dispose(& entry->lock)); if (NULL != (void*)entry->block_contents) sos_kmem_cache_free(entry->block_contents); sos_kmem_cache_free((sos_vaddr_t)entry); } return SOS_OK; } sos_ret_t sos_blkcache_delete_cache(struct sos_block_cache * bc) { sos_blkcache_flush(bc); /* The dirty list is expected to be empty */ sos_hash_dispose(bc->lookup_table); blkcache_collapse_list(& bc->sync_list); blkcache_collapse_list(& bc->free_list); sos_kfree((sos_vaddr_t)bc); return SOS_OK; } /** Helper function used to transfer a dirty block to the sync_list. @note the entry is expected to be locked */ static sos_ret_t blkcache_flush_entry(struct sos_block_cache * bc, struct sos_block_cache_entry * entry, sos_bool_t mark_as_free) { sos_ret_t retval = SOS_OK; SOS_ASSERT_FATAL(TRUE == sos_kmutex_owned_by_me(& entry->lock)); SOS_ASSERT_FATAL(entry->state == ENTRY_DIRTY); /* We prepare to transfer it to the sync/free list */ if (mark_as_free) { list_delete(bc->dirty_list, entry); entry->state = ENTRY_FREE; sos_hash_remove(bc->lookup_table, entry); list_add_head(bc->free_list, entry); } else { list_delete(bc->dirty_list, entry); entry->state = ENTRY_SYNC; list_add_head(bc->sync_list, entry); } retval = bc->operations->write_block(bc->blkdev_instance_custom_data, entry->block_contents, entry->block_index); if (SOS_OK != retval) { /* * Putting the entry back to the dirty list may cause * retrieve_block to loop infinitely (always trying to transfer * this block to disk...) !... A solution would be to register * this block in a dictionary of failing blocks and to force * retrieve block to ignore requests on these blocks */ SOS_FATAL_ERROR("Not implemented yet: we don't support failed disk access (see comments in source file)"); } return retval; } struct sos_block_cache_entry * sos_blkcache_retrieve_block(struct sos_block_cache * bc, sos_luoffset_t block_index, sos_blkcache_access_type_t access_type, sos_vaddr_t */*out*/ block_contents) { struct sos_block_cache_entry * entry = NULL; *block_contents = (sos_vaddr_t)NULL; /* Get and lock the entry */ while (TRUE) { entry = sos_hash_lookup(bc->lookup_table, & block_index); if (NULL != entry) { /* The block was already in the cache */ sos_kmutex_lock(& entry->lock, NULL); /* We have the mutex. However, this entry can now be dedicated to another block: it was stolen by another thread during the blocking time */ if ((ENTRY_FREE != entry->state) && (entry->block_index == block_index)) { /* Great ! This entry is still the one we expect ! */ /* Write-only access: force it to be dirty NOW ! */ if ( (access_type == SOS_BLKCACHE_WRITE_ONLY) && (ENTRY_SYNC == entry->state) ) { list_delete(bc->sync_list, entry); entry->state = ENTRY_DIRTY; list_add_head(bc->dirty_list, entry); } *block_contents = entry->block_contents; return entry; } /* Bad luck: someone stole the entry and used it in the meantime */ sos_kmutex_unlock(& entry->lock); /* Go back and look it up again */ continue; } /* Ok, the block is not yet in the cache: we need to allocate a new one */ /* The simplest: there is a free slot in the free list (take the most recently freed) */ if (! list_is_empty(bc->free_list)) { entry = list_get_head(bc->free_list); sos_kmutex_lock(& entry->lock, NULL); /* By the time we return from the lock, the entry could have be stolen */ if ((ENTRY_FREE != entry->state) || (NULL != sos_hash_lookup(bc->lookup_table, & block_index))) { sos_kmutex_unlock(& entry->lock); continue; } /* Ok, we got a REALLY free entry ! */ break; } /* A little more complicated: there is a non-dirty entry in synch with disk, transfer it to the free list. Take the least recently used */ if (! list_is_empty(bc->sync_list)) { entry = list_get_tail(bc->sync_list); sos_kmutex_lock(& entry->lock, NULL); /* During blocking time, the block is not in the sync list anymore ! We'd better redo a complete iteration to choose another block, just in case the block we are looking for was transferred in cache by someone else */ if ((ENTRY_SYNC != entry->state) || (NULL != sos_hash_lookup(bc->lookup_table, & block_index))) { sos_kmutex_unlock(& entry->lock); continue; } list_delete(bc->sync_list, entry); sos_hash_remove(bc->lookup_table, entry); entry->state = ENTRY_FREE; list_add_head(bc->free_list, entry); /* Now we have a free entry for us ! */ break; } /* * Bad luck: we have to flush a dirty entry back to disk */ SOS_ASSERT_FATAL(! list_is_empty(bc->dirty_list)); entry = list_get_tail(bc->dirty_list); sos_kmutex_lock(& entry->lock, NULL); /* During blocking time, the block was transferred to the sync list ! We'd better redo a complete iteration to choose another block, if we want to keep this recently-accessed block inside the cache */ if ((ENTRY_DIRTY == entry->state) && (NULL == sos_hash_lookup(bc->lookup_table, & block_index))) { /* entry still dirty: transfer it to the free list */ if (SOS_OK == blkcache_flush_entry(bc, entry, TRUE)) /* We can use it now ! */ break; } sos_kmutex_unlock(& entry->lock); /* We can now go for another iteration because we blocked: maybe the block was retrieved into the cache by someone else */ } /* Here we have a locked block that needs to be initialised */ /* Remove it from the free list */ list_delete(bc->free_list, entry); /* Prepare it to be inserted in the dictionnary */ entry->block_index = block_index; /* Announce that we are preparing to add the block into the cache */ if (SOS_OK != sos_hash_insert(bc->lookup_table, entry)) SOS_FATAL_ERROR("Unexpected hash collision"); /* No need to do anything for a write-only block, simply mark it dirty */ if (access_type == SOS_BLKCACHE_WRITE_ONLY) { entry->state = ENTRY_DIRTY; list_add_head(bc->dirty_list, entry); } /* Otherwise retrieve the block from disk NOW ! */ else { entry->state = ENTRY_SYNC; list_add_head(bc->sync_list, entry); if (SOS_OK != bc->operations->read_block(bc->blkdev_instance_custom_data, entry->block_contents, entry->block_index)) { /* In case of failure, remove it from hash and return NULL */ list_delete(bc->sync_list, entry); sos_hash_remove(bc->lookup_table, entry); entry->state = ENTRY_FREE; list_add_head(bc->free_list, entry); sos_kmutex_unlock(& entry->lock); return NULL; } } *block_contents = entry->block_contents; return entry; } sos_ret_t sos_blkcache_release_block(struct sos_block_cache * bc, struct sos_block_cache_entry * entry, sos_bool_t is_dirty, sos_bool_t force_flush) { /* Enforce the least recently used policy */ if (ENTRY_SYNC == entry->state) { list_delete(bc->sync_list, entry); /* Eventually transfer the block to the dirty list */ if (is_dirty) entry->state = ENTRY_DIRTY; } else list_delete(bc->dirty_list, entry); if (ENTRY_SYNC == entry->state) list_add_head(bc->sync_list, entry); else list_add_head(bc->dirty_list, entry); if ( (ENTRY_DIRTY == entry->state) && force_flush) blkcache_flush_entry(bc, entry, FALSE); sos_kmutex_unlock(& entry->lock); return SOS_OK; } sos_ret_t sos_blkcache_flush(struct sos_block_cache * bc) { struct sos_block_cache_entry * entry; while (NULL != (entry = list_get_head(bc->dirty_list)) ) { sos_ret_t retval = SOS_OK; sos_kmutex_lock(& entry->lock, NULL); if (ENTRY_DIRTY == entry->state) retval = blkcache_flush_entry(bc, entry, FALSE); sos_kmutex_unlock(& entry->lock); if (SOS_OK != retval) return retval; } return SOS_OK; }