501 lines
14 KiB
C
501 lines
14 KiB
C
|
/* Copyright (C) 2005,2006 David Decotigny
|
||
|
|
||
|
This program is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU General Public License
|
||
|
as published by the Free Software Foundation; either version 2
|
||
|
of the License, or (at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program; if not, write to the Free Software
|
||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||
|
USA.
|
||
|
*/
|
||
|
|
||
|
#include <sos/ksynch.h>
|
||
|
#include <sos/hash.h>
|
||
|
#include <sos/kmem_slab.h>
|
||
|
#include <sos/kmalloc.h>
|
||
|
#include <sos/ksynch.h>
|
||
|
#include <sos/assert.h>
|
||
|
#include <sos/list.h>
|
||
|
|
||
|
#include "blkcache.h"
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @note In the implementation below, we choose to transfer the blocks
|
||
|
* from list to list /before/ doing the disk access (if needed). The
|
||
|
* aim is to have the entries in the right list before any other
|
||
|
* thread can lock them. Actually, these "other threads" can lock the
|
||
|
* entry only during the disk accesses (kernel is non
|
||
|
* preemptible). So, putting the entries in the correct list /before/
|
||
|
* doing the actual disk read/write access will lower the probability
|
||
|
* for the thread to learn that, at the end, the entry it was waiting
|
||
|
* for has been transferred on another list.
|
||
|
*
|
||
|
* @note What is extremely important however, is that the
|
||
|
* sos_hash_insert/remove operation be carried out BEFORE the
|
||
|
* bocck_read/write operations. If not, then it may be possible to
|
||
|
* insert an entry already inserted (which causes the system to halt),
|
||
|
* etc.
|
||
|
*/
|
||
|
|
||
|
|
||
|
|
||
|
struct sos_block_cache_entry
|
||
|
{
|
||
|
/** The key of the hash map */
|
||
|
sos_luoffset_t block_index;
|
||
|
|
||
|
/** Kernel address where the data for the block are stored */
|
||
|
sos_vaddr_t block_contents;
|
||
|
|
||
|
/** Is the block available, dirty or not ? */
|
||
|
enum { ENTRY_FREE, ENTRY_SYNC, ENTRY_DIRTY } state;
|
||
|
|
||
|
/** Lock to protect the entry */
|
||
|
struct sos_kmutex lock;
|
||
|
|
||
|
/** Linkage structure to keep the cache block in the hash map */
|
||
|
struct sos_hash_linkage hlink;
|
||
|
|
||
|
/** Links to insert the bloc into the free/sync/dirty lists */
|
||
|
struct sos_block_cache_entry *prev, *next;
|
||
|
};
|
||
|
|
||
|
|
||
|
/** The entries of all the cache entries are stored in a single SLAB pool */
|
||
|
static struct sos_kslab_cache * cache_of_bkcache_entries;
|
||
|
|
||
|
|
||
|
struct sos_block_cache
|
||
|
{
|
||
|
/** SLAB pool for the block contents */
|
||
|
struct sos_kslab_cache * slab_cache;
|
||
|
|
||
|
/** Dictionary offset -> block cache entry */
|
||
|
struct sos_hash_table * lookup_table;
|
||
|
|
||
|
/** Hardware read/write operations */
|
||
|
struct sos_blockdev_operations * operations;
|
||
|
void * blkdev_instance_custom_data;
|
||
|
|
||
|
/* Lists to look into in order to free a node. LRU is enforced */
|
||
|
struct sos_block_cache_entry * free_list; /**< List of entries
|
||
|
still available */
|
||
|
struct sos_block_cache_entry * sync_list; /**< Blocks in sync with
|
||
|
disk (LRU at end) */
|
||
|
struct sos_block_cache_entry * dirty_list; /**< Dirty blocks (LRU
|
||
|
last) */
|
||
|
};
|
||
|
|
||
|
|
||
|
sos_ret_t sos_blkcache_subsystem_setup()
|
||
|
{
|
||
|
cache_of_bkcache_entries
|
||
|
= sos_kmem_cache_create("blkcache_entry",
|
||
|
sizeof(struct sos_block_cache_entry),
|
||
|
1, 0,
|
||
|
SOS_KSLAB_CREATE_MAP | SOS_KSLAB_CREATE_ZERO);
|
||
|
if (NULL == cache_of_bkcache_entries)
|
||
|
return -SOS_ENOMEM;
|
||
|
return SOS_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
struct sos_block_cache *
|
||
|
sos_blkcache_new_cache(void * blockdev_instance_custom_data,
|
||
|
sos_size_t block_size,
|
||
|
sos_count_t cache_size_in_blocks,
|
||
|
struct sos_blockdev_operations * blockdev_ops)
|
||
|
{
|
||
|
sos_count_t idx;
|
||
|
struct sos_block_cache * blkcache;
|
||
|
|
||
|
SOS_ASSERT_FATAL(block_size > 0);
|
||
|
SOS_ASSERT_FATAL(cache_size_in_blocks > 0);
|
||
|
|
||
|
blkcache
|
||
|
= (struct sos_block_cache*) sos_kmalloc(sizeof(struct sos_block_cache), 0);
|
||
|
if (NULL == blkcache)
|
||
|
return NULL;
|
||
|
|
||
|
blkcache->blkdev_instance_custom_data = blockdev_instance_custom_data;
|
||
|
blkcache->operations = blockdev_ops;
|
||
|
|
||
|
/* Allocate the hash table */
|
||
|
blkcache->lookup_table = sos_hash_create("blkcache",
|
||
|
struct sos_block_cache_entry,
|
||
|
sos_hash_ui64,
|
||
|
sos_hash_key_eq_ui64,
|
||
|
17, block_index, hlink);
|
||
|
if (NULL == blkcache->lookup_table)
|
||
|
{
|
||
|
sos_kfree((sos_vaddr_t)blkcache);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* Allocate the slab cache for the blocks */
|
||
|
blkcache->slab_cache = sos_kmem_cache_create("blkcache", block_size,
|
||
|
4, 0,
|
||
|
SOS_KSLAB_CREATE_MAP
|
||
|
| SOS_KSLAB_CREATE_ZERO);
|
||
|
if (NULL == blkcache->slab_cache)
|
||
|
{
|
||
|
sos_hash_dispose(blkcache->lookup_table);
|
||
|
sos_kfree((sos_vaddr_t)blkcache);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* Allocate the cache blocks in this slab cache, and mark them as
|
||
|
"free" */
|
||
|
for (idx = 0 ; idx < cache_size_in_blocks ; idx ++)
|
||
|
{
|
||
|
struct sos_block_cache_entry * bkcache_entry;
|
||
|
sos_vaddr_t bkcache_contents;
|
||
|
|
||
|
bkcache_entry
|
||
|
= (struct sos_block_cache_entry*)sos_kmem_cache_alloc(cache_of_bkcache_entries,
|
||
|
0);
|
||
|
if (NULL == bkcache_entry)
|
||
|
{
|
||
|
sos_blkcache_delete_cache(blkcache);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
bkcache_contents = sos_kmem_cache_alloc(blkcache->slab_cache, 0);
|
||
|
if (NULL == (void*)bkcache_contents)
|
||
|
{
|
||
|
sos_kmem_cache_free((sos_vaddr_t)bkcache_entry);
|
||
|
sos_blkcache_delete_cache(blkcache);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
/* Initialize the cache entry */
|
||
|
bkcache_entry->block_contents = bkcache_contents;
|
||
|
bkcache_entry->state = ENTRY_FREE;
|
||
|
SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_init(& bkcache_entry->lock,
|
||
|
"bkcache_lock",
|
||
|
SOS_KWQ_ORDER_FIFO));
|
||
|
|
||
|
/* Now add it in the free list */
|
||
|
list_add_head(blkcache->free_list, bkcache_entry);
|
||
|
}
|
||
|
|
||
|
return blkcache;
|
||
|
}
|
||
|
|
||
|
|
||
|
/** Helper function used to collapse the sync/free lists */
|
||
|
static sos_ret_t blkcache_collapse_list(struct sos_block_cache_entry ** l)
|
||
|
{
|
||
|
while (! list_is_empty(*l))
|
||
|
{
|
||
|
struct sos_block_cache_entry * entry = list_pop_head(*l);
|
||
|
if (NULL == entry)
|
||
|
break;
|
||
|
|
||
|
/* The delete_cache function should be called only when the
|
||
|
block device is not currently in use */
|
||
|
SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_dispose(& entry->lock));
|
||
|
|
||
|
if (NULL != (void*)entry->block_contents)
|
||
|
sos_kmem_cache_free(entry->block_contents);
|
||
|
|
||
|
sos_kmem_cache_free((sos_vaddr_t)entry);
|
||
|
}
|
||
|
|
||
|
return SOS_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
sos_ret_t
|
||
|
sos_blkcache_delete_cache(struct sos_block_cache * bc)
|
||
|
{
|
||
|
sos_blkcache_flush(bc); /* The dirty list is expected to be empty */
|
||
|
|
||
|
sos_hash_dispose(bc->lookup_table);
|
||
|
blkcache_collapse_list(& bc->sync_list);
|
||
|
blkcache_collapse_list(& bc->free_list);
|
||
|
|
||
|
sos_kfree((sos_vaddr_t)bc);
|
||
|
|
||
|
return SOS_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
/** Helper function used to transfer a dirty block to the
|
||
|
sync_list. @note the entry is expected to be locked */
|
||
|
static sos_ret_t blkcache_flush_entry(struct sos_block_cache * bc,
|
||
|
struct sos_block_cache_entry * entry,
|
||
|
sos_bool_t mark_as_free)
|
||
|
{
|
||
|
sos_ret_t retval = SOS_OK;
|
||
|
|
||
|
SOS_ASSERT_FATAL(TRUE == sos_kmutex_owned_by_me(& entry->lock));
|
||
|
SOS_ASSERT_FATAL(entry->state == ENTRY_DIRTY);
|
||
|
|
||
|
/* We prepare to transfer it to the sync/free list */
|
||
|
if (mark_as_free)
|
||
|
{
|
||
|
list_delete(bc->dirty_list, entry);
|
||
|
entry->state = ENTRY_FREE;
|
||
|
sos_hash_remove(bc->lookup_table, entry);
|
||
|
list_add_head(bc->free_list, entry);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
list_delete(bc->dirty_list, entry);
|
||
|
entry->state = ENTRY_SYNC;
|
||
|
list_add_head(bc->sync_list, entry);
|
||
|
}
|
||
|
|
||
|
retval = bc->operations->write_block(bc->blkdev_instance_custom_data,
|
||
|
entry->block_contents,
|
||
|
entry->block_index);
|
||
|
if (SOS_OK != retval)
|
||
|
{
|
||
|
/*
|
||
|
* Putting the entry back to the dirty list may cause
|
||
|
* retrieve_block to loop infinitely (always trying to transfer
|
||
|
* this block to disk...) !... A solution would be to register
|
||
|
* this block in a dictionary of failing blocks and to force
|
||
|
* retrieve block to ignore requests on these blocks
|
||
|
*/
|
||
|
SOS_FATAL_ERROR("Not implemented yet: we don't support failed disk access (see comments in source file)");
|
||
|
}
|
||
|
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
|
||
|
struct sos_block_cache_entry *
|
||
|
sos_blkcache_retrieve_block(struct sos_block_cache * bc,
|
||
|
sos_luoffset_t block_index,
|
||
|
sos_blkcache_access_type_t access_type,
|
||
|
sos_vaddr_t */*out*/ block_contents)
|
||
|
{
|
||
|
struct sos_block_cache_entry * entry = NULL;
|
||
|
*block_contents = (sos_vaddr_t)NULL;
|
||
|
|
||
|
/* Get and lock the entry */
|
||
|
while (TRUE)
|
||
|
{
|
||
|
entry = sos_hash_lookup(bc->lookup_table,
|
||
|
& block_index);
|
||
|
if (NULL != entry)
|
||
|
{
|
||
|
/* The block was already in the cache */
|
||
|
|
||
|
sos_kmutex_lock(& entry->lock, NULL);
|
||
|
|
||
|
/* We have the mutex. However, this entry can now be
|
||
|
dedicated to another block: it was stolen by another
|
||
|
thread during the blocking time */
|
||
|
if ((ENTRY_FREE != entry->state)
|
||
|
&& (entry->block_index == block_index))
|
||
|
{
|
||
|
/* Great ! This entry is still the one we expect ! */
|
||
|
|
||
|
/* Write-only access: force it to be dirty NOW ! */
|
||
|
if ( (access_type == SOS_BLKCACHE_WRITE_ONLY)
|
||
|
&& (ENTRY_SYNC == entry->state) )
|
||
|
{
|
||
|
list_delete(bc->sync_list, entry);
|
||
|
entry->state = ENTRY_DIRTY;
|
||
|
list_add_head(bc->dirty_list, entry);
|
||
|
}
|
||
|
|
||
|
*block_contents = entry->block_contents;
|
||
|
return entry;
|
||
|
}
|
||
|
|
||
|
/* Bad luck: someone stole the entry and used it in the
|
||
|
meantime */
|
||
|
sos_kmutex_unlock(& entry->lock);
|
||
|
/* Go back and look it up again */
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Ok, the block is not yet in the cache: we need to allocate a new
|
||
|
one */
|
||
|
|
||
|
/* The simplest: there is a free slot in the free list (take the
|
||
|
most recently freed) */
|
||
|
if (! list_is_empty(bc->free_list))
|
||
|
{
|
||
|
entry = list_get_head(bc->free_list);
|
||
|
sos_kmutex_lock(& entry->lock, NULL);
|
||
|
|
||
|
/* By the time we return from the lock, the entry could have
|
||
|
be stolen */
|
||
|
if ((ENTRY_FREE != entry->state)
|
||
|
|| (NULL != sos_hash_lookup(bc->lookup_table, & block_index)))
|
||
|
{
|
||
|
sos_kmutex_unlock(& entry->lock);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* Ok, we got a REALLY free entry ! */
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/* A little more complicated: there is a non-dirty entry in
|
||
|
synch with disk, transfer it to the free list. Take the least
|
||
|
recently used */
|
||
|
if (! list_is_empty(bc->sync_list))
|
||
|
{
|
||
|
entry = list_get_tail(bc->sync_list);
|
||
|
sos_kmutex_lock(& entry->lock, NULL);
|
||
|
|
||
|
/* During blocking time, the block is not in the sync list
|
||
|
anymore ! We'd better redo a complete iteration to choose
|
||
|
another block, just in case the block we are looking for
|
||
|
was transferred in cache by someone else */
|
||
|
if ((ENTRY_SYNC != entry->state)
|
||
|
|| (NULL != sos_hash_lookup(bc->lookup_table, & block_index)))
|
||
|
{
|
||
|
sos_kmutex_unlock(& entry->lock);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
list_delete(bc->sync_list, entry);
|
||
|
sos_hash_remove(bc->lookup_table, entry);
|
||
|
entry->state = ENTRY_FREE;
|
||
|
list_add_head(bc->free_list, entry);
|
||
|
|
||
|
/* Now we have a free entry for us ! */
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Bad luck: we have to flush a dirty entry back to disk
|
||
|
*/
|
||
|
SOS_ASSERT_FATAL(! list_is_empty(bc->dirty_list));
|
||
|
entry = list_get_tail(bc->dirty_list);
|
||
|
|
||
|
sos_kmutex_lock(& entry->lock, NULL);
|
||
|
|
||
|
/* During blocking time, the block was transferred to the sync
|
||
|
list ! We'd better redo a complete iteration to choose
|
||
|
another block, if we want to keep this recently-accessed
|
||
|
block inside the cache */
|
||
|
if ((ENTRY_DIRTY == entry->state)
|
||
|
&& (NULL == sos_hash_lookup(bc->lookup_table, & block_index)))
|
||
|
{
|
||
|
/* entry still dirty: transfer it to the free list */
|
||
|
|
||
|
if (SOS_OK == blkcache_flush_entry(bc, entry, TRUE))
|
||
|
/* We can use it now ! */
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
sos_kmutex_unlock(& entry->lock);
|
||
|
|
||
|
/* We can now go for another iteration because we blocked: maybe
|
||
|
the block was retrieved into the cache by someone else */
|
||
|
}
|
||
|
|
||
|
/* Here we have a locked block that needs to be initialised */
|
||
|
|
||
|
/* Remove it from the free list */
|
||
|
list_delete(bc->free_list, entry);
|
||
|
|
||
|
/* Prepare it to be inserted in the dictionnary */
|
||
|
entry->block_index = block_index;
|
||
|
|
||
|
/* Announce that we are preparing to add the block into the cache */
|
||
|
if (SOS_OK != sos_hash_insert(bc->lookup_table, entry))
|
||
|
SOS_FATAL_ERROR("Unexpected hash collision");
|
||
|
|
||
|
|
||
|
/* No need to do anything for a write-only block, simply mark it
|
||
|
dirty */
|
||
|
if (access_type == SOS_BLKCACHE_WRITE_ONLY)
|
||
|
{
|
||
|
entry->state = ENTRY_DIRTY;
|
||
|
list_add_head(bc->dirty_list, entry);
|
||
|
}
|
||
|
|
||
|
/* Otherwise retrieve the block from disk NOW ! */
|
||
|
else
|
||
|
{
|
||
|
entry->state = ENTRY_SYNC;
|
||
|
list_add_head(bc->sync_list, entry);
|
||
|
|
||
|
if (SOS_OK
|
||
|
!= bc->operations->read_block(bc->blkdev_instance_custom_data,
|
||
|
entry->block_contents,
|
||
|
entry->block_index))
|
||
|
{
|
||
|
/* In case of failure, remove it from hash and return NULL */
|
||
|
list_delete(bc->sync_list, entry);
|
||
|
sos_hash_remove(bc->lookup_table, entry);
|
||
|
entry->state = ENTRY_FREE;
|
||
|
list_add_head(bc->free_list, entry);
|
||
|
sos_kmutex_unlock(& entry->lock);
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*block_contents = entry->block_contents;
|
||
|
return entry;
|
||
|
}
|
||
|
|
||
|
|
||
|
sos_ret_t
|
||
|
sos_blkcache_release_block(struct sos_block_cache * bc,
|
||
|
struct sos_block_cache_entry * entry,
|
||
|
sos_bool_t is_dirty,
|
||
|
sos_bool_t force_flush)
|
||
|
{
|
||
|
/* Enforce the least recently used policy */
|
||
|
if (ENTRY_SYNC == entry->state)
|
||
|
{
|
||
|
list_delete(bc->sync_list, entry);
|
||
|
/* Eventually transfer the block to the dirty list */
|
||
|
if (is_dirty)
|
||
|
entry->state = ENTRY_DIRTY;
|
||
|
}
|
||
|
else
|
||
|
list_delete(bc->dirty_list, entry);
|
||
|
|
||
|
if (ENTRY_SYNC == entry->state)
|
||
|
list_add_head(bc->sync_list, entry);
|
||
|
else
|
||
|
list_add_head(bc->dirty_list, entry);
|
||
|
|
||
|
if ( (ENTRY_DIRTY == entry->state) && force_flush)
|
||
|
blkcache_flush_entry(bc, entry, FALSE);
|
||
|
|
||
|
sos_kmutex_unlock(& entry->lock);
|
||
|
return SOS_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
sos_ret_t
|
||
|
sos_blkcache_flush(struct sos_block_cache * bc)
|
||
|
{
|
||
|
struct sos_block_cache_entry * entry;
|
||
|
while (NULL != (entry = list_get_head(bc->dirty_list)) )
|
||
|
{
|
||
|
sos_ret_t retval = SOS_OK;
|
||
|
|
||
|
sos_kmutex_lock(& entry->lock, NULL);
|
||
|
if (ENTRY_DIRTY == entry->state)
|
||
|
retval = blkcache_flush_entry(bc, entry, FALSE);
|
||
|
sos_kmutex_unlock(& entry->lock);
|
||
|
|
||
|
if (SOS_OK != retval)
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
return SOS_OK;
|
||
|
}
|