sos-code-article10/sos/blkcache.c

501 lines
14 KiB
C
Raw Normal View History

2018-07-13 17:13:10 +02:00
/* Copyright (C) 2005,2006 David Decotigny
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include <sos/ksynch.h>
#include <sos/hash.h>
#include <sos/kmem_slab.h>
#include <sos/kmalloc.h>
#include <sos/ksynch.h>
#include <sos/assert.h>
#include <sos/list.h>
#include "blkcache.h"
/**
* @note In the implementation below, we choose to transfer the blocks
* from list to list /before/ doing the disk access (if needed). The
* aim is to have the entries in the right list before any other
* thread can lock them. Actually, these "other threads" can lock the
* entry only during the disk accesses (kernel is non
* preemptible). So, putting the entries in the correct list /before/
* doing the actual disk read/write access will lower the probability
* for the thread to learn that, at the end, the entry it was waiting
* for has been transferred on another list.
*
* @note What is extremely important however, is that the
* sos_hash_insert/remove operation be carried out BEFORE the
* bocck_read/write operations. If not, then it may be possible to
* insert an entry already inserted (which causes the system to halt),
* etc.
*/
struct sos_block_cache_entry
{
/** The key of the hash map */
sos_luoffset_t block_index;
/** Kernel address where the data for the block are stored */
sos_vaddr_t block_contents;
/** Is the block available, dirty or not ? */
enum { ENTRY_FREE, ENTRY_SYNC, ENTRY_DIRTY } state;
/** Lock to protect the entry */
struct sos_kmutex lock;
/** Linkage structure to keep the cache block in the hash map */
struct sos_hash_linkage hlink;
/** Links to insert the bloc into the free/sync/dirty lists */
struct sos_block_cache_entry *prev, *next;
};
/** The entries of all the cache entries are stored in a single SLAB pool */
static struct sos_kslab_cache * cache_of_bkcache_entries;
struct sos_block_cache
{
/** SLAB pool for the block contents */
struct sos_kslab_cache * slab_cache;
/** Dictionary offset -> block cache entry */
struct sos_hash_table * lookup_table;
/** Hardware read/write operations */
struct sos_blockdev_operations * operations;
void * blkdev_instance_custom_data;
/* Lists to look into in order to free a node. LRU is enforced */
struct sos_block_cache_entry * free_list; /**< List of entries
still available */
struct sos_block_cache_entry * sync_list; /**< Blocks in sync with
disk (LRU at end) */
struct sos_block_cache_entry * dirty_list; /**< Dirty blocks (LRU
last) */
};
sos_ret_t sos_blkcache_subsystem_setup()
{
cache_of_bkcache_entries
= sos_kmem_cache_create("blkcache_entry",
sizeof(struct sos_block_cache_entry),
1, 0,
SOS_KSLAB_CREATE_MAP | SOS_KSLAB_CREATE_ZERO);
if (NULL == cache_of_bkcache_entries)
return -SOS_ENOMEM;
return SOS_OK;
}
struct sos_block_cache *
sos_blkcache_new_cache(void * blockdev_instance_custom_data,
sos_size_t block_size,
sos_count_t cache_size_in_blocks,
struct sos_blockdev_operations * blockdev_ops)
{
sos_count_t idx;
struct sos_block_cache * blkcache;
SOS_ASSERT_FATAL(block_size > 0);
SOS_ASSERT_FATAL(cache_size_in_blocks > 0);
blkcache
= (struct sos_block_cache*) sos_kmalloc(sizeof(struct sos_block_cache), 0);
if (NULL == blkcache)
return NULL;
blkcache->blkdev_instance_custom_data = blockdev_instance_custom_data;
blkcache->operations = blockdev_ops;
/* Allocate the hash table */
blkcache->lookup_table = sos_hash_create("blkcache",
struct sos_block_cache_entry,
sos_hash_ui64,
sos_hash_key_eq_ui64,
17, block_index, hlink);
if (NULL == blkcache->lookup_table)
{
sos_kfree((sos_vaddr_t)blkcache);
return NULL;
}
/* Allocate the slab cache for the blocks */
blkcache->slab_cache = sos_kmem_cache_create("blkcache", block_size,
4, 0,
SOS_KSLAB_CREATE_MAP
| SOS_KSLAB_CREATE_ZERO);
if (NULL == blkcache->slab_cache)
{
sos_hash_dispose(blkcache->lookup_table);
sos_kfree((sos_vaddr_t)blkcache);
return NULL;
}
/* Allocate the cache blocks in this slab cache, and mark them as
"free" */
for (idx = 0 ; idx < cache_size_in_blocks ; idx ++)
{
struct sos_block_cache_entry * bkcache_entry;
sos_vaddr_t bkcache_contents;
bkcache_entry
= (struct sos_block_cache_entry*)sos_kmem_cache_alloc(cache_of_bkcache_entries,
0);
if (NULL == bkcache_entry)
{
sos_blkcache_delete_cache(blkcache);
return NULL;
}
bkcache_contents = sos_kmem_cache_alloc(blkcache->slab_cache, 0);
if (NULL == (void*)bkcache_contents)
{
sos_kmem_cache_free((sos_vaddr_t)bkcache_entry);
sos_blkcache_delete_cache(blkcache);
return NULL;
}
/* Initialize the cache entry */
bkcache_entry->block_contents = bkcache_contents;
bkcache_entry->state = ENTRY_FREE;
SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_init(& bkcache_entry->lock,
"bkcache_lock",
SOS_KWQ_ORDER_FIFO));
/* Now add it in the free list */
list_add_head(blkcache->free_list, bkcache_entry);
}
return blkcache;
}
/** Helper function used to collapse the sync/free lists */
static sos_ret_t blkcache_collapse_list(struct sos_block_cache_entry ** l)
{
while (! list_is_empty(*l))
{
struct sos_block_cache_entry * entry = list_pop_head(*l);
if (NULL == entry)
break;
/* The delete_cache function should be called only when the
block device is not currently in use */
SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_dispose(& entry->lock));
if (NULL != (void*)entry->block_contents)
sos_kmem_cache_free(entry->block_contents);
sos_kmem_cache_free((sos_vaddr_t)entry);
}
return SOS_OK;
}
sos_ret_t
sos_blkcache_delete_cache(struct sos_block_cache * bc)
{
sos_blkcache_flush(bc); /* The dirty list is expected to be empty */
sos_hash_dispose(bc->lookup_table);
blkcache_collapse_list(& bc->sync_list);
blkcache_collapse_list(& bc->free_list);
sos_kfree((sos_vaddr_t)bc);
return SOS_OK;
}
/** Helper function used to transfer a dirty block to the
sync_list. @note the entry is expected to be locked */
static sos_ret_t blkcache_flush_entry(struct sos_block_cache * bc,
struct sos_block_cache_entry * entry,
sos_bool_t mark_as_free)
{
sos_ret_t retval = SOS_OK;
SOS_ASSERT_FATAL(TRUE == sos_kmutex_owned_by_me(& entry->lock));
SOS_ASSERT_FATAL(entry->state == ENTRY_DIRTY);
/* We prepare to transfer it to the sync/free list */
if (mark_as_free)
{
list_delete(bc->dirty_list, entry);
entry->state = ENTRY_FREE;
sos_hash_remove(bc->lookup_table, entry);
list_add_head(bc->free_list, entry);
}
else
{
list_delete(bc->dirty_list, entry);
entry->state = ENTRY_SYNC;
list_add_head(bc->sync_list, entry);
}
retval = bc->operations->write_block(bc->blkdev_instance_custom_data,
entry->block_contents,
entry->block_index);
if (SOS_OK != retval)
{
/*
* Putting the entry back to the dirty list may cause
* retrieve_block to loop infinitely (always trying to transfer
* this block to disk...) !... A solution would be to register
* this block in a dictionary of failing blocks and to force
* retrieve block to ignore requests on these blocks
*/
SOS_FATAL_ERROR("Not implemented yet: we don't support failed disk access (see comments in source file)");
}
return retval;
}
struct sos_block_cache_entry *
sos_blkcache_retrieve_block(struct sos_block_cache * bc,
sos_luoffset_t block_index,
sos_blkcache_access_type_t access_type,
sos_vaddr_t */*out*/ block_contents)
{
struct sos_block_cache_entry * entry = NULL;
*block_contents = (sos_vaddr_t)NULL;
/* Get and lock the entry */
while (TRUE)
{
entry = sos_hash_lookup(bc->lookup_table,
& block_index);
if (NULL != entry)
{
/* The block was already in the cache */
sos_kmutex_lock(& entry->lock, NULL);
/* We have the mutex. However, this entry can now be
dedicated to another block: it was stolen by another
thread during the blocking time */
if ((ENTRY_FREE != entry->state)
&& (entry->block_index == block_index))
{
/* Great ! This entry is still the one we expect ! */
/* Write-only access: force it to be dirty NOW ! */
if ( (access_type == SOS_BLKCACHE_WRITE_ONLY)
&& (ENTRY_SYNC == entry->state) )
{
list_delete(bc->sync_list, entry);
entry->state = ENTRY_DIRTY;
list_add_head(bc->dirty_list, entry);
}
*block_contents = entry->block_contents;
return entry;
}
/* Bad luck: someone stole the entry and used it in the
meantime */
sos_kmutex_unlock(& entry->lock);
/* Go back and look it up again */
continue;
}
/* Ok, the block is not yet in the cache: we need to allocate a new
one */
/* The simplest: there is a free slot in the free list (take the
most recently freed) */
if (! list_is_empty(bc->free_list))
{
entry = list_get_head(bc->free_list);
sos_kmutex_lock(& entry->lock, NULL);
/* By the time we return from the lock, the entry could have
be stolen */
if ((ENTRY_FREE != entry->state)
|| (NULL != sos_hash_lookup(bc->lookup_table, & block_index)))
{
sos_kmutex_unlock(& entry->lock);
continue;
}
/* Ok, we got a REALLY free entry ! */
break;
}
/* A little more complicated: there is a non-dirty entry in
synch with disk, transfer it to the free list. Take the least
recently used */
if (! list_is_empty(bc->sync_list))
{
entry = list_get_tail(bc->sync_list);
sos_kmutex_lock(& entry->lock, NULL);
/* During blocking time, the block is not in the sync list
anymore ! We'd better redo a complete iteration to choose
another block, just in case the block we are looking for
was transferred in cache by someone else */
if ((ENTRY_SYNC != entry->state)
|| (NULL != sos_hash_lookup(bc->lookup_table, & block_index)))
{
sos_kmutex_unlock(& entry->lock);
continue;
}
list_delete(bc->sync_list, entry);
sos_hash_remove(bc->lookup_table, entry);
entry->state = ENTRY_FREE;
list_add_head(bc->free_list, entry);
/* Now we have a free entry for us ! */
break;
}
/*
* Bad luck: we have to flush a dirty entry back to disk
*/
SOS_ASSERT_FATAL(! list_is_empty(bc->dirty_list));
entry = list_get_tail(bc->dirty_list);
sos_kmutex_lock(& entry->lock, NULL);
/* During blocking time, the block was transferred to the sync
list ! We'd better redo a complete iteration to choose
another block, if we want to keep this recently-accessed
block inside the cache */
if ((ENTRY_DIRTY == entry->state)
&& (NULL == sos_hash_lookup(bc->lookup_table, & block_index)))
{
/* entry still dirty: transfer it to the free list */
if (SOS_OK == blkcache_flush_entry(bc, entry, TRUE))
/* We can use it now ! */
break;
}
sos_kmutex_unlock(& entry->lock);
/* We can now go for another iteration because we blocked: maybe
the block was retrieved into the cache by someone else */
}
/* Here we have a locked block that needs to be initialised */
/* Remove it from the free list */
list_delete(bc->free_list, entry);
/* Prepare it to be inserted in the dictionnary */
entry->block_index = block_index;
/* Announce that we are preparing to add the block into the cache */
if (SOS_OK != sos_hash_insert(bc->lookup_table, entry))
SOS_FATAL_ERROR("Unexpected hash collision");
/* No need to do anything for a write-only block, simply mark it
dirty */
if (access_type == SOS_BLKCACHE_WRITE_ONLY)
{
entry->state = ENTRY_DIRTY;
list_add_head(bc->dirty_list, entry);
}
/* Otherwise retrieve the block from disk NOW ! */
else
{
entry->state = ENTRY_SYNC;
list_add_head(bc->sync_list, entry);
if (SOS_OK
!= bc->operations->read_block(bc->blkdev_instance_custom_data,
entry->block_contents,
entry->block_index))
{
/* In case of failure, remove it from hash and return NULL */
list_delete(bc->sync_list, entry);
sos_hash_remove(bc->lookup_table, entry);
entry->state = ENTRY_FREE;
list_add_head(bc->free_list, entry);
sos_kmutex_unlock(& entry->lock);
return NULL;
}
}
*block_contents = entry->block_contents;
return entry;
}
sos_ret_t
sos_blkcache_release_block(struct sos_block_cache * bc,
struct sos_block_cache_entry * entry,
sos_bool_t is_dirty,
sos_bool_t force_flush)
{
/* Enforce the least recently used policy */
if (ENTRY_SYNC == entry->state)
{
list_delete(bc->sync_list, entry);
/* Eventually transfer the block to the dirty list */
if (is_dirty)
entry->state = ENTRY_DIRTY;
}
else
list_delete(bc->dirty_list, entry);
if (ENTRY_SYNC == entry->state)
list_add_head(bc->sync_list, entry);
else
list_add_head(bc->dirty_list, entry);
if ( (ENTRY_DIRTY == entry->state) && force_flush)
blkcache_flush_entry(bc, entry, FALSE);
sos_kmutex_unlock(& entry->lock);
return SOS_OK;
}
sos_ret_t
sos_blkcache_flush(struct sos_block_cache * bc)
{
struct sos_block_cache_entry * entry;
while (NULL != (entry = list_get_head(bc->dirty_list)) )
{
sos_ret_t retval = SOS_OK;
sos_kmutex_lock(& entry->lock, NULL);
if (ENTRY_DIRTY == entry->state)
retval = blkcache_flush_entry(bc, entry, FALSE);
sos_kmutex_unlock(& entry->lock);
if (SOS_OK != retval)
return retval;
}
return SOS_OK;
}