sos-code-article10/sos/fs_pagecache.c

/* Copyright (C) 2005,2006      David Decotigny

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License
   as published by the Free Software Foundation; either version 2
   of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
   USA.
*/
#include <sos/ksynch.h>
#include <sos/kmem_slab.h>
#include <sos/hash.h>
#include <sos/physmem.h> /* For SOS_PAGE_MASK */
#include <sos/list.h>
#include <sos/assert.h>
#include <sos/uaccess.h>
#include <sos/kmalloc.h>

#include "fs_pagecache.h"


#define SOS_OFFSET64_PAGE_ALIGN_INF(offs64) \
  ( ((sos_luoffset_t)(offs64)) & (~ ((sos_luoffset_t)(SOS_PAGE_MASK))) )


#define SOS_OFFSET64_IS_PAGE_ALIGNED(offs64) \
  ( ( ((sos_luoffset_t)(offs64)) & (((sos_luoffset_t)(SOS_PAGE_MASK))) ) == 0 )


/**
 * Definition of an object holding a reference to a shared mapping of
 * a file/device-mapped cache page.
 *
 * @note This structure is huge. We can shrink it largely by removing the
 * "name" field from the lock structure (32 bytes).
 */
struct sos_fs_pagecache_entry
{
  /** offset of the cached page in the file or device */
  sos_luoffset_t file_offset;

  /** Address of the cached page for this offset */
  sos_vaddr_t    kernel_vaddr;

  struct sos_kmutex lock;
  sos_count_t ref_cnt;

  sos_bool_t initial_fill_aborted; /**< True when the page could not
				      be correctly filled */

  /**
   * When 0: the page is clean wrt to read/write syscalls, ie the disk
   * contents reflect the contents of the page since the last
   * read/write operation. However, the disk may NOT be in sync wrt to
   * mmap() operations: if mmap() operations occured in the meantime,
   * the disk may NOT be up to date, and the pagecache entry may even
   * NOT be considered dirty. This is because we do not trace each of
   * the read/write MMU operations from every processes (this would
   * need to catch all writes even on read/write mapped pages) and we
   * don't have a reverse mapping available to set the page read-only
   * in every mappings once it has been synched to disk (to
   * effectively trace the dirty state relative to mmap operations).
   *
   * When ">0": at least one process changed the contents of the page
   * through read/write syscalls since last sync operation.
   *
   * @note A boolean is enough for 99% of the code. But we need a real
   * counter for the sos_fs_pagecache_sync operation to make sure we
   * don't iterate 2 times over the same page.
   */
  sos_lcount_t rw_dirty_order;
#define ENTRY_IS_RW_DIRTY(e) ((e)->rw_dirty_order > 0)

  /** Linkage structure to keep the cache entry in the hash map */
  struct sos_hash_linkage hlink;

  /** Links to insert the entry into the rw_sync/rw_dirty lists */
  struct sos_fs_pagecache_entry *prev, *next;
};


struct sos_fs_pagecache
{
  /** The operation used to synchronize the mapped pages with the
      backing store */
  sos_fs_pagecache_sync_function_t sync_fct;
  void * sync_fct_custom_data;

  /** The dictionary offset -> pagecache_entry */
  struct sos_hash_table  * lookup_table;

  /* Lists to look into in order to free a node */
  struct sos_fs_pagecache_entry * rw_sync_list;  /**< Pages in sync
						      with disk wrt
						      read/write API
						      (LRU at end) */
  struct sos_fs_pagecache_entry * rw_dirty_list; /**< Dirty pages wrt
						      read/write API
						      (LRU last) */

  /** The "timestamp" high watermark used to iterate over the dirty
      pages in the sync function */
  sos_lcount_t top_rw_dirty_order;
};


/** The slab cache for pagecache */
static struct sos_kslab_cache * cache_of_pagecache;


/** The slab cache for pagecache entries */
static struct sos_kslab_cache * cache_of_pagecache_entry;


sos_ret_t sos_fs_pagecache_subsystem_setup()
{
  /* Allocate the slab caches for the page caches and page cache
     entries */

  cache_of_pagecache =
    sos_kmem_cache_create("pagecache", sizeof(struct sos_fs_pagecache),
			  2, 0, SOS_KSLAB_CREATE_MAP | SOS_KSLAB_CREATE_ZERO);
  if (NULL == cache_of_pagecache)
    return -SOS_ENOMEM;

  cache_of_pagecache_entry =
    sos_kmem_cache_create("pagecache_entry",
			  sizeof(struct sos_fs_pagecache_entry),
			  2, 0, SOS_KSLAB_CREATE_MAP | SOS_KSLAB_CREATE_ZERO);
  if (NULL == cache_of_pagecache_entry)
    {
      sos_kmem_cache_destroy(cache_of_pagecache);
      return -SOS_ENOMEM;
    }

  return SOS_OK;
}


struct sos_fs_pagecache *
sos_fs_pagecache_new_cache(sos_fs_pagecache_sync_function_t sync_fct,
			   void * sync_fct_custom_data)
{
  struct sos_fs_pagecache * pagecache
    = (struct sos_fs_pagecache*) sos_kmem_cache_alloc(cache_of_pagecache,
						      0);
  if (NULL == pagecache)
    return NULL;

  pagecache->lookup_table = sos_hash_create("pagecache",
					    struct sos_fs_pagecache_entry,
					    sos_hash_ui64,
					    sos_hash_key_eq_ui64,
					    127, file_offset, hlink);
  if (NULL == pagecache->lookup_table)
    {
      sos_kmem_cache_free((sos_vaddr_t) pagecache);
      return NULL;
    }

  pagecache->sync_fct             = sync_fct;
  pagecache->sync_fct_custom_data = sync_fct_custom_data;
  pagecache->top_rw_dirty_order   = 0x24;

  return pagecache;
}


sos_ret_t
sos_fs_pagecache_delete_cache(struct sos_fs_pagecache * pc)
{
  /* The cache is EXPECTED to be empty ! */

  if (!list_is_empty(pc->rw_dirty_list))
    SOS_FATAL_ERROR("Non empty dirty list");
  if (!list_is_empty(pc->rw_sync_list))
    SOS_FATAL_ERROR("Non empty sync list");

  sos_hash_dispose(pc->lookup_table);
  return sos_kmem_cache_free((sos_vaddr_t)pc);
}


/** Helper function to flush a page to disk. Expects the entry to be
    locked */
static sos_ret_t pagecache_sync_page(struct sos_fs_pagecache * pc,
				     struct sos_fs_pagecache_entry * entry)
{
  sos_ret_t retval;

  if (! ENTRY_IS_RW_DIRTY(entry))
    return SOS_OK;

  /* Now do the real transfer to backing store */
  retval = pc->sync_fct(entry->file_offset, entry->kernel_vaddr,
			pc->sync_fct_custom_data);
  if (SOS_OK != retval)
    return retval;

  /* Transfer page to the sync list */
  list_delete(pc->rw_dirty_list, entry);
  entry->rw_dirty_order = 0;
  list_add_head(pc->rw_sync_list, entry);

  return SOS_OK;
}


/** Helper function to correctly lock an entry */
static sos_ret_t pagecache_use(struct sos_fs_pagecache * pc,
			       struct sos_fs_pagecache_entry * entry)
{
  entry->ref_cnt ++;
  return sos_kmutex_lock(& entry->lock, NULL);
}


/**
 * Helper function to transfer a page to the dirty r/w list
 */
static sos_ret_t pagecache_set_rw_dirty(struct sos_fs_pagecache * pc,
					struct sos_fs_pagecache_entry * entry)
{
  if (ENTRY_IS_RW_DIRTY(entry))
    return SOS_OK; /* Nothing to do */

  list_delete(pc->rw_sync_list, entry);
  entry->rw_dirty_order = ++ pc->top_rw_dirty_order;
  list_add_head(pc->rw_dirty_list, entry);

  return SOS_OK;
}


/** Helper function to correctly unlock an entry, flushing it to disk
    if needed */
static sos_ret_t pagecache_release(struct sos_fs_pagecache * pc,
				   struct sos_fs_pagecache_entry * entry)
{
  if (entry->ref_cnt > 1)
    {
      entry->ref_cnt --;
      sos_kmutex_unlock(& entry->lock);
      return SOS_OK;
    }

  /*
   * The cached page is now referenced ONLY by US, we can try to
   * remove it from the cache
   */

  /* Flush any change to disk, at least if we are sure that its
     content is legal, ie that the page_in callback did success in
     filling it */
  if (! entry->initial_fill_aborted)
    pagecache_sync_page(pc, entry);

  /* Ok, now WE are not interested by this entry anymore */
  entry->ref_cnt --;

  /* During blocking time, another thread could have asked for the
     entry. In this case, stop here */
  if (entry->ref_cnt > 0)
    {
      sos_kmutex_unlock(& entry->lock);
      return SOS_OK;
    }

  /* Remove it from the lists */
  sos_hash_remove(pc->lookup_table, entry);
  if (ENTRY_IS_RW_DIRTY(entry))
    list_delete(pc->rw_dirty_list, entry);
  else
    list_delete(pc->rw_sync_list, entry);

  /* We can safely erase it now ! */
  sos_kmutex_unlock(& entry->lock);
  SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_dispose(& entry->lock)); /* No thread are waiting */
  sos_kfree(entry->kernel_vaddr);
  sos_kmem_cache_free((sos_vaddr_t)entry);

  return SOS_OK;
}


/**
 * Helper function to look up an entry from the cache and lock it. If
 * the entry does not exist (yet), return NULL.
 */
static struct sos_fs_pagecache_entry *
pagecache_lookup_and_lock(struct sos_fs_pagecache * pc,
			  sos_luoffset_t offset)
{
  sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
  struct sos_fs_pagecache_entry * entry = NULL;

  while (TRUE)
    {
      entry
	= (struct sos_fs_pagecache_entry*) sos_hash_lookup(pc->lookup_table,
							   & pgoffs);
      if (! entry)
	break;

      /* Lock it now */
      SOS_ASSERT_FATAL(SOS_OK == pagecache_use(pc, entry));

      /*
       * Entry is now locked
       */
      
      /* Make sure it contains legal contents: if we were blocked
	 because of the page_in operations reading it from disk, an
	 error could have been occured. In this case, we must consider
	 that this entry is not yet inserted in the cache */
      if (entry->initial_fill_aborted)
	{
	  pagecache_release(pc, entry);
	  continue;
	}

      /* Ok, we have the entry and it is correctly initialized ! */
      break;
    }
  
  return entry;
}


sos_ret_t
sos_fs_pagecache_read(struct sos_fs_pagecache * pc,
		      sos_luoffset_t offset,
		      sos_genaddr_t dest_buf,
		      sos_size_t * /* in/out */len)
{
  sos_ret_t retval;
  sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
  sos_luoffset_t endpos = offset + *len;
  struct sos_fs_pagecache_entry * entry;

  entry = pagecache_lookup_and_lock(pc, pgoffs);
  if (NULL == entry)
    return -SOS_ENOENT;

  /* Great ! Found the entry in the cache ! */

  /* Read only up to the end of the page */
  if (endpos - pgoffs > SOS_PAGE_SIZE)
    endpos = pgoffs + SOS_PAGE_SIZE;

  /* Copy page contents to destination buffer */
  retval = sos_memcpy_generic_to(dest_buf,
				 entry->kernel_vaddr + (offset - pgoffs),
				 endpos - offset);
  pagecache_release(pc, entry);

  if (retval < 0)
    {
      *len = 0;
      return retval;
    }

  *len = retval;
  if ((sos_luoffset_t)retval != endpos - offset)
    return -SOS_EFAULT;

  return SOS_OK;
}


sos_ret_t
sos_fs_pagecache_write(struct sos_fs_pagecache * pc,
		       sos_luoffset_t offset,
		       sos_genaddr_t src_buf,
		       sos_size_t * /* in/out */len,
		       sos_bool_t synchronous_write)
{
  sos_ret_t retval;
  sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
  sos_luoffset_t endpos = offset + *len;
  struct sos_fs_pagecache_entry * entry;

  entry = pagecache_lookup_and_lock(pc, pgoffs);
  if (NULL == entry)
    return -SOS_ENOENT;

  /* Great ! Found the entry in the cache ! */

  /* Read only up to the end of the page */
  if (endpos - pgoffs > SOS_PAGE_SIZE)
    endpos = pgoffs + SOS_PAGE_SIZE;

  /* Copy page contents to destination buffer */
  retval = sos_memcpy_generic_from(entry->kernel_vaddr + (offset - pgoffs),
				   src_buf,
				   endpos - offset);
  /* Transfer the entry in the dirty list if needed */
  if (retval >= 0)
    pagecache_set_rw_dirty(pc, entry);

  if (retval < 0)
    {
      *len = 0;
      pagecache_release(pc, entry);
      return retval;
    }

  *len = retval;
  if ((sos_luoffset_t)retval != endpos - offset)
    retval = -SOS_EFAULT;
  else
    retval = SOS_OK;

  /* Flush to disk if needed */
  if (synchronous_write)
    {
      sos_ret_t ret = pagecache_sync_page(pc, entry);
      if (SOS_OK == retval)
	retval = ret;
    }

  pagecache_release(pc, entry);
  return retval;
}


sos_ret_t sos_fs_pagecache_set_dirty(struct sos_fs_pagecache * pc,
				     sos_luoffset_t offset,
				     sos_bool_t sync_backing_store)
{
  sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
  struct sos_fs_pagecache_entry * entry;

  entry = pagecache_lookup_and_lock(pc, pgoffs);
  if (NULL == entry)
    return -SOS_ENOENT;

  /* Great ! Found the entry in the cache ! */
  pagecache_set_rw_dirty(pc, entry);

  /* Synchronize to backing store if needed */
  if (sync_backing_store)
    pagecache_sync_page(pc, entry);

  pagecache_release(pc, entry);
  return SOS_OK;
}


struct sos_fs_pagecache_entry *
sos_fs_pagecache_ref_page(struct sos_fs_pagecache * pc,
			  sos_luoffset_t offset,
			  sos_vaddr_t * /* out */ kernel_vaddr,
			  sos_bool_t * /* out */ newly_allocated)
{
  sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
  struct sos_fs_pagecache_entry * entry;

  /* The offset is expected to be page-aligned */
  if (pgoffs != offset)
    return NULL;

  entry = pagecache_lookup_and_lock(pc, pgoffs);
  if (NULL != entry)
    {
      /* Found it ! No need to go further */
      *newly_allocated = FALSE;
      *kernel_vaddr = entry->kernel_vaddr;
      return entry;
    }


  /*
   * Need to allocate a new kernel page
   */

  entry = (struct sos_fs_pagecache_entry*)
    sos_kmem_cache_alloc(cache_of_pagecache_entry, 0);
  if (NULL == entry)
    return (sos_vaddr_t)NULL;

  if (SOS_OK != sos_kmutex_init(& entry->lock, "pagecache_entry",
				SOS_KWQ_ORDER_FIFO))
    {
      sos_kmem_cache_free((sos_vaddr_t)entry);
      return NULL;
    }

  /* Initial state of the page correspond to an erroneous
     initialization */
  entry->file_offset          = pgoffs;
  entry->initial_fill_aborted = TRUE;
  entry->ref_cnt              = 1;

  /* Allocate the page */
  entry->kernel_vaddr = sos_kmalloc(SOS_PAGE_SIZE, 0);
  if (((sos_vaddr_t)NULL) == entry->kernel_vaddr)
    {
      sos_kmutex_dispose(& entry->lock);
      sos_kmem_cache_free((sos_vaddr_t)entry);
      return NULL;
    }

  /* Own the mutex */
  SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_lock(& entry->lock, NULL));
  
  /* Try to insert it into the hash table. Might fail if the page was
     already inserted, which could be possible because the allocation
     routines might have blocked */
  if (SOS_OK != sos_hash_insert(pc->lookup_table, entry))
    {
      /* entry was inserted during allocations, undo the new entry */
      sos_kmutex_unlock(& entry->lock);
      sos_kmutex_dispose(& entry->lock);
      sos_kfree(entry->kernel_vaddr);
      sos_kmem_cache_free((sos_vaddr_t)entry);

      /* Get the real entry */
      entry = pagecache_lookup_and_lock(pc, offset);
      SOS_ASSERT_FATAL(NULL != entry);
      *kernel_vaddr = entry->kernel_vaddr;
      *newly_allocated = FALSE;
      return entry;
    }

  /* Now register the entry in the sync list */
  entry->rw_dirty_order = 0;
  list_add_head(pc->rw_sync_list, entry);

  *newly_allocated = TRUE;
  *kernel_vaddr = entry->kernel_vaddr;
  return entry;
}


sos_ret_t
sos_fs_pagecache_unlock_page(struct sos_fs_pagecache * pc,
			     struct sos_fs_pagecache_entry * entry,
			     sos_bool_t initial_fill_aborted)
{

  entry->initial_fill_aborted = initial_fill_aborted;

  if (initial_fill_aborted)
    return pagecache_release(pc, entry);

  return sos_kmutex_unlock(& entry->lock);
}


sos_ret_t
sos_fs_pagecache_unref_page(struct sos_fs_pagecache * pc,
			    sos_luoffset_t offset)
{
  sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);
  struct sos_fs_pagecache_entry * entry;

  /* The offset is expected to be page-aligned */
  if (pgoffs != offset)
    return -SOS_EINVAL;

  entry
    = (struct sos_fs_pagecache_entry*) sos_hash_lookup(pc->lookup_table,
						& pgoffs);
  SOS_ASSERT_FATAL(NULL != entry);
  SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_lock(& entry->lock, NULL));
  return pagecache_release(pc, entry);
}


sos_ret_t
sos_fs_pagecache_sync(struct sos_fs_pagecache * pc)
{
  sos_ret_t retval = SOS_OK;
  int dummy = 0;
  sos_lcount_t rw_dirty_order = 0;

  /** High watermark telling "you won't take the pages added
      afterwards into account" */
  sos_lcount_t top_rw_dirty_order = pc->top_rw_dirty_order;

  if (list_is_empty(pc->rw_dirty_list))
    return SOS_OK;

  /* This scan will be exhaustive and resilient to addition/removal of
     devices as long as new devices are added with list_add_tail
     (because the scan is "forward", ie in order head -> tail) */
  while (TRUE)
    {
      struct sos_fs_pagecache_entry * entry = NULL;
      int ndirty;

      /* As long as we don't block, we can safely access the
	 prev/next fields of the page descriptor */
      list_foreach_backward(pc->rw_dirty_list, entry, ndirty)
	{
	  sos_ret_t ret = SOS_OK;
	  struct sos_fs_pagecache_entry * prev_entry = NULL;

	  /* Reached the initial high watermark ? Don't take the
	     additional pages into account */
	  if (entry->rw_dirty_order > top_rw_dirty_order)
	    break;

	  if (entry->rw_dirty_order <= rw_dirty_order)
	    continue;

	  rw_dirty_order = entry->rw_dirty_order;
	  prev_entry     = entry->prev;

	  SOS_ASSERT_FATAL(SOS_OK == pagecache_use(pc, entry));
	  if (! entry->initial_fill_aborted)
	    ret = pagecache_sync_page(pc, entry);
	  if (SOS_OK != ret)
	    retval = ret;
	  pagecache_release(pc, entry);

	  /* We must NOT continue the loops because the prev/next page
	     cache entry might have been removed or added (sync pages,
	     by definition) ! */
	  if (prev_entry != entry->prev)
	    goto lookup_next_ent;
	}

      /* Reached the end of the list */
      break;

    lookup_next_ent:
      /* Loop over */
      dummy ++;
    }

  return retval;
}
Initial import 2018-07-13 17:13:10 +02:00			`/* Copyright (C) 2005,2006 David Decotigny`

			`This program is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU General Public License`
			`as published by the Free Software Foundation; either version 2`
			`of the License, or (at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with this program; if not, write to the Free Software`
			`Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,`
			`USA.`
			`*/`
			`#include <sos/ksynch.h>`
			`#include <sos/kmem_slab.h>`
			`#include <sos/hash.h>`
			`#include <sos/physmem.h> /* For SOS_PAGE_MASK */`
			`#include <sos/list.h>`
			`#include <sos/assert.h>`
			`#include <sos/uaccess.h>`
			`#include <sos/kmalloc.h>`

			`#include "fs_pagecache.h"`


			`#define SOS_OFFSET64_PAGE_ALIGN_INF(offs64) \`
			`( ((sos_luoffset_t)(offs64)) & (~ ((sos_luoffset_t)(SOS_PAGE_MASK))) )`


			`#define SOS_OFFSET64_IS_PAGE_ALIGNED(offs64) \`
			`( ( ((sos_luoffset_t)(offs64)) & (((sos_luoffset_t)(SOS_PAGE_MASK))) ) == 0 )`


			`/**`
			`* Definition of an object holding a reference to a shared mapping of`
			`* a file/device-mapped cache page.`
			`*`
			`* @note This structure is huge. We can shrink it largely by removing the`
			`* "name" field from the lock structure (32 bytes).`
			`*/`
			`struct sos_fs_pagecache_entry`
			`{`
			`/** offset of the cached page in the file or device */`
			`sos_luoffset_t file_offset;`

			`/** Address of the cached page for this offset */`
			`sos_vaddr_t kernel_vaddr;`

			`struct sos_kmutex lock;`
			`sos_count_t ref_cnt;`

			`sos_bool_t initial_fill_aborted; /**< True when the page could not`
			`be correctly filled */`

			`/**`
			`* When 0: the page is clean wrt to read/write syscalls, ie the disk`
			`* contents reflect the contents of the page since the last`
			`* read/write operation. However, the disk may NOT be in sync wrt to`
			`* mmap() operations: if mmap() operations occured in the meantime,`
			`* the disk may NOT be up to date, and the pagecache entry may even`
			`* NOT be considered dirty. This is because we do not trace each of`
			`* the read/write MMU operations from every processes (this would`
			`* need to catch all writes even on read/write mapped pages) and we`
			`* don't have a reverse mapping available to set the page read-only`
			`* in every mappings once it has been synched to disk (to`
			`* effectively trace the dirty state relative to mmap operations).`
			`*`
			`* When ">0": at least one process changed the contents of the page`
			`* through read/write syscalls since last sync operation.`
			`*`
			`* @note A boolean is enough for 99% of the code. But we need a real`
			`* counter for the sos_fs_pagecache_sync operation to make sure we`
			`* don't iterate 2 times over the same page.`
			`*/`
			`sos_lcount_t rw_dirty_order;`
			`#define ENTRY_IS_RW_DIRTY(e) ((e)->rw_dirty_order > 0)`

			`/** Linkage structure to keep the cache entry in the hash map */`
			`struct sos_hash_linkage hlink;`

			`/** Links to insert the entry into the rw_sync/rw_dirty lists */`
			`struct sos_fs_pagecache_entry prev, next;`
			`};`


			`struct sos_fs_pagecache`
			`{`
			`/** The operation used to synchronize the mapped pages with the`
			`backing store */`
			`sos_fs_pagecache_sync_function_t sync_fct;`
			`void * sync_fct_custom_data;`

			`/** The dictionary offset -> pagecache_entry */`
			`struct sos_hash_table * lookup_table;`

			`/* Lists to look into in order to free a node */`
			`struct sos_fs_pagecache_entry * rw_sync_list; /**< Pages in sync`
			`with disk wrt`
			`read/write API`
			`(LRU at end) */`
			`struct sos_fs_pagecache_entry * rw_dirty_list; /**< Dirty pages wrt`
			`read/write API`
			`(LRU last) */`

			`/** The "timestamp" high watermark used to iterate over the dirty`
			`pages in the sync function */`
			`sos_lcount_t top_rw_dirty_order;`
			`};`


			`/** The slab cache for pagecache */`
			`static struct sos_kslab_cache * cache_of_pagecache;`


			`/** The slab cache for pagecache entries */`
			`static struct sos_kslab_cache * cache_of_pagecache_entry;`


			`sos_ret_t sos_fs_pagecache_subsystem_setup()`
			`{`
			`/* Allocate the slab caches for the page caches and page cache`
			`entries */`

			`cache_of_pagecache =`
			`sos_kmem_cache_create("pagecache", sizeof(struct sos_fs_pagecache),`
			`2, 0, SOS_KSLAB_CREATE_MAP \| SOS_KSLAB_CREATE_ZERO);`
			`if (NULL == cache_of_pagecache)`
			`return -SOS_ENOMEM;`

			`cache_of_pagecache_entry =`
			`sos_kmem_cache_create("pagecache_entry",`
			`sizeof(struct sos_fs_pagecache_entry),`
			`2, 0, SOS_KSLAB_CREATE_MAP \| SOS_KSLAB_CREATE_ZERO);`
			`if (NULL == cache_of_pagecache_entry)`
			`{`
			`sos_kmem_cache_destroy(cache_of_pagecache);`
			`return -SOS_ENOMEM;`
			`}`

			`return SOS_OK;`
			`}`


			`struct sos_fs_pagecache *`
			`sos_fs_pagecache_new_cache(sos_fs_pagecache_sync_function_t sync_fct,`
			`void * sync_fct_custom_data)`
			`{`
			`struct sos_fs_pagecache * pagecache`
			`= (struct sos_fs_pagecache*) sos_kmem_cache_alloc(cache_of_pagecache,`
			`0);`
			`if (NULL == pagecache)`
			`return NULL;`

			`pagecache->lookup_table = sos_hash_create("pagecache",`
			`struct sos_fs_pagecache_entry,`
			`sos_hash_ui64,`
			`sos_hash_key_eq_ui64,`
			`127, file_offset, hlink);`
			`if (NULL == pagecache->lookup_table)`
			`{`
			`sos_kmem_cache_free((sos_vaddr_t) pagecache);`
			`return NULL;`
			`}`

			`pagecache->sync_fct = sync_fct;`
			`pagecache->sync_fct_custom_data = sync_fct_custom_data;`
			`pagecache->top_rw_dirty_order = 0x24;`

			`return pagecache;`
			`}`


			`sos_ret_t`
			`sos_fs_pagecache_delete_cache(struct sos_fs_pagecache * pc)`
			`{`
			`/* The cache is EXPECTED to be empty ! */`

			`if (!list_is_empty(pc->rw_dirty_list))`
			`SOS_FATAL_ERROR("Non empty dirty list");`
			`if (!list_is_empty(pc->rw_sync_list))`
			`SOS_FATAL_ERROR("Non empty sync list");`

			`sos_hash_dispose(pc->lookup_table);`
			`return sos_kmem_cache_free((sos_vaddr_t)pc);`
			`}`


			`/** Helper function to flush a page to disk. Expects the entry to be`
			`locked */`
			`static sos_ret_t pagecache_sync_page(struct sos_fs_pagecache * pc,`
			`struct sos_fs_pagecache_entry * entry)`
			`{`
			`sos_ret_t retval;`

			`if (! ENTRY_IS_RW_DIRTY(entry))`
			`return SOS_OK;`

			`/* Now do the real transfer to backing store */`
			`retval = pc->sync_fct(entry->file_offset, entry->kernel_vaddr,`
			`pc->sync_fct_custom_data);`
			`if (SOS_OK != retval)`
			`return retval;`

			`/* Transfer page to the sync list */`
			`list_delete(pc->rw_dirty_list, entry);`
			`entry->rw_dirty_order = 0;`
			`list_add_head(pc->rw_sync_list, entry);`

			`return SOS_OK;`
			`}`


			`/** Helper function to correctly lock an entry */`
			`static sos_ret_t pagecache_use(struct sos_fs_pagecache * pc,`
			`struct sos_fs_pagecache_entry * entry)`
			`{`
			`entry->ref_cnt ++;`
			`return sos_kmutex_lock(& entry->lock, NULL);`
			`}`


			`/**`
			`* Helper function to transfer a page to the dirty r/w list`
			`*/`
			`static sos_ret_t pagecache_set_rw_dirty(struct sos_fs_pagecache * pc,`
			`struct sos_fs_pagecache_entry * entry)`
			`{`
			`if (ENTRY_IS_RW_DIRTY(entry))`
			`return SOS_OK; /* Nothing to do */`

			`list_delete(pc->rw_sync_list, entry);`
			`entry->rw_dirty_order = ++ pc->top_rw_dirty_order;`
			`list_add_head(pc->rw_dirty_list, entry);`

			`return SOS_OK;`
			`}`


			`/** Helper function to correctly unlock an entry, flushing it to disk`
			`if needed */`
			`static sos_ret_t pagecache_release(struct sos_fs_pagecache * pc,`
			`struct sos_fs_pagecache_entry * entry)`
			`{`
			`if (entry->ref_cnt > 1)`
			`{`
			`entry->ref_cnt --;`
			`sos_kmutex_unlock(& entry->lock);`
			`return SOS_OK;`
			`}`

			`/*`
			`* The cached page is now referenced ONLY by US, we can try to`
			`* remove it from the cache`
			`*/`

			`/* Flush any change to disk, at least if we are sure that its`
			`content is legal, ie that the page_in callback did success in`
			`filling it */`
			`if (! entry->initial_fill_aborted)`
			`pagecache_sync_page(pc, entry);`

			`/* Ok, now WE are not interested by this entry anymore */`
			`entry->ref_cnt --;`

			`/* During blocking time, another thread could have asked for the`
			`entry. In this case, stop here */`
			`if (entry->ref_cnt > 0)`
			`{`
			`sos_kmutex_unlock(& entry->lock);`
			`return SOS_OK;`
			`}`

			`/* Remove it from the lists */`
			`sos_hash_remove(pc->lookup_table, entry);`
			`if (ENTRY_IS_RW_DIRTY(entry))`
			`list_delete(pc->rw_dirty_list, entry);`
			`else`
			`list_delete(pc->rw_sync_list, entry);`

			`/* We can safely erase it now ! */`
			`sos_kmutex_unlock(& entry->lock);`
			`SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_dispose(& entry->lock)); /* No thread are waiting */`
			`sos_kfree(entry->kernel_vaddr);`
			`sos_kmem_cache_free((sos_vaddr_t)entry);`

			`return SOS_OK;`
			`}`


			`/**`
			`* Helper function to look up an entry from the cache and lock it. If`
			`* the entry does not exist (yet), return NULL.`
			`*/`
			`static struct sos_fs_pagecache_entry *`
			`pagecache_lookup_and_lock(struct sos_fs_pagecache * pc,`
			`sos_luoffset_t offset)`
			`{`
			`sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);`
			`struct sos_fs_pagecache_entry * entry = NULL;`

			`while (TRUE)`
			`{`
			`entry`
			`= (struct sos_fs_pagecache_entry*) sos_hash_lookup(pc->lookup_table,`
			`& pgoffs);`
			`if (! entry)`
			`break;`

			`/* Lock it now */`
			`SOS_ASSERT_FATAL(SOS_OK == pagecache_use(pc, entry));`

			`/*`
			`* Entry is now locked`
			`*/`

			`/* Make sure it contains legal contents: if we were blocked`
			`because of the page_in operations reading it from disk, an`
			`error could have been occured. In this case, we must consider`
			`that this entry is not yet inserted in the cache */`
			`if (entry->initial_fill_aborted)`
			`{`
			`pagecache_release(pc, entry);`
			`continue;`
			`}`

			`/* Ok, we have the entry and it is correctly initialized ! */`
			`break;`
			`}`

			`return entry;`
			`}`


			`sos_ret_t`
			`sos_fs_pagecache_read(struct sos_fs_pagecache * pc,`
			`sos_luoffset_t offset,`
			`sos_genaddr_t dest_buf,`
			`sos_size_t * /* in/out */len)`
			`{`
			`sos_ret_t retval;`
			`sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);`
			`sos_luoffset_t endpos = offset + *len;`
			`struct sos_fs_pagecache_entry * entry;`

			`entry = pagecache_lookup_and_lock(pc, pgoffs);`
			`if (NULL == entry)`
			`return -SOS_ENOENT;`

			`/* Great ! Found the entry in the cache ! */`

			`/* Read only up to the end of the page */`
			`if (endpos - pgoffs > SOS_PAGE_SIZE)`
			`endpos = pgoffs + SOS_PAGE_SIZE;`

			`/* Copy page contents to destination buffer */`
			`retval = sos_memcpy_generic_to(dest_buf,`
			`entry->kernel_vaddr + (offset - pgoffs),`
			`endpos - offset);`
			`pagecache_release(pc, entry);`

			`if (retval < 0)`
			`{`
			`*len = 0;`
			`return retval;`
			`}`

			`*len = retval;`
			`if ((sos_luoffset_t)retval != endpos - offset)`
			`return -SOS_EFAULT;`

			`return SOS_OK;`
			`}`


			`sos_ret_t`
			`sos_fs_pagecache_write(struct sos_fs_pagecache * pc,`
			`sos_luoffset_t offset,`
			`sos_genaddr_t src_buf,`
			`sos_size_t * /* in/out */len,`
			`sos_bool_t synchronous_write)`
			`{`
			`sos_ret_t retval;`
			`sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);`
			`sos_luoffset_t endpos = offset + *len;`
			`struct sos_fs_pagecache_entry * entry;`

			`entry = pagecache_lookup_and_lock(pc, pgoffs);`
			`if (NULL == entry)`
			`return -SOS_ENOENT;`

			`/* Great ! Found the entry in the cache ! */`

			`/* Read only up to the end of the page */`
			`if (endpos - pgoffs > SOS_PAGE_SIZE)`
			`endpos = pgoffs + SOS_PAGE_SIZE;`

			`/* Copy page contents to destination buffer */`
			`retval = sos_memcpy_generic_from(entry->kernel_vaddr + (offset - pgoffs),`
			`src_buf,`
			`endpos - offset);`
			`/* Transfer the entry in the dirty list if needed */`
			`if (retval >= 0)`
			`pagecache_set_rw_dirty(pc, entry);`

			`if (retval < 0)`
			`{`
			`*len = 0;`
			`pagecache_release(pc, entry);`
			`return retval;`
			`}`

			`*len = retval;`
			`if ((sos_luoffset_t)retval != endpos - offset)`
			`retval = -SOS_EFAULT;`
			`else`
			`retval = SOS_OK;`

			`/* Flush to disk if needed */`
			`if (synchronous_write)`
			`{`
			`sos_ret_t ret = pagecache_sync_page(pc, entry);`
			`if (SOS_OK == retval)`
			`retval = ret;`
			`}`

			`pagecache_release(pc, entry);`
			`return retval;`
			`}`


			`sos_ret_t sos_fs_pagecache_set_dirty(struct sos_fs_pagecache * pc,`
			`sos_luoffset_t offset,`
			`sos_bool_t sync_backing_store)`
			`{`
			`sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);`
			`struct sos_fs_pagecache_entry * entry;`

			`entry = pagecache_lookup_and_lock(pc, pgoffs);`
			`if (NULL == entry)`
			`return -SOS_ENOENT;`

			`/* Great ! Found the entry in the cache ! */`
			`pagecache_set_rw_dirty(pc, entry);`

			`/* Synchronize to backing store if needed */`
			`if (sync_backing_store)`
			`pagecache_sync_page(pc, entry);`

			`pagecache_release(pc, entry);`
			`return SOS_OK;`
			`}`


			`struct sos_fs_pagecache_entry *`
			`sos_fs_pagecache_ref_page(struct sos_fs_pagecache * pc,`
			`sos_luoffset_t offset,`
			`sos_vaddr_t * /* out */ kernel_vaddr,`
			`sos_bool_t * /* out */ newly_allocated)`
			`{`
			`sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);`
			`struct sos_fs_pagecache_entry * entry;`

			`/* The offset is expected to be page-aligned */`
			`if (pgoffs != offset)`
			`return NULL;`

			`entry = pagecache_lookup_and_lock(pc, pgoffs);`
			`if (NULL != entry)`
			`{`
			`/* Found it ! No need to go further */`
			`*newly_allocated = FALSE;`
			`*kernel_vaddr = entry->kernel_vaddr;`
			`return entry;`
			`}`


			`/*`
			`* Need to allocate a new kernel page`
			`*/`

			`entry = (struct sos_fs_pagecache_entry*)`
			`sos_kmem_cache_alloc(cache_of_pagecache_entry, 0);`
			`if (NULL == entry)`
			`return (sos_vaddr_t)NULL;`

			`if (SOS_OK != sos_kmutex_init(& entry->lock, "pagecache_entry",`
			`SOS_KWQ_ORDER_FIFO))`
			`{`
			`sos_kmem_cache_free((sos_vaddr_t)entry);`
			`return NULL;`
			`}`

			`/* Initial state of the page correspond to an erroneous`
			`initialization */`
			`entry->file_offset = pgoffs;`
			`entry->initial_fill_aborted = TRUE;`
			`entry->ref_cnt = 1;`

			`/* Allocate the page */`
			`entry->kernel_vaddr = sos_kmalloc(SOS_PAGE_SIZE, 0);`
			`if (((sos_vaddr_t)NULL) == entry->kernel_vaddr)`
			`{`
			`sos_kmutex_dispose(& entry->lock);`
			`sos_kmem_cache_free((sos_vaddr_t)entry);`
			`return NULL;`
			`}`

			`/* Own the mutex */`
			`SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_lock(& entry->lock, NULL));`

			`/* Try to insert it into the hash table. Might fail if the page was`
			`already inserted, which could be possible because the allocation`
			`routines might have blocked */`
			`if (SOS_OK != sos_hash_insert(pc->lookup_table, entry))`
			`{`
			`/* entry was inserted during allocations, undo the new entry */`
			`sos_kmutex_unlock(& entry->lock);`
			`sos_kmutex_dispose(& entry->lock);`
			`sos_kfree(entry->kernel_vaddr);`
			`sos_kmem_cache_free((sos_vaddr_t)entry);`

			`/* Get the real entry */`
			`entry = pagecache_lookup_and_lock(pc, offset);`
			`SOS_ASSERT_FATAL(NULL != entry);`
			`*kernel_vaddr = entry->kernel_vaddr;`
			`*newly_allocated = FALSE;`
			`return entry;`
			`}`

			`/* Now register the entry in the sync list */`
			`entry->rw_dirty_order = 0;`
			`list_add_head(pc->rw_sync_list, entry);`

			`*newly_allocated = TRUE;`
			`*kernel_vaddr = entry->kernel_vaddr;`
			`return entry;`
			`}`


			`sos_ret_t`
			`sos_fs_pagecache_unlock_page(struct sos_fs_pagecache * pc,`
			`struct sos_fs_pagecache_entry * entry,`
			`sos_bool_t initial_fill_aborted)`
			`{`

			`entry->initial_fill_aborted = initial_fill_aborted;`

			`if (initial_fill_aborted)`
			`return pagecache_release(pc, entry);`

			`return sos_kmutex_unlock(& entry->lock);`
			`}`


			`sos_ret_t`
			`sos_fs_pagecache_unref_page(struct sos_fs_pagecache * pc,`
			`sos_luoffset_t offset)`
			`{`
			`sos_luoffset_t pgoffs = SOS_OFFSET64_PAGE_ALIGN_INF(offset);`
			`struct sos_fs_pagecache_entry * entry;`

			`/* The offset is expected to be page-aligned */`
			`if (pgoffs != offset)`
			`return -SOS_EINVAL;`

			`entry`
			`= (struct sos_fs_pagecache_entry*) sos_hash_lookup(pc->lookup_table,`
			`& pgoffs);`
			`SOS_ASSERT_FATAL(NULL != entry);`
			`SOS_ASSERT_FATAL(SOS_OK == sos_kmutex_lock(& entry->lock, NULL));`
			`return pagecache_release(pc, entry);`
			`}`


			`sos_ret_t`
			`sos_fs_pagecache_sync(struct sos_fs_pagecache * pc)`
			`{`
			`sos_ret_t retval = SOS_OK;`
			`int dummy = 0;`
			`sos_lcount_t rw_dirty_order = 0;`

			`/** High watermark telling "you won't take the pages added`
			`afterwards into account" */`
			`sos_lcount_t top_rw_dirty_order = pc->top_rw_dirty_order;`

			`if (list_is_empty(pc->rw_dirty_list))`
			`return SOS_OK;`

			`/* This scan will be exhaustive and resilient to addition/removal of`
			`devices as long as new devices are added with list_add_tail`
			`(because the scan is "forward", ie in order head -> tail) */`
			`while (TRUE)`
			`{`
			`struct sos_fs_pagecache_entry * entry = NULL;`
			`int ndirty;`

			`/* As long as we don't block, we can safely access the`
			`prev/next fields of the page descriptor */`
			`list_foreach_backward(pc->rw_dirty_list, entry, ndirty)`
			`{`
			`sos_ret_t ret = SOS_OK;`
			`struct sos_fs_pagecache_entry * prev_entry = NULL;`

			`/* Reached the initial high watermark ? Don't take the`
			`additional pages into account */`
			`if (entry->rw_dirty_order > top_rw_dirty_order)`
			`break;`

			`if (entry->rw_dirty_order <= rw_dirty_order)`
			`continue;`

			`rw_dirty_order = entry->rw_dirty_order;`
			`prev_entry = entry->prev;`

			`SOS_ASSERT_FATAL(SOS_OK == pagecache_use(pc, entry));`
			`if (! entry->initial_fill_aborted)`
			`ret = pagecache_sync_page(pc, entry);`
			`if (SOS_OK != ret)`
			`retval = ret;`
			`pagecache_release(pc, entry);`

			`/* We must NOT continue the loops because the prev/next page`
			`cache entry might have been removed or added (sync pages,`
			`by definition) ! */`
			`if (prev_entry != entry->prev)`
			`goto lookup_next_ent;`
			`}`

			`/* Reached the end of the list */`
			`break;`

			`lookup_next_ent:`
			`/* Loop over */`
			`dummy ++;`
			`}`

			`return retval;`
			`}`