215 lines
7.8 KiB
C
215 lines
7.8 KiB
C
|
/* Copyright (C) 2005,2006 David Decotigny
|
||
|
|
||
|
This program is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU General Public License
|
||
|
as published by the Free Software Foundation; either version 2
|
||
|
of the License, or (at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program; if not, write to the Free Software
|
||
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||
|
USA.
|
||
|
*/
|
||
|
#ifndef _SOS_FSPAGECACHE_H_
|
||
|
#define _SOS_FSPAGECACHE_H_
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @file fs_pagecache.h
|
||
|
*
|
||
|
* Simple page cache interface. Used to automate the synchronization
|
||
|
* between the rad/write operations and mmap. A "FS page cache" is
|
||
|
* simply a set of pages mapping a file in memory. A file may not be
|
||
|
* entirely mapped into memory: its pages are mapped only if any user
|
||
|
* thread invoked an mmap and page-faulted inside the mapped region to
|
||
|
* map these pages into memory. Contrary to some other caches in SOS
|
||
|
* (eg the block cache), this one is not limited in size. As many
|
||
|
* pages as needed will be allocated for it, as permitted by the
|
||
|
* available RAM. With a pageout mechanism, this cache will be
|
||
|
* shrinked when needed: some of its pages will be transferred back to
|
||
|
* disk and unmapped.
|
||
|
*
|
||
|
* A page cache is used both to cache memory mapped files of an FS,
|
||
|
* and memory mapped block devices. Hence:
|
||
|
* - there is one SINGLE page cache for each block device (proper to
|
||
|
* each disk, to each partition)
|
||
|
* - there is one SINGLE page cache for each file of a file system
|
||
|
*
|
||
|
* For block devices, the page cache automatically synchronizes the
|
||
|
* pages with the block cache as long as these mapped pages are
|
||
|
* accessed through the read/write API. However, <b>NO <i>automatic
|
||
|
* and accurate</i> synchronization</b> between the in-memory modified
|
||
|
* pages (accessed through the MMU) and the block cache is provided
|
||
|
* because we have no way to collect the accurate list of pages
|
||
|
* modified through MMU write accesses (this would require either to
|
||
|
* catch all the MMU write operations [too inefficient !], or to have
|
||
|
* a reverse-mapping system in order to look at the dirty bit of all
|
||
|
* the mappings). Hence, to enforce blkcache/pagecache
|
||
|
* synchronization, the msync/munmap API must be used manually. Thus,
|
||
|
* the page cache is accurately synchronized with the block cache:
|
||
|
* - automatically: with the read/write/sync operations
|
||
|
* - manually: with the msync and munmap (and of course: exit) operations
|
||
|
*
|
||
|
* Nevertheless, from the viewpoint of "blkdev.c", the
|
||
|
* blockdev_read/write operations are always in sync with the MMU
|
||
|
* because the pagecache is accessed prior to the blkcache: any
|
||
|
* divergence between the pagecache and the blkcache is hence
|
||
|
* <i>hidden</i>. But keep in mind that if you want the disk to
|
||
|
* accurately reflect the contents of the mapped pages, you have to
|
||
|
* eventually call msync, munmap, or to destroy the address space (ie
|
||
|
* exit the process).
|
||
|
*
|
||
|
* A side effect: if you map /dev/hda and /dev/hda1, both mappings
|
||
|
* will be inconsistent and may also be inconsistent with read/write
|
||
|
* accesses. This is because the partitions have their own page cache
|
||
|
* while they share the block cache with the disk device. A solution
|
||
|
* would be to share the page cache between the disk device and all
|
||
|
* its partitions. But, due to the fact that partitions are not
|
||
|
* necessarily page-aligned in the disk, this would impose some pages
|
||
|
* to not correspond to a page-aligned offset inside a partition,
|
||
|
* requiring either to have an odd semantic of the mmap syscall (the
|
||
|
* allowed device "offset" would depend on the disk partitioning) if
|
||
|
* we want to share the mapped pages between the cache and userspace,
|
||
|
* or to allocate other pages for the required userspace mappings and
|
||
|
* keep them in sync with the page cache pages. Both solutions seem
|
||
|
* ugly to me, and not worth implementing since the page cache is
|
||
|
* aimed at being generic enough to be used for file mappings: files
|
||
|
* don't have sub-files (as do disk devices that have partitions). So
|
||
|
* solving the problem is non pertinent for files. And who will ever
|
||
|
* need /dev/hda mappings to be consistent with those of /dev/hda1 ?...
|
||
|
*/
|
||
|
#include <sos/errno.h>
|
||
|
#include <sos/uaccess.h>
|
||
|
|
||
|
|
||
|
/** Opaque structure holding a page cache */
|
||
|
struct sos_fs_pagecache;
|
||
|
|
||
|
/** Opaque structure holding a page of the cache */
|
||
|
struct sos_fs_pagecache_entry;
|
||
|
|
||
|
|
||
|
sos_ret_t sos_fs_pagecache_subsystem_setup(void);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Function called to flush the dirty pages to backing store
|
||
|
*/
|
||
|
typedef sos_ret_t
|
||
|
(*sos_fs_pagecache_sync_function_t)(sos_luoffset_t offset,
|
||
|
sos_vaddr_t dirty_page,
|
||
|
void * custom_data);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Create a new pagecache.
|
||
|
*
|
||
|
* @param sync_fct, the function used to flush the dirty pages to
|
||
|
* backing store. may be NULL
|
||
|
*/
|
||
|
struct sos_fs_pagecache *
|
||
|
sos_fs_pagecache_new_cache(sos_fs_pagecache_sync_function_t sync_fct,
|
||
|
void * sync_fct_custom_data);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Delete the page cache.
|
||
|
*
|
||
|
* The page cache is expected to be already flushed to backing store
|
||
|
*/
|
||
|
sos_ret_t
|
||
|
sos_fs_pagecache_delete_cache(struct sos_fs_pagecache * pc);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Read from the given offset from the cache, if present.
|
||
|
* @return ENOENT when no page for the given offset is mapped, return
|
||
|
* EFAULT when the contents could not be completely copied to
|
||
|
* destination buffer
|
||
|
*/
|
||
|
sos_ret_t
|
||
|
sos_fs_pagecache_read(struct sos_fs_pagecache * pc,
|
||
|
sos_luoffset_t offset,
|
||
|
sos_genaddr_t dest_buf,
|
||
|
sos_size_t * /* in/out */len);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Write at the given offset from the cache, if present
|
||
|
* @return ENOENT when no page for the given offset is mapped, return
|
||
|
* EFAULT when the contents could not be completely copied from
|
||
|
* source buffer
|
||
|
*/
|
||
|
sos_ret_t
|
||
|
sos_fs_pagecache_write(struct sos_fs_pagecache * pc,
|
||
|
sos_luoffset_t offset,
|
||
|
sos_genaddr_t src_buf,
|
||
|
sos_size_t * /* in/out */len,
|
||
|
sos_bool_t synchronous_write);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Function reserved to blkdev.c and FS code: used by the msync
|
||
|
* callback to mark a pagecache page dirty
|
||
|
*
|
||
|
* @param sync_backing_store When TRUE, then the page must be flushed
|
||
|
* to backing store.
|
||
|
*/
|
||
|
sos_ret_t sos_fs_pagecache_set_dirty(struct sos_fs_pagecache * pc,
|
||
|
sos_luoffset_t offset,
|
||
|
sos_bool_t sync_backing_store);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Prepare a page to be mapped: get a NEW reference to the page
|
||
|
* (kernel address) of the page to be mapped, which is also locked in
|
||
|
* order to be used. If the page is not yet present in the cache,
|
||
|
* allocate it and prepare it to be filled
|
||
|
*
|
||
|
* @param offset MUST be page-aligned
|
||
|
* @param newly_allocated TRUE when the page was not already mapped by
|
||
|
* someone: the contents of the page is then IRRELEVANT
|
||
|
*
|
||
|
* @return NULL on error
|
||
|
*
|
||
|
* @note The page is also LOCKED, use unlock to unlock it before
|
||
|
* unreferencing it
|
||
|
*/
|
||
|
struct sos_fs_pagecache_entry *
|
||
|
sos_fs_pagecache_ref_page(struct sos_fs_pagecache * pc,
|
||
|
sos_luoffset_t offset,
|
||
|
sos_vaddr_t * /* out */ kernel_vaddr,
|
||
|
sos_bool_t * /* out */ newly_allocated);
|
||
|
|
||
|
|
||
|
/** Called by the blkdev.c and FS page_in callback to unlock the entry
|
||
|
after it has been initialized. */
|
||
|
sos_ret_t
|
||
|
sos_fs_pagecache_unlock_page(struct sos_fs_pagecache * pc,
|
||
|
struct sos_fs_pagecache_entry * entry,
|
||
|
sos_bool_t initial_fill_aborted);
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Called when the page is unmapped from a user process space
|
||
|
* @param offset MUST be page-aligned
|
||
|
*
|
||
|
* @note the page is expected to be present in the cache
|
||
|
* @note the entry is expected NOT to be locked !
|
||
|
*/
|
||
|
sos_ret_t
|
||
|
sos_fs_pagecache_unref_page(struct sos_fs_pagecache * pc,
|
||
|
sos_luoffset_t offset);
|
||
|
|
||
|
|
||
|
/** Call the sync function on each dirty page */
|
||
|
sos_ret_t
|
||
|
sos_fs_pagecache_sync(struct sos_fs_pagecache * pc);
|
||
|
|
||
|
#endif
|