sos-code-article10/sos/umem_vmm.h

617 lines
20 KiB
C

/* Copyright (C) 2005,2006 David Decotigny
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#ifndef _SOS_UMEM_VMM_H_
#define _SOS_UMEM_VMM_H_
/**
* @file umem_vmm.h
*
* Management of the address space of a process in SOS. The so-called
* "address space" of a process consists in the description of the
* virtual addresses that are valid in the user space of a process (in
* SOS: addresses 1G-4G). The kernel-space of a process is managed by
* the "kmem" subsystem, and is kept identical accross all the
* processes in the system.
*
* The umem_vmm subsystem handles the following features:
* - demand-mapping of resourcs (files: mmap): mapping in physical RAM
* will be delayed as much as possible, until the process really
* need to access the mapped addresses
* - mprotect/mremap support
* - private and shared mappings
* - Copy-On-Write (COW) of the private mappings upon fork() to favour
* shared physical memory as much as possible
* - "heap" management (brk/sbrk)
*
* Swap is NOT supported (yet), which means that the following is NOT
* supported:
* - locked/reserved I/O pages (everything is locked in RAM)
* - "safe" demand-mapping of anonymous pages, ie conservative VMM
* allocation (alloc of anonymous pages on the swap)
* Other unsupported features:
* - dynamically-resizable regions (Linux's GROWUP/GROWDOWN vma): the
* user stack is expected to have a suitable virtual size from the
* beginning, or sos_umem_vmm_resize() must be used explicitely to
* resize it
* - no provision of "stack size" accounting, since there are
* multiple stacks (ie user threads) in a process: which stack to
* consider ???
*
* The address space is divided into "virtual regions" (aka "VR") that
* describe a single mapping, aka a segment of contiguous pages in
* user-space virtual memory. Each such virtual region "maps" a
* "resource" and is characterised by:
* - its base address and length in user-space
* - the allowed accesses, aka "protection" (read-only or read/write)
* - the resource it maps in virtual memory
*
* A so-called resource is typically:
* - a file
* - a device
* - an area initially full of zeros (the VR mapping this are called
* "anonymous mappings")
*
* The implementation is very close to that of Linux and Kos. This is
* a "simple" implementation, not the most elegant one, such as those
* based on "shadow objects" hierarchies as found in BSD 4.4 and Mach,
* or that of Solaris (based on the "anon" lists). Actually, this
* implementation does not use "shadow-objects"/anon list when a COW
* page of a shared mapping is made anonymous. This won't hurt the
* implementation of the basic demand-mapping mechanism; on the
* contrary, it will make things simpler. But this will largely impact
* the implementation of the swap-in/swap-out strategies, as these
* would require a non trivial intrication of low-level and higher
* level algorithms.
*/
/**
* Definition of an "address space" in Kos. This is an opaque
* structure defined in umem_vmm.c. Its main role is to list virtual
* regions. It mainly consists in:
* - a reference to the process owning it
* - maximum allowed protection (ie can it be mapped read-only or
* read/write ?)
* - the list of VRs mapping resources
* - a mm_context that reflects the configuration of the MMU
* - the location of the heap for this process
* - statistics
*/
struct sos_umem_vmm_as;
/**
* Definition of a "virtual region". Linux would call them "vma"
* (Virtual Memory Area), and Solaris: "segments". It mainly consists
* in:
* - the start/end addresses of the mapping
* - a pointer to the resource that it maps
* - the type of mapping (shared/private)
* - the actual protection flags (@see SOS_VM_MAP_PROT_* flags in
* hwcore/paging.h)
* - a set of callbacks (@see sos_umem_vmm_vr_ops below) automatically
* called by the umem_vmm subsystem each time the VR is modified
*/
struct sos_umem_vmm_vr;
/** VR flag: region can be shared between a process and its
children */
#define SOS_VR_MAP_SHARED (1 << 0)
#include <sos/types.h>
#include <sos/process.h>
/**
* The callbacks applicable on a virtual region. Automatically called
* by the umem_vmm subsystem.
*
* Calling sequences:
* - duplicate_as() (aka fork()):
* vr->ops->ref()
* add vr to lists
* - delete_as() (aka exit()):
* vr->ops->unmap()
* remove vr from lists
* vr->ops->unref()
* - mmap():
* -> left + new + right VRs can fusion:
* remove right_vr from list
* right_vr->ops->unref()
* -> left + new VRs can fusion:
* nothing
* -> new + right VRs can fusion:
* nothing
* -> isolated:
* add new_vr to lists
* new_vr->map()
* new_vr->ops->ref()
* - munmap():
* -> VR totally unmapped:
* vr->ops->unmap()
* remove vr from lists
* vr->ops->unref()
* -> VR unmapped in the middle (split into 2):
* add (new) right VR into the lists
* vr->unmap(middle_unmapped_area)
* right_vr->ops->ref()
* -> VR unmapped on its left:
* vr->ops->unmap(left_unmapped_area)
* -> VR unmapped on its right:
* vr->ops->unmap(right_unmapped_area)
* - chprot():
* -> VR totally chprot:
* nothing
* -> VR chprot in the middle (split into 3):
* add (new) middle+right VRs into the lists
* middle_vr->ops->ref()
* right_vr->ops->ref()
* -> VR chprot on its left (split into 2):
* add (new) right VR into the lists
* right_vr->ops->ref()
* -> VR chprot on its right (split into 2):
* add (new) right VR into the lists
* right_vr->ops->ref()
* - resize():
* -> if moving the VR: map/unmap
* -> otherwise: nothing
*/
struct sos_umem_vmm_vr_ops
{
/**
* Called after the virtual region has been inserted
* inside its address space.
* @note Optional
*/
void (*ref)(struct sos_umem_vmm_vr * vr);
/**
* Called when the virtual region is removed from its
* address space
* @note Optional
*/
void (*unref)(struct sos_umem_vmm_vr * vr);
/**
* Called when part or all a VR is unmapped
* @note Optional
*/
void (*unmap)(struct sos_umem_vmm_vr * vr,
sos_uaddr_t uaddr, sos_size_t size);
/**
* Called by the page fault handler to map data at the given virtual
* address. In the Linux kernel, this callback is named "nopage".
*
* @note MANDATORY
*/
sos_ret_t (*page_in)(struct sos_umem_vmm_vr * vr,
sos_uaddr_t uaddr,
sos_bool_t write_access);
/**
* Called to synchronize the contents of the given page with its
* backing store. This method is responsible for resetting the dirty
* flag of the page (@see sos_paging_set_dirty). Appropriate locking
* MUST be handled internally by this method.
*
* @note Optional
*/
sos_ret_t (*sync_page)(struct sos_umem_vmm_vr * vr,
sos_uaddr_t page_uaddr,
sos_ui32_t flags);
/** Flags for msync */
#define SOS_MSYNC_SYNC (1 << 0)
#define SOS_MSYNC_ASYNC (0 << 0)
};
/**
* The definition of a mapped resource. Typically, a mapped resource
* is a file or a device: in both cases, only part of the resource is
* mapped by each VR, this part is given by the offset_in_resource
* field of the VR, and the size field of the VR.
*/
struct sos_umem_vmm_mapped_resource
{
/** Represent the maximum authrized SOS_VR_PROT_* for the VRs mapping
it */
sos_ui32_t allowed_access_rights;
/** Some flags associated with the resource. Currently only
SOS_MAPPED_RESOURCE_ANONYMOUS is supported */
sos_ui32_t flags;
/** List of VRs mapping this resource */
struct sos_umem_vmm_vr * list_vr;
/**
* MANDATORY Callback function called when a new VR is created,
* which maps the resource. This callback is allowed to change the
* following fields of the VR:
* - sos_umem_vmm_set_ops_of_vr()
*/
sos_ret_t (*mmap)(struct sos_umem_vmm_vr *);
/**
* Custom data that the user is free to define: the umem_vmm
* subsystem won't ever look at it or change it.
*/
void *custom_data;
};
/** Inidicate that this resource is not backed by any physical
storage. This means that the "offset_in_resource" field of the
VRs will be computed by sos_umem_vmm_map() */
#define SOS_MAPPED_RESOURCE_ANONYMOUS (1 << 0)
/**
* Physical address of THE page (full of 0s) used for anonymous
* mappings. Anybody can map it provided it is ALWAYS in read-only
* mode
*/
extern sos_paddr_t sos_zero_physpage;
/**
* "ZERO" page address mapped in kernel space
*/
extern sos_vaddr_t sos_zero_kernelpage;
/**
* Setup the umem_vmm subsystem.
*/
sos_ret_t sos_umem_vmm_subsystem_setup(void);
/**
* Get the current effective address space. This may be the "normal"
* address space of the current thread, but not necessarilly: this
* might be the address space of just another process ! This may
* happen if a kernel thread of process P wants to access the address
* space of another process P2. This might also be NULL (no access to
* user space needed).
*/
struct sos_umem_vmm_as * sos_umem_vmm_get_current_as(void);
/**
* Change the current effective address space, eventually
* reconfiguring the MMU. This will increase the owning process's
* reference count of the given AS, and decrease that of the previous
* AS (when different).
*
* @param as may be NULL (no need to access user space)
*/
sos_ret_t sos_umem_vmm_set_current_as(struct sos_umem_vmm_as * as);
/**
* Create a new, empty, address space
*
* @param owner The process that will own the new address space
*
* @note no need to call
* sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access()
*/
struct sos_umem_vmm_as *
sos_umem_vmm_create_empty_as(struct sos_process *owner);
/**
* Create a new address space, copy of the model_as address
* space. All the translations belonging to private mappings are
* marked 'read-only' to activate the "copy-on-write" semantics.
*
* @param model_as The address space to copy
* @param for_owner The process that will hold the new address space
*
* @note automatically calls
* sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access()
*/
struct sos_umem_vmm_as *
sos_umem_vmm_duplicate_as(struct sos_umem_vmm_as * model_as,
struct sos_process *for_owner);
/**
* Called at process deletion time, to remove all mappings present in
* the address space. This function not only delete all the VR data
* structures, it also calls the unmap()/unref() callbacks of these
* VRs. However, the physical pages mapped inside the address space
* won't be unmapped at this stage: they will be unmapped all in one
* go when the undelying mm_context will become unused.
*
* @note no need to call
* sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access()
*/
sos_ret_t
sos_umem_vmm_delete_as(struct sos_umem_vmm_as * as);
/*
* Accessor functions for the address space
*/
/** Retrieve the pointer (NOT a new reference !) to the process owning
the given address space. */
struct sos_process *
sos_umem_vmm_get_process(struct sos_umem_vmm_as * as);
/** Retrieve the pointer (NOT a new reference !) to the MMU
configuration for the given address space */
struct sos_mm_context *
sos_umem_vmm_get_mm_context(struct sos_umem_vmm_as * as);
/** Retrieve a pointer to the VR that covers the given virtual address
in the given address space */
struct sos_umem_vmm_vr *
sos_umem_vmm_get_vr_at_address(struct sos_umem_vmm_as * as,
sos_uaddr_t uaddr);
/*
* Accessor functions for the virtual regions
*/
/** Retrieve the address space owning the given VR */
struct sos_umem_vmm_as *
sos_umem_vmm_get_as_of_vr(struct sos_umem_vmm_vr * vr);
/** Retrieve the set of callbacks of the given VR */
struct sos_umem_vmm_vr_ops *
sos_umem_vmm_get_ops_of_vr(struct sos_umem_vmm_vr * vr);
/** Retrieve the current protection of the given VR */
sos_ui32_t sos_umem_vmm_get_prot_of_vr(struct sos_umem_vmm_vr * vr);
/** Retrieve the flags of the given VR. One will especially be
interested in the SOS_VR_MAP_SHARED bit */
sos_ui32_t sos_umem_vmm_get_flags_of_vr(struct sos_umem_vmm_vr * vr);
/** Retrieve the resource mapped by the VR */
struct sos_umem_vmm_mapped_resource *
sos_umem_vmm_get_mapped_resource_of_vr(struct sos_umem_vmm_vr * vr);
/** Retrieve the start user address for the given mapping */
sos_uaddr_t sos_umem_vmm_get_start_of_vr(struct sos_umem_vmm_vr * vr);
/** Retrieve the size (in user space) of the given mapping */
sos_size_t sos_umem_vmm_get_size_of_vr(struct sos_umem_vmm_vr * vr);
/** Retrieve the offset in the resource of the mapping */
sos_luoffset_t
sos_umem_vmm_get_offset_in_resource(struct sos_umem_vmm_vr * vr);
/*
* Restricted accessor functions. May only be called from inside the
* map() callback of a VR
*/
/**
* Function that is not called directly by the umem_subsystem: It MUST
* always be called by the mmap() callback of the resource being
* mapped (@see sos_umem_vmm_mapped_resource::mmap()). The mmap()
* method is called at VR creation time, automatically by
* sos_umem_vmm_map().
*
* @note The VR MUST NOT already have a set of operations (fatal error)
*/
sos_ret_t sos_umem_vmm_set_ops_of_vr(struct sos_umem_vmm_vr * vr,
struct sos_umem_vmm_vr_ops * ops);
/*
* mmap API
*/
/** sos_umem_vmm_map() flag: the address given as parameter to
sos_umem_vmm_map() is not only a hint, it is where the VR is
expected to be mapped */
#define SOS_VR_MAP_FIXED (1 << 31)
/**
* Add a new VR in the given address space, that maps the given
* resource. Its semantics follows that of the UNIX mmap() call
* (including SOS_VR_MAP_FIXED). Real mapping in physical memory will
* be delayed as much as possible (demand paging) and the physical
* pages will be shared among processes as much as possible (COW).
*
* @param *uaddr must be page-aligned, and can be NULL. It stores the
* address of the mapping, when successful
*
* @param size The size of the mapping in user space
*
* @param access_rights The allowed accesses to the mapped resource
* (@see SOS_VM_MAP_PROT_* flags in hwcore/paging.h)
*
* @param flags mainly: is it shared mapping (SOS_VR_MAP_SHARED) or not ?
*
* @param resource MUST be NON NULL, and its mmap() method must also
* be NON NULL
*
* @param offset_in_resource where inside the resource does the
* mapping start
*
* @return SOS_OK on success (address of the mapping stored in uaddr)
*
* @note no need to call
* sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access()
*/
sos_ret_t
sos_umem_vmm_map(struct sos_umem_vmm_as * as,
sos_uaddr_t *uaddr, sos_size_t size,
sos_ui32_t access_rights,
sos_ui32_t flags,
struct sos_umem_vmm_mapped_resource * resource,
sos_luoffset_t offset_in_resource);
/**
* Unmap the given address interval. This might imply the partial or
* complete unmapping of 0, 1 or more VRs. Same semantics as unix
* munmap()
*
* @note automatically calls
* sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access()
*/
sos_ret_t
sos_umem_vmm_unmap(struct sos_umem_vmm_as * as,
sos_uaddr_t uaddr, sos_size_t size);
/**
* Flush the given pages to backing store. Call the sync_page method
* for each of the dirty pages. The MMU is expected to be configured
* for the given AS !
*
* @note MAKE SURE YOU CALL
* sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access()
*/
sos_ret_t
sos_umem_vmm_sync(struct sos_umem_vmm_as * as,
sos_uaddr_t uaddr, sos_size_t size,
sos_ui32_t flags);
/**
* Change the access rights of the given address interval. This might
* concern 0, 1 or more VRs, and result in the splitting in 1 or 2 VRs
* if they are partially concerned by the change in protection.. Same
* semantics as unix mprotect()
*
* @param new_access_rights @see SOS_VM_MAP_PROT_* flags in hwcore/paging.h
*
* @note MAKE SURE YOU CALL
* sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access()
*/
sos_ret_t
sos_umem_vmm_chprot(struct sos_umem_vmm_as * as,
sos_uaddr_t uaddr, sos_size_t size,
sos_ui32_t new_access_rights);
/**
* Flag for sos_umem_vmm_resize() to indicate that the VR being
* resized can be moved elsewhere if there is not enough room to
* resize it in-place
*/
#define SOS_VR_REMAP_MAYMOVE (1 << 30)
/**
* Lookup the region covering the old_uaddr/old_size interval, and
* resize it to match the *new_uaddr/new_size requirements. This is a
* variant of Unix's mremap() that allow to resize the VR by its
* low-addresses (mremap only allows to resize a VR by its
* top-address).
*
* @param old_uaddr Low address of the interval covered by the VR to resize
*
* @param old_size Size of the interval covered by the VR to resize
*
* @param new_uaddr MUST BE page-aligned ! Initially: the new start
* address of the VR, allowing to change the low-address. Once the
* function returns: the actual start address of the VR (which might
* be different, due to SOS_VR_REMAP_MAYMOVE flag, when set)
*
* @param new_size The size requested for the VR. Might be
* smaller/larger than the original VR size
*
* @param flags Essentially: 0 or SOS_VR_REMAP_MAYMOVE
*
* @note MAKE SURE YOU CALL
* sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access()
*/
sos_ret_t
sos_umem_vmm_resize(struct sos_umem_vmm_as * as,
sos_uaddr_t old_uaddr, sos_size_t old_size,
sos_uaddr_t /* in/out */*new_uaddr, sos_size_t new_size,
sos_ui32_t flags);
/*
* Heap management API (ie libc's malloc support)
*/
/**
* Change the top address of the heap.
*
* @param new_top_uaddr When NULL don't change anything. Otherwise:
* change the top address of the heap
*
* @return The top address of the heap after having been updated (if
* ever)
*/
sos_uaddr_t
sos_umem_vmm_brk(struct sos_umem_vmm_as * as,
sos_uaddr_t new_top_uaddr);
/*
* Reserved functions
*/
/**
* Called by the main page fault handler when a physical page is not
* mapped for the given address of the current address space. This
* function is called only if:
* - The access (read / write) is allowed on this VR
* - no physical page is mapped yet
* This function first calls the sos_paging_try_resolve_COW() to
* resolve the COW if a COW access pattern is detected, and, if
* unsuccessful, the sos_umem_vmm_vr_ops::page_in() method of the VR.
*
* @param uaddr The address that was accessed, causing the fault.
*
* @param write_access Was it write access ?
*
* @param user_access Was it a user access ? Or a kernel access (by
* uaccess.h functions) ?
*
* @return SOS_OK when the fault could be solved, ie a page could be
* mapped for the given address. -SOS_EFAULT otherwise, meaning the
* faulting thread should be terminated or signalled (SIGSEGV)
*
* @note: The current mm_context MUST be that of the current thread
* (which caused the exception) !
*/
sos_ret_t sos_umem_vmm_try_resolve_page_fault(sos_uaddr_t uaddr,
sos_bool_t write_access,
sos_bool_t user_access);
/**
* Initialize the initial heap once the program code/data is mapped
* Called by the ELF32 program loader.
*/
sos_ret_t
sos_umem_vmm_init_heap(struct sos_umem_vmm_as * as,
sos_uaddr_t heap_start);
#endif /* _SOS_UMEM_VMM_H_ */