/* Copyright (C) 2005,2006 David Decotigny This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef _SOS_UMEM_VMM_H_ #define _SOS_UMEM_VMM_H_ /** * @file umem_vmm.h * * Management of the address space of a process in SOS. The so-called * "address space" of a process consists in the description of the * virtual addresses that are valid in the user space of a process (in * SOS: addresses 1G-4G). The kernel-space of a process is managed by * the "kmem" subsystem, and is kept identical accross all the * processes in the system. * * The umem_vmm subsystem handles the following features: * - demand-mapping of resourcs (files: mmap): mapping in physical RAM * will be delayed as much as possible, until the process really * need to access the mapped addresses * - mprotect/mremap support * - private and shared mappings * - Copy-On-Write (COW) of the private mappings upon fork() to favour * shared physical memory as much as possible * - "heap" management (brk/sbrk) * * Swap is NOT supported (yet), which means that the following is NOT * supported: * - locked/reserved I/O pages (everything is locked in RAM) * - "safe" demand-mapping of anonymous pages, ie conservative VMM * allocation (alloc of anonymous pages on the swap) * Other unsupported features: * - dynamically-resizable regions (Linux's GROWUP/GROWDOWN vma): the * user stack is expected to have a suitable virtual size from the * beginning, or sos_umem_vmm_resize() must be used explicitely to * resize it * - no provision of "stack size" accounting, since there are * multiple stacks (ie user threads) in a process: which stack to * consider ??? * * The address space is divided into "virtual regions" (aka "VR") that * describe a single mapping, aka a segment of contiguous pages in * user-space virtual memory. Each such virtual region "maps" a * "resource" and is characterised by: * - its base address and length in user-space * - the allowed accesses, aka "protection" (read-only or read/write) * - the resource it maps in virtual memory * * A so-called resource is typically: * - a file * - a device * - an area initially full of zeros (the VR mapping this are called * "anonymous mappings") * * The implementation is very close to that of Linux and Kos. This is * a "simple" implementation, not the most elegant one, such as those * based on "shadow objects" hierarchies as found in BSD 4.4 and Mach, * or that of Solaris (based on the "anon" lists). Actually, this * implementation does not use "shadow-objects"/anon list when a COW * page of a shared mapping is made anonymous. This won't hurt the * implementation of the basic demand-mapping mechanism; on the * contrary, it will make things simpler. But this will largely impact * the implementation of the swap-in/swap-out strategies, as these * would require a non trivial intrication of low-level and higher * level algorithms. */ /** * Definition of an "address space" in Kos. This is an opaque * structure defined in umem_vmm.c. Its main role is to list virtual * regions. It mainly consists in: * - a reference to the process owning it * - maximum allowed protection (ie can it be mapped read-only or * read/write ?) * - the list of VRs mapping resources * - a mm_context that reflects the configuration of the MMU * - the location of the heap for this process * - statistics */ struct sos_umem_vmm_as; /** * Definition of a "virtual region". Linux would call them "vma" * (Virtual Memory Area), and Solaris: "segments". It mainly consists * in: * - the start/end addresses of the mapping * - a pointer to the resource that it maps * - the type of mapping (shared/private) * - the actual protection flags (@see SOS_VM_MAP_PROT_* flags in * hwcore/paging.h) * - a set of callbacks (@see sos_umem_vmm_vr_ops below) automatically * called by the umem_vmm subsystem each time the VR is modified */ struct sos_umem_vmm_vr; /** VR flag: region can be shared between a process and its children */ #define SOS_VR_MAP_SHARED (1 << 0) #include #include /** * The callbacks applicable on a virtual region. Automatically called * by the umem_vmm subsystem. * * Calling sequences: * - duplicate_as() (aka fork()): * vr->ops->ref() * add vr to lists * - delete_as() (aka exit()): * vr->ops->unmap() * remove vr from lists * vr->ops->unref() * - mmap(): * -> left + new + right VRs can fusion: * remove right_vr from list * right_vr->ops->unref() * -> left + new VRs can fusion: * nothing * -> new + right VRs can fusion: * nothing * -> isolated: * add new_vr to lists * new_vr->map() * new_vr->ops->ref() * - munmap(): * -> VR totally unmapped: * vr->ops->unmap() * remove vr from lists * vr->ops->unref() * -> VR unmapped in the middle (split into 2): * add (new) right VR into the lists * vr->unmap(middle_unmapped_area) * right_vr->ops->ref() * -> VR unmapped on its left: * vr->ops->unmap(left_unmapped_area) * -> VR unmapped on its right: * vr->ops->unmap(right_unmapped_area) * - chprot(): * -> VR totally chprot: * nothing * -> VR chprot in the middle (split into 3): * add (new) middle+right VRs into the lists * middle_vr->ops->ref() * right_vr->ops->ref() * -> VR chprot on its left (split into 2): * add (new) right VR into the lists * right_vr->ops->ref() * -> VR chprot on its right (split into 2): * add (new) right VR into the lists * right_vr->ops->ref() * - resize(): * -> if moving the VR: map/unmap * -> otherwise: nothing */ struct sos_umem_vmm_vr_ops { /** * Called after the virtual region has been inserted * inside its address space. * @note Optional */ void (*ref)(struct sos_umem_vmm_vr * vr); /** * Called when the virtual region is removed from its * address space * @note Optional */ void (*unref)(struct sos_umem_vmm_vr * vr); /** * Called when part or all a VR is unmapped * @note Optional */ void (*unmap)(struct sos_umem_vmm_vr * vr, sos_uaddr_t uaddr, sos_size_t size); /** * Called by the page fault handler to map data at the given virtual * address. In the Linux kernel, this callback is named "nopage". * * @note MANDATORY */ sos_ret_t (*page_in)(struct sos_umem_vmm_vr * vr, sos_uaddr_t uaddr, sos_bool_t write_access); /** * Called to synchronize the contents of the given page with its * backing store. This method is responsible for resetting the dirty * flag of the page (@see sos_paging_set_dirty). Appropriate locking * MUST be handled internally by this method. * * @note Optional */ sos_ret_t (*sync_page)(struct sos_umem_vmm_vr * vr, sos_uaddr_t page_uaddr, sos_ui32_t flags); /** Flags for msync */ #define SOS_MSYNC_SYNC (1 << 0) #define SOS_MSYNC_ASYNC (0 << 0) }; /** * The definition of a mapped resource. Typically, a mapped resource * is a file or a device: in both cases, only part of the resource is * mapped by each VR, this part is given by the offset_in_resource * field of the VR, and the size field of the VR. */ struct sos_umem_vmm_mapped_resource { /** Represent the maximum authrized SOS_VR_PROT_* for the VRs mapping it */ sos_ui32_t allowed_access_rights; /** Some flags associated with the resource. Currently only SOS_MAPPED_RESOURCE_ANONYMOUS is supported */ sos_ui32_t flags; /** List of VRs mapping this resource */ struct sos_umem_vmm_vr * list_vr; /** * MANDATORY Callback function called when a new VR is created, * which maps the resource. This callback is allowed to change the * following fields of the VR: * - sos_umem_vmm_set_ops_of_vr() */ sos_ret_t (*mmap)(struct sos_umem_vmm_vr *); /** * Custom data that the user is free to define: the umem_vmm * subsystem won't ever look at it or change it. */ void *custom_data; }; /** Inidicate that this resource is not backed by any physical storage. This means that the "offset_in_resource" field of the VRs will be computed by sos_umem_vmm_map() */ #define SOS_MAPPED_RESOURCE_ANONYMOUS (1 << 0) /** * Physical address of THE page (full of 0s) used for anonymous * mappings. Anybody can map it provided it is ALWAYS in read-only * mode */ extern sos_paddr_t sos_zero_physpage; /** * "ZERO" page address mapped in kernel space */ extern sos_vaddr_t sos_zero_kernelpage; /** * Setup the umem_vmm subsystem. */ sos_ret_t sos_umem_vmm_subsystem_setup(void); /** * Get the current effective address space. This may be the "normal" * address space of the current thread, but not necessarilly: this * might be the address space of just another process ! This may * happen if a kernel thread of process P wants to access the address * space of another process P2. This might also be NULL (no access to * user space needed). */ struct sos_umem_vmm_as * sos_umem_vmm_get_current_as(void); /** * Change the current effective address space, eventually * reconfiguring the MMU. This will increase the owning process's * reference count of the given AS, and decrease that of the previous * AS (when different). * * @param as may be NULL (no need to access user space) */ sos_ret_t sos_umem_vmm_set_current_as(struct sos_umem_vmm_as * as); /** * Create a new, empty, address space * * @param owner The process that will own the new address space * * @note no need to call * sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access() */ struct sos_umem_vmm_as * sos_umem_vmm_create_empty_as(struct sos_process *owner); /** * Create a new address space, copy of the model_as address * space. All the translations belonging to private mappings are * marked 'read-only' to activate the "copy-on-write" semantics. * * @param model_as The address space to copy * @param for_owner The process that will hold the new address space * * @note automatically calls * sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access() */ struct sos_umem_vmm_as * sos_umem_vmm_duplicate_as(struct sos_umem_vmm_as * model_as, struct sos_process *for_owner); /** * Called at process deletion time, to remove all mappings present in * the address space. This function not only delete all the VR data * structures, it also calls the unmap()/unref() callbacks of these * VRs. However, the physical pages mapped inside the address space * won't be unmapped at this stage: they will be unmapped all in one * go when the undelying mm_context will become unused. * * @note no need to call * sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access() */ sos_ret_t sos_umem_vmm_delete_as(struct sos_umem_vmm_as * as); /* * Accessor functions for the address space */ /** Retrieve the pointer (NOT a new reference !) to the process owning the given address space. */ struct sos_process * sos_umem_vmm_get_process(struct sos_umem_vmm_as * as); /** Retrieve the pointer (NOT a new reference !) to the MMU configuration for the given address space */ struct sos_mm_context * sos_umem_vmm_get_mm_context(struct sos_umem_vmm_as * as); /** Retrieve a pointer to the VR that covers the given virtual address in the given address space */ struct sos_umem_vmm_vr * sos_umem_vmm_get_vr_at_address(struct sos_umem_vmm_as * as, sos_uaddr_t uaddr); /* * Accessor functions for the virtual regions */ /** Retrieve the address space owning the given VR */ struct sos_umem_vmm_as * sos_umem_vmm_get_as_of_vr(struct sos_umem_vmm_vr * vr); /** Retrieve the set of callbacks of the given VR */ struct sos_umem_vmm_vr_ops * sos_umem_vmm_get_ops_of_vr(struct sos_umem_vmm_vr * vr); /** Retrieve the current protection of the given VR */ sos_ui32_t sos_umem_vmm_get_prot_of_vr(struct sos_umem_vmm_vr * vr); /** Retrieve the flags of the given VR. One will especially be interested in the SOS_VR_MAP_SHARED bit */ sos_ui32_t sos_umem_vmm_get_flags_of_vr(struct sos_umem_vmm_vr * vr); /** Retrieve the resource mapped by the VR */ struct sos_umem_vmm_mapped_resource * sos_umem_vmm_get_mapped_resource_of_vr(struct sos_umem_vmm_vr * vr); /** Retrieve the start user address for the given mapping */ sos_uaddr_t sos_umem_vmm_get_start_of_vr(struct sos_umem_vmm_vr * vr); /** Retrieve the size (in user space) of the given mapping */ sos_size_t sos_umem_vmm_get_size_of_vr(struct sos_umem_vmm_vr * vr); /** Retrieve the offset in the resource of the mapping */ sos_luoffset_t sos_umem_vmm_get_offset_in_resource(struct sos_umem_vmm_vr * vr); /* * Restricted accessor functions. May only be called from inside the * map() callback of a VR */ /** * Function that is not called directly by the umem_subsystem: It MUST * always be called by the mmap() callback of the resource being * mapped (@see sos_umem_vmm_mapped_resource::mmap()). The mmap() * method is called at VR creation time, automatically by * sos_umem_vmm_map(). * * @note The VR MUST NOT already have a set of operations (fatal error) */ sos_ret_t sos_umem_vmm_set_ops_of_vr(struct sos_umem_vmm_vr * vr, struct sos_umem_vmm_vr_ops * ops); /* * mmap API */ /** sos_umem_vmm_map() flag: the address given as parameter to sos_umem_vmm_map() is not only a hint, it is where the VR is expected to be mapped */ #define SOS_VR_MAP_FIXED (1 << 31) /** * Add a new VR in the given address space, that maps the given * resource. Its semantics follows that of the UNIX mmap() call * (including SOS_VR_MAP_FIXED). Real mapping in physical memory will * be delayed as much as possible (demand paging) and the physical * pages will be shared among processes as much as possible (COW). * * @param *uaddr must be page-aligned, and can be NULL. It stores the * address of the mapping, when successful * * @param size The size of the mapping in user space * * @param access_rights The allowed accesses to the mapped resource * (@see SOS_VM_MAP_PROT_* flags in hwcore/paging.h) * * @param flags mainly: is it shared mapping (SOS_VR_MAP_SHARED) or not ? * * @param resource MUST be NON NULL, and its mmap() method must also * be NON NULL * * @param offset_in_resource where inside the resource does the * mapping start * * @return SOS_OK on success (address of the mapping stored in uaddr) * * @note no need to call * sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access() */ sos_ret_t sos_umem_vmm_map(struct sos_umem_vmm_as * as, sos_uaddr_t *uaddr, sos_size_t size, sos_ui32_t access_rights, sos_ui32_t flags, struct sos_umem_vmm_mapped_resource * resource, sos_luoffset_t offset_in_resource); /** * Unmap the given address interval. This might imply the partial or * complete unmapping of 0, 1 or more VRs. Same semantics as unix * munmap() * * @note automatically calls * sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access() */ sos_ret_t sos_umem_vmm_unmap(struct sos_umem_vmm_as * as, sos_uaddr_t uaddr, sos_size_t size); /** * Flush the given pages to backing store. Call the sync_page method * for each of the dirty pages. The MMU is expected to be configured * for the given AS ! * * @note MAKE SURE YOU CALL * sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access() */ sos_ret_t sos_umem_vmm_sync(struct sos_umem_vmm_as * as, sos_uaddr_t uaddr, sos_size_t size, sos_ui32_t flags); /** * Change the access rights of the given address interval. This might * concern 0, 1 or more VRs, and result in the splitting in 1 or 2 VRs * if they are partially concerned by the change in protection.. Same * semantics as unix mprotect() * * @param new_access_rights @see SOS_VM_MAP_PROT_* flags in hwcore/paging.h * * @note MAKE SURE YOU CALL * sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access() */ sos_ret_t sos_umem_vmm_chprot(struct sos_umem_vmm_as * as, sos_uaddr_t uaddr, sos_size_t size, sos_ui32_t new_access_rights); /** * Flag for sos_umem_vmm_resize() to indicate that the VR being * resized can be moved elsewhere if there is not enough room to * resize it in-place */ #define SOS_VR_REMAP_MAYMOVE (1 << 30) /** * Lookup the region covering the old_uaddr/old_size interval, and * resize it to match the *new_uaddr/new_size requirements. This is a * variant of Unix's mremap() that allow to resize the VR by its * low-addresses (mremap only allows to resize a VR by its * top-address). * * @param old_uaddr Low address of the interval covered by the VR to resize * * @param old_size Size of the interval covered by the VR to resize * * @param new_uaddr MUST BE page-aligned ! Initially: the new start * address of the VR, allowing to change the low-address. Once the * function returns: the actual start address of the VR (which might * be different, due to SOS_VR_REMAP_MAYMOVE flag, when set) * * @param new_size The size requested for the VR. Might be * smaller/larger than the original VR size * * @param flags Essentially: 0 or SOS_VR_REMAP_MAYMOVE * * @note MAKE SURE YOU CALL * sos_thread_prepare_user_space_access()/sos_thread_end_user_space_access() */ sos_ret_t sos_umem_vmm_resize(struct sos_umem_vmm_as * as, sos_uaddr_t old_uaddr, sos_size_t old_size, sos_uaddr_t /* in/out */*new_uaddr, sos_size_t new_size, sos_ui32_t flags); /* * Heap management API (ie libc's malloc support) */ /** * Change the top address of the heap. * * @param new_top_uaddr When NULL don't change anything. Otherwise: * change the top address of the heap * * @return The top address of the heap after having been updated (if * ever) */ sos_uaddr_t sos_umem_vmm_brk(struct sos_umem_vmm_as * as, sos_uaddr_t new_top_uaddr); /* * Reserved functions */ /** * Called by the main page fault handler when a physical page is not * mapped for the given address of the current address space. This * function is called only if: * - The access (read / write) is allowed on this VR * - no physical page is mapped yet * This function first calls the sos_paging_try_resolve_COW() to * resolve the COW if a COW access pattern is detected, and, if * unsuccessful, the sos_umem_vmm_vr_ops::page_in() method of the VR. * * @param uaddr The address that was accessed, causing the fault. * * @param write_access Was it write access ? * * @param user_access Was it a user access ? Or a kernel access (by * uaccess.h functions) ? * * @return SOS_OK when the fault could be solved, ie a page could be * mapped for the given address. -SOS_EFAULT otherwise, meaning the * faulting thread should be terminated or signalled (SIGSEGV) * * @note: The current mm_context MUST be that of the current thread * (which caused the exception) ! */ sos_ret_t sos_umem_vmm_try_resolve_page_fault(sos_uaddr_t uaddr, sos_bool_t write_access, sos_bool_t user_access); /** * Initialize the initial heap once the program code/data is mapped * Called by the ELF32 program loader. */ sos_ret_t sos_umem_vmm_init_heap(struct sos_umem_vmm_as * as, sos_uaddr_t heap_start); #endif /* _SOS_UMEM_VMM_H_ */