/* Copyright (C) 2004 David Decotigny This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include "mm_context.h" #include "paging.h" /* * Important NOTICE concerning the use of the reference & occupation * counters of the physical pages by the "paging" subsystem: * - All the kernel PT are SHARED. This means that as soon as one * kernel PT belongs to one mm_context, it belongs to ALL the * mm_contexts. We don't update the real reference count of the PT * in this respect, because it would require to update the * reference counts of ALL the kernel PTs as soon as a new * mm_context is created, or as soon as a mm_context is * suppressed. This way, the reference count is constant * independently of the actual number of PD really sharing them. * - We do NOT maintain the occupation count of the PDs. This would add * some little overhead that is useless * - We do maintain the occupation count of ALL the PTs: it represents the * number of PTE allocated in the PT */ /** The structure of a page directory entry. See Intel vol 3 section 3.6.4 */ struct x86_pde { sos_ui32_t present :1; /* 1=PT mapped */ sos_ui32_t write :1; /* 0=read-only, 1=read/write */ sos_ui32_t user :1; /* 0=supervisor, 1=user */ sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */ sos_ui32_t cache_disabled :1; /* 1=cache disabled */ sos_ui32_t accessed :1; /* 1=read/write access since last clear */ sos_ui32_t zero :1; /* Intel reserved */ sos_ui32_t page_size :1; /* 0=4kB, 1=4MB or 2MB (depending on PAE) */ sos_ui32_t global_page :1; /* Ignored (Intel reserved) */ sos_ui32_t custom :3; /* Do what you want with them */ sos_ui32_t pt_paddr :20; } __attribute__ ((packed)); /** Intermediate type to speed up PDE copy */ typedef union { struct x86_pde pde; sos_ui32_t ui32; } x86_pde_val_t; /** The structure of a page table entry. See Intel vol 3 section 3.6.4 */ struct x86_pte { sos_ui32_t present :1; /* 1=PT mapped */ sos_ui32_t write :1; /* 0=read-only, 1=read/write */ sos_ui32_t user :1; /* 0=supervisor, 1=user */ sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */ sos_ui32_t cache_disabled :1; /* 1=cache disabled */ sos_ui32_t accessed :1; /* 1=read/write access since last clear */ sos_ui32_t dirty :1; /* 1=write access since last clear */ sos_ui32_t zero :1; /* Intel reserved */ sos_ui32_t global_page :1; /* 1=No TLB invalidation upon cr3 switch (when PG set in cr4) */ sos_ui32_t custom :3; /* Do what you want with them */ sos_ui32_t paddr :20; } __attribute__ ((packed)); /** Intermediate type to speed up PTE copy */ typedef union { struct x86_pte pte; sos_ui32_t ui32; } x86_pte_val_t; /** Structure of the x86 CR3 register: the Page Directory Base Register. See Intel x86 doc Vol 3 section 2.5 */ struct x86_pdbr { sos_ui32_t zero1 :3; /* Intel reserved */ sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */ sos_ui32_t cache_disabled :1; /* 1=cache disabled */ sos_ui32_t zero2 :7; /* Intel reserved */ sos_ui32_t pd_paddr :20; } __attribute__ ((packed)); /** * Helper macro to control the MMU: invalidate the TLB entry for the * page located at the given virtual address. See Intel x86 vol 3 * section 3.7. */ #define invlpg(vaddr) \ do { \ __asm__ __volatile__("invlpg %0"::"m"(*((unsigned *)(vaddr)))); \ } while(0) /** * Helper macro to control the MMU: invalidate the whole TLB. See * Intel x86 vol 3 section 3.7. */ #define flush_tlb() \ do { \ unsigned long tmpreg; \ asm volatile("movl %%cr3,%0\n\tmovl %0,%%cr3" :"=r" \ (tmpreg) : :"memory"); \ } while (0) /** * Helper macro to compute the index in the PD for the given virtual * address */ #define virt_to_pd_index(vaddr) \ (((unsigned)(vaddr)) >> 22) /** * Helper macro to compute the index in the PT for the given virtual * address */ #define virt_to_pt_index(vaddr) \ ( (((unsigned)(vaddr)) >> 12) & 0x3ff ) /** * Helper macro to compute the offset in the page for the given virtual * address */ #define virt_to_page_offset(vaddr) \ (((unsigned)(vaddr)) & SOS_PAGE_MASK) /** * Helper function to map a page in the pd.\ Suppose that the RAM * is identity mapped to resolve PT actual (CPU) address from the PD * entry */ static sos_ret_t paging_setup_map_helper(struct x86_pde * pd, sos_paddr_t ppage, sos_vaddr_t vaddr) { /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(vaddr); unsigned index_in_pt = virt_to_pt_index(vaddr); /* Make sure the page table was mapped */ struct x86_pte * pt; if (pd[index_in_pd].present) { pt = (struct x86_pte*) (pd[index_in_pd].pt_paddr << 12); /* This test will always be TRUE here, since the setup routine scans the kernel pages in a strictly increasing order: at each step, the map will result in the allocation of a new PT entry. For the sake of clarity, we keep the test here. */ if (pt[index_in_pt].present) SOS_ASSERT_FATAL(FALSE); /* indicate a fatal error */ } else { /* No : allocate a new one */ pt = (struct x86_pte*) sos_physmem_ref_physpage_new(FALSE); if (! pt) return -SOS_ENOMEM; memset((void*)pt, 0x0, SOS_PAGE_SIZE); pd[index_in_pd].present = TRUE; pd[index_in_pd].write = 1; /* It would be too complicated to determine whether it corresponds to a real R/W area of the kernel code/data or read-only */ pd[index_in_pd].pt_paddr = ((sos_paddr_t)pt) >> 12; } /* Map the page in the page table */ pt[index_in_pt].present = 1; pt[index_in_pt].write = 1; /* It would be too complicated to determine whether it corresponds to a real R/W area of the kernel code/data or R/O only */ pt[index_in_pt].user = 0; pt[index_in_pt].paddr = ppage >> 12; /* Increase the PT's occupation count because we allocated a new PTE inside it */ sos_physmem_inc_physpage_occupation((sos_paddr_t)pt); return SOS_OK; } sos_ret_t sos_paging_subsystem_setup(sos_paddr_t identity_mapping_base, sos_paddr_t identity_mapping_top) { /* The PDBR we will setup below */ struct x86_pdbr cr3; /* Get the PD for the kernel */ struct x86_pde * pd = (struct x86_pde*) sos_physmem_ref_physpage_new(FALSE); /* The iterator for scanning the kernel area */ sos_paddr_t paddr; /* Reset the PD. For the moment, there is still an IM for the whole RAM, so that the paddr are also vaddr */ memset((void*)pd, 0x0, SOS_PAGE_SIZE); /* Identity-map the identity_mapping_* area */ for (paddr = identity_mapping_base ; paddr < identity_mapping_top ; paddr += SOS_PAGE_SIZE) { if (paging_setup_map_helper(pd, paddr, paddr)) return -SOS_ENOMEM; } /* Identity-map the PC-specific BIOS/Video area */ for (paddr = BIOS_N_VIDEO_START ; paddr < BIOS_N_VIDEO_END ; paddr += SOS_PAGE_SIZE) { if (paging_setup_map_helper(pd, paddr, paddr)) return -SOS_ENOMEM; } /* Ok, kernel is now identity mapped in the PD. We still have to set up the mirroring */ pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].present = TRUE; pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].write = 1; pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].user = 0; pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].pt_paddr = ((sos_paddr_t)pd)>>12; /* We now just have to configure the MMU to use our PD. See Intel x86 doc vol 3, section 3.6.3 */ memset(& cr3, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */ cr3.pd_paddr = ((sos_paddr_t)pd) >> 12; /* Actual loading of the PDBR in the MMU: setup cr3 + bits 31[Paging Enabled] and 16[Write Protect] of cr0, see Intel x86 doc vol 3, sections 2.5, 3.6.1 and 4.11.3 + note table 4-2 */ asm volatile ("movl %0,%%cr3\n\t" "movl %%cr0,%%eax\n\t" "orl $0x80010000, %%eax\n\t" /* bit 31 | bit 16 */ "movl %%eax,%%cr0\n\t" "jmp 1f\n\t" "1:\n\t" "movl $2f, %%eax\n\t" "jmp *%%eax\n\t" "2:\n\t" ::"r"(cr3):"memory","eax"); /* * Here, the only memory available is: * - The BIOS+video area * - the identity_mapping_base .. identity_mapping_top area * - the PD mirroring area (4M) * All accesses to other virtual addresses will generate a #PF */ return SOS_OK; } /* Suppose that the current address is configured with the mirroring * enabled to access the PD and PT. */ sos_ret_t sos_paging_map(sos_paddr_t ppage_paddr, sos_vaddr_t vpage_vaddr, sos_bool_t is_user_page, sos_ui32_t flags) { /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(vpage_vaddr); unsigned index_in_pt = virt_to_pt_index(vpage_vaddr); /* Get the PD of the current context */ struct x86_pde *pd = (struct x86_pde*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)); /* Address of the PT in the mirroring */ struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*index_in_pd); SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(ppage_paddr)); SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr)); /* EXEC permission ignored on x86 */ flags &= ~SOS_VM_MAP_PROT_EXEC; /* The mapping of anywhere in the PD mirroring is FORBIDDEN ;) */ if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR) && (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE)) return -SOS_EINVAL; /* Map a page for the PT if necessary */ if (! pd[index_in_pd].present) { x86_pde_val_t u; /* No : allocate a new one */ sos_paddr_t pt_ppage = sos_physmem_ref_physpage_new(! (flags & SOS_VM_MAP_ATOMIC)); if (! pt_ppage) { return -SOS_ENOMEM; } /* Prepare the value of the PDE */ u.pde = (struct x86_pde){ .present = TRUE, .write = 1, .pt_paddr = ((sos_paddr_t)pt_ppage) >> 12 }; /* Is it a PDE concerning the kernel space */ if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR) { /* Yes: So we need to update the PDE of ALL the mm_contexts in the system */ /* First of all: this is a kernel PT */ u.pde.user = 0; /* Now synchronize all the PD */ SOS_ASSERT_FATAL(SOS_OK == sos_mm_context_synch_kernel_PDE(index_in_pd, u.ui32)); } else /* We should have written "else if (vpage_vaddr >= SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed because the beginning of the function detects and rejects mapping requests inside the mirroring */ { /* No: The request concerns the user space. So only the current MMU context is concerned */ /* First of all: this is a user PT */ u.pde.user = 1; /* Now update the current PD */ pd[index_in_pd] = u.pde; } /* * The PT is now mapped in the PD mirroring */ /* Invalidate TLB for the page we just added */ invlpg(pt); /* Reset this new PT */ memset((void*)pt, 0x0, SOS_PAGE_SIZE); } /* If we allocate a new entry in the PT, increase its occupation count. */ if (! pt[index_in_pt].present) sos_physmem_inc_physpage_occupation(pd[index_in_pd].pt_paddr << 12); /* Otherwise, that means that a physical page is implicitely unmapped */ else sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12); /* Map the page in the page table */ pt[index_in_pt].present = TRUE; pt[index_in_pt].write = (flags & SOS_VM_MAP_PROT_WRITE)?1:0; pt[index_in_pt].user = (is_user_page)?1:0; pt[index_in_pt].paddr = ppage_paddr >> 12; sos_physmem_ref_physpage_at(ppage_paddr); /* * The page is now mapped in the current address space */ /* Invalidate TLB for the page we just added */ invlpg(vpage_vaddr); return SOS_OK; } sos_ret_t sos_paging_unmap(sos_vaddr_t vpage_vaddr) { sos_ret_t pt_dec_occupation_retval; /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(vpage_vaddr); unsigned index_in_pt = virt_to_pt_index(vpage_vaddr); /* Get the PD of the current context */ struct x86_pde *pd = (struct x86_pde*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)); /* Address of the PT in the mirroring */ struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*index_in_pd); SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr)); /* No page mapped at this address ? */ if (! pd[index_in_pd].present) return -SOS_EINVAL; if (! pt[index_in_pt].present) return -SOS_EINVAL; /* The unmapping of anywhere in the PD mirroring is FORBIDDEN ;) */ if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR) && (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE)) return -SOS_EINVAL; /* Reclaim the physical page */ sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12); /* Unmap the page in the page table */ memset(pt + index_in_pt, 0x0, sizeof(struct x86_pte)); /* Invalidate TLB for the page we just unmapped */ invlpg(vpage_vaddr); /* Reclaim this entry in the PT, which may free the PT */ pt_dec_occupation_retval = sos_physmem_dec_physpage_occupation(pd[index_in_pd].pt_paddr << 12); SOS_ASSERT_FATAL(pt_dec_occupation_retval >= 0); if (pt_dec_occupation_retval > 0) /* If the PT is now completely unused... */ { x86_pde_val_t u; /* * The PT is not referenced by this PD anymore */ sos_physmem_unref_physpage(pd[index_in_pd].pt_paddr << 12); /* * Reset the PDE */ /* Mark the PDE as unavailable */ u.ui32 = 0; /* Is it a PDE concerning the kernel space */ if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR) { /* Now synchronize all the PD */ SOS_ASSERT_FATAL(SOS_OK == sos_mm_context_synch_kernel_PDE(index_in_pd, u.ui32)); } else /* We should have written "else if (vpage_vaddr >= SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed because the beginning of the function detects and rejects mapping requests inside the mirroring */ { /* No: The request concerns the user space. So only the current MMU context is concerned */ pd[index_in_pd] = u.pde; } /* Update the TLB */ invlpg(pt); } return SOS_OK; } sos_ret_t sos_paging_unmap_interval(sos_vaddr_t vaddr, sos_size_t size) { sos_ret_t retval = 0; if (! SOS_IS_PAGE_ALIGNED(vaddr)) return -SOS_EINVAL; if (! SOS_IS_PAGE_ALIGNED(size)) return -SOS_EINVAL; for ( ; size >= SOS_PAGE_SIZE ; vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE) if (SOS_OK == sos_paging_unmap(vaddr)) retval += SOS_PAGE_SIZE; return retval; } sos_ui32_t sos_paging_get_prot(sos_vaddr_t vaddr) { sos_ui32_t retval; /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(vaddr); unsigned index_in_pt = virt_to_pt_index(vaddr); /* Get the PD of the current context */ struct x86_pde *pd = (struct x86_pde*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)); /* Address of the PT in the mirroring */ struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*index_in_pd); /* No page mapped at this address ? */ if (! pd[index_in_pd].present) return SOS_VM_MAP_PROT_NONE; if (! pt[index_in_pt].present) return SOS_VM_MAP_PROT_NONE; /* Default access right of an available page is "read" on x86 */ retval = SOS_VM_MAP_PROT_READ; if (pd[index_in_pd].write && pt[index_in_pt].write) retval |= SOS_VM_MAP_PROT_WRITE; return retval; } sos_ret_t sos_paging_set_prot(sos_vaddr_t vaddr, sos_ui32_t new_prot) { /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(vaddr); unsigned index_in_pt = virt_to_pt_index(vaddr); /* Get the PD of the current context */ struct x86_pde *pd = (struct x86_pde*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)); /* Address of the PT in the mirroring */ struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*index_in_pd); /* EXEC permission ignored on x86 */ new_prot &= ~SOS_VM_MAP_PROT_EXEC; /* Check flags */ if (new_prot & ~(SOS_VM_MAP_PROT_READ | SOS_VM_MAP_PROT_WRITE)) return -SOS_EINVAL; if (! (new_prot & SOS_VM_MAP_PROT_READ)) /* x86 READ flag always set by default */ return -SOS_ENOSUP; /* No page mapped at this address ? */ if (! pd[index_in_pd].present) return -SOS_EINVAL; if (! pt[index_in_pt].present) return -SOS_EINVAL; /* Update access rights */ pt[index_in_pt].write = ((new_prot & SOS_VM_MAP_PROT_WRITE) != 0); invlpg(vaddr); return SOS_OK; } sos_ret_t sos_paging_set_prot_of_interval(sos_vaddr_t vaddr, sos_size_t size, sos_ui32_t new_prot) { if (! SOS_IS_PAGE_ALIGNED(vaddr)) return -SOS_EINVAL; if (! SOS_IS_PAGE_ALIGNED(size)) return -SOS_EINVAL; for ( ; size >= SOS_PAGE_SIZE ; vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE) sos_paging_set_prot(vaddr, new_prot); return SOS_OK; } sos_bool_t sos_paging_is_dirty(sos_vaddr_t vaddr) { /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(vaddr); unsigned index_in_pt = virt_to_pt_index(vaddr); /* Get the PD of the current context */ struct x86_pde *pd = (struct x86_pde*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)); /* Address of the PT in the mirroring */ struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*index_in_pd); /* No page mapped at this address ? */ if (! pd[index_in_pd].present) return FALSE; if (! pt[index_in_pt].present) return FALSE; return (pt[index_in_pt].dirty != 0); } sos_ret_t sos_paging_set_dirty(sos_vaddr_t vaddr, sos_bool_t is_dirty) { /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(vaddr); unsigned index_in_pt = virt_to_pt_index(vaddr); /* Get the PD of the current context */ struct x86_pde *pd = (struct x86_pde*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)); /* Address of the PT in the mirroring */ struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*index_in_pd); /* No page mapped at this address ? */ if (! pd[index_in_pd].present) return -SOS_EFAULT; if (! pt[index_in_pt].present) return -SOS_EFAULT; pt[index_in_pt].dirty = is_dirty; return SOS_OK; } sos_paddr_t sos_paging_get_paddr(sos_vaddr_t vaddr) { /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(vaddr); unsigned index_in_pt = virt_to_pt_index(vaddr); unsigned offset_in_page = virt_to_page_offset(vaddr); /* Get the PD of the current context */ struct x86_pde *pd = (struct x86_pde*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)); /* Address of the PT in the mirroring */ struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*index_in_pd); /* No page mapped at this address ? */ if (! pd[index_in_pd].present) return (sos_paddr_t)NULL; if (! pt[index_in_pt].present) return (sos_paddr_t)NULL; return (pt[index_in_pt].paddr << 12) + offset_in_page; } /* ************************************************* * Functions restricted to mm_context module */ sos_paddr_t sos_paging_get_current_PD_paddr(void) { struct x86_pdbr pdbr; asm volatile("movl %%cr3, %0\n": "=r"(pdbr)); return (pdbr.pd_paddr << 12); } sos_ret_t sos_paging_set_current_PD_paddr(sos_paddr_t paddr_PD) { struct x86_pdbr pdbr; SOS_ASSERT_FATAL(paddr_PD != 0); SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(paddr_PD)); /* Setup the value of the PDBR */ memset(& pdbr, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */ pdbr.pd_paddr = (paddr_PD >> 12); /* Configure the MMU according to the PDBR */ asm volatile ("movl %0,%%cr3\n" ::"r"(pdbr)); return SOS_OK; } sos_ret_t sos_paging_dispose(sos_vaddr_t vaddr_PD) { x86_pde_val_t *pd = (x86_pde_val_t*) vaddr_PD; x86_pte_val_t *pt; int index_in_pd; /* Allocate 1 page in kernel space to map the PTs in order to unreference the physical pages they reference */ pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0); if (! pt) return -SOS_ENOMEM; /* (Nothing to do in kernel space) */ /* Reset all the PTs in user space */ for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ; index_in_pd < 1024 ; /* 1 PDE = 1 PT = 1024 Pages = 4MB */ index_in_pd ++) { sos_paddr_t paddr_pt = (pd[index_in_pd].pde.pt_paddr << 12); int index_in_pt; /* Nothing to do if there is no PT */ if (! pd[index_in_pd].pde.present) { pd[index_in_pd].ui32 = 0; continue; } /* Map this PT inside kernel */ SOS_ASSERT_FATAL(SOS_OK == sos_paging_map(paddr_pt, (sos_vaddr_t)pt, FALSE, SOS_VM_MAP_PROT_READ | SOS_VM_MAP_PROT_WRITE)); /* Reset all the mappings in this PT */ for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++) { /* Ignore unmapped PTE */ if (! pt[index_in_pt].pte.present) { pt[index_in_pt].ui32 = 0; continue; } /* Unreference the associated page */ sos_physmem_unref_physpage(pt[index_in_pt].pte.paddr << 12); /* Decrease occupation count of the PT */ sos_physmem_dec_physpage_occupation(paddr_pt); /* Reset PTE */ pt[index_in_pt].ui32 = 0; } /* Unmap PT */ SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)pt)); /* Reset PDE */ pd[index_in_pd].ui32 = 0; /* Unreference PT */ sos_physmem_unref_physpage(paddr_pt); } /* Unallocate kernel space used for the temporary PT */ SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)pt)); return SOS_OK; } sos_ret_t sos_paging_copy_kernel_space(sos_vaddr_t dest_vaddr_PD, sos_vaddr_t src_vaddr_PD) { x86_pde_val_t *src_pd = (x86_pde_val_t*) src_vaddr_PD; x86_pde_val_t *dest_pd = (x86_pde_val_t*) dest_vaddr_PD; sos_paddr_t dest_paddr_PD = sos_paging_get_paddr(dest_vaddr_PD); x86_pde_val_t mirror_pde; int index_in_pd; /* Fill destination PD with zeros */ memset((void*)dest_vaddr_PD, 0x0, SOS_PAGE_SIZE); /* Synchronize it with the master Kernel MMU context. Stop just before the mirroring ! */ for (index_in_pd = 0 ; index_in_pd < (SOS_PAGING_MIRROR_VADDR >> 22) ; /* 1 PDE = 1 PT = 1024 Pages = 4MB */ index_in_pd ++) { /* Copy the master's configuration */ dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32; /* We DON'T mark the underlying PT and pages as referenced because all the PD are equivalent in the kernel space: as soon as a page is mapped in the kernel, it is mapped by X address spaces, and as soon as it is unmapped by 1 address space, it is unmapped in all the others. So that for X address spaces, the reference counter will be either 0 or X, and not something else: using the reference counter correctly won't be of any use and would consume some time in updating it. */ } /* Setup the mirroring for the new address space */ mirror_pde.ui32 = 0; mirror_pde.pde.present = TRUE; mirror_pde.pde.write = 1; mirror_pde.pde.user = 0; /* This is a KERNEL PDE */ mirror_pde.pde.pt_paddr = (dest_paddr_PD >> 12); dest_pd[SOS_PAGING_MIRROR_VADDR >> 22].ui32 = mirror_pde.ui32; return SOS_OK; } sos_ret_t sos_paging_copy_user_space(sos_vaddr_t dest_vaddr_PD, sos_vaddr_t src_vaddr_PD) { x86_pde_val_t *src_pd = (x86_pde_val_t*) src_vaddr_PD; x86_pde_val_t *dest_pd = (x86_pde_val_t*) dest_vaddr_PD; x86_pte_val_t *tmp_src_pt, *tmp_dest_pt; int index_in_pd; /* Allocate 2 pages in kernel space to map the PT in order to perform the copy of the PTs from source to destination */ tmp_src_pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0); if (! tmp_src_pt) return -SOS_ENOMEM; tmp_dest_pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0); if (! tmp_dest_pt) { sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt); return -SOS_ENOMEM; } /* Copy each used PT from source to destination */ for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ; index_in_pd < 1024 ; /* 1 PDE = 1 PT = 1024 Pages = 4MB */ index_in_pd ++) { sos_paddr_t paddr_dest_pt; int index_in_pt; /* We first litterally copy the source PDE in the destination PDE. However, please bare in mind that, in the end, both won't reference the same physical PT: the destination PDE will be updated (below) to match the address of its own new PT */ dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32; /* Ignore unused PTs */ if (! src_pd[index_in_pd].pde.present) continue; /* Allocate the destination PT */ paddr_dest_pt = sos_physmem_ref_physpage_new(TRUE); if (NULL == (void*)paddr_dest_pt) { sos_paging_dispose((sos_vaddr_t)dest_vaddr_PD); /* Unallocate temporary kernel space used for the copy */ sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt); sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt); return -SOS_ENOMEM; } /* Map source and destination PT */ SOS_ASSERT_FATAL(SOS_OK == sos_paging_map(src_pd[index_in_pd].pde.pt_paddr << 12, (sos_vaddr_t)tmp_src_pt, FALSE, SOS_VM_MAP_PROT_READ)); SOS_ASSERT_FATAL(SOS_OK == sos_paging_map(paddr_dest_pt, (sos_vaddr_t)tmp_dest_pt, FALSE, SOS_VM_MAP_PROT_READ | SOS_VM_MAP_PROT_WRITE)); /* Copy the contents of the source to the destination PT, updating the reference counts of the pages */ for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++) { /* Copy the source PTE */ tmp_dest_pt[index_in_pt].ui32 = tmp_src_pt[index_in_pt].ui32; /* Ignore non-present pages */ if (! tmp_dest_pt[index_in_pt].pte.present) continue; /* Reset the dirty/accessed flags */ tmp_dest_pt[index_in_pt].pte.accessed = 0; tmp_dest_pt[index_in_pt].pte.dirty = 0; /* Increase the reference count of the destination page */ sos_physmem_ref_physpage_at(tmp_src_pt[index_in_pt].pte.paddr << 12); /* Increase occupation count of the PT */ sos_physmem_inc_physpage_occupation(paddr_dest_pt); } /* Unmap the temporary PTs */ SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_src_pt)); SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_dest_pt)); /* Update the destination PDE */ dest_pd[index_in_pd].pde.pt_paddr = (paddr_dest_pt >> 12); /* Reset the dirty/accessed flags */ dest_pd[index_in_pd].pde.accessed = 0; } /* Unallocate temporary kernel space used for the copy */ SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt)); SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt)); return SOS_OK; } sos_ret_t sos_paging_prepare_COW(sos_uaddr_t base_address, sos_size_t length) { SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(base_address)); SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(length)); SOS_ASSERT_FATAL(SOS_PAGING_IS_USER_AREA(base_address, length)); /* Mark all the pages read-only, when already mapped in physical memory */ for ( ; length > 0 ; length -= SOS_PAGE_SIZE, base_address += SOS_PAGE_SIZE) { sos_paging_set_prot(base_address, SOS_VM_MAP_PROT_READ); } return SOS_OK; } sos_ret_t sos_paging_try_resolve_COW(sos_uaddr_t uaddr) { sos_ret_t refcnt; /* Get the page directory entry and table entry index for this address */ unsigned index_in_pd = virt_to_pd_index(uaddr); unsigned index_in_pt = virt_to_pt_index(uaddr); /* Get the PD of the current context */ struct x86_pde *pd = (struct x86_pde*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)); /* Address of the PT in the mirroring */ struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR + SOS_PAGE_SIZE*index_in_pd); /* No page mapped at this address ? */ if (! pd[index_in_pd].present) return -SOS_EFAULT; if (! pt[index_in_pt].present) return -SOS_EFAULT; /* Read-only PT not supported by kernel ! */ if (! pd[index_in_pd].write) return -SOS_EFAULT; /* Cannot understand a COW request if the page is already read/write */ SOS_ASSERT_FATAL(! pt[index_in_pt].write); /* We do a private copy of the page only if the current mapped page is shared by more than 1 process */ refcnt = sos_physmem_get_physpage_refcount(pt[index_in_pt].paddr << 12); SOS_ASSERT_FATAL(refcnt > 0); if (refcnt == 1) { /* We are the only address space to reference this page, we can safely turn it read/write now */ pt[index_in_pt].write = 1; invlpg(pt[index_in_pt].paddr << 12); } /* Otherwise we need to make a private copy of the page */ else { sos_paddr_t new_ppage; sos_vaddr_t vpage_src, tmp_dest; /* For that, we allocate the destination page inside the kernel space to perform the copy. We will transfer it into its final user-space address later */ tmp_dest = sos_kmem_vmm_alloc(1, SOS_KMEM_VMM_MAP); if (! tmp_dest) return -SOS_ENOMEM; /* copy the contents of the page */ vpage_src = SOS_PAGE_ALIGN_INF(uaddr); memcpy((void*)tmp_dest, (void*)vpage_src, SOS_PAGE_SIZE); /* replace the original (read-only) mapping with a (read/write) mapping to the new page. This will automatically unreference the original page */ new_ppage = sos_paging_get_paddr(tmp_dest); SOS_ASSERT_FATAL(new_ppage != (sos_vaddr_t)NULL); if (SOS_OK != sos_paging_map(new_ppage, vpage_src, TRUE, SOS_VM_MAP_PROT_READ | SOS_VM_MAP_PROT_WRITE)) { sos_kmem_vmm_free(tmp_dest); return -SOS_ENOMEM; } /* We can now unmap the destination page from inside the kernel and free the kernel VM range for it */ SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free(tmp_dest)); } /* That's all, folks ! */ return SOS_OK; }