sos-code-article10/hwcore/paging.c

1057 lines
31 KiB
C

/* Copyright (C) 2004 David Decotigny
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.
*/
#include <sos/physmem.h>
#include <sos/klibc.h>
#include <sos/assert.h>
#include "mm_context.h"
#include "paging.h"
/*
* Important NOTICE concerning the use of the reference & occupation
* counters of the physical pages by the "paging" subsystem:
* - All the kernel PT are SHARED. This means that as soon as one
* kernel PT belongs to one mm_context, it belongs to ALL the
* mm_contexts. We don't update the real reference count of the PT
* in this respect, because it would require to update the
* reference counts of ALL the kernel PTs as soon as a new
* mm_context is created, or as soon as a mm_context is
* suppressed. This way, the reference count is constant
* independently of the actual number of PD really sharing them.
* - We do NOT maintain the occupation count of the PDs. This would add
* some little overhead that is useless
* - We do maintain the occupation count of ALL the PTs: it represents the
* number of PTE allocated in the PT
*/
/** The structure of a page directory entry. See Intel vol 3 section
3.6.4 */
struct x86_pde
{
sos_ui32_t present :1; /* 1=PT mapped */
sos_ui32_t write :1; /* 0=read-only, 1=read/write */
sos_ui32_t user :1; /* 0=supervisor, 1=user */
sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */
sos_ui32_t cache_disabled :1; /* 1=cache disabled */
sos_ui32_t accessed :1; /* 1=read/write access since last clear */
sos_ui32_t zero :1; /* Intel reserved */
sos_ui32_t page_size :1; /* 0=4kB, 1=4MB or 2MB (depending on PAE) */
sos_ui32_t global_page :1; /* Ignored (Intel reserved) */
sos_ui32_t custom :3; /* Do what you want with them */
sos_ui32_t pt_paddr :20;
} __attribute__ ((packed));
/** Intermediate type to speed up PDE copy */
typedef union {
struct x86_pde pde;
sos_ui32_t ui32;
} x86_pde_val_t;
/** The structure of a page table entry. See Intel vol 3 section
3.6.4 */
struct x86_pte
{
sos_ui32_t present :1; /* 1=PT mapped */
sos_ui32_t write :1; /* 0=read-only, 1=read/write */
sos_ui32_t user :1; /* 0=supervisor, 1=user */
sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */
sos_ui32_t cache_disabled :1; /* 1=cache disabled */
sos_ui32_t accessed :1; /* 1=read/write access since last clear */
sos_ui32_t dirty :1; /* 1=write access since last clear */
sos_ui32_t zero :1; /* Intel reserved */
sos_ui32_t global_page :1; /* 1=No TLB invalidation upon cr3 switch
(when PG set in cr4) */
sos_ui32_t custom :3; /* Do what you want with them */
sos_ui32_t paddr :20;
} __attribute__ ((packed));
/** Intermediate type to speed up PTE copy */
typedef union {
struct x86_pte pte;
sos_ui32_t ui32;
} x86_pte_val_t;
/** Structure of the x86 CR3 register: the Page Directory Base
Register. See Intel x86 doc Vol 3 section 2.5 */
struct x86_pdbr
{
sos_ui32_t zero1 :3; /* Intel reserved */
sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */
sos_ui32_t cache_disabled :1; /* 1=cache disabled */
sos_ui32_t zero2 :7; /* Intel reserved */
sos_ui32_t pd_paddr :20;
} __attribute__ ((packed));
/**
* Helper macro to control the MMU: invalidate the TLB entry for the
* page located at the given virtual address. See Intel x86 vol 3
* section 3.7.
*/
#define invlpg(vaddr) \
do { \
__asm__ __volatile__("invlpg %0"::"m"(*((unsigned *)(vaddr)))); \
} while(0)
/**
* Helper macro to control the MMU: invalidate the whole TLB. See
* Intel x86 vol 3 section 3.7.
*/
#define flush_tlb() \
do { \
unsigned long tmpreg; \
asm volatile("movl %%cr3,%0\n\tmovl %0,%%cr3" :"=r" \
(tmpreg) : :"memory"); \
} while (0)
/**
* Helper macro to compute the index in the PD for the given virtual
* address
*/
#define virt_to_pd_index(vaddr) \
(((unsigned)(vaddr)) >> 22)
/**
* Helper macro to compute the index in the PT for the given virtual
* address
*/
#define virt_to_pt_index(vaddr) \
( (((unsigned)(vaddr)) >> 12) & 0x3ff )
/**
* Helper macro to compute the offset in the page for the given virtual
* address
*/
#define virt_to_page_offset(vaddr) \
(((unsigned)(vaddr)) & SOS_PAGE_MASK)
/**
* Helper function to map a page in the pd.\ Suppose that the RAM
* is identity mapped to resolve PT actual (CPU) address from the PD
* entry
*/
static sos_ret_t paging_setup_map_helper(struct x86_pde * pd,
sos_paddr_t ppage,
sos_vaddr_t vaddr)
{
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(vaddr);
unsigned index_in_pt = virt_to_pt_index(vaddr);
/* Make sure the page table was mapped */
struct x86_pte * pt;
if (pd[index_in_pd].present)
{
pt = (struct x86_pte*) (pd[index_in_pd].pt_paddr << 12);
/* This test will always be TRUE here, since the setup routine
scans the kernel pages in a strictly increasing order: at
each step, the map will result in the allocation of a new PT
entry. For the sake of clarity, we keep the test here. */
if (pt[index_in_pt].present)
SOS_ASSERT_FATAL(FALSE); /* indicate a fatal error */
}
else
{
/* No : allocate a new one */
pt = (struct x86_pte*) sos_physmem_ref_physpage_new(FALSE);
if (! pt)
return -SOS_ENOMEM;
memset((void*)pt, 0x0, SOS_PAGE_SIZE);
pd[index_in_pd].present = TRUE;
pd[index_in_pd].write = 1; /* It would be too complicated to
determine whether it
corresponds to a real R/W area
of the kernel code/data or
read-only */
pd[index_in_pd].pt_paddr = ((sos_paddr_t)pt) >> 12;
}
/* Map the page in the page table */
pt[index_in_pt].present = 1;
pt[index_in_pt].write = 1; /* It would be too complicated to
determine whether it corresponds to
a real R/W area of the kernel
code/data or R/O only */
pt[index_in_pt].user = 0;
pt[index_in_pt].paddr = ppage >> 12;
/* Increase the PT's occupation count because we allocated a new PTE
inside it */
sos_physmem_inc_physpage_occupation((sos_paddr_t)pt);
return SOS_OK;
}
sos_ret_t sos_paging_subsystem_setup(sos_paddr_t identity_mapping_base,
sos_paddr_t identity_mapping_top)
{
/* The PDBR we will setup below */
struct x86_pdbr cr3;
/* Get the PD for the kernel */
struct x86_pde * pd
= (struct x86_pde*) sos_physmem_ref_physpage_new(FALSE);
/* The iterator for scanning the kernel area */
sos_paddr_t paddr;
/* Reset the PD. For the moment, there is still an IM for the whole
RAM, so that the paddr are also vaddr */
memset((void*)pd,
0x0,
SOS_PAGE_SIZE);
/* Identity-map the identity_mapping_* area */
for (paddr = identity_mapping_base ;
paddr < identity_mapping_top ;
paddr += SOS_PAGE_SIZE)
{
if (paging_setup_map_helper(pd, paddr, paddr))
return -SOS_ENOMEM;
}
/* Identity-map the PC-specific BIOS/Video area */
for (paddr = BIOS_N_VIDEO_START ;
paddr < BIOS_N_VIDEO_END ;
paddr += SOS_PAGE_SIZE)
{
if (paging_setup_map_helper(pd, paddr, paddr))
return -SOS_ENOMEM;
}
/* Ok, kernel is now identity mapped in the PD. We still have to set
up the mirroring */
pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].present = TRUE;
pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].write = 1;
pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].user = 0;
pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].pt_paddr
= ((sos_paddr_t)pd)>>12;
/* We now just have to configure the MMU to use our PD. See Intel
x86 doc vol 3, section 3.6.3 */
memset(& cr3, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */
cr3.pd_paddr = ((sos_paddr_t)pd) >> 12;
/* Actual loading of the PDBR in the MMU: setup cr3 + bits 31[Paging
Enabled] and 16[Write Protect] of cr0, see Intel x86 doc vol 3,
sections 2.5, 3.6.1 and 4.11.3 + note table 4-2 */
asm volatile ("movl %0,%%cr3\n\t"
"movl %%cr0,%%eax\n\t"
"orl $0x80010000, %%eax\n\t" /* bit 31 | bit 16 */
"movl %%eax,%%cr0\n\t"
"jmp 1f\n\t"
"1:\n\t"
"movl $2f, %%eax\n\t"
"jmp *%%eax\n\t"
"2:\n\t" ::"r"(cr3):"memory","eax");
/*
* Here, the only memory available is:
* - The BIOS+video area
* - the identity_mapping_base .. identity_mapping_top area
* - the PD mirroring area (4M)
* All accesses to other virtual addresses will generate a #PF
*/
return SOS_OK;
}
/* Suppose that the current address is configured with the mirroring
* enabled to access the PD and PT. */
sos_ret_t sos_paging_map(sos_paddr_t ppage_paddr,
sos_vaddr_t vpage_vaddr,
sos_bool_t is_user_page,
sos_ui32_t flags)
{
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(vpage_vaddr);
unsigned index_in_pt = virt_to_pt_index(vpage_vaddr);
/* Get the PD of the current context */
struct x86_pde *pd = (struct x86_pde*)
(SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
/* Address of the PT in the mirroring */
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*index_in_pd);
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(ppage_paddr));
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr));
/* EXEC permission ignored on x86 */
flags &= ~SOS_VM_MAP_PROT_EXEC;
/* The mapping of anywhere in the PD mirroring is FORBIDDEN ;) */
if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR)
&& (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE))
return -SOS_EINVAL;
/* Map a page for the PT if necessary */
if (! pd[index_in_pd].present)
{
x86_pde_val_t u;
/* No : allocate a new one */
sos_paddr_t pt_ppage
= sos_physmem_ref_physpage_new(! (flags & SOS_VM_MAP_ATOMIC));
if (! pt_ppage)
{
return -SOS_ENOMEM;
}
/* Prepare the value of the PDE */
u.pde = (struct x86_pde){
.present = TRUE,
.write = 1,
.pt_paddr = ((sos_paddr_t)pt_ppage) >> 12
};
/* Is it a PDE concerning the kernel space */
if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR)
{
/* Yes: So we need to update the PDE of ALL the mm_contexts
in the system */
/* First of all: this is a kernel PT */
u.pde.user = 0;
/* Now synchronize all the PD */
SOS_ASSERT_FATAL(SOS_OK ==
sos_mm_context_synch_kernel_PDE(index_in_pd,
u.ui32));
}
else /* We should have written "else if (vpage_vaddr >=
SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed
because the beginning of the function detects and
rejects mapping requests inside the mirroring */
{
/* No: The request concerns the user space. So only the
current MMU context is concerned */
/* First of all: this is a user PT */
u.pde.user = 1;
/* Now update the current PD */
pd[index_in_pd] = u.pde;
}
/*
* The PT is now mapped in the PD mirroring
*/
/* Invalidate TLB for the page we just added */
invlpg(pt);
/* Reset this new PT */
memset((void*)pt, 0x0, SOS_PAGE_SIZE);
}
/* If we allocate a new entry in the PT, increase its occupation
count. */
if (! pt[index_in_pt].present)
sos_physmem_inc_physpage_occupation(pd[index_in_pd].pt_paddr << 12);
/* Otherwise, that means that a physical page is implicitely
unmapped */
else
sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12);
/* Map the page in the page table */
pt[index_in_pt].present = TRUE;
pt[index_in_pt].write = (flags & SOS_VM_MAP_PROT_WRITE)?1:0;
pt[index_in_pt].user = (is_user_page)?1:0;
pt[index_in_pt].paddr = ppage_paddr >> 12;
sos_physmem_ref_physpage_at(ppage_paddr);
/*
* The page is now mapped in the current address space
*/
/* Invalidate TLB for the page we just added */
invlpg(vpage_vaddr);
return SOS_OK;
}
sos_ret_t sos_paging_unmap(sos_vaddr_t vpage_vaddr)
{
sos_ret_t pt_dec_occupation_retval;
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(vpage_vaddr);
unsigned index_in_pt = virt_to_pt_index(vpage_vaddr);
/* Get the PD of the current context */
struct x86_pde *pd = (struct x86_pde*)
(SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
/* Address of the PT in the mirroring */
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*index_in_pd);
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr));
/* No page mapped at this address ? */
if (! pd[index_in_pd].present)
return -SOS_EINVAL;
if (! pt[index_in_pt].present)
return -SOS_EINVAL;
/* The unmapping of anywhere in the PD mirroring is FORBIDDEN ;) */
if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR)
&& (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE))
return -SOS_EINVAL;
/* Reclaim the physical page */
sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12);
/* Unmap the page in the page table */
memset(pt + index_in_pt, 0x0, sizeof(struct x86_pte));
/* Invalidate TLB for the page we just unmapped */
invlpg(vpage_vaddr);
/* Reclaim this entry in the PT, which may free the PT */
pt_dec_occupation_retval
= sos_physmem_dec_physpage_occupation(pd[index_in_pd].pt_paddr << 12);
SOS_ASSERT_FATAL(pt_dec_occupation_retval >= 0);
if (pt_dec_occupation_retval > 0)
/* If the PT is now completely unused... */
{
x86_pde_val_t u;
/*
* The PT is not referenced by this PD anymore
*/
sos_physmem_unref_physpage(pd[index_in_pd].pt_paddr << 12);
/*
* Reset the PDE
*/
/* Mark the PDE as unavailable */
u.ui32 = 0;
/* Is it a PDE concerning the kernel space */
if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR)
{
/* Now synchronize all the PD */
SOS_ASSERT_FATAL(SOS_OK ==
sos_mm_context_synch_kernel_PDE(index_in_pd,
u.ui32));
}
else /* We should have written "else if (vpage_vaddr >=
SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed
because the beginning of the function detects and
rejects mapping requests inside the mirroring */
{
/* No: The request concerns the user space. So only the
current MMU context is concerned */
pd[index_in_pd] = u.pde;
}
/* Update the TLB */
invlpg(pt);
}
return SOS_OK;
}
sos_ret_t sos_paging_unmap_interval(sos_vaddr_t vaddr,
sos_size_t size)
{
sos_ret_t retval = 0;
if (! SOS_IS_PAGE_ALIGNED(vaddr))
return -SOS_EINVAL;
if (! SOS_IS_PAGE_ALIGNED(size))
return -SOS_EINVAL;
for ( ;
size >= SOS_PAGE_SIZE ;
vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE)
if (SOS_OK == sos_paging_unmap(vaddr))
retval += SOS_PAGE_SIZE;
return retval;
}
sos_ui32_t sos_paging_get_prot(sos_vaddr_t vaddr)
{
sos_ui32_t retval;
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(vaddr);
unsigned index_in_pt = virt_to_pt_index(vaddr);
/* Get the PD of the current context */
struct x86_pde *pd = (struct x86_pde*)
(SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
/* Address of the PT in the mirroring */
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*index_in_pd);
/* No page mapped at this address ? */
if (! pd[index_in_pd].present)
return SOS_VM_MAP_PROT_NONE;
if (! pt[index_in_pt].present)
return SOS_VM_MAP_PROT_NONE;
/* Default access right of an available page is "read" on x86 */
retval = SOS_VM_MAP_PROT_READ;
if (pd[index_in_pd].write && pt[index_in_pt].write)
retval |= SOS_VM_MAP_PROT_WRITE;
return retval;
}
sos_ret_t sos_paging_set_prot(sos_vaddr_t vaddr,
sos_ui32_t new_prot)
{
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(vaddr);
unsigned index_in_pt = virt_to_pt_index(vaddr);
/* Get the PD of the current context */
struct x86_pde *pd = (struct x86_pde*)
(SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
/* Address of the PT in the mirroring */
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*index_in_pd);
/* EXEC permission ignored on x86 */
new_prot &= ~SOS_VM_MAP_PROT_EXEC;
/* Check flags */
if (new_prot & ~(SOS_VM_MAP_PROT_READ | SOS_VM_MAP_PROT_WRITE))
return -SOS_EINVAL;
if (! (new_prot & SOS_VM_MAP_PROT_READ))
/* x86 READ flag always set by default */
return -SOS_ENOSUP;
/* No page mapped at this address ? */
if (! pd[index_in_pd].present)
return -SOS_EINVAL;
if (! pt[index_in_pt].present)
return -SOS_EINVAL;
/* Update access rights */
pt[index_in_pt].write = ((new_prot & SOS_VM_MAP_PROT_WRITE) != 0);
invlpg(vaddr);
return SOS_OK;
}
sos_ret_t sos_paging_set_prot_of_interval(sos_vaddr_t vaddr,
sos_size_t size,
sos_ui32_t new_prot)
{
if (! SOS_IS_PAGE_ALIGNED(vaddr))
return -SOS_EINVAL;
if (! SOS_IS_PAGE_ALIGNED(size))
return -SOS_EINVAL;
for ( ; size >= SOS_PAGE_SIZE ; vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE)
sos_paging_set_prot(vaddr, new_prot);
return SOS_OK;
}
sos_bool_t sos_paging_is_dirty(sos_vaddr_t vaddr)
{
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(vaddr);
unsigned index_in_pt = virt_to_pt_index(vaddr);
/* Get the PD of the current context */
struct x86_pde *pd = (struct x86_pde*)
(SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
/* Address of the PT in the mirroring */
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*index_in_pd);
/* No page mapped at this address ? */
if (! pd[index_in_pd].present)
return FALSE;
if (! pt[index_in_pt].present)
return FALSE;
return (pt[index_in_pt].dirty != 0);
}
sos_ret_t sos_paging_set_dirty(sos_vaddr_t vaddr,
sos_bool_t is_dirty)
{
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(vaddr);
unsigned index_in_pt = virt_to_pt_index(vaddr);
/* Get the PD of the current context */
struct x86_pde *pd = (struct x86_pde*)
(SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
/* Address of the PT in the mirroring */
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*index_in_pd);
/* No page mapped at this address ? */
if (! pd[index_in_pd].present)
return -SOS_EFAULT;
if (! pt[index_in_pt].present)
return -SOS_EFAULT;
pt[index_in_pt].dirty = is_dirty;
return SOS_OK;
}
sos_paddr_t sos_paging_get_paddr(sos_vaddr_t vaddr)
{
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(vaddr);
unsigned index_in_pt = virt_to_pt_index(vaddr);
unsigned offset_in_page = virt_to_page_offset(vaddr);
/* Get the PD of the current context */
struct x86_pde *pd = (struct x86_pde*)
(SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
/* Address of the PT in the mirroring */
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*index_in_pd);
/* No page mapped at this address ? */
if (! pd[index_in_pd].present)
return (sos_paddr_t)NULL;
if (! pt[index_in_pt].present)
return (sos_paddr_t)NULL;
return (pt[index_in_pt].paddr << 12) + offset_in_page;
}
/* *************************************************
* Functions restricted to mm_context module
*/
sos_paddr_t sos_paging_get_current_PD_paddr(void)
{
struct x86_pdbr pdbr;
asm volatile("movl %%cr3, %0\n": "=r"(pdbr));
return (pdbr.pd_paddr << 12);
}
sos_ret_t sos_paging_set_current_PD_paddr(sos_paddr_t paddr_PD)
{
struct x86_pdbr pdbr;
SOS_ASSERT_FATAL(paddr_PD != 0);
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(paddr_PD));
/* Setup the value of the PDBR */
memset(& pdbr, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */
pdbr.pd_paddr = (paddr_PD >> 12);
/* Configure the MMU according to the PDBR */
asm volatile ("movl %0,%%cr3\n" ::"r"(pdbr));
return SOS_OK;
}
sos_ret_t sos_paging_dispose(sos_vaddr_t vaddr_PD)
{
x86_pde_val_t *pd = (x86_pde_val_t*) vaddr_PD;
x86_pte_val_t *pt;
int index_in_pd;
/* Allocate 1 page in kernel space to map the PTs in order to
unreference the physical pages they reference */
pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
if (! pt)
return -SOS_ENOMEM;
/* (Nothing to do in kernel space) */
/* Reset all the PTs in user space */
for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ;
index_in_pd < 1024 ; /* 1 PDE = 1 PT
= 1024 Pages
= 4MB */
index_in_pd ++)
{
sos_paddr_t paddr_pt = (pd[index_in_pd].pde.pt_paddr << 12);
int index_in_pt;
/* Nothing to do if there is no PT */
if (! pd[index_in_pd].pde.present)
{
pd[index_in_pd].ui32 = 0;
continue;
}
/* Map this PT inside kernel */
SOS_ASSERT_FATAL(SOS_OK
== sos_paging_map(paddr_pt,
(sos_vaddr_t)pt, FALSE,
SOS_VM_MAP_PROT_READ
| SOS_VM_MAP_PROT_WRITE));
/* Reset all the mappings in this PT */
for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++)
{
/* Ignore unmapped PTE */
if (! pt[index_in_pt].pte.present)
{
pt[index_in_pt].ui32 = 0;
continue;
}
/* Unreference the associated page */
sos_physmem_unref_physpage(pt[index_in_pt].pte.paddr << 12);
/* Decrease occupation count of the PT */
sos_physmem_dec_physpage_occupation(paddr_pt);
/* Reset PTE */
pt[index_in_pt].ui32 = 0;
}
/* Unmap PT */
SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)pt));
/* Reset PDE */
pd[index_in_pd].ui32 = 0;
/* Unreference PT */
sos_physmem_unref_physpage(paddr_pt);
}
/* Unallocate kernel space used for the temporary PT */
SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)pt));
return SOS_OK;
}
sos_ret_t sos_paging_copy_kernel_space(sos_vaddr_t dest_vaddr_PD,
sos_vaddr_t src_vaddr_PD)
{
x86_pde_val_t *src_pd = (x86_pde_val_t*) src_vaddr_PD;
x86_pde_val_t *dest_pd = (x86_pde_val_t*) dest_vaddr_PD;
sos_paddr_t dest_paddr_PD = sos_paging_get_paddr(dest_vaddr_PD);
x86_pde_val_t mirror_pde;
int index_in_pd;
/* Fill destination PD with zeros */
memset((void*)dest_vaddr_PD, 0x0, SOS_PAGE_SIZE);
/* Synchronize it with the master Kernel MMU context. Stop just
before the mirroring ! */
for (index_in_pd = 0 ;
index_in_pd < (SOS_PAGING_MIRROR_VADDR >> 22) ; /* 1 PDE = 1 PT
= 1024 Pages
= 4MB */
index_in_pd ++)
{
/* Copy the master's configuration */
dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32;
/* We DON'T mark the underlying PT and pages as referenced
because all the PD are equivalent in the kernel space: as
soon as a page is mapped in the kernel, it is mapped by X
address spaces, and as soon as it is unmapped by 1 address
space, it is unmapped in all the others. So that for X
address spaces, the reference counter will be either 0 or X,
and not something else: using the reference counter correctly
won't be of any use and would consume some time in updating it. */
}
/* Setup the mirroring for the new address space */
mirror_pde.ui32 = 0;
mirror_pde.pde.present = TRUE;
mirror_pde.pde.write = 1;
mirror_pde.pde.user = 0; /* This is a KERNEL PDE */
mirror_pde.pde.pt_paddr = (dest_paddr_PD >> 12);
dest_pd[SOS_PAGING_MIRROR_VADDR >> 22].ui32 = mirror_pde.ui32;
return SOS_OK;
}
sos_ret_t sos_paging_copy_user_space(sos_vaddr_t dest_vaddr_PD,
sos_vaddr_t src_vaddr_PD)
{
x86_pde_val_t *src_pd = (x86_pde_val_t*) src_vaddr_PD;
x86_pde_val_t *dest_pd = (x86_pde_val_t*) dest_vaddr_PD;
x86_pte_val_t *tmp_src_pt, *tmp_dest_pt;
int index_in_pd;
/* Allocate 2 pages in kernel space to map the PT in order to
perform the copy of the PTs from source to destination */
tmp_src_pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
if (! tmp_src_pt)
return -SOS_ENOMEM;
tmp_dest_pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
if (! tmp_dest_pt)
{
sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt);
return -SOS_ENOMEM;
}
/* Copy each used PT from source to destination */
for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ;
index_in_pd < 1024 ; /* 1 PDE = 1 PT
= 1024 Pages
= 4MB */
index_in_pd ++)
{
sos_paddr_t paddr_dest_pt;
int index_in_pt;
/* We first litterally copy the source PDE in the destination
PDE. However, please bare in mind that, in the end, both
won't reference the same physical PT: the destination PDE
will be updated (below) to match the address of its own new
PT */
dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32;
/* Ignore unused PTs */
if (! src_pd[index_in_pd].pde.present)
continue;
/* Allocate the destination PT */
paddr_dest_pt = sos_physmem_ref_physpage_new(TRUE);
if (NULL == (void*)paddr_dest_pt)
{
sos_paging_dispose((sos_vaddr_t)dest_vaddr_PD);
/* Unallocate temporary kernel space used for the copy */
sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt);
sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt);
return -SOS_ENOMEM;
}
/* Map source and destination PT */
SOS_ASSERT_FATAL(SOS_OK
== sos_paging_map(src_pd[index_in_pd].pde.pt_paddr << 12,
(sos_vaddr_t)tmp_src_pt, FALSE,
SOS_VM_MAP_PROT_READ));
SOS_ASSERT_FATAL(SOS_OK
== sos_paging_map(paddr_dest_pt,
(sos_vaddr_t)tmp_dest_pt, FALSE,
SOS_VM_MAP_PROT_READ
| SOS_VM_MAP_PROT_WRITE));
/* Copy the contents of the source to the destination PT,
updating the reference counts of the pages */
for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++)
{
/* Copy the source PTE */
tmp_dest_pt[index_in_pt].ui32 = tmp_src_pt[index_in_pt].ui32;
/* Ignore non-present pages */
if (! tmp_dest_pt[index_in_pt].pte.present)
continue;
/* Reset the dirty/accessed flags */
tmp_dest_pt[index_in_pt].pte.accessed = 0;
tmp_dest_pt[index_in_pt].pte.dirty = 0;
/* Increase the reference count of the destination page */
sos_physmem_ref_physpage_at(tmp_src_pt[index_in_pt].pte.paddr << 12);
/* Increase occupation count of the PT */
sos_physmem_inc_physpage_occupation(paddr_dest_pt);
}
/* Unmap the temporary PTs */
SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_src_pt));
SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_dest_pt));
/* Update the destination PDE */
dest_pd[index_in_pd].pde.pt_paddr = (paddr_dest_pt >> 12);
/* Reset the dirty/accessed flags */
dest_pd[index_in_pd].pde.accessed = 0;
}
/* Unallocate temporary kernel space used for the copy */
SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt));
SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt));
return SOS_OK;
}
sos_ret_t sos_paging_prepare_COW(sos_uaddr_t base_address,
sos_size_t length)
{
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(base_address));
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(length));
SOS_ASSERT_FATAL(SOS_PAGING_IS_USER_AREA(base_address, length));
/* Mark all the pages read-only, when already mapped in physical
memory */
for ( ;
length > 0 ;
length -= SOS_PAGE_SIZE, base_address += SOS_PAGE_SIZE)
{
sos_paging_set_prot(base_address,
SOS_VM_MAP_PROT_READ);
}
return SOS_OK;
}
sos_ret_t sos_paging_try_resolve_COW(sos_uaddr_t uaddr)
{
sos_ret_t refcnt;
/* Get the page directory entry and table entry index for this
address */
unsigned index_in_pd = virt_to_pd_index(uaddr);
unsigned index_in_pt = virt_to_pt_index(uaddr);
/* Get the PD of the current context */
struct x86_pde *pd = (struct x86_pde*)
(SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
/* Address of the PT in the mirroring */
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
+ SOS_PAGE_SIZE*index_in_pd);
/* No page mapped at this address ? */
if (! pd[index_in_pd].present)
return -SOS_EFAULT;
if (! pt[index_in_pt].present)
return -SOS_EFAULT;
/* Read-only PT not supported by kernel ! */
if (! pd[index_in_pd].write)
return -SOS_EFAULT;
/* Cannot understand a COW request if the page is already
read/write */
SOS_ASSERT_FATAL(! pt[index_in_pt].write);
/* We do a private copy of the page only if the current mapped page
is shared by more than 1 process */
refcnt = sos_physmem_get_physpage_refcount(pt[index_in_pt].paddr << 12);
SOS_ASSERT_FATAL(refcnt > 0);
if (refcnt == 1)
{
/* We are the only address space to reference this page, we can
safely turn it read/write now */
pt[index_in_pt].write = 1;
invlpg(pt[index_in_pt].paddr << 12);
}
/* Otherwise we need to make a private copy of the page */
else
{
sos_paddr_t new_ppage;
sos_vaddr_t vpage_src, tmp_dest;
/* For that, we allocate the destination page inside the kernel
space to perform the copy. We will transfer it into its
final user-space address later */
tmp_dest = sos_kmem_vmm_alloc(1, SOS_KMEM_VMM_MAP);
if (! tmp_dest)
return -SOS_ENOMEM;
/* copy the contents of the page */
vpage_src = SOS_PAGE_ALIGN_INF(uaddr);
memcpy((void*)tmp_dest, (void*)vpage_src, SOS_PAGE_SIZE);
/* replace the original (read-only) mapping with a (read/write)
mapping to the new page. This will automatically unreference
the original page */
new_ppage = sos_paging_get_paddr(tmp_dest);
SOS_ASSERT_FATAL(new_ppage != (sos_vaddr_t)NULL);
if (SOS_OK != sos_paging_map(new_ppage, vpage_src,
TRUE,
SOS_VM_MAP_PROT_READ
| SOS_VM_MAP_PROT_WRITE))
{
sos_kmem_vmm_free(tmp_dest);
return -SOS_ENOMEM;
}
/* We can now unmap the destination page from inside the
kernel and free the kernel VM range for it */
SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free(tmp_dest));
}
/* That's all, folks ! */
return SOS_OK;
}