1057 lines
31 KiB
C
1057 lines
31 KiB
C
/* Copyright (C) 2004 David Decotigny
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
|
USA.
|
|
*/
|
|
#include <sos/physmem.h>
|
|
#include <sos/klibc.h>
|
|
#include <sos/assert.h>
|
|
|
|
#include "mm_context.h"
|
|
|
|
#include "paging.h"
|
|
|
|
|
|
/*
|
|
* Important NOTICE concerning the use of the reference & occupation
|
|
* counters of the physical pages by the "paging" subsystem:
|
|
* - All the kernel PT are SHARED. This means that as soon as one
|
|
* kernel PT belongs to one mm_context, it belongs to ALL the
|
|
* mm_contexts. We don't update the real reference count of the PT
|
|
* in this respect, because it would require to update the
|
|
* reference counts of ALL the kernel PTs as soon as a new
|
|
* mm_context is created, or as soon as a mm_context is
|
|
* suppressed. This way, the reference count is constant
|
|
* independently of the actual number of PD really sharing them.
|
|
* - We do NOT maintain the occupation count of the PDs. This would add
|
|
* some little overhead that is useless
|
|
* - We do maintain the occupation count of ALL the PTs: it represents the
|
|
* number of PTE allocated in the PT
|
|
*/
|
|
|
|
|
|
/** The structure of a page directory entry. See Intel vol 3 section
|
|
3.6.4 */
|
|
struct x86_pde
|
|
{
|
|
sos_ui32_t present :1; /* 1=PT mapped */
|
|
sos_ui32_t write :1; /* 0=read-only, 1=read/write */
|
|
sos_ui32_t user :1; /* 0=supervisor, 1=user */
|
|
sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */
|
|
sos_ui32_t cache_disabled :1; /* 1=cache disabled */
|
|
sos_ui32_t accessed :1; /* 1=read/write access since last clear */
|
|
sos_ui32_t zero :1; /* Intel reserved */
|
|
sos_ui32_t page_size :1; /* 0=4kB, 1=4MB or 2MB (depending on PAE) */
|
|
sos_ui32_t global_page :1; /* Ignored (Intel reserved) */
|
|
sos_ui32_t custom :3; /* Do what you want with them */
|
|
sos_ui32_t pt_paddr :20;
|
|
} __attribute__ ((packed));
|
|
|
|
|
|
/** Intermediate type to speed up PDE copy */
|
|
typedef union {
|
|
struct x86_pde pde;
|
|
sos_ui32_t ui32;
|
|
} x86_pde_val_t;
|
|
|
|
|
|
/** The structure of a page table entry. See Intel vol 3 section
|
|
3.6.4 */
|
|
struct x86_pte
|
|
{
|
|
sos_ui32_t present :1; /* 1=PT mapped */
|
|
sos_ui32_t write :1; /* 0=read-only, 1=read/write */
|
|
sos_ui32_t user :1; /* 0=supervisor, 1=user */
|
|
sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */
|
|
sos_ui32_t cache_disabled :1; /* 1=cache disabled */
|
|
sos_ui32_t accessed :1; /* 1=read/write access since last clear */
|
|
sos_ui32_t dirty :1; /* 1=write access since last clear */
|
|
sos_ui32_t zero :1; /* Intel reserved */
|
|
sos_ui32_t global_page :1; /* 1=No TLB invalidation upon cr3 switch
|
|
(when PG set in cr4) */
|
|
sos_ui32_t custom :3; /* Do what you want with them */
|
|
sos_ui32_t paddr :20;
|
|
} __attribute__ ((packed));
|
|
|
|
|
|
/** Intermediate type to speed up PTE copy */
|
|
typedef union {
|
|
struct x86_pte pte;
|
|
sos_ui32_t ui32;
|
|
} x86_pte_val_t;
|
|
|
|
|
|
/** Structure of the x86 CR3 register: the Page Directory Base
|
|
Register. See Intel x86 doc Vol 3 section 2.5 */
|
|
struct x86_pdbr
|
|
{
|
|
sos_ui32_t zero1 :3; /* Intel reserved */
|
|
sos_ui32_t write_through :1; /* 0=write-back, 1=write-through */
|
|
sos_ui32_t cache_disabled :1; /* 1=cache disabled */
|
|
sos_ui32_t zero2 :7; /* Intel reserved */
|
|
sos_ui32_t pd_paddr :20;
|
|
} __attribute__ ((packed));
|
|
|
|
|
|
/**
|
|
* Helper macro to control the MMU: invalidate the TLB entry for the
|
|
* page located at the given virtual address. See Intel x86 vol 3
|
|
* section 3.7.
|
|
*/
|
|
#define invlpg(vaddr) \
|
|
do { \
|
|
__asm__ __volatile__("invlpg %0"::"m"(*((unsigned *)(vaddr)))); \
|
|
} while(0)
|
|
|
|
|
|
/**
|
|
* Helper macro to control the MMU: invalidate the whole TLB. See
|
|
* Intel x86 vol 3 section 3.7.
|
|
*/
|
|
#define flush_tlb() \
|
|
do { \
|
|
unsigned long tmpreg; \
|
|
asm volatile("movl %%cr3,%0\n\tmovl %0,%%cr3" :"=r" \
|
|
(tmpreg) : :"memory"); \
|
|
} while (0)
|
|
|
|
|
|
/**
|
|
* Helper macro to compute the index in the PD for the given virtual
|
|
* address
|
|
*/
|
|
#define virt_to_pd_index(vaddr) \
|
|
(((unsigned)(vaddr)) >> 22)
|
|
|
|
|
|
/**
|
|
* Helper macro to compute the index in the PT for the given virtual
|
|
* address
|
|
*/
|
|
#define virt_to_pt_index(vaddr) \
|
|
( (((unsigned)(vaddr)) >> 12) & 0x3ff )
|
|
|
|
|
|
/**
|
|
* Helper macro to compute the offset in the page for the given virtual
|
|
* address
|
|
*/
|
|
#define virt_to_page_offset(vaddr) \
|
|
(((unsigned)(vaddr)) & SOS_PAGE_MASK)
|
|
|
|
|
|
/**
|
|
* Helper function to map a page in the pd.\ Suppose that the RAM
|
|
* is identity mapped to resolve PT actual (CPU) address from the PD
|
|
* entry
|
|
*/
|
|
static sos_ret_t paging_setup_map_helper(struct x86_pde * pd,
|
|
sos_paddr_t ppage,
|
|
sos_vaddr_t vaddr)
|
|
{
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(vaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(vaddr);
|
|
|
|
/* Make sure the page table was mapped */
|
|
struct x86_pte * pt;
|
|
if (pd[index_in_pd].present)
|
|
{
|
|
pt = (struct x86_pte*) (pd[index_in_pd].pt_paddr << 12);
|
|
|
|
/* This test will always be TRUE here, since the setup routine
|
|
scans the kernel pages in a strictly increasing order: at
|
|
each step, the map will result in the allocation of a new PT
|
|
entry. For the sake of clarity, we keep the test here. */
|
|
if (pt[index_in_pt].present)
|
|
SOS_ASSERT_FATAL(FALSE); /* indicate a fatal error */
|
|
}
|
|
else
|
|
{
|
|
/* No : allocate a new one */
|
|
pt = (struct x86_pte*) sos_physmem_ref_physpage_new(FALSE);
|
|
if (! pt)
|
|
return -SOS_ENOMEM;
|
|
|
|
memset((void*)pt, 0x0, SOS_PAGE_SIZE);
|
|
|
|
pd[index_in_pd].present = TRUE;
|
|
pd[index_in_pd].write = 1; /* It would be too complicated to
|
|
determine whether it
|
|
corresponds to a real R/W area
|
|
of the kernel code/data or
|
|
read-only */
|
|
pd[index_in_pd].pt_paddr = ((sos_paddr_t)pt) >> 12;
|
|
}
|
|
|
|
|
|
/* Map the page in the page table */
|
|
pt[index_in_pt].present = 1;
|
|
pt[index_in_pt].write = 1; /* It would be too complicated to
|
|
determine whether it corresponds to
|
|
a real R/W area of the kernel
|
|
code/data or R/O only */
|
|
pt[index_in_pt].user = 0;
|
|
pt[index_in_pt].paddr = ppage >> 12;
|
|
|
|
/* Increase the PT's occupation count because we allocated a new PTE
|
|
inside it */
|
|
sos_physmem_inc_physpage_occupation((sos_paddr_t)pt);
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_subsystem_setup(sos_paddr_t identity_mapping_base,
|
|
sos_paddr_t identity_mapping_top)
|
|
{
|
|
/* The PDBR we will setup below */
|
|
struct x86_pdbr cr3;
|
|
|
|
/* Get the PD for the kernel */
|
|
struct x86_pde * pd
|
|
= (struct x86_pde*) sos_physmem_ref_physpage_new(FALSE);
|
|
|
|
/* The iterator for scanning the kernel area */
|
|
sos_paddr_t paddr;
|
|
|
|
/* Reset the PD. For the moment, there is still an IM for the whole
|
|
RAM, so that the paddr are also vaddr */
|
|
memset((void*)pd,
|
|
0x0,
|
|
SOS_PAGE_SIZE);
|
|
|
|
/* Identity-map the identity_mapping_* area */
|
|
for (paddr = identity_mapping_base ;
|
|
paddr < identity_mapping_top ;
|
|
paddr += SOS_PAGE_SIZE)
|
|
{
|
|
if (paging_setup_map_helper(pd, paddr, paddr))
|
|
return -SOS_ENOMEM;
|
|
}
|
|
|
|
/* Identity-map the PC-specific BIOS/Video area */
|
|
for (paddr = BIOS_N_VIDEO_START ;
|
|
paddr < BIOS_N_VIDEO_END ;
|
|
paddr += SOS_PAGE_SIZE)
|
|
{
|
|
if (paging_setup_map_helper(pd, paddr, paddr))
|
|
return -SOS_ENOMEM;
|
|
}
|
|
|
|
/* Ok, kernel is now identity mapped in the PD. We still have to set
|
|
up the mirroring */
|
|
pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].present = TRUE;
|
|
pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].write = 1;
|
|
pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].user = 0;
|
|
pd[virt_to_pd_index(SOS_PAGING_MIRROR_VADDR)].pt_paddr
|
|
= ((sos_paddr_t)pd)>>12;
|
|
|
|
/* We now just have to configure the MMU to use our PD. See Intel
|
|
x86 doc vol 3, section 3.6.3 */
|
|
memset(& cr3, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */
|
|
cr3.pd_paddr = ((sos_paddr_t)pd) >> 12;
|
|
|
|
/* Actual loading of the PDBR in the MMU: setup cr3 + bits 31[Paging
|
|
Enabled] and 16[Write Protect] of cr0, see Intel x86 doc vol 3,
|
|
sections 2.5, 3.6.1 and 4.11.3 + note table 4-2 */
|
|
asm volatile ("movl %0,%%cr3\n\t"
|
|
"movl %%cr0,%%eax\n\t"
|
|
"orl $0x80010000, %%eax\n\t" /* bit 31 | bit 16 */
|
|
"movl %%eax,%%cr0\n\t"
|
|
"jmp 1f\n\t"
|
|
"1:\n\t"
|
|
"movl $2f, %%eax\n\t"
|
|
"jmp *%%eax\n\t"
|
|
"2:\n\t" ::"r"(cr3):"memory","eax");
|
|
|
|
/*
|
|
* Here, the only memory available is:
|
|
* - The BIOS+video area
|
|
* - the identity_mapping_base .. identity_mapping_top area
|
|
* - the PD mirroring area (4M)
|
|
* All accesses to other virtual addresses will generate a #PF
|
|
*/
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
/* Suppose that the current address is configured with the mirroring
|
|
* enabled to access the PD and PT. */
|
|
sos_ret_t sos_paging_map(sos_paddr_t ppage_paddr,
|
|
sos_vaddr_t vpage_vaddr,
|
|
sos_bool_t is_user_page,
|
|
sos_ui32_t flags)
|
|
{
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(vpage_vaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(vpage_vaddr);
|
|
|
|
/* Get the PD of the current context */
|
|
struct x86_pde *pd = (struct x86_pde*)
|
|
(SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
|
|
|
|
/* Address of the PT in the mirroring */
|
|
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*index_in_pd);
|
|
|
|
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(ppage_paddr));
|
|
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr));
|
|
|
|
/* EXEC permission ignored on x86 */
|
|
flags &= ~SOS_VM_MAP_PROT_EXEC;
|
|
|
|
/* The mapping of anywhere in the PD mirroring is FORBIDDEN ;) */
|
|
if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR)
|
|
&& (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE))
|
|
return -SOS_EINVAL;
|
|
|
|
/* Map a page for the PT if necessary */
|
|
if (! pd[index_in_pd].present)
|
|
{
|
|
x86_pde_val_t u;
|
|
|
|
/* No : allocate a new one */
|
|
sos_paddr_t pt_ppage
|
|
= sos_physmem_ref_physpage_new(! (flags & SOS_VM_MAP_ATOMIC));
|
|
if (! pt_ppage)
|
|
{
|
|
return -SOS_ENOMEM;
|
|
}
|
|
|
|
/* Prepare the value of the PDE */
|
|
u.pde = (struct x86_pde){
|
|
.present = TRUE,
|
|
.write = 1,
|
|
.pt_paddr = ((sos_paddr_t)pt_ppage) >> 12
|
|
};
|
|
|
|
/* Is it a PDE concerning the kernel space */
|
|
if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR)
|
|
{
|
|
/* Yes: So we need to update the PDE of ALL the mm_contexts
|
|
in the system */
|
|
|
|
/* First of all: this is a kernel PT */
|
|
u.pde.user = 0;
|
|
|
|
/* Now synchronize all the PD */
|
|
SOS_ASSERT_FATAL(SOS_OK ==
|
|
sos_mm_context_synch_kernel_PDE(index_in_pd,
|
|
u.ui32));
|
|
}
|
|
else /* We should have written "else if (vpage_vaddr >=
|
|
SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed
|
|
because the beginning of the function detects and
|
|
rejects mapping requests inside the mirroring */
|
|
{
|
|
/* No: The request concerns the user space. So only the
|
|
current MMU context is concerned */
|
|
|
|
/* First of all: this is a user PT */
|
|
u.pde.user = 1;
|
|
|
|
/* Now update the current PD */
|
|
pd[index_in_pd] = u.pde;
|
|
}
|
|
|
|
/*
|
|
* The PT is now mapped in the PD mirroring
|
|
*/
|
|
|
|
/* Invalidate TLB for the page we just added */
|
|
invlpg(pt);
|
|
|
|
/* Reset this new PT */
|
|
memset((void*)pt, 0x0, SOS_PAGE_SIZE);
|
|
}
|
|
|
|
/* If we allocate a new entry in the PT, increase its occupation
|
|
count. */
|
|
if (! pt[index_in_pt].present)
|
|
sos_physmem_inc_physpage_occupation(pd[index_in_pd].pt_paddr << 12);
|
|
|
|
/* Otherwise, that means that a physical page is implicitely
|
|
unmapped */
|
|
else
|
|
sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12);
|
|
|
|
/* Map the page in the page table */
|
|
pt[index_in_pt].present = TRUE;
|
|
pt[index_in_pt].write = (flags & SOS_VM_MAP_PROT_WRITE)?1:0;
|
|
pt[index_in_pt].user = (is_user_page)?1:0;
|
|
pt[index_in_pt].paddr = ppage_paddr >> 12;
|
|
sos_physmem_ref_physpage_at(ppage_paddr);
|
|
|
|
|
|
/*
|
|
* The page is now mapped in the current address space
|
|
*/
|
|
|
|
/* Invalidate TLB for the page we just added */
|
|
invlpg(vpage_vaddr);
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_unmap(sos_vaddr_t vpage_vaddr)
|
|
{
|
|
sos_ret_t pt_dec_occupation_retval;
|
|
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(vpage_vaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(vpage_vaddr);
|
|
|
|
/* Get the PD of the current context */
|
|
struct x86_pde *pd = (struct x86_pde*)
|
|
(SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
|
|
|
|
/* Address of the PT in the mirroring */
|
|
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*index_in_pd);
|
|
|
|
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(vpage_vaddr));
|
|
|
|
/* No page mapped at this address ? */
|
|
if (! pd[index_in_pd].present)
|
|
return -SOS_EINVAL;
|
|
if (! pt[index_in_pt].present)
|
|
return -SOS_EINVAL;
|
|
|
|
/* The unmapping of anywhere in the PD mirroring is FORBIDDEN ;) */
|
|
if ((vpage_vaddr >= SOS_PAGING_MIRROR_VADDR)
|
|
&& (vpage_vaddr < SOS_PAGING_MIRROR_VADDR + SOS_PAGING_MIRROR_SIZE))
|
|
return -SOS_EINVAL;
|
|
|
|
/* Reclaim the physical page */
|
|
sos_physmem_unref_physpage(pt[index_in_pt].paddr << 12);
|
|
|
|
/* Unmap the page in the page table */
|
|
memset(pt + index_in_pt, 0x0, sizeof(struct x86_pte));
|
|
|
|
/* Invalidate TLB for the page we just unmapped */
|
|
invlpg(vpage_vaddr);
|
|
|
|
/* Reclaim this entry in the PT, which may free the PT */
|
|
pt_dec_occupation_retval
|
|
= sos_physmem_dec_physpage_occupation(pd[index_in_pd].pt_paddr << 12);
|
|
SOS_ASSERT_FATAL(pt_dec_occupation_retval >= 0);
|
|
if (pt_dec_occupation_retval > 0)
|
|
/* If the PT is now completely unused... */
|
|
{
|
|
x86_pde_val_t u;
|
|
|
|
|
|
/*
|
|
* The PT is not referenced by this PD anymore
|
|
*/
|
|
sos_physmem_unref_physpage(pd[index_in_pd].pt_paddr << 12);
|
|
|
|
|
|
/*
|
|
* Reset the PDE
|
|
*/
|
|
|
|
/* Mark the PDE as unavailable */
|
|
u.ui32 = 0;
|
|
|
|
/* Is it a PDE concerning the kernel space */
|
|
if (vpage_vaddr < SOS_PAGING_MIRROR_VADDR)
|
|
{
|
|
/* Now synchronize all the PD */
|
|
SOS_ASSERT_FATAL(SOS_OK ==
|
|
sos_mm_context_synch_kernel_PDE(index_in_pd,
|
|
u.ui32));
|
|
}
|
|
else /* We should have written "else if (vpage_vaddr >=
|
|
SOS_PAGING_BASE_USER_ADDRESS)" but this is not needed
|
|
because the beginning of the function detects and
|
|
rejects mapping requests inside the mirroring */
|
|
{
|
|
/* No: The request concerns the user space. So only the
|
|
current MMU context is concerned */
|
|
pd[index_in_pd] = u.pde;
|
|
}
|
|
|
|
/* Update the TLB */
|
|
invlpg(pt);
|
|
}
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_unmap_interval(sos_vaddr_t vaddr,
|
|
sos_size_t size)
|
|
{
|
|
sos_ret_t retval = 0;
|
|
|
|
if (! SOS_IS_PAGE_ALIGNED(vaddr))
|
|
return -SOS_EINVAL;
|
|
if (! SOS_IS_PAGE_ALIGNED(size))
|
|
return -SOS_EINVAL;
|
|
|
|
for ( ;
|
|
size >= SOS_PAGE_SIZE ;
|
|
vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE)
|
|
if (SOS_OK == sos_paging_unmap(vaddr))
|
|
retval += SOS_PAGE_SIZE;
|
|
|
|
return retval;
|
|
}
|
|
|
|
|
|
sos_ui32_t sos_paging_get_prot(sos_vaddr_t vaddr)
|
|
{
|
|
sos_ui32_t retval;
|
|
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(vaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(vaddr);
|
|
|
|
/* Get the PD of the current context */
|
|
struct x86_pde *pd = (struct x86_pde*)
|
|
(SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
|
|
|
|
/* Address of the PT in the mirroring */
|
|
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*index_in_pd);
|
|
|
|
/* No page mapped at this address ? */
|
|
if (! pd[index_in_pd].present)
|
|
return SOS_VM_MAP_PROT_NONE;
|
|
if (! pt[index_in_pt].present)
|
|
return SOS_VM_MAP_PROT_NONE;
|
|
|
|
/* Default access right of an available page is "read" on x86 */
|
|
retval = SOS_VM_MAP_PROT_READ;
|
|
if (pd[index_in_pd].write && pt[index_in_pt].write)
|
|
retval |= SOS_VM_MAP_PROT_WRITE;
|
|
|
|
return retval;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_set_prot(sos_vaddr_t vaddr,
|
|
sos_ui32_t new_prot)
|
|
{
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(vaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(vaddr);
|
|
|
|
/* Get the PD of the current context */
|
|
struct x86_pde *pd = (struct x86_pde*)
|
|
(SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
|
|
|
|
/* Address of the PT in the mirroring */
|
|
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*index_in_pd);
|
|
|
|
/* EXEC permission ignored on x86 */
|
|
new_prot &= ~SOS_VM_MAP_PROT_EXEC;
|
|
|
|
/* Check flags */
|
|
if (new_prot & ~(SOS_VM_MAP_PROT_READ | SOS_VM_MAP_PROT_WRITE))
|
|
return -SOS_EINVAL;
|
|
if (! (new_prot & SOS_VM_MAP_PROT_READ))
|
|
/* x86 READ flag always set by default */
|
|
return -SOS_ENOSUP;
|
|
|
|
/* No page mapped at this address ? */
|
|
if (! pd[index_in_pd].present)
|
|
return -SOS_EINVAL;
|
|
if (! pt[index_in_pt].present)
|
|
return -SOS_EINVAL;
|
|
|
|
/* Update access rights */
|
|
pt[index_in_pt].write = ((new_prot & SOS_VM_MAP_PROT_WRITE) != 0);
|
|
invlpg(vaddr);
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_set_prot_of_interval(sos_vaddr_t vaddr,
|
|
sos_size_t size,
|
|
sos_ui32_t new_prot)
|
|
{
|
|
if (! SOS_IS_PAGE_ALIGNED(vaddr))
|
|
return -SOS_EINVAL;
|
|
if (! SOS_IS_PAGE_ALIGNED(size))
|
|
return -SOS_EINVAL;
|
|
|
|
for ( ; size >= SOS_PAGE_SIZE ; vaddr += SOS_PAGE_SIZE, size -= SOS_PAGE_SIZE)
|
|
sos_paging_set_prot(vaddr, new_prot);
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_bool_t sos_paging_is_dirty(sos_vaddr_t vaddr)
|
|
{
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(vaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(vaddr);
|
|
|
|
/* Get the PD of the current context */
|
|
struct x86_pde *pd = (struct x86_pde*)
|
|
(SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
|
|
|
|
/* Address of the PT in the mirroring */
|
|
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*index_in_pd);
|
|
|
|
/* No page mapped at this address ? */
|
|
if (! pd[index_in_pd].present)
|
|
return FALSE;
|
|
if (! pt[index_in_pt].present)
|
|
return FALSE;
|
|
|
|
return (pt[index_in_pt].dirty != 0);
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_set_dirty(sos_vaddr_t vaddr,
|
|
sos_bool_t is_dirty)
|
|
{
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(vaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(vaddr);
|
|
|
|
/* Get the PD of the current context */
|
|
struct x86_pde *pd = (struct x86_pde*)
|
|
(SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
|
|
|
|
/* Address of the PT in the mirroring */
|
|
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*index_in_pd);
|
|
|
|
/* No page mapped at this address ? */
|
|
if (! pd[index_in_pd].present)
|
|
return -SOS_EFAULT;
|
|
if (! pt[index_in_pt].present)
|
|
return -SOS_EFAULT;
|
|
|
|
pt[index_in_pt].dirty = is_dirty;
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_paddr_t sos_paging_get_paddr(sos_vaddr_t vaddr)
|
|
{
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(vaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(vaddr);
|
|
unsigned offset_in_page = virt_to_page_offset(vaddr);
|
|
|
|
/* Get the PD of the current context */
|
|
struct x86_pde *pd = (struct x86_pde*)
|
|
(SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
|
|
|
|
/* Address of the PT in the mirroring */
|
|
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*index_in_pd);
|
|
|
|
/* No page mapped at this address ? */
|
|
if (! pd[index_in_pd].present)
|
|
return (sos_paddr_t)NULL;
|
|
if (! pt[index_in_pt].present)
|
|
return (sos_paddr_t)NULL;
|
|
|
|
return (pt[index_in_pt].paddr << 12) + offset_in_page;
|
|
}
|
|
|
|
|
|
/* *************************************************
|
|
* Functions restricted to mm_context module
|
|
*/
|
|
|
|
|
|
sos_paddr_t sos_paging_get_current_PD_paddr(void)
|
|
{
|
|
struct x86_pdbr pdbr;
|
|
asm volatile("movl %%cr3, %0\n": "=r"(pdbr));
|
|
return (pdbr.pd_paddr << 12);
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_set_current_PD_paddr(sos_paddr_t paddr_PD)
|
|
{
|
|
struct x86_pdbr pdbr;
|
|
|
|
SOS_ASSERT_FATAL(paddr_PD != 0);
|
|
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(paddr_PD));
|
|
|
|
/* Setup the value of the PDBR */
|
|
memset(& pdbr, 0x0, sizeof(struct x86_pdbr)); /* Reset the PDBR */
|
|
pdbr.pd_paddr = (paddr_PD >> 12);
|
|
|
|
/* Configure the MMU according to the PDBR */
|
|
asm volatile ("movl %0,%%cr3\n" ::"r"(pdbr));
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_dispose(sos_vaddr_t vaddr_PD)
|
|
{
|
|
x86_pde_val_t *pd = (x86_pde_val_t*) vaddr_PD;
|
|
x86_pte_val_t *pt;
|
|
int index_in_pd;
|
|
|
|
/* Allocate 1 page in kernel space to map the PTs in order to
|
|
unreference the physical pages they reference */
|
|
pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
|
|
if (! pt)
|
|
return -SOS_ENOMEM;
|
|
|
|
/* (Nothing to do in kernel space) */
|
|
|
|
/* Reset all the PTs in user space */
|
|
for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ;
|
|
index_in_pd < 1024 ; /* 1 PDE = 1 PT
|
|
= 1024 Pages
|
|
= 4MB */
|
|
index_in_pd ++)
|
|
{
|
|
sos_paddr_t paddr_pt = (pd[index_in_pd].pde.pt_paddr << 12);
|
|
int index_in_pt;
|
|
|
|
/* Nothing to do if there is no PT */
|
|
if (! pd[index_in_pd].pde.present)
|
|
{
|
|
pd[index_in_pd].ui32 = 0;
|
|
continue;
|
|
}
|
|
|
|
/* Map this PT inside kernel */
|
|
SOS_ASSERT_FATAL(SOS_OK
|
|
== sos_paging_map(paddr_pt,
|
|
(sos_vaddr_t)pt, FALSE,
|
|
SOS_VM_MAP_PROT_READ
|
|
| SOS_VM_MAP_PROT_WRITE));
|
|
|
|
/* Reset all the mappings in this PT */
|
|
for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++)
|
|
{
|
|
/* Ignore unmapped PTE */
|
|
if (! pt[index_in_pt].pte.present)
|
|
{
|
|
pt[index_in_pt].ui32 = 0;
|
|
continue;
|
|
}
|
|
|
|
/* Unreference the associated page */
|
|
sos_physmem_unref_physpage(pt[index_in_pt].pte.paddr << 12);
|
|
|
|
/* Decrease occupation count of the PT */
|
|
sos_physmem_dec_physpage_occupation(paddr_pt);
|
|
|
|
/* Reset PTE */
|
|
pt[index_in_pt].ui32 = 0;
|
|
}
|
|
|
|
/* Unmap PT */
|
|
SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)pt));
|
|
|
|
/* Reset PDE */
|
|
pd[index_in_pd].ui32 = 0;
|
|
|
|
/* Unreference PT */
|
|
sos_physmem_unref_physpage(paddr_pt);
|
|
}
|
|
|
|
/* Unallocate kernel space used for the temporary PT */
|
|
SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)pt));
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_copy_kernel_space(sos_vaddr_t dest_vaddr_PD,
|
|
sos_vaddr_t src_vaddr_PD)
|
|
{
|
|
x86_pde_val_t *src_pd = (x86_pde_val_t*) src_vaddr_PD;
|
|
x86_pde_val_t *dest_pd = (x86_pde_val_t*) dest_vaddr_PD;
|
|
sos_paddr_t dest_paddr_PD = sos_paging_get_paddr(dest_vaddr_PD);
|
|
x86_pde_val_t mirror_pde;
|
|
int index_in_pd;
|
|
|
|
/* Fill destination PD with zeros */
|
|
memset((void*)dest_vaddr_PD, 0x0, SOS_PAGE_SIZE);
|
|
|
|
/* Synchronize it with the master Kernel MMU context. Stop just
|
|
before the mirroring ! */
|
|
for (index_in_pd = 0 ;
|
|
index_in_pd < (SOS_PAGING_MIRROR_VADDR >> 22) ; /* 1 PDE = 1 PT
|
|
= 1024 Pages
|
|
= 4MB */
|
|
index_in_pd ++)
|
|
{
|
|
/* Copy the master's configuration */
|
|
dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32;
|
|
|
|
/* We DON'T mark the underlying PT and pages as referenced
|
|
because all the PD are equivalent in the kernel space: as
|
|
soon as a page is mapped in the kernel, it is mapped by X
|
|
address spaces, and as soon as it is unmapped by 1 address
|
|
space, it is unmapped in all the others. So that for X
|
|
address spaces, the reference counter will be either 0 or X,
|
|
and not something else: using the reference counter correctly
|
|
won't be of any use and would consume some time in updating it. */
|
|
}
|
|
|
|
/* Setup the mirroring for the new address space */
|
|
mirror_pde.ui32 = 0;
|
|
mirror_pde.pde.present = TRUE;
|
|
mirror_pde.pde.write = 1;
|
|
mirror_pde.pde.user = 0; /* This is a KERNEL PDE */
|
|
mirror_pde.pde.pt_paddr = (dest_paddr_PD >> 12);
|
|
dest_pd[SOS_PAGING_MIRROR_VADDR >> 22].ui32 = mirror_pde.ui32;
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_copy_user_space(sos_vaddr_t dest_vaddr_PD,
|
|
sos_vaddr_t src_vaddr_PD)
|
|
{
|
|
x86_pde_val_t *src_pd = (x86_pde_val_t*) src_vaddr_PD;
|
|
x86_pde_val_t *dest_pd = (x86_pde_val_t*) dest_vaddr_PD;
|
|
x86_pte_val_t *tmp_src_pt, *tmp_dest_pt;
|
|
int index_in_pd;
|
|
|
|
/* Allocate 2 pages in kernel space to map the PT in order to
|
|
perform the copy of the PTs from source to destination */
|
|
tmp_src_pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
|
|
if (! tmp_src_pt)
|
|
return -SOS_ENOMEM;
|
|
|
|
tmp_dest_pt = (x86_pte_val_t *)sos_kmem_vmm_alloc(1, 0);
|
|
if (! tmp_dest_pt)
|
|
{
|
|
sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt);
|
|
return -SOS_ENOMEM;
|
|
}
|
|
|
|
/* Copy each used PT from source to destination */
|
|
for (index_in_pd = (SOS_PAGING_BASE_USER_ADDRESS >> 22) ;
|
|
index_in_pd < 1024 ; /* 1 PDE = 1 PT
|
|
= 1024 Pages
|
|
= 4MB */
|
|
index_in_pd ++)
|
|
{
|
|
sos_paddr_t paddr_dest_pt;
|
|
int index_in_pt;
|
|
|
|
/* We first litterally copy the source PDE in the destination
|
|
PDE. However, please bare in mind that, in the end, both
|
|
won't reference the same physical PT: the destination PDE
|
|
will be updated (below) to match the address of its own new
|
|
PT */
|
|
dest_pd[index_in_pd].ui32 = src_pd[index_in_pd].ui32;
|
|
|
|
/* Ignore unused PTs */
|
|
if (! src_pd[index_in_pd].pde.present)
|
|
continue;
|
|
|
|
/* Allocate the destination PT */
|
|
paddr_dest_pt = sos_physmem_ref_physpage_new(TRUE);
|
|
if (NULL == (void*)paddr_dest_pt)
|
|
{
|
|
sos_paging_dispose((sos_vaddr_t)dest_vaddr_PD);
|
|
|
|
/* Unallocate temporary kernel space used for the copy */
|
|
sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt);
|
|
sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt);
|
|
return -SOS_ENOMEM;
|
|
}
|
|
|
|
/* Map source and destination PT */
|
|
SOS_ASSERT_FATAL(SOS_OK
|
|
== sos_paging_map(src_pd[index_in_pd].pde.pt_paddr << 12,
|
|
(sos_vaddr_t)tmp_src_pt, FALSE,
|
|
SOS_VM_MAP_PROT_READ));
|
|
SOS_ASSERT_FATAL(SOS_OK
|
|
== sos_paging_map(paddr_dest_pt,
|
|
(sos_vaddr_t)tmp_dest_pt, FALSE,
|
|
SOS_VM_MAP_PROT_READ
|
|
| SOS_VM_MAP_PROT_WRITE));
|
|
|
|
/* Copy the contents of the source to the destination PT,
|
|
updating the reference counts of the pages */
|
|
for (index_in_pt = 0 ; index_in_pt < 1024 ; index_in_pt ++)
|
|
{
|
|
/* Copy the source PTE */
|
|
tmp_dest_pt[index_in_pt].ui32 = tmp_src_pt[index_in_pt].ui32;
|
|
|
|
/* Ignore non-present pages */
|
|
if (! tmp_dest_pt[index_in_pt].pte.present)
|
|
continue;
|
|
|
|
/* Reset the dirty/accessed flags */
|
|
tmp_dest_pt[index_in_pt].pte.accessed = 0;
|
|
tmp_dest_pt[index_in_pt].pte.dirty = 0;
|
|
|
|
/* Increase the reference count of the destination page */
|
|
sos_physmem_ref_physpage_at(tmp_src_pt[index_in_pt].pte.paddr << 12);
|
|
|
|
/* Increase occupation count of the PT */
|
|
sos_physmem_inc_physpage_occupation(paddr_dest_pt);
|
|
}
|
|
|
|
/* Unmap the temporary PTs */
|
|
SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_src_pt));
|
|
SOS_ASSERT_FATAL(SOS_OK == sos_paging_unmap((sos_vaddr_t)tmp_dest_pt));
|
|
|
|
/* Update the destination PDE */
|
|
dest_pd[index_in_pd].pde.pt_paddr = (paddr_dest_pt >> 12);
|
|
|
|
/* Reset the dirty/accessed flags */
|
|
dest_pd[index_in_pd].pde.accessed = 0;
|
|
}
|
|
|
|
|
|
/* Unallocate temporary kernel space used for the copy */
|
|
SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_src_pt));
|
|
SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free((sos_vaddr_t)tmp_dest_pt));
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_prepare_COW(sos_uaddr_t base_address,
|
|
sos_size_t length)
|
|
{
|
|
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(base_address));
|
|
SOS_ASSERT_FATAL(SOS_IS_PAGE_ALIGNED(length));
|
|
SOS_ASSERT_FATAL(SOS_PAGING_IS_USER_AREA(base_address, length));
|
|
|
|
/* Mark all the pages read-only, when already mapped in physical
|
|
memory */
|
|
for ( ;
|
|
length > 0 ;
|
|
length -= SOS_PAGE_SIZE, base_address += SOS_PAGE_SIZE)
|
|
{
|
|
sos_paging_set_prot(base_address,
|
|
SOS_VM_MAP_PROT_READ);
|
|
}
|
|
|
|
return SOS_OK;
|
|
}
|
|
|
|
|
|
sos_ret_t sos_paging_try_resolve_COW(sos_uaddr_t uaddr)
|
|
{
|
|
sos_ret_t refcnt;
|
|
|
|
/* Get the page directory entry and table entry index for this
|
|
address */
|
|
unsigned index_in_pd = virt_to_pd_index(uaddr);
|
|
unsigned index_in_pt = virt_to_pt_index(uaddr);
|
|
|
|
/* Get the PD of the current context */
|
|
struct x86_pde *pd = (struct x86_pde*)
|
|
(SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*virt_to_pd_index(SOS_PAGING_MIRROR_VADDR));
|
|
|
|
/* Address of the PT in the mirroring */
|
|
struct x86_pte * pt = (struct x86_pte*) (SOS_PAGING_MIRROR_VADDR
|
|
+ SOS_PAGE_SIZE*index_in_pd);
|
|
|
|
/* No page mapped at this address ? */
|
|
if (! pd[index_in_pd].present)
|
|
return -SOS_EFAULT;
|
|
if (! pt[index_in_pt].present)
|
|
return -SOS_EFAULT;
|
|
|
|
/* Read-only PT not supported by kernel ! */
|
|
if (! pd[index_in_pd].write)
|
|
return -SOS_EFAULT;
|
|
|
|
/* Cannot understand a COW request if the page is already
|
|
read/write */
|
|
SOS_ASSERT_FATAL(! pt[index_in_pt].write);
|
|
|
|
/* We do a private copy of the page only if the current mapped page
|
|
is shared by more than 1 process */
|
|
refcnt = sos_physmem_get_physpage_refcount(pt[index_in_pt].paddr << 12);
|
|
SOS_ASSERT_FATAL(refcnt > 0);
|
|
|
|
if (refcnt == 1)
|
|
{
|
|
/* We are the only address space to reference this page, we can
|
|
safely turn it read/write now */
|
|
pt[index_in_pt].write = 1;
|
|
invlpg(pt[index_in_pt].paddr << 12);
|
|
}
|
|
|
|
/* Otherwise we need to make a private copy of the page */
|
|
else
|
|
{
|
|
sos_paddr_t new_ppage;
|
|
sos_vaddr_t vpage_src, tmp_dest;
|
|
|
|
/* For that, we allocate the destination page inside the kernel
|
|
space to perform the copy. We will transfer it into its
|
|
final user-space address later */
|
|
tmp_dest = sos_kmem_vmm_alloc(1, SOS_KMEM_VMM_MAP);
|
|
if (! tmp_dest)
|
|
return -SOS_ENOMEM;
|
|
|
|
/* copy the contents of the page */
|
|
vpage_src = SOS_PAGE_ALIGN_INF(uaddr);
|
|
memcpy((void*)tmp_dest, (void*)vpage_src, SOS_PAGE_SIZE);
|
|
|
|
/* replace the original (read-only) mapping with a (read/write)
|
|
mapping to the new page. This will automatically unreference
|
|
the original page */
|
|
new_ppage = sos_paging_get_paddr(tmp_dest);
|
|
SOS_ASSERT_FATAL(new_ppage != (sos_vaddr_t)NULL);
|
|
if (SOS_OK != sos_paging_map(new_ppage, vpage_src,
|
|
TRUE,
|
|
SOS_VM_MAP_PROT_READ
|
|
| SOS_VM_MAP_PROT_WRITE))
|
|
{
|
|
sos_kmem_vmm_free(tmp_dest);
|
|
return -SOS_ENOMEM;
|
|
}
|
|
|
|
/* We can now unmap the destination page from inside the
|
|
kernel and free the kernel VM range for it */
|
|
SOS_ASSERT_FATAL(SOS_OK == sos_kmem_vmm_free(tmp_dest));
|
|
}
|
|
|
|
/* That's all, folks ! */
|
|
return SOS_OK;
|
|
}
|