matos/arch/x86/paging.c

352 lines
12 KiB
C

#include "allocArea.h"
#include "paging.h"
#include "errno.h"
#include "kernel.h"
#include "klibc.h"
#include "mem.h"
#include "mmuContext.h"
#include "stdarg.h"
// In a Vaddr, 10 first bit (MSB) are the index in the Page Directory. A Page Directory Entry
// point to a Page Table. The 10 next bits are then an index in this Page Table. A Page Table
// Entry then point to a physical address at which is added the remaining 12 bits. So they are
// 1024 entry in the PD, each of them pointing to a PT of 1024 entry. Each PTE pointing to 4K
// page. First address (up to page_desc from mem.c) are mapped such as Paddr == Vaddr. To make
// PD always accessible a (x86?) trick is used : The mirroring. A given entry N in the PD point
// to the PD (this is possible because PDE very looks like PTE in x86). So N << (10 + 12 = 4Mo)
// point to the Paddr of PD. Then, accessing N * 4Mo + I * 4Ko is accessing the PT of the Ieme
// entry in the PD (as MMU take the PD pointed by the PDE number N like a PT). More
// particularly, accessing N * 4Mo + N * 4ko is accessing the PD.
//
// PD is at Vaddr N * 4Mo and take 4ko. Each PT are allocated dynamically.
// Just make sure that N have not been used by identity mapping
#define PT_SHIFT 12
#define PTE_MASK 0x3ff // 10bits
#define PD_SHIFT 22
#define PD_MIRROR_PAGE_IDX 1023U
static unsigned long mappedPage = 0;
struct pde {
uint32_t present : 1;
uint32_t write : 1; // 0 read - 1 RW
uint32_t user : 1; // 0 supervisor - 1 user
uint32_t write_through : 1; // 0 write-back - 1 write_through
uint32_t cache_disable : 1;
uint32_t access : 1; // have been accessed
uint32_t zero : 1; // Not used
uint32_t size : 1; // 0 for 4Kb 1 for 4Mb
uint32_t ignored : 1;
uint32_t available : 3;
uint32_t pt_addr : 20;
} __attribute__((packed));
struct pte {
uint32_t present : 1;
uint32_t write : 1; // 0 read - 1 RW
uint32_t user : 1; // 0 supervisor - 1 user
uint32_t write_through : 1; // 0 write-back - 1 write_through
uint32_t cache_disable : 1;
uint32_t access : 1; // have been accessed
uint32_t dirty : 1; // if set, indicates that page has been written to. This flag is
// not updated by the CPU, and once set will not unset itself.
uint32_t zero : 1; // if PAT is supported, shall indicate the memory type. Otherwise,
// it must be 0.
uint32_t global : 1; // if set, prevents the TLB from updating the address in its cache
// if CR3 is reset. Note, that the page global enable bit in CR4
// must be set to enable this feature.
uint32_t available : 3;
uint32_t paddr : 20;
} __attribute__((packed));
struct pdbr {
uint32_t zero1 : 3; // reserved
uint32_t write_through : 1; // 0 write-back - 1 write-through
uint32_t cache_disabled : 1; // 1=cache disabled
uint32_t zero2 : 7; // reserved
uint32_t pd_paddr : 20;
} __attribute__((packed));
// invalidate the TLB entry for the page located at the given virtual address
static inline void __native_flush_tlb_single(unsigned long addr)
{
asm volatile("invlpg (%0)" ::"r"(addr) : "memory");
}
int pagingSetup(paddr_t lowerKernelAddr, paddr_t upperKernelAddr)
{
struct pdbr cr3;
// x86 got 1024 of pde for 4Byte each: 4ko !
struct pde *pd = (struct pde *)allocPhyPage(1);
memset(pd, 0, PAGE_SIZE);
memset(&cr3, 0x0, sizeof(struct pdbr));
cr3.pd_paddr = ((paddr_t)pd) >> 12;
// MMU not enabled for the moment. No need to use mirroring
// Identity mapping up to upperKernelAddr
for (paddr_t i = lowerKernelAddr; i < upperKernelAddr; i += PAGE_SIZE) {
uint pdEntry = i >> (PD_SHIFT);
uint ptEntry = (i >> PT_SHIFT) & PTE_MASK;
struct pte *pt;
if (pd[pdEntry].present) {
pt = (struct pte *)(pd[pdEntry].pt_addr << PT_SHIFT);
refPhyPage((paddr_t)pt);
} else {
pt = (struct pte *)allocPhyPage(1);
memset(pt, 0, PAGE_SIZE);
pd[pdEntry].present = 1;
pd[pdEntry].write = 1;
pd[pdEntry].pt_addr = ((paddr_t)pt >> PT_SHIFT);
}
pt[ptEntry].present = 1;
pt[ptEntry].write = 1; // TODO set Kernel code as RO
pt[ptEntry].paddr = i >> PAGE_SHIFT;
}
// Setup mirroring
pd[PAGING_MIRROR_VADDR >> PD_SHIFT].present = 1;
pd[PAGING_MIRROR_VADDR >> PD_SHIFT].write = 1;
pd[PAGING_MIRROR_VADDR >> PD_SHIFT].pt_addr = ((paddr_t)pd >> PT_SHIFT);
pd[PAGING_MIRROR_VADDR >> PD_SHIFT].user = 0;
// Loading of the PDBR in the MMU:
asm volatile("movl %0,%%cr3\n\t"
"movl %%cr0,%%eax\n\t"
"orl $0x80010000, %%eax\n\t" /* bit 31 | bit 16 */
"movl %%eax,%%cr0\n\t"
"jmp 1f\n\t"
"1:\n\t"
"movl $2f, %%eax\n\t"
"jmp *%%eax\n\t"
"2:\n\t" ::"r"(cr3)
: "memory", "eax");
return 0;
}
int pageMap(vaddr_t vaddr, paddr_t paddr, int flags)
{
uint pdEntry = vaddr >> (PD_SHIFT);
uint ptEntry = (vaddr >> PT_SHIFT) & PTE_MASK;
if ((vaddr >= PAGING_MIRROR_VADDR) && (vaddr < PAGING_MIRROR_VADDR + PAGING_MIRROR_SIZE))
return -EINVAL;
// Thank to mirroring, we can access the PD
struct pde *pd =
(struct pde *)(PAGING_MIRROR_VADDR + PAGE_SIZE * (PAGING_MIRROR_VADDR >> PD_SHIFT));
struct pte *pt = (struct pte *)((PAGING_MIRROR_VADDR) + (pdEntry * PAGE_SIZE));
if (!pd[pdEntry].present) {
paddr_t ptPhy = allocPhyPage(1);
if (ptPhy == (vaddr_t)NULL)
return -ENOMEM;
pd[pdEntry].present = 1;
pd[pdEntry].write = 1;
pd[pdEntry].pt_addr = (ptPhy >> PT_SHIFT);
if (vaddr < PAGING_BASE_USER_ADDRESS) {
pd[pdEntry].user = 0;
mmuContextSyncKernelPDE(pdEntry, &pd[pdEntry], sizeof(struct pde));
} else {
assert(flags & PAGING_MEM_USER);
pd[pdEntry].user = 1;
}
__native_flush_tlb_single((vaddr_t)pt);
memset((void *)pt, 0, PAGE_SIZE);
}
{
// Already mapped ? Remove old mapping
if (pt[ptEntry].present) {
unrefPhyPage(pt[ptEntry].paddr << PAGE_SHIFT);
} // PTE not already used ? We will use it ! So increase the PT ref count
else {
refPhyPage(pd[pdEntry].pt_addr << PAGE_SHIFT);
}
}
pt[ptEntry].user = (flags & PAGING_MEM_USER) ? 1 : 0;
pt[ptEntry].present = 1;
pt[ptEntry].write = (flags & PAGING_MEM_WRITE) ? 1 : 0;
pt[ptEntry].paddr = paddr >> PAGE_SHIFT;
refPhyPage(paddr);
__native_flush_tlb_single(vaddr);
mappedPage++;
return 0;
}
int pageUnmap(vaddr_t vaddr)
{
uint pdEntry = vaddr >> (PD_SHIFT);
uint ptEntry = (vaddr >> PT_SHIFT) & PTE_MASK;
if ((vaddr >= PAGING_MIRROR_VADDR) && (vaddr < PAGING_MIRROR_VADDR + PAGING_MIRROR_SIZE))
return -EINVAL;
// Thank to mirroring, we can access the PD
struct pde *pd =
(struct pde *)(PAGING_MIRROR_VADDR + PAGE_SIZE * (PAGING_MIRROR_VADDR >> PD_SHIFT));
struct pte *pt = (struct pte *)((PAGING_MIRROR_VADDR) + (pdEntry * PAGE_SIZE));
if (!pd[pdEntry].present)
return -EINVAL;
if (!pt[ptEntry].present)
return -EINVAL;
unrefPhyPage(pt[ptEntry].paddr << PAGE_SHIFT);
pt[ptEntry].present = 0;
// PTE not used. Decrease refcount on it. Is PT not used anymore ?
if (unrefPhyPage(pd[pdEntry].pt_addr << PT_SHIFT) == 0) {
pd[pdEntry].present = 0;
if (vaddr < PAGING_BASE_USER_ADDRESS) {
mmuContextSyncKernelPDE(pdEntry, &pd[pdEntry], sizeof(struct pde));
}
__native_flush_tlb_single((vaddr_t)pt);
}
__native_flush_tlb_single(vaddr);
mappedPage--;
return 0;
}
paddr_t pagingGetPaddr(vaddr_t vaddr)
{
/* Get the page directory entry and table entry index for this
address */
unsigned pdEntry = vaddr >> PD_SHIFT;
unsigned ptEntry = vaddr >> PT_SHIFT & PTE_MASK;
unsigned pageOffset = vaddr & PAGE_MASK;
// Thank to mirroring, we can access the PD
struct pde *pd =
(struct pde *)(PAGING_MIRROR_VADDR + PAGE_SIZE * (PAGING_MIRROR_VADDR >> PD_SHIFT));
struct pte *pt = (struct pte *)((PAGING_MIRROR_VADDR) + (pdEntry * PAGE_SIZE));
/* No page mapped at this address ? */
if (!pd[pdEntry].present)
return (paddr_t)NULL;
if (!pt[ptEntry].present)
return (paddr_t)NULL;
return (pt[ptEntry].paddr << PT_SHIFT) + pageOffset;
}
unsigned long getNbMappedPage(void)
{
return mappedPage;
}
paddr_t pagingGetCurrentPDPaddr()
{
struct pdbr pdbr;
asm volatile("movl %%cr3, %0\n" : "=r"(pdbr));
return (pdbr.pd_paddr << 12);
}
int pagingSetCurrentPDPaddr(paddr_t paddrPD)
{
struct pdbr pdbr;
assert(paddrPD != 0);
assert(IS_ALIGNED(paddrPD, PAGE_SIZE));
/* Setup the value of the PDBR */
memset(&pdbr, 0x0, sizeof(struct pdbr)); /* Reset the PDBR */
pdbr.pd_paddr = (paddrPD >> 12);
/* Configure the MMU according to the PDBR */
asm volatile("movl %0,%%cr3\n" ::"r"(pdbr));
return 0;
}
// unmap page inside this MMU context
int pagingClearUserContext(vaddr_t vaddr_PD)
{
struct pde *pd = (struct pde *)vaddr_PD;
//Tmp pt to unref page they reference
struct pte *pt = (struct pte *)areaAlloc(1, 0);
if(pt == NULL)
return -ENOMEM;
for (int pdIdx = PAGING_BASE_USER_ADDRESS >> PD_SHIFT; pdIdx < 1024; pdIdx++) {
if(!pd[pdIdx].present){
memset(&pd[pdIdx], 0, sizeof(struct pde));
continue;
}
paddr_t ptAddr = pd[pdIdx].pt_addr << PT_SHIFT;
assert(!pageMap((vaddr_t)pt, ptAddr, PAGING_MEM_WRITE | PAGING_MEM_READ));
for(int ptIdx = 0; ptIdx < 1024; ptIdx ++){
if(!pt[ptIdx].present){
memset(&pt[ptIdx], 0, sizeof(struct pte));
continue;
}
unrefPhyPage(pt[ptIdx].paddr);
memset(&pt[ptIdx], 0, sizeof(struct pte));
}
assert(!pageUnmap((vaddr_t)pt));
memset(&pd[pdIdx], 0, sizeof(struct pde));
unrefPhyPage(ptAddr);
}
areaFree((vaddr_t)pt);
return 0;
}
int pagingCopyKernelSpace(vaddr_t destVaddrPD, paddr_t destPaddrPD, vaddr_t srcVaddrPD)
{
struct pde *src_pd = (struct pde *)srcVaddrPD;
struct pde *dest_pd = (struct pde *)destVaddrPD;
struct pde mirror_pde = {0};
uint index_in_pd;
/* Fill destination PD with zeros */
memset((void *)destVaddrPD, 0x0, PAGE_SIZE);
/* Synchronize it with the master Kernel MMU context. Stop just
before the mirroring ! */
for (index_in_pd = 0; index_in_pd < (PAGING_MIRROR_VADDR >> 22); /* 1 PDE = 1 PT
= 1024 Pages
= 4MB */
index_in_pd++) {
/* Copy the master's configuration */
dest_pd[index_in_pd] = src_pd[index_in_pd];
/* We DON'T mark the underlying PT and pages as referenced
because all the PD are equivalent in the kernel space: as
soon as a page is mapped in the kernel, it is mapped by X
address spaces, and as soon as it is unmapped by 1 address
space, it is unmapped in all the others. So that for X
address spaces, the reference counter will be either 0 or X,
and not something else: using the reference counter correctly
won't be of any use and would consume some time in updating it. */
}
/* Setup the mirroring for the new address space */
mirror_pde.present = TRUE;
mirror_pde.write = 1;
mirror_pde.user = 0; /* This is a KERNEL PDE */
mirror_pde.pt_addr = (destPaddrPD >> 12);
dest_pd[PAGING_MIRROR_VADDR >> 22] = mirror_pde;
return 0;
}