#include "allocArea.h" #include "paging.h" #include "errno.h" #include "kernel.h" #include "klibc.h" #include "mem.h" #include "mmuContext.h" #include "stdarg.h" // In a Vaddr, 10 first bit (MSB) are the index in the Page Directory. A Page Directory Entry // point to a Page Table. The 10 next bits are then an index in this Page Table. A Page Table // Entry then point to a physical address at which is added the remaining 12 bits. So they are // 1024 entry in the PD, each of them pointing to a PT of 1024 entry. Each PTE pointing to 4K // page. First address (up to page_desc from mem.c) are mapped such as Paddr == Vaddr. To make // PD always accessible a (x86?) trick is used : The mirroring. A given entry N in the PD point // to the PD (this is possible because PDE very looks like PTE in x86). So N << (10 + 12 = 4Mo) // point to the Paddr of PD. Then, accessing N * 4Mo + I * 4Ko is accessing the PT of the Ieme // entry in the PD (as MMU take the PD pointed by the PDE number N like a PT). More // particularly, accessing N * 4Mo + N * 4ko is accessing the PD. // // PD is at Vaddr N * 4Mo and take 4ko. Each PT are allocated dynamically. // Just make sure that N have not been used by identity mapping #define PT_SHIFT 12 #define PTE_MASK 0x3ff // 10bits #define PD_SHIFT 22 #define PD_MIRROR_PAGE_IDX 1023U static unsigned long mappedPage = 0; struct pde { uint32_t present : 1; uint32_t write : 1; // 0 read - 1 RW uint32_t user : 1; // 0 supervisor - 1 user uint32_t write_through : 1; // 0 write-back - 1 write_through uint32_t cache_disable : 1; uint32_t access : 1; // have been accessed uint32_t zero : 1; // Not used uint32_t size : 1; // 0 for 4Kb 1 for 4Mb uint32_t ignored : 1; uint32_t available : 3; uint32_t pt_addr : 20; } __attribute__((packed)); struct pte { uint32_t present : 1; uint32_t write : 1; // 0 read - 1 RW uint32_t user : 1; // 0 supervisor - 1 user uint32_t write_through : 1; // 0 write-back - 1 write_through uint32_t cache_disable : 1; uint32_t access : 1; // have been accessed uint32_t dirty : 1; // if set, indicates that page has been written to. This flag is // not updated by the CPU, and once set will not unset itself. uint32_t zero : 1; // if PAT is supported, shall indicate the memory type. Otherwise, // it must be 0. uint32_t global : 1; // if set, prevents the TLB from updating the address in its cache // if CR3 is reset. Note, that the page global enable bit in CR4 // must be set to enable this feature. uint32_t available : 3; uint32_t paddr : 20; } __attribute__((packed)); struct pdbr { uint32_t zero1 : 3; // reserved uint32_t write_through : 1; // 0 write-back - 1 write-through uint32_t cache_disabled : 1; // 1=cache disabled uint32_t zero2 : 7; // reserved uint32_t pd_paddr : 20; } __attribute__((packed)); // invalidate the TLB entry for the page located at the given virtual address static inline void __native_flush_tlb_single(unsigned long addr) { asm volatile("invlpg (%0)" ::"r"(addr) : "memory"); } int pagingSetup(paddr_t lowerKernelAddr, paddr_t upperKernelAddr) { struct pdbr cr3; // x86 got 1024 of pde for 4Byte each: 4ko ! struct pde *pd = (struct pde *)allocPhyPage(1); memset(pd, 0, PAGE_SIZE); memset(&cr3, 0x0, sizeof(struct pdbr)); cr3.pd_paddr = ((paddr_t)pd) >> 12; // MMU not enabled for the moment. No need to use mirroring // Identity mapping up to upperKernelAddr for (paddr_t i = lowerKernelAddr; i < upperKernelAddr; i += PAGE_SIZE) { uint pdEntry = i >> (PD_SHIFT); uint ptEntry = (i >> PT_SHIFT) & PTE_MASK; struct pte *pt; if (pd[pdEntry].present) { pt = (struct pte *)(pd[pdEntry].pt_addr << PT_SHIFT); refPhyPage((paddr_t)pt); } else { pt = (struct pte *)allocPhyPage(1); memset(pt, 0, PAGE_SIZE); pd[pdEntry].present = 1; pd[pdEntry].write = 1; pd[pdEntry].pt_addr = ((paddr_t)pt >> PT_SHIFT); } pt[ptEntry].present = 1; pt[ptEntry].write = 1; // TODO set Kernel code as RO pt[ptEntry].paddr = i >> PAGE_SHIFT; } // Setup mirroring pd[PAGING_MIRROR_VADDR >> PD_SHIFT].present = 1; pd[PAGING_MIRROR_VADDR >> PD_SHIFT].write = 1; pd[PAGING_MIRROR_VADDR >> PD_SHIFT].pt_addr = ((paddr_t)pd >> PT_SHIFT); pd[PAGING_MIRROR_VADDR >> PD_SHIFT].user = 0; // Loading of the PDBR in the MMU: asm volatile("movl %0,%%cr3\n\t" "movl %%cr0,%%eax\n\t" "orl $0x80010000, %%eax\n\t" /* bit 31 | bit 16 */ "movl %%eax,%%cr0\n\t" "jmp 1f\n\t" "1:\n\t" "movl $2f, %%eax\n\t" "jmp *%%eax\n\t" "2:\n\t" ::"r"(cr3) : "memory", "eax"); return 0; } int pageMap(vaddr_t vaddr, paddr_t paddr, int flags) { uint pdEntry = vaddr >> (PD_SHIFT); uint ptEntry = (vaddr >> PT_SHIFT) & PTE_MASK; if ((vaddr >= PAGING_MIRROR_VADDR) && (vaddr < PAGING_MIRROR_VADDR + PAGING_MIRROR_SIZE)) return -EINVAL; // Thank to mirroring, we can access the PD struct pde *pd = (struct pde *)(PAGING_MIRROR_VADDR + PAGE_SIZE * (PAGING_MIRROR_VADDR >> PD_SHIFT)); struct pte *pt = (struct pte *)((PAGING_MIRROR_VADDR) + (pdEntry * PAGE_SIZE)); if (!pd[pdEntry].present) { paddr_t ptPhy = allocPhyPage(1); if (ptPhy == (vaddr_t)NULL) return -ENOMEM; pd[pdEntry].present = 1; pd[pdEntry].write = 1; pd[pdEntry].pt_addr = (ptPhy >> PT_SHIFT); if (vaddr < PAGING_BASE_USER_ADDRESS) { pd[pdEntry].user = 0; mmuContextSyncKernelPDE(pdEntry, &pd[pdEntry], sizeof(struct pde)); } else { assert(flags & PAGING_MEM_USER); pd[pdEntry].user = 1; } __native_flush_tlb_single((vaddr_t)pt); memset((void *)pt, 0, PAGE_SIZE); } else { // Already mapped ? Remove old mapping if (pt[ptEntry].present) { unrefPhyPage(pt[ptEntry].paddr << PAGE_SHIFT); } // PTE not already used ? We will use it ! So increase the PT ref count else { refPhyPage(pd[pdEntry].pt_addr << PAGE_SHIFT); } } pt[ptEntry].user = (flags & PAGING_MEM_USER) ? 1 : 0; pt[ptEntry].present = 1; pt[ptEntry].write = (flags & PAGING_MEM_WRITE) ? 1 : 0; pt[ptEntry].paddr = paddr >> PAGE_SHIFT; refPhyPage(paddr); __native_flush_tlb_single(vaddr); mappedPage++; return 0; } int pageUnmap(vaddr_t vaddr) { uint pdEntry = vaddr >> (PD_SHIFT); uint ptEntry = (vaddr >> PT_SHIFT) & PTE_MASK; if ((vaddr >= PAGING_MIRROR_VADDR) && (vaddr < PAGING_MIRROR_VADDR + PAGING_MIRROR_SIZE)) return -EINVAL; // Thank to mirroring, we can access the PD struct pde *pd = (struct pde *)(PAGING_MIRROR_VADDR + PAGE_SIZE * (PAGING_MIRROR_VADDR >> PD_SHIFT)); struct pte *pt = (struct pte *)((PAGING_MIRROR_VADDR) + (pdEntry * PAGE_SIZE)); if (!pd[pdEntry].present) return -EINVAL; if (!pt[ptEntry].present) return -EINVAL; unrefPhyPage(pt[ptEntry].paddr << PAGE_SHIFT); pt[ptEntry].present = 0; // PTE not used. Decrease refcount on it. Is PT not used anymore ? if (unrefPhyPage(pd[pdEntry].pt_addr << PT_SHIFT) == 0) { pd[pdEntry].present = 0; if (vaddr < PAGING_BASE_USER_ADDRESS) { mmuContextSyncKernelPDE(pdEntry, &pd[pdEntry], sizeof(struct pde)); } __native_flush_tlb_single((vaddr_t)pt); } __native_flush_tlb_single(vaddr); mappedPage--; return 0; } paddr_t pagingGetPaddr(vaddr_t vaddr) { /* Get the page directory entry and table entry index for this address */ unsigned pdEntry = vaddr >> PD_SHIFT; unsigned ptEntry = vaddr >> PT_SHIFT; unsigned pageOffset = vaddr & PAGE_MASK; // Thank to mirroring, we can access the PD struct pde *pd = (struct pde *)(PAGING_MIRROR_VADDR + PAGE_SIZE * (PAGING_MIRROR_VADDR >> PD_SHIFT)); struct pte *pt = (struct pte *)((PAGING_MIRROR_VADDR) + (pdEntry * PAGE_SIZE)); /* No page mapped at this address ? */ if (!pd[pdEntry].present) return (paddr_t)NULL; if (!pt[ptEntry].present) return (paddr_t)NULL; return (pt[ptEntry].paddr << PT_SHIFT) + pageOffset; } unsigned long getNbMappedPage(void) { return mappedPage; } paddr_t pagingGetCurrentPDPaddr() { struct pdbr pdbr; asm volatile("movl %%cr3, %0\n" : "=r"(pdbr)); return (pdbr.pd_paddr << 12); } int pagingSetCurrentPDPaddr(paddr_t paddrPD) { struct pdbr pdbr; assert(paddrPD != 0); assert(IS_ALIGNED(paddrPD, PAGE_SIZE)); /* Setup the value of the PDBR */ memset(&pdbr, 0x0, sizeof(struct pdbr)); /* Reset the PDBR */ pdbr.pd_paddr = (paddrPD >> 12); /* Configure the MMU according to the PDBR */ asm volatile("movl %0,%%cr3\n" ::"r"(pdbr)); return 0; } // unmap page inside this MMU context int pagingClearUserContext(vaddr_t vaddr_PD) { struct pde *pd = (struct pde *)vaddr_PD; //Tmp pt to unref page they reference struct pte *pt = (struct pte *)areaAlloc(1, 0); if(pt == NULL) return -ENOMEM; for (int pdIdx = PAGING_BASE_USER_ADDRESS >> PD_SHIFT; pdIdx < 1024; pdIdx++) { if(!pd[pdIdx].present){ memset(&pd[pdIdx], 0, sizeof(struct pde)); continue; } paddr_t ptAddr = pd[pdIdx].pt_addr << PT_SHIFT; assert(!pageMap(ptAddr, (vaddr_t)pt, PAGING_MEM_USER | PAGING_MEM_WRITE)); for(int ptIdx = 0; ptIdx < 1024; ptIdx ++){ if(!pt[ptIdx].present){ memset(&pt[ptIdx], 0, sizeof(struct pte)); continue; } unrefPhyPage(pt[ptIdx].paddr); memset(&pt[ptIdx], 0, sizeof(struct pte)); } assert(!pageUnmap((vaddr_t)pt)); memset(&pd[pdIdx], 0, sizeof(struct pde)); unrefPhyPage(ptAddr); } areaFree((vaddr_t)pt); return 0; } int pagingCopyKernelSpace(vaddr_t destVaddrPD, paddr_t destPaddrPD, vaddr_t srcVaddrPD) { struct pde *src_pd = (struct pde *)srcVaddrPD; struct pde *dest_pd = (struct pde *)destVaddrPD; struct pde mirror_pde; uint index_in_pd; /* Fill destination PD with zeros */ memset((void *)destVaddrPD, 0x0, PAGE_SIZE); /* Synchronize it with the master Kernel MMU context. Stop just before the mirroring ! */ for (index_in_pd = 0; index_in_pd < (PAGING_MIRROR_VADDR >> 22); /* 1 PDE = 1 PT = 1024 Pages = 4MB */ index_in_pd++) { /* Copy the master's configuration */ dest_pd[index_in_pd] = src_pd[index_in_pd]; /* We DON'T mark the underlying PT and pages as referenced because all the PD are equivalent in the kernel space: as soon as a page is mapped in the kernel, it is mapped by X address spaces, and as soon as it is unmapped by 1 address space, it is unmapped in all the others. So that for X address spaces, the reference counter will be either 0 or X, and not something else: using the reference counter correctly won't be of any use and would consume some time in updating it. */ } /* Setup the mirroring for the new address space */ mirror_pde.present = TRUE; mirror_pde.write = 1; mirror_pde.user = 0; /* This is a KERNEL PDE */ mirror_pde.pt_addr = (destPaddrPD >> 12); dest_pd[PAGING_MIRROR_VADDR >> 22] = mirror_pde; return 0; }