From 8f6f6cf471d47b04fb9bc9365ab4138e9b29dcd8 Mon Sep 17 00:00:00 2001 From: Mathieu Maret Date: Sun, 17 Oct 2021 15:37:20 +0200 Subject: [PATCH] WIP: add TSS management --- arch/x86/cpu_context.c | 198 +++++++++++++++++++++++++++++++++- arch/x86/cpu_context_switch.S | 20 ++++ arch/x86/gdt.c | 34 +++++- arch/x86/gdt.h | 6 +- core/cpu_context.h | 19 +++- core/main.c | 4 +- core/segment.h | 3 + 7 files changed, 274 insertions(+), 10 deletions(-) diff --git a/arch/x86/cpu_context.c b/arch/x86/cpu_context.c index ea5d673..c3e69d7 100644 --- a/arch/x86/cpu_context.c +++ b/arch/x86/cpu_context.c @@ -1,15 +1,16 @@ -/* Copyright (C) 2005 David Decotigny +/* Copyright (C) 2021 Mathieu Maret + Copyright (C) 2005 David Decotigny Copyright (C) 2000-2004, The KOS team Initially taken from SOS */ +#include "cpu_context.h" #include "assert.h" +#include "gdt.h" #include "klibc.h" #include "segment.h" -#include "cpu_context.h" - /** * Here is the definition of a CPU context for IA32 processors. This * is a Matos/SOS convention, not a specification given by the IA32 @@ -76,6 +77,22 @@ struct cpu_kstate { struct cpu_state regs; } __attribute__((packed)); +/** + * Structure of an interrupted User thread's context. This is almost + * the same as a kernel context, except that 2 additional values are + * pushed on the stack before the eflags/cs/eip of the interrupted + * context: the stack configuration of the interrupted user context. + * + * @see Section 6.4.1 of Intel x86 vol 1 + */ +struct cpu_ustate { + struct cpu_state regs; + struct { + uint32_t cpl3_esp; + uint16_t cpl3_ss; + }; +} __attribute__((packed)); + /** * THE main operation of a kernel thread. This routine calls the * kernel thread function start_func and calls exit_func when @@ -96,6 +113,123 @@ static void core_routine(cpu_kstate_function_arg1_t *start_func, void *start_arg ; } +/* + * Structure of a Task State Segment on the x86 Architecture. + * + * @see Intel x86 spec vol 3, figure 6-2 + * + * @note Such a data structure should not cross any page boundary (see + * end of section 6.2.1 of Intel spec vol 3). This is the reason why + * we tell gcc to align it on a 128B boundary (its size is 104B, which + * is <= 128). + */ +struct x86_tss { + + /** + * Intel provides a way for a task to switch to another in an + * automatic way (call gates). In this case, the back_link field + * stores the source TSS of the context switch. This allows to + * easily implement coroutines, task backtracking, ... In Matos/SOS we + * don't use TSS for the context switch purpouse, so we always + * ignore this field. + * (+0) + */ + uint16_t back_link; + + uint16_t reserved1; + + /* CPL0 saved context. (+4) */ + vaddr_t esp0; + uint16_t ss0; + + uint16_t reserved2; + + /* CPL1 saved context. (+12) */ + vaddr_t esp1; + uint16_t ss1; + + uint16_t reserved3; + + /* CPL2 saved context. (+20) */ + vaddr_t esp2; + uint16_t ss2; + + uint16_t reserved4; + + /* Interrupted context's saved registers. (+28) */ + vaddr_t cr3; + vaddr_t eip; + uint32_t eflags; + uint32_t eax; + uint32_t ecx; + uint32_t edx; + uint32_t ebx; + uint32_t esp; + uint32_t ebp; + uint32_t esi; + uint32_t edi; + + /* +72 */ + uint16_t es; + uint16_t reserved5; + + /* +76 */ + uint16_t cs; + uint16_t reserved6; + + /* +80 */ + uint16_t ss; + uint16_t reserved7; + + /* +84 */ + uint16_t ds; + uint16_t reserved8; + + /* +88 */ + uint16_t fs; + uint16_t reserved9; + + /* +92 */ + uint16_t gs; + uint16_t reserved10; + + /* +96 */ + uint16_t ldtr; + uint16_t reserved11; + + /* +100 */ + uint16_t debug_trap_flag : 1; + uint16_t reserved12 : 15; + uint16_t iomap_base_addr; + + /* 104 */ +} __attribute__((packed, aligned(128))); + +static struct x86_tss kernel_tss; + +int cpu_context_subsystem_setup() +{ + /* Reset the kernel TSS */ + memset(&kernel_tss, 0x0, sizeof(kernel_tss)); + + /** + * Now setup the kernel TSS. + * + * Considering the privilege change method we choose (cpl3 -> cpl0 + * through a software interrupt), we don't need to initialize a + * full-fledged TSS. See section 6.4.1 of Intel x86 vol 1. Actually, + * only a correct value for the kernel esp and ss are required (aka + * "ss0" and "esp0" fields). Since the esp0 will have to be updated + * at privilege change time, we don't have to set it up now. + */ + kernel_tss.ss0 = BUILD_SEGMENT_REG_VALUE(0, FALSE, SEG_KDATA); + + /* Register this TSS into the gdt */ + gdtRegisterTSS((vaddr_t)&kernel_tss); + + return 0; +} + int cpu_kstate_init(struct cpu_state **ctxt, cpu_kstate_function_arg1_t *start_func, vaddr_t start_arg, vaddr_t stack_bottom, size_t stack_size, cpu_kstate_function_arg1_t *exit_func, vaddr_t exit_arg) @@ -213,6 +347,30 @@ void cpu_state_detect_kernel_stack_overflow(const struct cpu_state *ctxt, vaddr_ /* ======================================================================= * Public Accessor functions */ +int cpu_context_is_in_user_mode(const struct cpu_state *ctxt) +{ + /* An interrupted user thread has its CS register set to that of the + User code segment */ + switch (GET_CPU_CS_REGISTER_VALUE(ctxt->cs)) { + case BUILD_SEGMENT_REG_VALUE(3, FALSE, SEG_UCODE): + return TRUE; + break; + + case BUILD_SEGMENT_REG_VALUE(0, FALSE, SEG_KCODE): + return FALSE; + break; + + default: + pr_err("Invalid saved context Code segment register: 0x%x (k=%x, u=%x) !", + (unsigned)GET_CPU_CS_REGISTER_VALUE(ctxt->cs), + BUILD_SEGMENT_REG_VALUE(0, FALSE, SEG_KCODE), + BUILD_SEGMENT_REG_VALUE(3, FALSE, SEG_UCODE)); + break; + } + + /* Should never get here */ + return -1; +} vaddr_t cpu_context_get_PC(const struct cpu_state *ctxt) { @@ -240,3 +398,37 @@ void cpu_context_dump(const struct cpu_state *ctxt) (unsigned)GET_CPU_CS_REGISTER_VALUE(ctxt->cs), (unsigned)ctxt->ds, (unsigned)ctxt->cpl0_ss, (unsigned)ctxt->error_code); } + +/* ************************************************************* + * Function to manage the TSS. This function is not really "public": + * it is reserved to the assembler routines defined in + * cpu_context_switch.S + * + * Update the kernel stack address so that the IRQ, syscalls and + * exception return in a correct stack location when coming back into + * kernel mode. + */ +void cpu_context_update_kernel_tss(struct cpu_state *next_ctxt) +{ + /* next_ctxt corresponds to an interrupted user thread ? */ + if (cpu_context_is_in_user_mode(next_ctxt)) { + /* + * Yes: "next_ctxt" is an interrupted user thread => we are + * going to switch to user mode ! Setup the stack address so + * that the user thread "next_ctxt" can come back to the correct + * stack location when returning in kernel mode. + * + * This stack location corresponds to the SP of the next user + * thread once its context has been transferred on the CPU, ie + * once the CPU has executed all the pop/iret instruction of the + * context switch with privilege change. + */ + kernel_tss.esp0 = ((vaddr_t)next_ctxt) + sizeof(struct cpu_ustate); + /* Note: no need to protect this agains IRQ because IRQs are not + allowed to update it by themselves, and they are not allowed + to block */ + } else { + /* No: No need to update kernel TSS when we stay in kernel + mode */ + } +} diff --git a/arch/x86/cpu_context_switch.S b/arch/x86/cpu_context_switch.S index f26c376..064cc68 100644 --- a/arch/x86/cpu_context_switch.S +++ b/arch/x86/cpu_context_switch.S @@ -2,6 +2,17 @@ .text +/** + * C Function called by the routines below in order to tell the CPU + * where will be the kernel stack (needed by the interrupt handlers) + * when next_ctxt will come back into kernel mode. + * + * void cpu_context_update_kernel_tss(struct cpu_state *next_ctxt) + * + * @see end of cpu_context.c + */ +.extern cpu_context_update_kernel_tss + .globl cpu_context_switch .type cpu_context_switch, @function @@ -32,6 +43,15 @@ cpu_context_switch: /* This is the proper context switch ! We change the stack here */ movl 68(%esp), %esp + /* Prepare kernel TSS in case we are switching to a user thread: we + make sure that we will come back into the kernel at a correct + stack location */ + pushl %esp /* Pass the location of the context we are + restoring to the function */ + call cpu_context_update_kernel_tss + + + addl $4, %esp /* Restore the CPU context */ popw %gs popw %fs diff --git a/arch/x86/gdt.c b/arch/x86/gdt.c index 7737611..769eae1 100644 --- a/arch/x86/gdt.c +++ b/arch/x86/gdt.c @@ -1,4 +1,5 @@ -/* Copyright (C) 2004 David Decotigny +/* Copyright (C) 2021 Mathieu Maret + Copyright (C) 2004 David Decotigny Copyright (C) 1999 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or @@ -17,7 +18,6 @@ USA. */ #include "segment.h" - #include "gdt.h" /** @@ -111,6 +111,10 @@ static struct x86_segment_descriptor gdt[] = { }, [SEG_KCODE] = BUILD_GDTE(0, 1), [SEG_KDATA] = BUILD_GDTE(0, 0), + [SEG_UCODE] = BUILD_GDTE(3, 1), + [SEG_UDATA] = BUILD_GDTE(3, 0), + [SEG_K_TSS] = {0,}, // Used by syscall, IRQ while in user space + // initialized by gdtRegisterTSS }; int gdtSetup(void) @@ -144,3 +148,29 @@ int gdtSetup(void) return 0; } + +int gdtRegisterTSS(vaddr_t tss_vaddr) +{ + uint16_t regval_tss; + + /* Initialize the GDT entry */ + gdt[SEG_K_TSS] = (struct x86_segment_descriptor){ + .limit_15_0 = 0x67, /* See Intel x86 vol 3 section 6.2.2 */ + .base_paged_addr_15_0 = (tss_vaddr)&0xffff, + .base_paged_addr_23_16 = (tss_vaddr >> 16) & 0xff, + .segment_type = 0x9, /* See Intel x86 vol 3 figure 6-3 */ + .descriptor_type = 0, /* (idem) */ + .dpl = 3, /* Allowed for CPL3 tasks */ + .present = 1, + .limit_19_16 = 0, /* Size of a TSS is < 2^16 ! */ + .custom = 0, /* Unused */ + .op_size = 0, /* See Intel x86 vol 3 figure 6-3 */ + .granularity = 1, /* limit is in Bytes */ + .base_paged_addr_31_24 = (tss_vaddr >> 24) & 0xff}; + + /* Load the TSS register into the processor */ + regval_tss = BUILD_SEGMENT_REG_VALUE(0, FALSE, SEG_K_TSS); + asm("ltr %0" : : "r"(regval_tss)); + + return 0; +} diff --git a/arch/x86/gdt.h b/arch/x86/gdt.h index d61885a..2e7792f 100644 --- a/arch/x86/gdt.h +++ b/arch/x86/gdt.h @@ -1,4 +1,5 @@ -/* Copyright (C) 2004 David Decotigny +/* Copyright (C) 2021 Mathieu Maret + Copyright (C) 2004 David Decotigny Copyright (C) 1999 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or @@ -17,7 +18,7 @@ USA. */ #pragma once - +#include "types.h" /** * @file gdt.h * @@ -34,3 +35,4 @@ * address space (ie "flat" virtual space). */ int gdtSetup(void); +int gdtRegisterTSS(vaddr_t tss_vaddr); diff --git a/core/cpu_context.h b/core/cpu_context.h index d719a1d..9d141cc 100644 --- a/core/cpu_context.h +++ b/core/cpu_context.h @@ -1,4 +1,5 @@ -/* Copyright (C) 2005 David Decotigny +/* Copyright (C) 2021 Mathieu Maret + Copyright (C) 2005 David Decotigny Copyright (C) 2000-2004, The KOS team This program is free software; you can redistribute it and/or @@ -84,6 +85,11 @@ int cpu_kstate_init(struct cpu_state **kctxt, cpu_kstate_function_arg1_t *start_ vaddr_t start_arg, vaddr_t stack_bottom, size_t stack_size, cpu_kstate_function_arg1_t *exit_func, vaddr_t exit_arg); +/** + * Prepare the system to deal with multiple CPU execution contexts + */ +int cpu_context_subsystem_setup(); + /** * Function that performs an immediate context-switch from one * kernel/user thread to another one. It stores the current executing @@ -119,6 +125,14 @@ void cpu_context_exit_to(struct cpu_state *switch_to_ctxt, * Public Accessor functions */ +/** + * Return whether the saved context was in kernel or user context + * + * @return TRUE when context was interrupted when in user mode, FALSE + * when in kernel mode, < 0 on error. + */ +int cpu_context_is_in_user_mode(const struct cpu_state *ctxt); + /** * Return Program Counter stored in the saved kernel/user context */ @@ -181,7 +195,8 @@ void cpu_state_detect_kernel_stack_overflow(const struct cpu_state *ctxt, vaddr_t kernel_stack_bottom, size_t kernel_stack_size); #else -#define cpu_state_prepare_detect_kernel_stack_overflow(ctxt, stkbottom, stksize) ({/* nop \ +#define cpu_state_prepare_detect_kernel_stack_overflow(ctxt, stkbottom, stksize) \ + ({/* nop \ */}) #define cpu_state_detect_kernel_stack_overflow(ctxt, stkbottom, stksize) ({/* nop */}) #endif diff --git a/core/main.c b/core/main.c index 0aa58c7..cda08ef 100644 --- a/core/main.c +++ b/core/main.c @@ -129,6 +129,7 @@ void kmain(unsigned long magic, unsigned long addr) // Turns out linux and windows do the same ! // https://lore.kernel.org/lkml/MWHPR21MB159330952629D36EEDE706B3D7379@MWHPR21MB1593.namprd21.prod.outlook.com/ if (mmap[i].addr < 0x100000) { + printf(" -> skipping\n"); continue; } memAddBank(max(mmap[i].addr, (multiboot_uint64_t)lastUsedByMem), @@ -161,7 +162,8 @@ void kmain(unsigned long magic, unsigned long addr) printf("[Setup] allocation system\n"); areaInit(firstUsedByMem, lastUsedByMem); - //allocSetup(); + + cpu_context_subsystem_setup(); printf("[Setup] thread system\n"); kthreadSetup(_stack_bottom, (_stack_top - _stack_bottom + 1)); diff --git a/core/segment.h b/core/segment.h index a548b1c..bc6d609 100644 --- a/core/segment.h +++ b/core/segment.h @@ -39,6 +39,9 @@ #define SEG_NULL 0 /* NULL segment, unused by the procesor */ #define SEG_KCODE 1 /* Kernel code segment */ #define SEG_KDATA 2 /* Kernel data segment */ +#define SEG_UCODE 3 /* User code segment */ +#define SEG_UDATA 4 /* User data segment */ +#define SEG_K_TSS 5 /* Kernel TSS for priviledge change (user to kernel) */ /** * Helper macro that builds a segment register's value