/* Copyright (C) 2005 David Decotigny Copyright (C) 2000-2004, The KOS team Initially taken from SOS */ #include "assert.h" #include "klibc.h" #include "segment.h" #include "cpu_context.h" /** * Here is the definition of a CPU context for IA32 processors. This * is a Matos/SOS convention, not a specification given by the IA32 * spec. However there is a strong constraint related to the x86 * interrupt handling specification: the top of the stack MUST be * compatible with the 'iret' instruction, ie there must be the * err_code (might be 0), eip, cs and eflags of the destination * context in that order (see Intel x86 specs vol 3, figure 5-4). * * @note IMPORTANT: This definition MUST be consistent with the way * the registers are stored on the stack in * irq_wrappers.S/exception_wrappers.S !!! Hence the constraint above. */ struct cpu_state { /* (Lower addresses) */ /* These are Matos/SOS convention */ uint16_t gs; uint16_t fs; uint16_t es; uint16_t ds; uint16_t cpl0_ss; /* This is ALWAYS the Stack Segment of the Kernel context (CPL0) of the interrupted thread, even for a user thread */ uint16_t alignment_padding; /* unused */ uint32_t edi; uint32_t esi; uint32_t esp; uint32_t ebp; uint32_t ebx; uint32_t edx; uint32_t ecx; uint32_t eax; /* MUST NEVER CHANGE (dependent on the IA32 iret instruction) */ uint32_t error_code; vaddr_t eip; uint32_t cs; /* 32bits according to the specs ! However, the CS register is really 16bits long */ uint32_t eflags; /* (Higher addresses) */ } __attribute__((packed)); /** * The CS value pushed on the stack by the CPU upon interrupt, and * needed by the iret instruction, is 32bits long while the real CPU * CS register is 16bits only: this macro simply retrieves the CPU * "CS" register value from the CS value pushed on the stack by the * CPU upon interrupt. * * The remaining 16bits pushed by the CPU should be considered * "reserved" and architecture dependent. IMHO, the specs don't say * anything about them. Considering that some architectures generate * non-zero values for these 16bits (at least Cyrix), we'd better * ignore them. */ #define GET_CPU_CS_REGISTER_VALUE(pushed_ui32_cs_value) ((pushed_ui32_cs_value)&0xffff) /** * Structure of an interrupted Kernel thread's context */ struct cpu_kstate { struct cpu_state regs; } __attribute__((packed)); /** * THE main operation of a kernel thread. This routine calls the * kernel thread function start_func and calls exit_func when * start_func returns. */ static void core_routine(cpu_kstate_function_arg1_t *start_func, void *start_arg, cpu_kstate_function_arg1_t *exit_func, void *exit_arg) __attribute__((noreturn)); static void core_routine(cpu_kstate_function_arg1_t *start_func, void *start_arg, cpu_kstate_function_arg1_t *exit_func, void *exit_arg) { start_func(start_arg); exit_func(exit_arg); assert(!"The exit function of the thread should NOT return !"); for (;;) ; } int cpu_kstate_init(struct cpu_state **ctxt, cpu_kstate_function_arg1_t *start_func, vaddr_t start_arg, vaddr_t stack_bottom, size_t stack_size, cpu_kstate_function_arg1_t *exit_func, vaddr_t exit_arg) { /* We are initializing a Kernel thread's context */ struct cpu_kstate *kctxt; /* This is a critical internal function, so that it is assumed that the caller knows what he does: we legitimally assume that values for ctxt, start_func, stack_* and exit_func are allways VALID ! */ /* Setup the stack. * * On x86, the stack goes downward. Each frame is configured this * way (higher addresses first): * * - (optional unused space. As of gcc 3.3, this space is 24 bytes) * - arg n * - arg n-1 * - ... * - arg 1 * - return instruction address: The address the function returns to * once finished * - local variables * * The remaining of the code should be read from the end upward to * understand how the processor will handle it. */ vaddr_t tmp_vaddr = stack_bottom + stack_size; uint32_t *stack = (uint32_t *)tmp_vaddr; /* If needed, poison the stack */ #ifdef CPU_STATE_DETECT_UNINIT_KERNEL_VARS memset((void *)stack_bottom, CPU_STATE_STACK_POISON, stack_size); #elif defined(CPU_STATE_DETECT_KERNEL_STACK_OVERFLOW) cpu_state_prepare_detect_kernel_stack_overflow(stack_bottom, stack_size); #endif /* Simulate a call to the core_routine() function: prepare its arguments */ *(--stack) = exit_arg; *(--stack) = (uint32_t)exit_func; *(--stack) = start_arg; *(--stack) = (uint32_t)start_func; *(--stack) = 0; /* Return address of core_routine => force page fault */ /* * Setup the initial context structure, so that the CPU will execute * the function core_routine() once this new context has been * restored on CPU */ /* Compute the base address of the structure, which must be located below the previous elements */ tmp_vaddr = ((vaddr_t)stack) - sizeof(struct cpu_kstate); kctxt = (struct cpu_kstate *)tmp_vaddr; /* Initialize the CPU context structure */ memset(kctxt, 0x0, sizeof(struct cpu_kstate)); /* Tell the CPU context structure that the first instruction to execute will be that of the core_routine() function */ kctxt->regs.eip = (uint32_t)core_routine; /* Setup the segment registers */ kctxt->regs.cs = BUILD_SEGMENT_REG_VALUE(0, FALSE, SEG_KCODE); /* Code */ kctxt->regs.ds = BUILD_SEGMENT_REG_VALUE(0, FALSE, SEG_KDATA); /* Data */ kctxt->regs.es = BUILD_SEGMENT_REG_VALUE(0, FALSE, SEG_KDATA); /* Data */ kctxt->regs.cpl0_ss = BUILD_SEGMENT_REG_VALUE(0, FALSE, SEG_KDATA); /* Stack */ /* fs and gs unused for the moment. */ /* The newly created context is initially interruptible */ kctxt->regs.eflags = (1 << 9); /* set IF bit */ /* Finally, update the generic kernel/user thread context */ *ctxt = (struct cpu_state *)kctxt; return 0; } #if defined(CPU_STATE_DETECT_KERNEL_STACK_OVERFLOW) void cpu_state_prepare_detect_kernel_stack_overflow(const struct cpu_state *ctxt, vaddr_t stack_bottom, size_t stack_size) { (void)ctxt; size_t poison_size = CPU_STATE_DETECT_KERNEL_STACK_OVERFLOW; if (poison_size > stack_size) poison_size = stack_size; memset((void *)stack_bottom, CPU_STATE_STACK_POISON, poison_size); } void cpu_state_detect_kernel_stack_overflow(const struct cpu_state *ctxt, vaddr_t stack_bottom, size_t stack_size) { unsigned char *c; size_t i; /* On Matos/SOS, "ctxt" corresponds to the address of the esp register of the saved context in Kernel mode (always, even for the interrupted context of a user thread). Here we make sure that this stack pointer is within the allowed stack area */ assert(((vaddr_t)ctxt) >= stack_bottom); assert(((vaddr_t)ctxt) + sizeof(struct cpu_kstate) <= stack_bottom + stack_size); /* Check that the bottom of the stack has not been altered */ for (c = (unsigned char *)stack_bottom, i = 0; (i < CPU_STATE_DETECT_KERNEL_STACK_OVERFLOW) && (i < stack_size); c++, i++) { assert(CPU_STATE_STACK_POISON == *c); } } #endif /* ======================================================================= * Public Accessor functions */ vaddr_t cpu_context_get_PC(const struct cpu_state *ctxt) { assert(NULL != ctxt); /* This is the PC of the interrupted context (ie kernel or user context). */ return ctxt->eip; } vaddr_t cpu_context_get_SP(const struct cpu_state *ctxt) { assert(NULL != ctxt); /* On Matos/SOS, "ctxt" corresponds to the address of the esp register of the saved context in Kernel mode (always, even for the interrupted context of a user thread). */ return (vaddr_t)ctxt; } void cpu_context_dump(const struct cpu_state *ctxt) { printf("CPU: eip=%x esp=%x eflags=%x cs=%x ds=%x ss=%x err=%x", (unsigned)ctxt->eip, (unsigned)ctxt, (unsigned)ctxt->eflags, (unsigned)GET_CPU_CS_REGISTER_VALUE(ctxt->cs), (unsigned)ctxt->ds, (unsigned)ctxt->cpl0_ss, (unsigned)ctxt->error_code); }