-
Notifications
You must be signed in to change notification settings - Fork 2
15. System Calls API Setup
With the user mode execution mechanism we enabled in the last chapter, it is time to implement a framework for the system calls API. Our design is to put user programs in a separate folder user/
apart from kernel src/
and build user programs into independent ELF binaries. The kernel defines a set of syscalls, each syscall having a unique syscall number and expected arguments. The user side implements a set of user library functions that wrap over these syscalls. User programs link with the user library to run on Hux.
I will use a hello()
syscall as an example throughout this chapter.
Scan through them before going forth:
- System Calls pages: "Possible Methods" section - we pass arugments on user stack
- Syscalls API code implementation of xv6 ✭
We pass arguments on the user stack. It is worth making some argument parsing helper functions @ src/interrupt/syscall.c
:
/** Helpers for getting something from user memory address. */
bool
sysarg_addr_int(uint32_t addr, int32_t *ret)
{
process_t *proc = running_proc();
if (addr < proc->stack_low || addr + 4 > USER_MAX) {
warn("sysarg_addr_int: invalid arg addr %p for %s", addr, proc->name);
return false;
}
*ret = *((int32_t *) addr);
return true;
}
bool
sysarg_addr_uint(uint32_t addr, uint32_t *ret)
{
process_t *proc = running_proc();
if (addr < proc->stack_low || addr + 4 > USER_MAX) {
warn("sysarg_addr_uint: invalid arg addr %p for %s", addr, proc->name);
return false;
}
*ret = *((uint32_t *) addr);
return true;
}
bool
sysarg_addr_mem(uint32_t addr, char **mem, size_t len)
{
process_t *proc = running_proc();
if (addr >= USER_MAX || addr + len > USER_MAX || addr < USER_BASE
|| (addr >= proc->heap_high && addr < proc->stack_low)
|| (addr + len > proc->heap_high && addr + len <= proc->stack_low)
|| (addr < proc->heap_high && addr + len > proc->heap_high)) {
warn("sysarg_addr_mem: invalid mem %p w/ len %d for %s",
addr, len, proc->name);
return false;
}
*mem = (char *) addr;
return true;
}
int32_t
sysarg_addr_str(uint32_t addr, char **str)
{
process_t *proc = running_proc();
if (addr >= USER_MAX || addr < USER_BASE
|| (addr >= proc->heap_high && addr < proc->stack_low)) {
warn("sysarg_get_str: invalid str %p for %s",
addr, proc->name);
return -1;
}
char *bound = addr < proc->heap_high ? (char *) proc->heap_high
: (char *) USER_MAX;
for (char *c = (char *) addr; c < bound; ++c) {
if (*c == '\0') {
*str = (char *) addr;
return c - (char *) addr;
}
}
return -1;
}
/**
* Get syscall arguments on the user stack.
* - state->esp is the current user ESP;
* - 0(state->esp) is the return address, so skip;
* - starting from 4(state->esp) are the arguments, from stack
* bottom -> top are user lib arguments from left -> right.
*/
/**
* Fetch the n-th (starting from 0-th) 32bit integer. Returns true on
* success and false if address not in user stack.
*/
bool
sysarg_get_int(int8_t n, int32_t *ret)
{
process_t *proc = running_proc();
uint32_t addr = (proc->trap_state->esp) + 4 + (4 * n);
return sysarg_addr_int(addr, ret);
}
/** Same but for uint32_t. */
bool
sysarg_get_uint(int8_t n, uint32_t *ret)
{
process_t *proc = running_proc();
uint32_t addr = (proc->trap_state->esp) + 4 + (4 * n);
return sysarg_addr_uint(addr, ret);
}
/**
* Fetch the n-th (starting from 0-th) 32-bit argument and interpret as
* a pointer to a bytes array of length `len`. Returns true on success
* and false if address invalid.
*/
bool
sysarg_get_mem(int8_t n, char **mem, size_t len)
{
uint32_t ptr;
if (!sysarg_get_int(n, (int32_t *) &ptr)) {
warn("sysarg_get_mem: inner sysarg_get_int failed");
return false;
}
return sysarg_addr_mem(ptr, mem, len);
}
/**
* Fetch the n-th (starting from 0-th) 32-bit argument and interpret as
* a pointer to a string. Returns the length of string actually parsed
* on success, and -1 if address invalid or the string is not correctly
* null-terminated.
*/
int32_t
sysarg_get_str(int8_t n, char **str)
{
uint32_t ptr;
if (!sysarg_get_int(n, (int32_t *) &ptr)) {
warn("sysarg_get_str: inner sysarg_get_int failed");
return -1;
}
return sysarg_addr_str(ptr, str);
}
// src/interrupt/syscall.h
bool sysarg_addr_int(uint32_t addr, int32_t *ret);
bool sysarg_addr_uint(uint32_t addr, uint32_t *ret);
bool sysarg_addr_mem(uint32_t addr, char **mem, size_t len);
int32_t sysarg_addr_str(uint32_t addr, char **str);
bool sysarg_get_int(int8_t n, int32_t *ret);
bool sysarg_get_uint(int8_t n, uint32_t *ret);
bool sysarg_get_mem(int8_t n, char **mem, size_t len);
int32_t sysarg_get_str(int8_t n, char **str);
Our central ISR handler stub is not treating syscall traps correctly yet. We will let it invoke a syscall()
wrapper function which checks EAX for the syscall number and dispatches the request to the corresponding end-point handler.
Add these definitions @ src/interrupt/syscall.h
:
/** Individual syscall handler type: void -> int32_t. */
typedef int32_t (*syscall_t)(void);
/** Syscall unsuccessful return code. */
#define SYS_FAIL_RC (-1)
void syscall(interrupt_state_t *state);
/** List of known syscall numbers. */
#define SYSCALL_HELLO 1
Write a central handler entry wrapper @ src/interrupt/syscall.c
:
/**
* Centralized syscall handler entry.
* - The trap state holds the syscall number in register EAX
* and the arguments on user stack;
* - Returns a return code back to register EAX.
*
* User syscall library should do syscalls following this rule.
*/
void
syscall(interrupt_state_t *state)
{
int32_t syscall_no = state->eax;
if (syscall_no <= 0 || syscall_no >= NUM_SYSCALLS) {
warn("syscall: unknwon syscall number %d", syscall_no);
state->eax = SYS_FAIL_RC;
} else if (syscall_handlers[syscall_no] == NULL) {
warn("syscall: null handler for syscall # %d", syscall_no);
state->eax = SYS_FAIL_RC;
} else {
syscall_t handler = syscall_handlers[syscall_no];
state->eax = handler();
}
}
We will implement the syscalls in separate folders, for example, process operations go in src/process/sysproc.c
, user heap memory operations go in src/memory/sysmem.c
, etc. A simple hello syscall would look like this @ src/process/sysproc.c
:
/** int32_t hello(int32_t num, char *mem, int32_t len, char *str); */
int32_t
syscall_hello(void)
{
int32_t num, len;
char *mem, *str;
if (!sysarg_get_int(0, &num))
return SYS_FAIL_RC;
if (!sysarg_get_int(2, &len))
return SYS_FAIL_RC;
if (len <= 0)
return SYS_FAIL_RC;
if (!sysarg_get_mem(1, &mem, len))
return SYS_FAIL_RC;
if (sysarg_get_str(3, &str) < 0)
return SYS_FAIL_RC;
process_t *proc = running_proc();
printf("From sysall_hello handler: Hello, %s!\n", proc->name);
printf(" num: %d, mem[0]: %c, str: %s\n", num, mem[0], str);
return 0;
}
// src/process/sysproc.h
int32_t syscall_hello(void);
Maintain an array of function pointers of type syscall_t
@ src/interrupt/syscall.c
:
/** Array of individual handlers: void -> int32_t functions. */
static syscall_t syscall_handlers[] = {
[SYSCALL_HELLO] syscall_hello
};
#define NUM_SYSCALLS ((int32_t) (sizeof(syscall_handlers) / sizeof(syscall_t)))
The interrupt handler stub needs to recognize the syscall trap number and invoke syscall()
. Modifications to the handler stub @ src/interrupt/isr.c
:
/** Print interrupt state information. */
static void
_print_interrupt_state(interrupt_state_t *state)
{
info("interrupt state:");
process_t *proc = running_proc();
printf(" Current process: %d - %s\n", proc->pid, proc->name);
printf(" INT#: %d ERR_CODE: %#010X\n",
state->int_no, state->err_code);
printf(" EAX: %#010X EIP: %#010X ESP: %#010X\n",
state->eax, state->eip, state->esp);
printf(" DS: %#010X CS: %#010X SS: %#010X\n",
state->ds, state->cs, state->ss);
printf(" EFLAGS: %#010X\n", state->eflags);
}
/**
* ISR handler written in C.
*
* Receives a pointer to a structure of interrupt state. Handles the
* interrupt and simply returns. Can modify interrupt state through
* this pointer if necesary.
*/
void
isr_handler(interrupt_state_t *state)
{
uint8_t int_no = state->int_no;
/** An exception interrupt. */
if (int_no <= 31) {
/** Panic if no actual ISR is registered. */
if (isr_table[int_no] == NULL) {
_print_interrupt_state(state);
error("isr: missing handler for exception (fault) # %#x", int_no);
} else
isr_table[int_no](state);
/** An IRQ-translated interrupt from external device. */
} else if (int_no <= 47) {
uint8_t irq_no = state->int_no - 32;
/** Call actual ISR if registered. */
if (isr_table[int_no] == NULL) {
_print_interrupt_state(state);
error("isr: missing handler for device interrupt # %#x", int_no);
} else
isr_table[int_no](state);
_pic_send_eoi(irq_no); /** Send back EOI signal to PIC. */
/** Syscall trap. */
} else if (int_no == INT_NO_SYSCALL) {
/** Point proc->trap_state to this trap. */
running_proc()->trap_state = state;
/**
* Call the syscall handler in `syscall.c`.
*
* Interrupt state contains the syscall number in EAX and the
* arguments on the user stack. Returns an integer return code
* back to EAX.
*/
syscall(state);
/** Unknown interrupt number. */
} else {
_print_interrupt_state(state);
error("isr: caught unknown interrupt # %#x", int_no);
}
}
It is better to have all user-side code in a separate user/
folder than mixing them with the kernel src/
. In the last few chapters, the init
process I used as a demonstration is put under src/process/
, which does not make too much sense. Reorganize the folder structure into:
src/
- ...
- kernel.c
user/
- lib/
- syscall.s
- syscall.h
- syslist.s
- init.c (using C code now)
- ... (other user programs)
This way, the user side is both logically and physically separated from the kernel side in our source tree ✭. Many other toy OS projects tend to mix everything together and have a flat folder structure, which I think is not a good practice.
The user syscall library is simply a set of wrappers over all the available syscalls that the system provides.
First, make a list that duplicates the syscall number definitions (instead of directly including kernel headers, again for better user/kernel separation). The list is written in GAS syntax constants @ user/lib/syslist.s
:
/** Syscall trap gate number. */
INT_NO_SYSCALL = 64
/** List of known syscall. */
SYSCALL_HELLO = 1
The wrapper implementations will be exactly the same for each syscall (except the syscall number and function name), so we use an assembly macro to do that for us. Code @ user/lib/syscall.s
:
.include "syslist.s"
/**
* Using an auto-generation macro, since every syscall expect the same
* thing of putting arguments on stack, setting EAX to the number, etc.
*/
.macro SYSCALL_LIBGEN name, no
.global \name
.type \name, @function
\name:
movl $\no, %eax
int $INT_NO_SYSCALL
ret
.endm
SYSCALL_LIBGEN hello, SYSCALL_HELLO
And the user library header is simply externing all the function declarations:
// user/lib/syscall.h
#ifndef SYSCALL_H
#define SYSCALL_H
#include <stdint.h>
/**
* Externed from ASM `syscall.s`.
*
* Be sure that all arguments & returns values are 32-bit values, since
* Hux parses syscall arguments by simply getting 32-bit values on stack.
*/
extern int32_t hello(int32_t num, char *mem, int32_t len, char *str);
#endif
Our Makefile also needs to be changed to build the user programs correctly. There are several things we need to take care of:
- It is expected that each user program
xxx.c
underuser/
is an independent program and should be compiled & linked into an independent ELF binaryxxx.bin
(withinit.c
being the only exception). - For a user program, the linker should expect an entry symbol at
main
(the C main function). - For a user program, the linker must be set to relocate the text section to
USER_BASE
(with-Ttext 0x20000000
) - that's where the text sections gets loaded in our process virtual address space.
Modifications to the Makefile:
C_SOURCES=$(shell find ./src/ -name "*.c")
C_OBJECTS=$(patsubst %.c, %.o, $(C_SOURCES))
S_SOURCES=$(shell find ./src/ -name "*.s")
S_OBJECTS=$(patsubst %.s, %.o, $(S_SOURCES))
INIT_SOURCE=./user/init.c
INIT_OBJECT=./user/init.c.o
INIT_LINKED=./user/init.bin
INIT_BINARY=./user/init
ULIB_C_SOURCES=$(shell find ./user/lib/ -name "*.c")
ULIB_C_OBJECTS=$(patsubst %.c, %.o, $(ULIB_C_SOURCES))
ULIB_S_SOURCES=$(shell find ./user/lib/ -name "*.s")
ULIB_S_OBJECTS=$(patsubst %.s, %.o, $(ULIB_S_SOURCES))
USER_SOURCES_ALL=$(shell find ./user/ -name "*.c" ! -path "./user/lib/*")
USER_SOURCES=$(filter-out $(INIT_SOURCE), $(USER_SOURCES_ALL))
USER_OBJECTS=$(patsubst %.c, %.c.o, $(USER_SOURCES))
USER_LINKEDS=$(patsubst %.c, %.bin, $(USER_SOURCES))
ADDRSPACE_USER_BASE=0x20000000
ASM=i686-elf-as
ASM_FLAGS=
CC=i686-elf-gcc
C_FLAGS_USER=-c -Wall -Wextra -ffreestanding -O2 -std=gnu99 -Wno-tautological-compare \
-g -fno-omit-frame-pointer
C_FLAGS=$(C_FLAGS_USER) -fstack-protector
LD=i686-elf-gcc
LD_FLAGS=-ffreestanding -O2 -nostdlib
OBJCOPY=i686-elf-objcopy
OBJDUMP=i686-elf-objdump
HUX_MSG="[--Hux->]"
#
# Targets for building.
#
ALL_DEPS := $(S_OBJECTS) $(C_OBJECTS)
ALL_DEPS += $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS) $(USER_LINKEDS) initproc
ALL_DEPS += kernel verify update
all: $(ALL_DEPS)
$(S_OBJECTS): %.o: %.s
@echo
@echo $(HUX_MSG) "Compiling kernel assembly '$<'..."
$(ASM) $(ASM_FLAGS) -o $@ $<
$(C_OBJECTS): %.o: %.c
@echo
@echo $(HUX_MSG) "Compiling kernel C code '$<'..."
$(CC) $(C_FLAGS) -o $@ $<
# User programs use more specific rules to build into independent binary.
$(ULIB_S_OBJECTS): %.o: %.s
@echo
@echo $(HUX_MSG) "Compiling user lib assembly '$<'..."
$(ASM) $(ASM_FLAGS) -I ./user/lib/ -o $@ $<
$(ULIB_C_OBJECTS): %.o: %.c
@echo
@echo $(HUX_MSG) "Compiling user lib C code '$<'..."
$(CC) $(C_FLAGS_USER) -o $@ $<
$(USER_LINKEDS): %.bin: %.c $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS)
@echo
@echo $(HUX_MSG) "Compiling & linking user program '$<'..."
$(CC) $(C_FLAGS_USER) -o $<.o $<
$(LD) $(LD_FLAGS) -e main -Ttext $(ADDRSPACE_USER_BASE) -o $@ \
$<.o $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS)
$(OBJCOPY) --strip-debug $@
# Init process goes separately, to allow later embedding into kernel image.
initproc: $(INIT_SOURCE) $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS)
@echo
@echo $(HUX_MSG) "Compiling & linking user 'init' program..."
$(CC) $(C_FLAGS_USER) -o $(INIT_OBJECT) $(INIT_SOURCE)
$(LD) $(LD_FLAGS) -e main -Ttext $(ADDRSPACE_USER_BASE) -o $(INIT_LINKED) \
$(INIT_OBJECT) $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS)
$(OBJCOPY) --strip-debug $(INIT_LINKED)
$(OBJCOPY) --strip-all -O binary $(INIT_LINKED) $(INIT_BINARY)
# Remember to link 'libgcc'. Embeds the init process binary.
kernel: $(S_OBJECTS) $(C_OBJECTS) initproc
@echo
@echo $(HUX_MSG) "Linking kernel image..."
$(LD) $(LD_FLAGS) -T scripts/kernel.ld -lgcc -o $(TARGET_BIN) \
-Wl,--oformat,elf32-i386 $(S_OBJECTS) $(C_OBJECTS) \
-Wl,-b,binary,$(INIT_BINARY)
$(OBJCOPY) --only-keep-debug $(TARGET_BIN) $(TARGET_SYM)
$(OBJCOPY) --strip-debug $(TARGET_BIN)
.PHONY: clean
clean:
@echo
@echo $(HUX_MSG) "Cleaning the build..."
rm -f $(S_OBJECTS) $(C_OBJECTS) $(ULIB_S_OBJECTS) $(ULIB_C_OBJECTS) \
$(INIT_OBJECT) $(INIT_LINKED) $(INIT_BINARY) \
$(USER_OBJECTS) $(USER_LINKEDS) \
$(TARGET_BIN) $(TARGET_ISO) $(TARGET_SYM)
Let's make a syscall from user land into the kernel! If we write an init.c
that invokes the hello()
syscall @ user/init.c
:
void
main(void)
{
int32_t num = 7913;
char mem[8] = "ABCDEFG";
int32_t len = 7;
char *str = "This is init!";
hello(num, mem, len, str);
asm volatile ( "hlt" );
}
This should produce a terminal window as the following after booting up:
(Notice that after the syscall, executing hlt
in user mode causes a general protection fault 0xd
. In the next chapter, we will be implementing useful syscalls which include exit()
, and by then all user programs must end with an exit()
call.)
Current repo structure:
hux-kernel
├── Makefile
├── scripts
│ ├── gdb_init
│ ├── grub.cfg
│ └── kernel.ld
├── src
│ ├── boot
│ │ ├── boot.s
│ │ ├── elf.h
│ │ └── multiboot.h
│ ├── common
│ │ ├── debug.c
│ │ ├── debug.h
│ │ ├── port.c
│ │ ├── port.h
│ │ ├── printf.c
│ │ ├── printf.h
│ │ ├── string.c
│ │ ├── string.h
│ │ ├── types.c
│ │ └── types.h
│ ├── device
│ │ ├── keyboard.c
│ │ ├── keyboard.h
│ │ ├── timer.c
│ │ └── timer.h
│ ├── display
│ │ ├── terminal.c
│ │ ├── terminal.h
│ │ └── vga.h
│ ├── interrupt
│ │ ├── idt-load.s
│ │ ├── idt.c
│ │ ├── idt.h
│ │ ├── isr-stub.s
│ │ ├── isr.c
│ │ ├── isr.h
│ │ ├── syscall.c
│ │ └── syscall.h
│ ├── memory
│ │ ├── gdt-load.s
│ │ ├── gdt.c
│ │ ├── gdt.h
│ │ ├── kheap.c
│ │ ├── kheap.h
│ │ ├── paging.c
│ │ ├── paging.h
│ │ ├── slabs.c
│ │ └── slabs.h
│ ├── process
│ │ ├── layout.h
│ │ ├── process.c
│ │ ├── process.h
│ │ ├── scheduler.c
│ │ ├── scheduler.h
│ │ ├── switch.s
│ │ ├── sysproc.c
│ │ └── sysproc.h
│ └── kernel.c
├── user
│ ├── lib
│ │ ├── syscall.h
│ │ ├── syscall.s
│ │ └── syslist.s
│ └── init.c
Guanzhou Jose Hu @ 2021