diff options
Diffstat (limited to 'kernel/api')
-rw-r--r-- | kernel/api/access.c | 136 | ||||
-rw-r--r-- | kernel/api/binfmt.c | 88 | ||||
-rw-r--r-- | kernel/api/elf.c | 905 | ||||
-rw-r--r-- | kernel/api/exec.c | 110 | ||||
-rw-r--r-- | kernel/api/syscall.c | 757 |
5 files changed, 1996 insertions, 0 deletions
diff --git a/kernel/api/access.c b/kernel/api/access.c new file mode 100644 index 0000000..d56e45d --- /dev/null +++ b/kernel/api/access.c @@ -0,0 +1,136 @@ +#include "errno.h" +#include "globals.h" +#include <mm/mm.h> +#include <util/string.h> + +#include "util/debug.h" + +#include "mm/kmalloc.h" +#include "mm/mman.h" + +#include "api/access.h" +#include "api/syscall.h" + +static inline long userland_address(const void *addr) +{ + return addr >= (void *)USER_MEM_LOW && addr < (void *)USER_MEM_HIGH; +} + +/* + * Check for permissions on [uaddr, uaddr + nbytes), then + * copy nbytes from userland address uaddr to kernel address kaddr. + * Do not access the userland virtual addresses directly; instead, + * use vmmap_read. + */ +long copy_from_user(void *kaddr, const void *uaddr, size_t nbytes) +{ + if (!range_perm(curproc, uaddr, nbytes, PROT_READ)) + { + return -EFAULT; + } + KASSERT(userland_address(uaddr) && !userland_address(kaddr)); + return vmmap_read(curproc->p_vmmap, uaddr, kaddr, nbytes); +} + +/* + * Check for permissions on [uaddr, uaddr + nbytes), then + * copy nbytes from kernel address kaddr to userland address uaddr. + * Do not access the userland virtual addresses directly; instead, + * use vmmap_write. + */ +long copy_to_user(void *uaddr, const void *kaddr, size_t nbytes) +{ + if (!range_perm(curproc, uaddr, nbytes, PROT_WRITE)) + { + return -EFAULT; + } + KASSERT(userland_address(uaddr) && !userland_address(kaddr)); + return vmmap_write(curproc->p_vmmap, uaddr, kaddr, nbytes); +} + +/* + * Duplicate the string identified by ustr into kernel memory. + * The kernel memory string kstr should be allocated using kmalloc. + */ +long user_strdup(argstr_t *ustr, char **kstrp) +{ + KASSERT(!userland_address(ustr)); + KASSERT(userland_address(ustr->as_str)); + + *kstrp = kmalloc(ustr->as_len + 1); + if (!*kstrp) + return -ENOMEM; + long ret = copy_from_user(*kstrp, ustr->as_str, ustr->as_len + 1); + if (ret) + { + kfree(*kstrp); + return ret; + } + return 0; +} + +/* + * Duplicate the string of vectors identified by uvec into kernel memory. + * The vector itself (char**) and each string (char*) should be allocated + * using kmalloc. + */ +long user_vecdup(argvec_t *uvec, char ***kvecp) +{ + KASSERT(!userland_address(uvec)); + KASSERT(userland_address(uvec->av_vec)); + + char **kvec = kmalloc((uvec->av_len + 1) * sizeof(char *)); + *kvecp = kvec; + + if (!kvec) + { + return -ENOMEM; + } + memset(kvec, 0, (uvec->av_len + 1) * sizeof(char *)); + + long ret = 0; + for (size_t i = 0; i < uvec->av_len && !ret; i++) + { + argstr_t argstr; + copy_from_user(&argstr, uvec->av_vec + i, sizeof(argstr_t)); + ret = user_strdup(&argstr, kvec + i); + } + + if (ret) + { + for (size_t i = 0; i < uvec->av_len; i++) + if (kvec[i]) + kfree(kvec[i]); + kfree(kvec); + *kvecp = NULL; + } + + return ret; +} + +/* + * Return 1 if process p has permissions perm for virtual address vaddr; + * otherwise return 0. + * + * Check against the vmarea's protections on the mapping. + */ +long addr_perm(proc_t *p, const void *vaddr, int perm) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} + +/* + * Return 1 if process p has permissions perm for virtual address range [vaddr, + * vaddr + len); otherwise return 0. + * + * Hints: + * You can use addr_perm in your implementation. + * Make sure to consider the case when the range of addresses that is being + * checked is less than a page. + */ +long range_perm(proc_t *p, const void *vaddr, size_t len, int perm) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return 0; +} diff --git a/kernel/api/binfmt.c b/kernel/api/binfmt.c new file mode 100644 index 0000000..1597fdf --- /dev/null +++ b/kernel/api/binfmt.c @@ -0,0 +1,88 @@ +#include "errno.h" + +#include "main/inits.h" + +#include "fs/fcntl.h" +#include "fs/file.h" +#include "fs/vfs_syscall.h" + +#include "util/debug.h" +#include "util/init.h" +#include "util/list.h" + +#include "mm/kmalloc.h" + +#include "api/binfmt.h" + +typedef struct binfmt +{ + const char *bf_id; + binfmt_load_func_t bf_load; + list_link_t bf_link; +} binfmt_t; + +static list_t binfmt_list = LIST_INITIALIZER(binfmt_list); + +long binfmt_add(const char *id, binfmt_load_func_t loadfunc) +{ + binfmt_t *fmt; + if (NULL == (fmt = kmalloc(sizeof(*fmt)))) + { + return -ENOMEM; + } + + dbg(DBG_EXEC, "Registering binary loader %s\n", id); + + fmt->bf_id = id; + fmt->bf_load = loadfunc; + list_insert_head(&binfmt_list, &fmt->bf_link); + + return 0; +} + +long binfmt_load(const char *filename, char *const *argv, char *const *envp, + uint64_t *rip, uint64_t *rsp) +{ + long fd = do_open(filename, O_RDONLY); + if (fd < 0) + { + dbg(DBG_EXEC, "ERROR: exec failed to open file %s\n", filename); + return fd; + } + file_t *file = fget((int)fd); + long ret = 0; + if (S_ISDIR(file->f_vnode->vn_mode)) + { + ret = -EISDIR; + } + if (!ret && !S_ISREG(file->f_vnode->vn_mode)) + { + ret = -EACCES; + } + fput(&file); + if (ret) + { + do_close((int)fd); + return ret; + } + + list_iterate(&binfmt_list, fmt, binfmt_t, bf_link) + { + dbg(DBG_EXEC, "Trying to exec %s using binary loader %s\n", filename, + fmt->bf_id); + + /* ENOEXE indicates that the given loader is unable to load + * the given file, any other error indicates that the file + * was recognized, but some other error existed which should + * be returned to the user, only if all loaders specify ENOEXEC + * do we actually return ENOEXEC */ + ret = fmt->bf_load(filename, (int)fd, argv, envp, rip, rsp); + if (ret != -ENOEXEC) + { + do_close((int)fd); + } + } + + do_close((int)fd); + return ret; +} diff --git a/kernel/api/elf.c b/kernel/api/elf.c new file mode 100644 index 0000000..5ad4a33 --- /dev/null +++ b/kernel/api/elf.c @@ -0,0 +1,905 @@ +/* + * The elf32 loader (the basis for this file) was modified by twd in 7/2018 so + * that it lays out the address space in a more Unix-like fashion (e.g., the + * stack is at the top of user memory, text is near the bottom). + * + * This loader (and the elf32 loader) are not strictly ABI compliant. See the + * Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29 for + * what initial process stacks are supposed to look like after the iret(q) in + * userland_entry is executed. The following would be required (but not + * necessarily sufficient!) for full compliance: + * + * 1) Remove the pointers to argv, envp, and auxv from the initial stack. + * 2) Have __libc_static_entry (static entry) and _ldloadrtld (callee of dynamic + * entry) calculate those pointers and place them on the stack (x86) or in + * registers (x86-64) along with argc as arguments to main. 3) Ensure that the + * stack pointer is 4 byte (x86) or 16 byte (x86-64) aligned by padding the end + * of the arguments being written to the stack with zeros. 4) Have the stack + * pointer point to argc, rather than a garbage return address. 5) Have + * __libc_static_entry and _bootstrap (ld-weenix) respect this change. + */ + +#include "errno.h" +#include "globals.h" + +#include "main/inits.h" + +#include "mm/kmalloc.h" +#include "mm/mm.h" +#include "mm/mman.h" +#include "mm/tlb.h" + +#include "api/binfmt.h" +#include "api/elf.h" + +#include "util/debug.h" +#include "util/string.h" + +#include "fs/fcntl.h" +#include "fs/file.h" +#include "fs/lseek.h" +#include "fs/vfs_syscall.h" + +static long _elf64_platform_check(const Elf64_Ehdr *header) +{ + return (EM_X86_64 == header->e_machine) // machine + && (ELFCLASS64 == header->e_ident[EI_CLASS]) // 32 or 64 bit + && (ELFDATA2LSB == header->e_ident[EI_DATA]); // endianness +} + +/* Helper function for the ELF loader. Maps the specified segment + * of the program header from the given file in to the given address + * space with the given memory offset (in pages). On success returns 0, + * otherwise returns a negative error code for the ELF loader to return. Note + * that since any error returned by this function should cause the ELF loader to + * give up, it is acceptable for the address space to be modified after + * returning an error. Note that memoff can be negative */ +static long _elf64_map_segment(vmmap_t *map, vnode_t *file, int64_t memoff, + const Elf64_Phdr *segment) +{ + /* calculate starting virtual address of segment e*/ + uintptr_t addr; + if (memoff < 0) + { + KASSERT(ADDR_TO_PN(segment->p_vaddr) > (uint64_t)-memoff); + addr = (uintptr_t)segment->p_vaddr - (uintptr_t)PN_TO_ADDR(-memoff); + } + else + { + addr = (uintptr_t)segment->p_vaddr + (uintptr_t)PN_TO_ADDR(memoff); + } + uint64_t off = segment->p_offset; + uint64_t memsz = segment->p_memsz; + uint64_t filesz = segment->p_filesz; + + dbg(DBG_ELF, + "Mapping program segment: type %#x, offset %#16lx," + " vaddr %#16lx, filesz %#lx, memsz %#lx, flags %#x, align %#lx\n", + segment->p_type, segment->p_offset, segment->p_vaddr, segment->p_filesz, + segment->p_memsz, segment->p_flags, segment->p_align); + + /* check for bad data in the segment header */ + if ((segment->p_align % PAGE_SIZE)) + { + dbg(DBG_ELF, "ERROR: segment not aligned on page\n"); + return -ENOEXEC; + } + else if (filesz > memsz) + { + dbg(DBG_ELF, "ERROR: segment file size is greater than memory size\n"); + return -ENOEXEC; + } + else if (PAGE_OFFSET(addr) != PAGE_OFFSET(off)) + { + dbg(DBG_ELF, + "ERROR: segment address and offset are not aligned correctly\n"); + return -ENOEXEC; + } + + /* calculate segment permissions */ + int perms = 0; + if (PF_R & segment->p_flags) + { + perms |= PROT_READ; + } + if (PF_W & segment->p_flags) + { + perms |= PROT_WRITE; + } + if (PF_X & segment->p_flags) + { + perms |= PROT_EXEC; + } + + if (filesz > 0) + { + /* something needs to be mapped from the file */ + /* start from the starting address and include enough pages to + * map all filesz bytes of the file */ + uint64_t lopage = ADDR_TO_PN(addr); + uint64_t npages = ADDR_TO_PN(addr + filesz - 1) - lopage + 1; + off_t fileoff = (off_t)PAGE_ALIGN_DOWN(off); + + if (!vmmap_is_range_empty(map, lopage, npages)) + { + dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); + return -ENOEXEC; + } + long ret = vmmap_map(map, file, lopage, npages, perms, + MAP_PRIVATE | MAP_FIXED, fileoff, 0, NULL); + if (ret) + return ret; + dbg(DBG_ELF, + "Mapped segment of length %lu pages at %#lx, memoff = %#lx\n", + npages, addr, memoff); + } + + if (memsz > filesz) + { + /* there is left over memory in the segment which must + * be initialized to 0 (anonymously mapped) */ + uint64_t lopage = ADDR_TO_PN( + addr + + filesz); // the first page containing data not stored in the file + uint64_t npages = + ADDR_TO_PN(PAGE_ALIGN_UP(addr + memsz)) - + lopage; // the first page totally unused by memory, minus low page + + /* check for overlapping mappings, considering the case where lopage + * contains file data and the case where it doesn't*/ + if (PAGE_ALIGNED(addr + filesz) && + !vmmap_is_range_empty(map, lopage, npages)) + { + dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); + return -ENOEXEC; + } + if (!PAGE_ALIGNED(addr + filesz) && npages > 1 && + !vmmap_is_range_empty(map, lopage + 1, npages - 1)) + { + dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n"); + return -ENOEXEC; + } + long ret = vmmap_map(map, NULL, lopage, npages, perms, + MAP_PRIVATE | MAP_FIXED, 0, 0, NULL); + if (ret) + return ret; + if (!PAGE_ALIGNED(addr + filesz) && filesz > 0) + { + /* In this case, we have accidentally zeroed too much of memory, as + * we zeroed all memory in the page containing addr + filesz. + * However, the remaining part of the data is not a full page, so we + * should not just map in another page (as there could be garbage + * after addr+filesz). For instance, consider the data-bss boundary + * (c.f. Intel x86 ELF supplement pp. 82). + * To fix this, we need to read in the contents of the file manually + * and put them at that user space addr in the anon map we just + * added. */ + void *buf = page_alloc(); + if (!buf) + return -ENOMEM; + + vlock(file); + ret = file->vn_ops->read(file, + (size_t)PAGE_ALIGN_DOWN(off + filesz - 1), + buf, PAGE_OFFSET(addr + filesz)); + if (ret >= 0) + { + KASSERT((uintptr_t)ret == PAGE_OFFSET(addr + filesz)); + ret = vmmap_write(map, PAGE_ALIGN_DOWN(addr + filesz - 1), buf, + PAGE_OFFSET(addr + filesz)); + } + vunlock(file); + page_free(buf); + return ret; + } + } + return 0; +} + +/* Read in the given fd's ELF header into the location pointed to by the given + * argument and does some basic checks that it is a valid ELF file, is an + * executable, and is for the correct platform + * interp is 1 if we are loading an interpreter, 0 otherwise + * Returns 0 on success, -errno on failure. Returns the ELF header in the header + * argument. */ +static long _elf64_load_ehdr(int fd, Elf64_Ehdr *header, int interp) +{ + long ret; + memset(header, 0, sizeof(*header)); + + /* Preliminary check that this is an ELF file */ + ret = do_read(fd, header, sizeof(*header)); + if (ret < 0) + return ret; + if ((ret < SELFMAG) || memcmp(&header->e_ident[0], ELFMAG, SELFMAG) != 0) + { + dbg(DBG_ELF, "ELF load failed: no magic number present\n"); + return -ENOEXEC; + } + if (ret < header->e_ehsize) + { + dbg(DBG_ELF, "ELF load failed: bad file size\n"); + return -ENOEXEC; + } + /* Log information about the file */ + dbg(DBG_ELF, "loading ELF file\n"); + dbgq(DBG_ELF, "ELF Header Information:\n"); + dbgq(DBG_ELF, "Version: %d\n", (int)header->e_ident[EI_VERSION]); + dbgq(DBG_ELF, "Class: %d\n", (int)header->e_ident[EI_CLASS]); + dbgq(DBG_ELF, "Data: %d\n", (int)header->e_ident[EI_DATA]); + dbgq(DBG_ELF, "Type: %d\n", (int)header->e_type); + dbgq(DBG_ELF, "Machine: %d\n", (int)header->e_machine); + + /* Check that the ELF file is executable and targets + * the correct platform */ + if (interp && header->e_type != ET_DYN) + { + dbg(DBG_ELF, + "ELF load failed: interpreter is not a shared object file\n"); + return -ENOEXEC; + } + if (!interp && header->e_type != ET_EXEC) + { + dbg(DBG_ELF, "ELF load failed: not executable ELF\n"); + return -ENOEXEC; + } + if (!_elf64_platform_check(header)) + { + dbg(DBG_ELF, "ELF load failed: incorrect platform\n"); + return -ENOEXEC; + } + return 0; +} + +/* Loads the program header tables from from the ELF file specified by + * the open file descriptor fd. header should point to the header information + * for that ELF file. pht is a buffer of size size. It must be large enough + * to hold the program header tables (whose size can be determined from + * the ELF header). + * + * Returns 0 on success or -errno on error. */ +static long _elf64_load_phtable(int fd, Elf64_Ehdr *header, char *pht, + size_t size) +{ + size_t phtsize = header->e_phentsize * header->e_phnum; + KASSERT(phtsize <= size); + /* header->e_phoff is a uint64_t cast to int. since the max file size on + * s5fs is way smaller than uint32_t, offsets in practice should never + * cause this cast to behave badly, although if weenix ever adds support + * for very large (> 4GB) files, this will be a bug. + */ + long ret = do_lseek(fd, (int)(header->e_phoff), SEEK_SET); + if (ret < 0) + return ret; + + ret = do_read(fd, pht, phtsize); + if (ret < 0) + return ret; + + KASSERT((size_t)ret <= phtsize); + if ((size_t)ret < phtsize) + { + return -ENOEXEC; + } + return 0; +} + +/* Maps the PT_LOAD segments for an ELF file into the given address space. + * vnode should be the open vnode of the ELF file. + * map is the address space to map the ELF file into. + * header is the ELF file's header. + * pht is the full program header table. + * memoff is the difference (in pages) between the desired base address and the + * base address given in the ELF file (usually 0x8048094) + * + * Returns the number of segments loaded on success, -errno on failure. */ +static long _elf64_map_progsegs(vnode_t *vnode, vmmap_t *map, + Elf64_Ehdr *header, char *pht, int64_t memoff) +{ + long ret = 0; + + long loadcount = 0; + for (uint32_t i = 0; i < header->e_phnum; i++) + { + Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize); + if (phtentry->p_type == PT_LOAD) + { + ret = _elf64_map_segment(map, vnode, memoff, phtentry); + if (ret) + return ret; + loadcount++; + } + } + + if (!loadcount) + { + dbg(DBG_ELF, "ERROR: ELF file contained no loadable sections\n"); + return -ENOEXEC; + } + return loadcount; +} + +/* Locates the program header for the interpreter in the given list of program + * headers through the phinterp out-argument. Returns 0 on success (even if + * there is no interpreter) or -errno on error. If there is no interpreter + * section then phinterp is set to NULL. If there is more than one interpreter + * then -EINVAL is returned. */ +static long _elf64_find_phinterp(Elf64_Ehdr *header, char *pht, + Elf64_Phdr **phinterp) +{ + *phinterp = NULL; + + for (uint32_t i = 0; i < header->e_phnum; i++) + { + Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize); + if (phtentry->p_type == PT_INTERP) + { + if (!*phinterp) + { + *phinterp = phtentry; + } + else + { + dbg(DBG_ELF, "ELF load failed: multiple interpreters\n"); + return -EINVAL; + } + } + } + return 0; +} + +/* Calculates the lower and upper virtual addresses that the given program + * header table would load into if _elf64_map_progsegs were called. We traverse + * all the program segments of type PT_LOAD and look at p_vaddr and p_memsz + * Return the low and high vaddrs in the given arguments if they are non-NULL. + * The high vaddr is one plus the highest vaddr used by the program. */ +static void _elf64_calc_progbounds(Elf64_Ehdr *header, char *pht, void **low, + void **high) +{ + Elf64_Addr curlow = (Elf64_Addr)-1; + Elf64_Addr curhigh = 0; + for (uint32_t i = 0; i < header->e_phnum; i++) + { + Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize); + if (phtentry->p_type == PT_LOAD) + { + if (phtentry->p_vaddr < curlow) + { + curlow = phtentry->p_vaddr; + } + if (phtentry->p_vaddr + phtentry->p_memsz > curhigh) + { + curhigh = phtentry->p_vaddr + phtentry->p_memsz; + } + } + } + if (low) + { + *low = (void *)curlow; + } + if (high) + { + *high = (void *)curhigh; + } +} + +/* Calculates the total size of all the arguments that need to be placed on the + * user stack before execution can begin. See AMD64 ABI Draft 0.99.6 page 29 + * Returns total size on success. Returns the number of non-NULL entries in + * argv, envp, and auxv in argc, envc, and auxc arguments, respectively */ +static size_t _elf64_calc_argsize(char *const argv[], char *const envp[], + Elf64_auxv_t *auxv, size_t phtsize, + size_t *argc, size_t *envc, size_t *auxc) +{ + size_t size = 0; + size_t i; + /* All strings in argv */ + for (i = 0; argv[i]; i++) + { + size += strlen(argv[i]) + 1; /* null terminator */ + } + if (argc) + { + *argc = i; + } + /* argv itself (+ null terminator) */ + size += (i + 1) * sizeof(char *); + + /* All strings in envp */ + for (i = 0; envp[i] != NULL; i++) + { + size += strlen(envp[i]) + 1; /* null terminator */ + } + if (envc != NULL) + { + *envc = i; + } + /* envp itself (+ null terminator) */ + size += (i + 1) * sizeof(char *); + + /* The only extra-space-consuming entry in auxv is AT_PHDR, as if we find + * that entry we'll need to put the program header table on the stack */ + for (i = 0; auxv[i].a_type != AT_NULL; i++) + { + if (auxv[i].a_type == AT_PHDR) + { + size += phtsize; + } + } + if (auxc) + { + *auxc = i; + } + /* auxv itself (+ null terminator) */ + size += (i + 1) * sizeof(Elf64_auxv_t); + + /* argc - reserving 8 bytes for alignment purposes */ + size += sizeof(int64_t); + /* argv, envp, and auxv pointers (as passed to main) */ + size += 3 * sizeof(void *); + + /* + * cjm5: the above isn't strictly ABI compliant. normally the userspace + * wrappers to main() (__libc_static_entry or _bootstrap for ld-weenix) are + * responsible for calculating *argv, *envp, *and *auxv to pass to main(). + * It's easier to do it here, though. + */ + + return size; +} + +/* Copies the arguments that must be on the stack prior to execution onto the + * user stack. This should never fail. + * arglow: low address on the user stack where we should start the copying + * argsize: total size of everything to go on the stack + * buf: a kernel buffer at least as big as argsize (for convenience) + * argv, envp, auxv: various vectors of stuff (to go on the stack) + * argc, envc, auxc: number of non-NULL entries in argv, envp, auxv, + * respectively (to avoid recomputing them) + * phtsize: the size of the program header table (to avoid recomputing) + * c.f. Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29 + */ +static void _elf64_load_args(vmmap_t *map, void *arglow, size_t argsize, + char *buf, char *const argv[], char *const envp[], + Elf64_auxv_t *auxv, size_t argc, size_t envc, + size_t auxc, size_t phtsize) +{ + dbg(DBG_ELF, + "Loading initial stack contents at 0x%p, argc = %lu, envc = %lu, auxc " + "= %lu\n", + arglow, argc, envc, auxc); + + size_t i; + + /* Copy argc: in x86-64, this is an eight-byte value, despite being treated + * as an int in a C main() function. See AMD64 ABI Draft 0.99.6 page 29 */ + *((int64_t *)buf) = (int64_t)argc; + + /* Calculate where the strings / tables pointed to by the vectors start */ + size_t veclen = (argc + 1 + envc + 1) * sizeof(char *) + + (auxc + 1) * sizeof(Elf64_auxv_t); + + char *vecstart = + buf + sizeof(int64_t) + + 3 * sizeof(void *); /* Beginning of argv (in kernel buffer) */ + + char *vvecstart = + ((char *)arglow) + sizeof(int64_t) + + 3 * sizeof(void *); /* Beginning of argv (in user space) */ + + char *strstart = vecstart + veclen; /* Beginning of first string pointed to + by argv (in kernel buffer) */ + + /* Beginning of first string pointed to by argv (in user space) */ + char *vstrstart = vvecstart + veclen; + + /* + * cjm5: since the first 6 arguments that can fit in registers are placed + * there in x86-64, __libc_static_entry (and ld-weenix, if it is ever ported + * to x86-64) have to take the following pointers off the stack and move + * them and argc into the first 4 argument registers before calling main(). + */ + + /* Copy over pointer to argv */ + *(char **)(buf + 8) = vvecstart; + /* Copy over pointer to envp */ + *(char **)(buf + 16) = vvecstart + (argc + 1) * sizeof(char *); + /* Copy over pointer to auxv */ + *(char **)(buf + 24) = vvecstart + (argc + 1 + envc + 1) * sizeof(char *); + + /* Copy over argv along with every string in it */ + for (i = 0; i < argc; i++) + { + size_t len = strlen(argv[i]) + 1; + strcpy(strstart, argv[i]); + /* Remember that we need to use the virtual address of the string */ + *(char **)vecstart = vstrstart; + strstart += len; + vstrstart += len; + vecstart += sizeof(char *); + } + /* null terminator of argv */ + *(char **)vecstart = NULL; + vecstart += sizeof(char *); + + /* Copy over envp along with every string in it */ + for (i = 0; i < envc; i++) + { + size_t len = strlen(envp[i]) + 1; + strcpy(strstart, envp[i]); + /* Remember that we need to use the virtual address of the string */ + *(char **)vecstart = vstrstart; + strstart += len; + vstrstart += len; + vecstart += sizeof(char *); + } + /* null terminator of envp */ + *(char **)vecstart = NULL; + vecstart += sizeof(char *); + + /* Copy over auxv along with the program header (if we find it) */ + for (i = 0; i < auxc; i++) + { + /* Copy over the auxv entry */ + memcpy(vecstart, &auxv[i], sizeof(Elf64_auxv_t)); + /* Check if it points to the program header */ + if (auxv[i].a_type == AT_PHDR) + { + /* Copy over the program header table */ + memcpy(strstart, auxv[i].a_un.a_ptr, (size_t)phtsize); + /* And modify the address */ + ((Elf64_auxv_t *)vecstart)->a_un.a_ptr = vstrstart; + } + vecstart += sizeof(Elf64_auxv_t); + } + /* null terminator of auxv */ + ((Elf64_auxv_t *)vecstart)->a_type = NULL; + + /* Finally, we're done copying into the kernel buffer. Now just copy the + * kernel buffer into user space */ + long ret = vmmap_write(map, arglow, buf, argsize); + /* If this failed, we must have set up the address space wrong... */ + KASSERT(!ret); +} + +static long _elf64_load(const char *filename, int fd, char *const argv[], + char *const envp[], uint64_t *rip, uint64_t *rsp) +{ + long ret = 0; + Elf64_Ehdr header; + Elf64_Ehdr interpheader; + + /* variables to clean up on failure */ + vmmap_t *map = NULL; + file_t *file = NULL; + char *pht = NULL; + char *interpname = NULL; + long interpfd = -1; + file_t *interpfile = NULL; + char *interppht = NULL; + Elf64_auxv_t *auxv = NULL; + char *argbuf = NULL; + + uintptr_t entry; + + file = fget(fd); + if (!file) + return -EBADF; + + /* Load and verify the ELF header */ + ret = _elf64_load_ehdr(fd, &header, 0); + if (ret) + goto done; + + map = vmmap_create(); + if (!map) + { + ret = -ENOMEM; + goto done; + } + + // Program header table entry size multiplied by + // number of entries. + size_t phtsize = header.e_phentsize * header.e_phnum; + pht = kmalloc(phtsize); + if (!pht) + { + ret = -ENOMEM; + goto done; + } + /* Read in the program header table */ + ret = _elf64_load_phtable(fd, &header, pht, phtsize); + if (ret) + goto done; + + /* Load the segments in the program header table */ + ret = _elf64_map_progsegs(file->f_vnode, map, &header, pht, 0); + if (ret < 0) + goto done; + + /* Check if program requires an interpreter */ + Elf64_Phdr *phinterp = NULL; + ret = _elf64_find_phinterp(&header, pht, &phinterp); + if (ret) + goto done; + + /* Calculate program bounds for future reference */ + void *proglow; + void *proghigh; + _elf64_calc_progbounds(&header, pht, &proglow, &proghigh); + + entry = (uintptr_t)header.e_entry; + + /* if an interpreter was requested load it */ + if (phinterp) + { + /* read the file name of the interpreter from the binary */ + ret = do_lseek(fd, (int)(phinterp->p_offset), SEEK_SET); + if (ret < 0) + goto done; + + interpname = kmalloc(phinterp->p_filesz); + if (!interpname) + { + ret = -ENOMEM; + goto done; + } + ret = do_read(fd, interpname, phinterp->p_filesz); + if (ret < 0) + goto done; + + if ((size_t)ret != phinterp->p_filesz) + { + ret = -ENOEXEC; + goto done; + } + + /* open the interpreter */ + dbgq(DBG_ELF, "ELF Interpreter: %*s\n", (int)phinterp->p_filesz, + interpname); + interpfd = do_open(interpname, O_RDONLY); + if (interpfd < 0) + { + ret = interpfd; + goto done; + } + kfree(interpname); + interpname = NULL; + + interpfile = fget((int)interpfd); + KASSERT(interpfile); + + /* Load and verify the interpreter ELF header */ + ret = _elf64_load_ehdr((int)interpfd, &interpheader, 1); + if (ret) + goto done; + + size_t interpphtsize = interpheader.e_phentsize * interpheader.e_phnum; + interppht = kmalloc(interpphtsize); + if (!interppht) + { + ret = -ENOMEM; + goto done; + } + /* Read in the program header table */ + ret = _elf64_load_phtable((int)interpfd, &interpheader, interppht, + interpphtsize); + if (ret) + goto done; + + /* Interpreter shouldn't itself need an interpreter */ + Elf64_Phdr *interpphinterp; + ret = _elf64_find_phinterp(&interpheader, interppht, &interpphinterp); + if (ret) + goto done; + + if (interpphinterp) + { + ret = -EINVAL; + goto done; + } + + /* Calculate the interpreter program size */ + void *interplow; + void *interphigh; + _elf64_calc_progbounds(&interpheader, interppht, &interplow, + &interphigh); + uint64_t interpnpages = + ADDR_TO_PN(PAGE_ALIGN_UP(interphigh)) - ADDR_TO_PN(interplow); + + /* Find space for the interpreter */ + /* This is the first pn at which the interpreter will be mapped */ + uint64_t interppagebase = + (uint64_t)vmmap_find_range(map, interpnpages, VMMAP_DIR_HILO); + if (interppagebase == ~0UL) + { + ret = -ENOMEM; + goto done; + } + + /* Base address at which the interpreter begins on that page */ + void *interpbase = (void *)((uintptr_t)PN_TO_ADDR(interppagebase) + + PAGE_OFFSET(interplow)); + + /* Offset from "expected base" in number of pages */ + int64_t interpoff = + (int64_t)interppagebase - (int64_t)ADDR_TO_PN(interplow); + + entry = (uintptr_t)interpbase + + ((uintptr_t)interpheader.e_entry - (uintptr_t)interplow); + + /* Load the interpreter program header and map in its segments */ + ret = _elf64_map_progsegs(interpfile->f_vnode, map, &interpheader, + interppht, interpoff); + if (ret < 0) + goto done; + + /* Build the ELF aux table */ + /* Need to hold AT_PHDR, AT_PHENT, AT_PHNUM, AT_ENTRY, AT_BASE, + * AT_PAGESZ, AT_NULL */ + auxv = (Elf64_auxv_t *)kmalloc(7 * sizeof(Elf64_auxv_t)); + if (!auxv) + { + ret = -ENOMEM; + goto done; + } + Elf64_auxv_t *auxvent = auxv; + + /* Add all the necessary entries */ + auxvent->a_type = AT_PHDR; + auxvent->a_un.a_ptr = pht; + auxvent++; + + auxvent->a_type = AT_PHENT; + auxvent->a_un.a_val = header.e_phentsize; + auxvent++; + + auxvent->a_type = AT_PHNUM; + auxvent->a_un.a_val = header.e_phnum; + auxvent++; + + auxvent->a_type = AT_ENTRY; + auxvent->a_un.a_ptr = (void *)header.e_entry; + auxvent++; + + auxvent->a_type = AT_BASE; + auxvent->a_un.a_ptr = interpbase; + auxvent++; + + auxvent->a_type = AT_PAGESZ; + auxvent->a_un.a_val = PAGE_SIZE; + auxvent++; + + auxvent->a_type = AT_NULL; + } + else + { + /* Just put AT_NULL (we don't really need this at all) */ + auxv = (Elf64_auxv_t *)kmalloc(sizeof(Elf64_auxv_t)); + if (!auxv) + { + ret = -ENOMEM; + goto done; + } + auxv->a_type = AT_NULL; + } + + /* Allocate stack at the top of the address space */ + uint64_t stack_lopage = (uint64_t)vmmap_find_range( + map, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1, VMMAP_DIR_HILO); + if (stack_lopage == ~0UL) + { + ret = -ENOMEM; + goto done; + } + ret = + vmmap_map(map, NULL, stack_lopage, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, 0, 0, NULL); + KASSERT(0 == ret); + dbg(DBG_ELF, "Mapped Stack at low addr 0x%p, size %#lx\n", + PN_TO_ADDR(stack_lopage), DEFAULT_STACK_SIZE + PAGE_SIZE); + + /* Calculate size needed on user stack for arguments */ + size_t argc, envc, auxc; + size_t argsize = + _elf64_calc_argsize(argv, envp, auxv, phtsize, &argc, &envc, &auxc); + /* Make sure it fits on the stack */ + if (argsize >= DEFAULT_STACK_SIZE) + { + ret = -E2BIG; + goto done; + } + /* Allocate kernel buffer for temporarily storing arguments */ + argbuf = (char *)kmalloc(argsize); + if (!argbuf) + { + ret = -ENOMEM; + goto done; + } + /* Calculate where in user space we start putting the args. */ + // the args go at the beginning (top) of the stack + void *arglow = + (char *)PN_TO_ADDR(stack_lopage) + + (uint64_t)( + ((uint64_t)PN_TO_ADDR((DEFAULT_STACK_SIZE / PAGE_SIZE) + 1)) - + argsize); + + /* Copy everything into the user address space, modifying addresses in + * argv, envp, and auxv to be user addresses as we go. */ + _elf64_load_args(map, arglow, argsize, argbuf, argv, envp, auxv, argc, envc, + auxc, phtsize); + + dbg(DBG_ELF, + "Past the point of no return. Swapping to map at 0x%p, setting brk to " + "0x%p\n", + map, proghigh); + /* the final threshold / What warm unspoken secrets will we learn? / Beyond + * the point of no return ... */ + + /* Give the process the new mappings. */ + vmmap_destroy(&curproc->p_vmmap); + map->vmm_proc = curproc; + curproc->p_vmmap = map; + map = NULL; /* So it doesn't get cleaned up at the end */ + + /* Flush the process pagetables and TLB */ + pt_unmap_range(curproc->p_pml4, USER_MEM_LOW, USER_MEM_HIGH); + tlb_flush_all(); + + /* Set the process break and starting break (immediately after the mapped-in + * text/data/bss from the executable) */ + curproc->p_brk = proghigh; + curproc->p_start_brk = proghigh; + + strncpy(curproc->p_name, filename, PROC_NAME_LEN); + + /* Tell the caller the correct stack pointer and instruction + * pointer to begin execution in user space */ + *rip = (uint64_t)entry; + *rsp = ((uint64_t)arglow) - + 8; /* Space on the user stack for the (garbage) return address */ + /* Note that the return address will be fixed by the userland entry code, + * whether in static or dynamic */ + + /* And we're done */ + ret = 0; + +// https://www.youtube.com/watch?v=PJhXVg2QisM +done: + fput(&file); + if (map) + { + vmmap_destroy(&map); + } + if (pht) + { + kfree(pht); + } + if (interpname) + { + kfree(interpname); + } + if (interpfd >= 0) + { + do_close((int)interpfd); + } + if (interpfile) + { + fput(&interpfile); + } + if (interppht) + { + kfree(interppht); + } + if (auxv) + { + kfree(auxv); + } + if (argbuf) + { + kfree(argbuf); + } + return ret; +} + +void elf64_init(void) { binfmt_add("ELF64", _elf64_load); } diff --git a/kernel/api/exec.c b/kernel/api/exec.c new file mode 100644 index 0000000..e0b66e8 --- /dev/null +++ b/kernel/api/exec.c @@ -0,0 +1,110 @@ +#include "util/debug.h" +#include <util/string.h> + +#include "main/gdt.h" + +#include "api/binfmt.h" +#include "api/exec.h" +#include "api/syscall.h" + +/* Enters userland from the kernel. Call this for a process that has up to now + * been a kernel-only process. Takes the registers to start userland execution + * with. Does not return. Note that the regs passed in should be on the current + * stack of execution. + */ + +void userland_entry(const regs_t regs) +{ + KASSERT(preemption_enabled()); + + dbg(DBG_ELF, ">>>>>>>>>>>>>>> pid: %d\n", curproc->p_pid); + + intr_disable(); + dbg(DBG_ELF, ">>>>>>>>>>>>>>>> intr_disable()\n"); + intr_setipl(IPL_LOW); + dbg(DBG_ELF, ">>>>>>>>>>>>>>>> intr_setipl()\n"); + + __asm__ __volatile__( + "movq %%rax, %%rsp\n\t" /* Move stack pointer up to regs */ + "popq %%r15\n\t" /* Pop all general purpose registers (except rsp, */ + "popq %%r14\n\t" /* which gets popped by iretq) */ + "popq %%r13\n\t" + "popq %%r12\n\t" + "popq %%rbp\n\t" + "popq %%rbx\n\t" + "popq %%r11\n\t" + "popq %%r10\n\t" + "popq %%r9\n\t" + "popq %%r8\n\t" + "popq %%rax\n\t" + "popq %%rcx\n\t" + "popq %%rdx\n\t" + "popq %%rsi\n\t" + "popq %%rdi\n\t" + "add $16, %%rsp\n\t" /* + * Move stack pointer up to the location of the + * arguments automatically pushed by the processor + * on an interrupt + */ + "iretq\n" + /* We're now in userland! */ + : /* No outputs */ + : "a"(®s) /* Forces regs to be in the 'a' register (%rax). */ + ); +} + +long do_execve(const char *filename, char *const *argv, char *const *envp, + struct regs *regs) +{ + uint64_t rip, rsp; + long ret = binfmt_load(filename, argv, envp, &rip, &rsp); + if (ret < 0) + { + return ret; + } + /* Make sure we "return" into the start of the newly loaded binary */ + dbg(DBG_EXEC, "Executing binary with rip 0x%p, rsp 0x%p\n", (void *)rip, + (void *)rsp); + regs->r_rip = rip; + regs->r_rsp = rsp; + return 0; +} + +/* + * The kernel version of execve needs to construct a set of saved user registers + * and fake a return from an interrupt to get to userland. The 64-bit version + * behaves mostly the same as the 32-bit version, but there are a few + * differences. Besides different general purpose registers, there is no longer + * a need for two esp/rsp fields since popa is not valid assembly in 64-bit. The + * only non-null segment registers are now cs and ss, but they are set the same + * as in 32-bit, although the segment descriptors they point to are slightly + * different. + */ +void kernel_execve(const char *filename, char *const *argv, char *const *envp) +{ + uint64_t rip, rsp; + long ret = binfmt_load(filename, argv, envp, &rip, &rsp); + dbg(DBG_EXEC, "ret = %ld\n", ret); + + KASSERT(0 == ret); /* Should never fail to load the first binary */ + + dbg(DBG_EXEC, "Entering userland with rip 0x%p, rsp 0x%p\n", (void *)rip, + (void *)rsp); + /* To enter userland, we build a set of saved registers to "trick" the + * processor into thinking we were in userland before. Yes, it's horrible. + * c.f. http://wiki.osdev.org/index.php?title=Getting_to_Ring_3&oldid=8195 + */ + regs_t regs; + memset(®s, 0, sizeof(regs_t)); + + /* Userland gdt entries (0x3 for ring 3) */ + regs.r_cs = GDT_USER_TEXT | 0x3; + regs.r_ss = GDT_USER_DATA | 0x3; + + /* Userland instruction pointer and stack pointer */ + regs.r_rip = rip; + regs.r_rsp = rsp; + + regs.r_rflags = 0x202; // see 32-bit version + userland_entry(regs); +}
\ No newline at end of file diff --git a/kernel/api/syscall.c b/kernel/api/syscall.c new file mode 100644 index 0000000..1be5276 --- /dev/null +++ b/kernel/api/syscall.c @@ -0,0 +1,757 @@ +#include "errno.h" +#include "globals.h" +#include "kernel.h" +#include <fs/vfs.h> +#include <util/time.h> + +#include "main/inits.h" +#include "main/interrupt.h" + +#include "mm/kmalloc.h" +#include "mm/mman.h" + +#include "fs/vfs_syscall.h" +#include "fs/vnode.h" + +#include "drivers/tty/tty.h" +#include "test/kshell/kshell.h" + +#include "vm/brk.h" +#include "vm/mmap.h" + +#include "api/access.h" +#include "api/exec.h" +#include "api/syscall.h" +#include "api/utsname.h" + +static long syscall_handler(regs_t *regs); + +static long syscall_dispatch(size_t sysnum, uintptr_t args, regs_t *regs); + +extern size_t active_tty; + +static const char *syscall_strings[49] = { + "syscall", "exit", "fork", "read", "write", "open", + "close", "waitpid", "link", "unlink", "execve", "chdir", + "sleep", "unknown", "lseek", "sync", "nuke", "dup", + "pipe", "ioctl", "unknown", "rmdir", "mkdir", "getdents", + "mmap", "mprotect", "munmap", "rename", "uname", "thr_create", + "thr_cancel", "thr_exit", "thr_yield", "thr_join", "gettid", "getpid", + "unknown", "unkown", "unknown", "errno", "halt", "get_free_mem", + "set_errno", "dup2", "brk", "mount", "umount", "stat", "usleep"}; + +void syscall_init(void) { intr_register(INTR_SYSCALL, syscall_handler); } + +// if condition, set errno to err and return -1 +#define ERROR_OUT(condition, err) \ + if (condition) \ + { \ + curthr->kt_errno = (err); \ + return -1; \ + } + +// if ret < 0, set errno to -ret and return -1 +#define ERROR_OUT_RET(ret) ERROR_OUT(ret < 0, -ret) + +/* + * Be sure to look at other examples of implemented system calls to see how + * this should be done - the general outline is as follows. + * + * - Initialize a read_args_t struct locally in kernel space and copy from + * userland args. + * - Allocate a temporary buffer (a page-aligned block of n pages that are + * enough space to store the number of bytes to read) + * - Call do_read() with the buffer and then copy the buffer to the userland + * args after the system call + * - Make sure to free the temporary buffer allocated + * - Return the number of bytes read, or return -1 and set the current thread's + * errno appropriately using ERROR_OUT_RET. + */ +static long sys_read(read_args_t *args) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * Be sure to look at other examples of implemented system calls to see how + * this should be done - the general outline is as follows. + * + * This function is very similar to sys_read - see above comments. You'll need + * to use the functions copy_from_user() and do_write(). Make sure to + * allocate a new temporary buffer for the data that is being written. This + * is to ensure that pagefaults within kernel mode do not happen. + */ +static long sys_write(write_args_t *args) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +/* + * This similar to the other system calls that you have implemented above. + * + * The general steps are as follows: + * - Copy the arguments from user memory + * - Check that the count field is at least the size of a dirent_t + * - Use a while loop to read count / sizeof(dirent_t) directory entries into + * the provided dirp and call do_getdent + * - Return the number of bytes read + */ +static long sys_getdents(getdents_args_t *args) +{ + NOT_YET_IMPLEMENTED("VM: ***none***"); + return -1; +} + +#ifdef __MOUNTING__ +static long sys_mount(mount_args_t *arg) +{ + mount_args_t kern_args; + char *source; + char *target; + char *type; + long ret; + + if (copy_from_user(&kern_args, arg, sizeof(kern_args)) < 0) + { + curthr->kt_errno = EFAULT; + return -1; + } + + /* null is okay only for the source */ + source = user_strdup(&kern_args.spec); + if (NULL == (target = user_strdup(&kern_args.dir))) + { + kfree(source); + curthr->kt_errno = EINVAL; + return -1; + } + if (NULL == (type = user_strdup(&kern_args.fstype))) + { + kfree(source); + kfree(target); + curthr->kt_errno = EINVAL; + return -1; + } + + ret = do_mount(source, target, type); + kfree(source); + kfree(target); + kfree(type); + + if (ret) + { + curthr->kt_errno = -ret; + return -1; + } + + return 0; +} + +static long sys_umount(argstr_t *input) +{ + argstr_t kstr; + char *target; + long ret; + + if (copy_from_user(&kstr, input, sizeof(kstr)) < 0) + { + curthr->kt_errno = EFAULT; + return -1; + } + + if (NULL == (target = user_strdup(&kstr))) + { + curthr->kt_errno = EINVAL; + return -1; + } + + ret = do_umount(target); + kfree(target); + + if (ret) + { + curthr->kt_errno = -ret; + return -1; + } + + return 0; +} +#endif + +static long sys_close(int fd) +{ + long ret = do_close(fd); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_dup(int fd) +{ + long ret = do_dup(fd); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_dup2(const dup2_args_t *args) +{ + dup2_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + ret = do_dup2(kargs.ofd, kargs.nfd); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_mkdir(mkdir_args_t *args) +{ + mkdir_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs.path, &path); + ERROR_OUT_RET(ret); + + ret = do_mkdir(path); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_rmdir(argstr_t *args) +{ + argstr_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs, &path); + ERROR_OUT_RET(ret); + + ret = do_rmdir(path); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_unlink(argstr_t *args) +{ + argstr_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs, &path); + ERROR_OUT_RET(ret); + + ret = do_unlink(path); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_link(link_args_t *args) +{ + link_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *to, *from; + ret = user_strdup(&kargs.to, &to); + ERROR_OUT_RET(ret); + + ret = user_strdup(&kargs.from, &from); + if (ret) + { + kfree(to); + ERROR_OUT_RET(ret); + } + + ret = do_link(from, to); + kfree(to); + kfree(from); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_rename(rename_args_t *args) +{ + rename_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *oldpath, *newpath; + ret = user_strdup(&kargs.oldpath, &oldpath); + ERROR_OUT_RET(ret); + + ret = user_strdup(&kargs.newpath, &newpath); + if (ret) + { + kfree(oldpath); + ERROR_OUT_RET(ret); + } + + ret = do_rename(oldpath, newpath); + kfree(oldpath); + kfree(newpath); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_chdir(argstr_t *args) +{ + argstr_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs, &path); + ERROR_OUT_RET(ret); + + ret = do_chdir(path); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_lseek(lseek_args_t *args) +{ + lseek_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + ret = do_lseek(kargs.fd, kargs.offset, kargs.whence); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_open(open_args_t *args) +{ + open_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs.filename, &path); + ERROR_OUT_RET(ret); + + ret = do_open(path, kargs.flags); + kfree(path); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_munmap(munmap_args_t *args) +{ + munmap_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + ret = do_munmap(kargs.addr, kargs.len); + + ERROR_OUT_RET(ret); + return ret; +} + +static void *sys_mmap(mmap_args_t *arg) +{ + mmap_args_t kargs; + + if (copy_from_user(&kargs, arg, sizeof(mmap_args_t))) + { + curthr->kt_errno = EFAULT; + return MAP_FAILED; + } + + void *ret; + long err = do_mmap(kargs.mma_addr, kargs.mma_len, kargs.mma_prot, + kargs.mma_flags, kargs.mma_fd, kargs.mma_off, &ret); + if (err) + { + curthr->kt_errno = -err; + return MAP_FAILED; + } + return ret; +} + +static pid_t sys_waitpid(waitpid_args_t *args) +{ + waitpid_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + int status; + pid_t pid = do_waitpid(kargs.wpa_pid, &status, kargs.wpa_options); + ERROR_OUT_RET(pid); + + if (kargs.wpa_status) + { + ret = copy_to_user(kargs.wpa_status, &status, sizeof(int)); + ERROR_OUT_RET(ret); + } + + return pid; +} + +static void *sys_brk(void *addr) +{ + void *new_brk; + long ret = do_brk(addr, &new_brk); + if (ret) + { + curthr->kt_errno = -ret; + return (void *)-1; + } + return new_brk; +} + +static void sys_halt(void) { proc_kill_all(); } + +static long sys_stat(stat_args_t *args) +{ + stat_args_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *path; + ret = user_strdup(&kargs.path, &path); + ERROR_OUT_RET(ret); + + stat_t stat_buf; + ret = do_stat(path, &stat_buf); + kfree(path); + ERROR_OUT_RET(ret); + + ret = copy_to_user(kargs.buf, &stat_buf, sizeof(stat_buf)); + ERROR_OUT_RET(ret); + + return ret; +} + +static long sys_pipe(int args[2]) +{ + int kargs[2]; + long ret = do_pipe(kargs); + ERROR_OUT_RET(ret); + + ret = copy_to_user(args, kargs, sizeof(kargs)); + ERROR_OUT_RET(ret); + + return ret; +} + +static long sys_uname(struct utsname *arg) +{ + static const char sysname[] = "Weenix"; + static const char release[] = "1.2"; + /* Version = last compilation time */ + static const char version[] = "#1 " __DATE__ " " __TIME__; + static const char nodename[] = ""; + static const char machine[] = ""; + long ret = 0; + + ret = copy_to_user(arg->sysname, sysname, sizeof(sysname)); + ERROR_OUT_RET(ret); + ret = copy_to_user(arg->release, release, sizeof(release)); + ERROR_OUT_RET(ret); + ret = copy_to_user(arg->version, version, sizeof(version)); + ERROR_OUT_RET(ret); + ret = copy_to_user(arg->nodename, nodename, sizeof(nodename)); + ERROR_OUT_RET(ret); + ret = copy_to_user(arg->machine, machine, sizeof(machine)); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_time(time_t *utloc) +{ + time_t time = do_time(); + if (utloc) + { + long ret = copy_to_user(utloc, &time, sizeof(time_t)); + ERROR_OUT_RET(ret); + } + return time; +} + +static long sys_fork(regs_t *regs) +{ + long ret = do_fork(regs); + ERROR_OUT_RET(ret); + return ret; +} + +static void free_vector(char **vect) +{ + char **temp; + for (temp = vect; *temp; temp++) + { + kfree(*temp); + } + kfree(vect); +} + +static long sys_execve(execve_args_t *args, regs_t *regs) +{ + execve_args_t kargs; + char *filename = NULL; + char **argv = NULL; + char **envp = NULL; + + long ret; + if ((ret = copy_from_user(&kargs, args, sizeof(kargs)))) + goto cleanup; + + if ((ret = user_strdup(&kargs.filename, &filename))) + goto cleanup; + + if (kargs.argv.av_vec && (ret = user_vecdup(&kargs.argv, &argv))) + goto cleanup; + + if (kargs.envp.av_vec && (ret = user_vecdup(&kargs.envp, &envp))) + goto cleanup; + + ret = do_execve(filename, argv, envp, regs); + +cleanup: + if (filename) + kfree(filename); + if (argv) + free_vector(argv); + if (envp) + free_vector(envp); + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_debug(argstr_t *args) +{ + argstr_t kargs; + long ret = copy_from_user(&kargs, args, sizeof(kargs)); + ERROR_OUT_RET(ret); + + char *str; + ret = user_strdup(&kargs, &str); + ERROR_OUT_RET(ret); + dbg(DBG_USER, "%s\n", str); + kfree(str); + return ret; +} + +static long sys_kshell(int ttyid) +{ + // ignoring the ttyid passed in as it always defaults to 0, + // instead using the active_tty value + kshell_t *ksh = kshell_create(active_tty); + ERROR_OUT(!ksh, ENODEV); + + long ret; + while ((ret = kshell_execute_next(ksh)) > 0) + ; + kshell_destroy(ksh); + + ERROR_OUT_RET(ret); + return ret; +} + +static long sys_usleep(usleep_args_t *args) +{ + return do_usleep(args->usec); +} + +static inline void check_curthr_cancelled() +{ + KASSERT(list_empty(&curthr->kt_mutexes)); + long cancelled = curthr->kt_cancelled; + void *retval = curthr->kt_retval; + + if (cancelled) + { + dbg(DBG_SYSCALL, "CANCELLING: thread 0x%p of P%d (%s)\n", curthr, + curproc->p_pid, curproc->p_name); + kthread_exit(retval); + } +} + +static long syscall_handler(regs_t *regs) +{ + size_t sysnum = (size_t)regs->r_rax; + uintptr_t args = (uintptr_t)regs->r_rdx; + + const char *syscall_string; + if (sysnum <= 47) + { + syscall_string = syscall_strings[sysnum]; + } + else + { + if (sysnum == 9001) + { + syscall_string = "debug"; + } + else if (sysnum == 9002) + { + syscall_string = "kshell"; + } + else + { + syscall_string = "unknown"; + } + } + + if (sysnum != SYS_errno) + dbg(DBG_SYSCALL, ">> pid %d, sysnum: %lu (%s), arg: %lu (0x%p)\n", + curproc->p_pid, sysnum, syscall_string, args, (void *)args); + + check_curthr_cancelled(); + long ret = syscall_dispatch(sysnum, args, regs); + check_curthr_cancelled(); + + if (sysnum != SYS_errno) + dbg(DBG_SYSCALL, "<< pid %d, sysnum: %lu (%s), returned: %lu (%#lx)\n", + curproc->p_pid, sysnum, syscall_string, ret, ret); + + regs->r_rax = (uint64_t)ret; + return 0; +} + +static long syscall_dispatch(size_t sysnum, uintptr_t args, regs_t *regs) +{ + switch (sysnum) + { + case SYS_waitpid: + return sys_waitpid((waitpid_args_t *)args); + + case SYS_exit: + do_exit((int)args); + panic("exit failed!\n"); + + case SYS_thr_exit: + kthread_exit((void *)args); + panic("thr_exit failed!\n"); + + case SYS_sched_yield: + sched_yield(); + return 0; + + case SYS_fork: + return sys_fork(regs); + + case SYS_getpid: + return curproc->p_pid; + + case SYS_sync: + do_sync(); + return 0; + +#ifdef __MOUNTING__ + case SYS_mount: + return sys_mount((mount_args_t *)args); + + case SYS_umount: + return sys_umount((argstr_t *)args); +#endif + + case SYS_mmap: + return (long)sys_mmap((mmap_args_t *)args); + + case SYS_munmap: + return sys_munmap((munmap_args_t *)args); + + case SYS_open: + return sys_open((open_args_t *)args); + + case SYS_close: + return sys_close((int)args); + + case SYS_read: + return sys_read((read_args_t *)args); + + case SYS_write: + return sys_write((write_args_t *)args); + + case SYS_dup: + return sys_dup((int)args); + + case SYS_dup2: + return sys_dup2((dup2_args_t *)args); + + case SYS_mkdir: + return sys_mkdir((mkdir_args_t *)args); + + case SYS_rmdir: + return sys_rmdir((argstr_t *)args); + + case SYS_unlink: + return sys_unlink((argstr_t *)args); + + case SYS_link: + return sys_link((link_args_t *)args); + + case SYS_rename: + return sys_rename((rename_args_t *)args); + + case SYS_chdir: + return sys_chdir((argstr_t *)args); + + case SYS_getdents: + return sys_getdents((getdents_args_t *)args); + + case SYS_brk: + return (long)sys_brk((void *)args); + + case SYS_lseek: + return sys_lseek((lseek_args_t *)args); + + case SYS_halt: + sys_halt(); + return -1; + + case SYS_set_errno: + curthr->kt_errno = (long)args; + return 0; + + case SYS_errno: + return curthr->kt_errno; + + case SYS_execve: + return sys_execve((execve_args_t *)args, regs); + + case SYS_stat: + return sys_stat((stat_args_t *)args); + + case SYS_pipe: + return sys_pipe((int *)args); + + case SYS_uname: + return sys_uname((struct utsname *)args); + + case SYS_time: + return sys_time((time_t *)args); + + case SYS_debug: + return sys_debug((argstr_t *)args); + + case SYS_kshell: + return sys_kshell((int)args); + + case SYS_usleep: + return sys_usleep((usleep_args_t *)args); + + default: + dbg(DBG_ERROR, "ERROR: unknown system call: %lu (args: 0x%p)\n", + sysnum, (void *)args); + curthr->kt_errno = ENOSYS; + return -1; + } +} |