aboutsummaryrefslogtreecommitdiff
path: root/kernel/api
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/api')
-rw-r--r--kernel/api/access.c136
-rw-r--r--kernel/api/binfmt.c88
-rw-r--r--kernel/api/elf.c905
-rw-r--r--kernel/api/exec.c110
-rw-r--r--kernel/api/syscall.c757
5 files changed, 1996 insertions, 0 deletions
diff --git a/kernel/api/access.c b/kernel/api/access.c
new file mode 100644
index 0000000..d56e45d
--- /dev/null
+++ b/kernel/api/access.c
@@ -0,0 +1,136 @@
+#include "errno.h"
+#include "globals.h"
+#include <mm/mm.h>
+#include <util/string.h>
+
+#include "util/debug.h"
+
+#include "mm/kmalloc.h"
+#include "mm/mman.h"
+
+#include "api/access.h"
+#include "api/syscall.h"
+
+static inline long userland_address(const void *addr)
+{
+ return addr >= (void *)USER_MEM_LOW && addr < (void *)USER_MEM_HIGH;
+}
+
+/*
+ * Check for permissions on [uaddr, uaddr + nbytes), then
+ * copy nbytes from userland address uaddr to kernel address kaddr.
+ * Do not access the userland virtual addresses directly; instead,
+ * use vmmap_read.
+ */
+long copy_from_user(void *kaddr, const void *uaddr, size_t nbytes)
+{
+ if (!range_perm(curproc, uaddr, nbytes, PROT_READ))
+ {
+ return -EFAULT;
+ }
+ KASSERT(userland_address(uaddr) && !userland_address(kaddr));
+ return vmmap_read(curproc->p_vmmap, uaddr, kaddr, nbytes);
+}
+
+/*
+ * Check for permissions on [uaddr, uaddr + nbytes), then
+ * copy nbytes from kernel address kaddr to userland address uaddr.
+ * Do not access the userland virtual addresses directly; instead,
+ * use vmmap_write.
+ */
+long copy_to_user(void *uaddr, const void *kaddr, size_t nbytes)
+{
+ if (!range_perm(curproc, uaddr, nbytes, PROT_WRITE))
+ {
+ return -EFAULT;
+ }
+ KASSERT(userland_address(uaddr) && !userland_address(kaddr));
+ return vmmap_write(curproc->p_vmmap, uaddr, kaddr, nbytes);
+}
+
+/*
+ * Duplicate the string identified by ustr into kernel memory.
+ * The kernel memory string kstr should be allocated using kmalloc.
+ */
+long user_strdup(argstr_t *ustr, char **kstrp)
+{
+ KASSERT(!userland_address(ustr));
+ KASSERT(userland_address(ustr->as_str));
+
+ *kstrp = kmalloc(ustr->as_len + 1);
+ if (!*kstrp)
+ return -ENOMEM;
+ long ret = copy_from_user(*kstrp, ustr->as_str, ustr->as_len + 1);
+ if (ret)
+ {
+ kfree(*kstrp);
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Duplicate the string of vectors identified by uvec into kernel memory.
+ * The vector itself (char**) and each string (char*) should be allocated
+ * using kmalloc.
+ */
+long user_vecdup(argvec_t *uvec, char ***kvecp)
+{
+ KASSERT(!userland_address(uvec));
+ KASSERT(userland_address(uvec->av_vec));
+
+ char **kvec = kmalloc((uvec->av_len + 1) * sizeof(char *));
+ *kvecp = kvec;
+
+ if (!kvec)
+ {
+ return -ENOMEM;
+ }
+ memset(kvec, 0, (uvec->av_len + 1) * sizeof(char *));
+
+ long ret = 0;
+ for (size_t i = 0; i < uvec->av_len && !ret; i++)
+ {
+ argstr_t argstr;
+ copy_from_user(&argstr, uvec->av_vec + i, sizeof(argstr_t));
+ ret = user_strdup(&argstr, kvec + i);
+ }
+
+ if (ret)
+ {
+ for (size_t i = 0; i < uvec->av_len; i++)
+ if (kvec[i])
+ kfree(kvec[i]);
+ kfree(kvec);
+ *kvecp = NULL;
+ }
+
+ return ret;
+}
+
+/*
+ * Return 1 if process p has permissions perm for virtual address vaddr;
+ * otherwise return 0.
+ *
+ * Check against the vmarea's protections on the mapping.
+ */
+long addr_perm(proc_t *p, const void *vaddr, int perm)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+/*
+ * Return 1 if process p has permissions perm for virtual address range [vaddr,
+ * vaddr + len); otherwise return 0.
+ *
+ * Hints:
+ * You can use addr_perm in your implementation.
+ * Make sure to consider the case when the range of addresses that is being
+ * checked is less than a page.
+ */
+long range_perm(proc_t *p, const void *vaddr, size_t len, int perm)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
diff --git a/kernel/api/binfmt.c b/kernel/api/binfmt.c
new file mode 100644
index 0000000..1597fdf
--- /dev/null
+++ b/kernel/api/binfmt.c
@@ -0,0 +1,88 @@
+#include "errno.h"
+
+#include "main/inits.h"
+
+#include "fs/fcntl.h"
+#include "fs/file.h"
+#include "fs/vfs_syscall.h"
+
+#include "util/debug.h"
+#include "util/init.h"
+#include "util/list.h"
+
+#include "mm/kmalloc.h"
+
+#include "api/binfmt.h"
+
+typedef struct binfmt
+{
+ const char *bf_id;
+ binfmt_load_func_t bf_load;
+ list_link_t bf_link;
+} binfmt_t;
+
+static list_t binfmt_list = LIST_INITIALIZER(binfmt_list);
+
+long binfmt_add(const char *id, binfmt_load_func_t loadfunc)
+{
+ binfmt_t *fmt;
+ if (NULL == (fmt = kmalloc(sizeof(*fmt))))
+ {
+ return -ENOMEM;
+ }
+
+ dbg(DBG_EXEC, "Registering binary loader %s\n", id);
+
+ fmt->bf_id = id;
+ fmt->bf_load = loadfunc;
+ list_insert_head(&binfmt_list, &fmt->bf_link);
+
+ return 0;
+}
+
+long binfmt_load(const char *filename, char *const *argv, char *const *envp,
+ uint64_t *rip, uint64_t *rsp)
+{
+ long fd = do_open(filename, O_RDONLY);
+ if (fd < 0)
+ {
+ dbg(DBG_EXEC, "ERROR: exec failed to open file %s\n", filename);
+ return fd;
+ }
+ file_t *file = fget((int)fd);
+ long ret = 0;
+ if (S_ISDIR(file->f_vnode->vn_mode))
+ {
+ ret = -EISDIR;
+ }
+ if (!ret && !S_ISREG(file->f_vnode->vn_mode))
+ {
+ ret = -EACCES;
+ }
+ fput(&file);
+ if (ret)
+ {
+ do_close((int)fd);
+ return ret;
+ }
+
+ list_iterate(&binfmt_list, fmt, binfmt_t, bf_link)
+ {
+ dbg(DBG_EXEC, "Trying to exec %s using binary loader %s\n", filename,
+ fmt->bf_id);
+
+ /* ENOEXE indicates that the given loader is unable to load
+ * the given file, any other error indicates that the file
+ * was recognized, but some other error existed which should
+ * be returned to the user, only if all loaders specify ENOEXEC
+ * do we actually return ENOEXEC */
+ ret = fmt->bf_load(filename, (int)fd, argv, envp, rip, rsp);
+ if (ret != -ENOEXEC)
+ {
+ do_close((int)fd);
+ }
+ }
+
+ do_close((int)fd);
+ return ret;
+}
diff --git a/kernel/api/elf.c b/kernel/api/elf.c
new file mode 100644
index 0000000..5ad4a33
--- /dev/null
+++ b/kernel/api/elf.c
@@ -0,0 +1,905 @@
+/*
+ * The elf32 loader (the basis for this file) was modified by twd in 7/2018 so
+ * that it lays out the address space in a more Unix-like fashion (e.g., the
+ * stack is at the top of user memory, text is near the bottom).
+ *
+ * This loader (and the elf32 loader) are not strictly ABI compliant. See the
+ * Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29 for
+ * what initial process stacks are supposed to look like after the iret(q) in
+ * userland_entry is executed. The following would be required (but not
+ * necessarily sufficient!) for full compliance:
+ *
+ * 1) Remove the pointers to argv, envp, and auxv from the initial stack.
+ * 2) Have __libc_static_entry (static entry) and _ldloadrtld (callee of dynamic
+ * entry) calculate those pointers and place them on the stack (x86) or in
+ * registers (x86-64) along with argc as arguments to main. 3) Ensure that the
+ * stack pointer is 4 byte (x86) or 16 byte (x86-64) aligned by padding the end
+ * of the arguments being written to the stack with zeros. 4) Have the stack
+ * pointer point to argc, rather than a garbage return address. 5) Have
+ * __libc_static_entry and _bootstrap (ld-weenix) respect this change.
+ */
+
+#include "errno.h"
+#include "globals.h"
+
+#include "main/inits.h"
+
+#include "mm/kmalloc.h"
+#include "mm/mm.h"
+#include "mm/mman.h"
+#include "mm/tlb.h"
+
+#include "api/binfmt.h"
+#include "api/elf.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#include "fs/fcntl.h"
+#include "fs/file.h"
+#include "fs/lseek.h"
+#include "fs/vfs_syscall.h"
+
+static long _elf64_platform_check(const Elf64_Ehdr *header)
+{
+ return (EM_X86_64 == header->e_machine) // machine
+ && (ELFCLASS64 == header->e_ident[EI_CLASS]) // 32 or 64 bit
+ && (ELFDATA2LSB == header->e_ident[EI_DATA]); // endianness
+}
+
+/* Helper function for the ELF loader. Maps the specified segment
+ * of the program header from the given file in to the given address
+ * space with the given memory offset (in pages). On success returns 0,
+ * otherwise returns a negative error code for the ELF loader to return. Note
+ * that since any error returned by this function should cause the ELF loader to
+ * give up, it is acceptable for the address space to be modified after
+ * returning an error. Note that memoff can be negative */
+static long _elf64_map_segment(vmmap_t *map, vnode_t *file, int64_t memoff,
+ const Elf64_Phdr *segment)
+{
+ /* calculate starting virtual address of segment e*/
+ uintptr_t addr;
+ if (memoff < 0)
+ {
+ KASSERT(ADDR_TO_PN(segment->p_vaddr) > (uint64_t)-memoff);
+ addr = (uintptr_t)segment->p_vaddr - (uintptr_t)PN_TO_ADDR(-memoff);
+ }
+ else
+ {
+ addr = (uintptr_t)segment->p_vaddr + (uintptr_t)PN_TO_ADDR(memoff);
+ }
+ uint64_t off = segment->p_offset;
+ uint64_t memsz = segment->p_memsz;
+ uint64_t filesz = segment->p_filesz;
+
+ dbg(DBG_ELF,
+ "Mapping program segment: type %#x, offset %#16lx,"
+ " vaddr %#16lx, filesz %#lx, memsz %#lx, flags %#x, align %#lx\n",
+ segment->p_type, segment->p_offset, segment->p_vaddr, segment->p_filesz,
+ segment->p_memsz, segment->p_flags, segment->p_align);
+
+ /* check for bad data in the segment header */
+ if ((segment->p_align % PAGE_SIZE))
+ {
+ dbg(DBG_ELF, "ERROR: segment not aligned on page\n");
+ return -ENOEXEC;
+ }
+ else if (filesz > memsz)
+ {
+ dbg(DBG_ELF, "ERROR: segment file size is greater than memory size\n");
+ return -ENOEXEC;
+ }
+ else if (PAGE_OFFSET(addr) != PAGE_OFFSET(off))
+ {
+ dbg(DBG_ELF,
+ "ERROR: segment address and offset are not aligned correctly\n");
+ return -ENOEXEC;
+ }
+
+ /* calculate segment permissions */
+ int perms = 0;
+ if (PF_R & segment->p_flags)
+ {
+ perms |= PROT_READ;
+ }
+ if (PF_W & segment->p_flags)
+ {
+ perms |= PROT_WRITE;
+ }
+ if (PF_X & segment->p_flags)
+ {
+ perms |= PROT_EXEC;
+ }
+
+ if (filesz > 0)
+ {
+ /* something needs to be mapped from the file */
+ /* start from the starting address and include enough pages to
+ * map all filesz bytes of the file */
+ uint64_t lopage = ADDR_TO_PN(addr);
+ uint64_t npages = ADDR_TO_PN(addr + filesz - 1) - lopage + 1;
+ off_t fileoff = (off_t)PAGE_ALIGN_DOWN(off);
+
+ if (!vmmap_is_range_empty(map, lopage, npages))
+ {
+ dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
+ return -ENOEXEC;
+ }
+ long ret = vmmap_map(map, file, lopage, npages, perms,
+ MAP_PRIVATE | MAP_FIXED, fileoff, 0, NULL);
+ if (ret)
+ return ret;
+ dbg(DBG_ELF,
+ "Mapped segment of length %lu pages at %#lx, memoff = %#lx\n",
+ npages, addr, memoff);
+ }
+
+ if (memsz > filesz)
+ {
+ /* there is left over memory in the segment which must
+ * be initialized to 0 (anonymously mapped) */
+ uint64_t lopage = ADDR_TO_PN(
+ addr +
+ filesz); // the first page containing data not stored in the file
+ uint64_t npages =
+ ADDR_TO_PN(PAGE_ALIGN_UP(addr + memsz)) -
+ lopage; // the first page totally unused by memory, minus low page
+
+ /* check for overlapping mappings, considering the case where lopage
+ * contains file data and the case where it doesn't*/
+ if (PAGE_ALIGNED(addr + filesz) &&
+ !vmmap_is_range_empty(map, lopage, npages))
+ {
+ dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
+ return -ENOEXEC;
+ }
+ if (!PAGE_ALIGNED(addr + filesz) && npages > 1 &&
+ !vmmap_is_range_empty(map, lopage + 1, npages - 1))
+ {
+ dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
+ return -ENOEXEC;
+ }
+ long ret = vmmap_map(map, NULL, lopage, npages, perms,
+ MAP_PRIVATE | MAP_FIXED, 0, 0, NULL);
+ if (ret)
+ return ret;
+ if (!PAGE_ALIGNED(addr + filesz) && filesz > 0)
+ {
+ /* In this case, we have accidentally zeroed too much of memory, as
+ * we zeroed all memory in the page containing addr + filesz.
+ * However, the remaining part of the data is not a full page, so we
+ * should not just map in another page (as there could be garbage
+ * after addr+filesz). For instance, consider the data-bss boundary
+ * (c.f. Intel x86 ELF supplement pp. 82).
+ * To fix this, we need to read in the contents of the file manually
+ * and put them at that user space addr in the anon map we just
+ * added. */
+ void *buf = page_alloc();
+ if (!buf)
+ return -ENOMEM;
+
+ vlock(file);
+ ret = file->vn_ops->read(file,
+ (size_t)PAGE_ALIGN_DOWN(off + filesz - 1),
+ buf, PAGE_OFFSET(addr + filesz));
+ if (ret >= 0)
+ {
+ KASSERT((uintptr_t)ret == PAGE_OFFSET(addr + filesz));
+ ret = vmmap_write(map, PAGE_ALIGN_DOWN(addr + filesz - 1), buf,
+ PAGE_OFFSET(addr + filesz));
+ }
+ vunlock(file);
+ page_free(buf);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/* Read in the given fd's ELF header into the location pointed to by the given
+ * argument and does some basic checks that it is a valid ELF file, is an
+ * executable, and is for the correct platform
+ * interp is 1 if we are loading an interpreter, 0 otherwise
+ * Returns 0 on success, -errno on failure. Returns the ELF header in the header
+ * argument. */
+static long _elf64_load_ehdr(int fd, Elf64_Ehdr *header, int interp)
+{
+ long ret;
+ memset(header, 0, sizeof(*header));
+
+ /* Preliminary check that this is an ELF file */
+ ret = do_read(fd, header, sizeof(*header));
+ if (ret < 0)
+ return ret;
+ if ((ret < SELFMAG) || memcmp(&header->e_ident[0], ELFMAG, SELFMAG) != 0)
+ {
+ dbg(DBG_ELF, "ELF load failed: no magic number present\n");
+ return -ENOEXEC;
+ }
+ if (ret < header->e_ehsize)
+ {
+ dbg(DBG_ELF, "ELF load failed: bad file size\n");
+ return -ENOEXEC;
+ }
+ /* Log information about the file */
+ dbg(DBG_ELF, "loading ELF file\n");
+ dbgq(DBG_ELF, "ELF Header Information:\n");
+ dbgq(DBG_ELF, "Version: %d\n", (int)header->e_ident[EI_VERSION]);
+ dbgq(DBG_ELF, "Class: %d\n", (int)header->e_ident[EI_CLASS]);
+ dbgq(DBG_ELF, "Data: %d\n", (int)header->e_ident[EI_DATA]);
+ dbgq(DBG_ELF, "Type: %d\n", (int)header->e_type);
+ dbgq(DBG_ELF, "Machine: %d\n", (int)header->e_machine);
+
+ /* Check that the ELF file is executable and targets
+ * the correct platform */
+ if (interp && header->e_type != ET_DYN)
+ {
+ dbg(DBG_ELF,
+ "ELF load failed: interpreter is not a shared object file\n");
+ return -ENOEXEC;
+ }
+ if (!interp && header->e_type != ET_EXEC)
+ {
+ dbg(DBG_ELF, "ELF load failed: not executable ELF\n");
+ return -ENOEXEC;
+ }
+ if (!_elf64_platform_check(header))
+ {
+ dbg(DBG_ELF, "ELF load failed: incorrect platform\n");
+ return -ENOEXEC;
+ }
+ return 0;
+}
+
+/* Loads the program header tables from from the ELF file specified by
+ * the open file descriptor fd. header should point to the header information
+ * for that ELF file. pht is a buffer of size size. It must be large enough
+ * to hold the program header tables (whose size can be determined from
+ * the ELF header).
+ *
+ * Returns 0 on success or -errno on error. */
+static long _elf64_load_phtable(int fd, Elf64_Ehdr *header, char *pht,
+ size_t size)
+{
+ size_t phtsize = header->e_phentsize * header->e_phnum;
+ KASSERT(phtsize <= size);
+ /* header->e_phoff is a uint64_t cast to int. since the max file size on
+ * s5fs is way smaller than uint32_t, offsets in practice should never
+ * cause this cast to behave badly, although if weenix ever adds support
+ * for very large (> 4GB) files, this will be a bug.
+ */
+ long ret = do_lseek(fd, (int)(header->e_phoff), SEEK_SET);
+ if (ret < 0)
+ return ret;
+
+ ret = do_read(fd, pht, phtsize);
+ if (ret < 0)
+ return ret;
+
+ KASSERT((size_t)ret <= phtsize);
+ if ((size_t)ret < phtsize)
+ {
+ return -ENOEXEC;
+ }
+ return 0;
+}
+
+/* Maps the PT_LOAD segments for an ELF file into the given address space.
+ * vnode should be the open vnode of the ELF file.
+ * map is the address space to map the ELF file into.
+ * header is the ELF file's header.
+ * pht is the full program header table.
+ * memoff is the difference (in pages) between the desired base address and the
+ * base address given in the ELF file (usually 0x8048094)
+ *
+ * Returns the number of segments loaded on success, -errno on failure. */
+static long _elf64_map_progsegs(vnode_t *vnode, vmmap_t *map,
+ Elf64_Ehdr *header, char *pht, int64_t memoff)
+{
+ long ret = 0;
+
+ long loadcount = 0;
+ for (uint32_t i = 0; i < header->e_phnum; i++)
+ {
+ Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
+ if (phtentry->p_type == PT_LOAD)
+ {
+ ret = _elf64_map_segment(map, vnode, memoff, phtentry);
+ if (ret)
+ return ret;
+ loadcount++;
+ }
+ }
+
+ if (!loadcount)
+ {
+ dbg(DBG_ELF, "ERROR: ELF file contained no loadable sections\n");
+ return -ENOEXEC;
+ }
+ return loadcount;
+}
+
+/* Locates the program header for the interpreter in the given list of program
+ * headers through the phinterp out-argument. Returns 0 on success (even if
+ * there is no interpreter) or -errno on error. If there is no interpreter
+ * section then phinterp is set to NULL. If there is more than one interpreter
+ * then -EINVAL is returned. */
+static long _elf64_find_phinterp(Elf64_Ehdr *header, char *pht,
+ Elf64_Phdr **phinterp)
+{
+ *phinterp = NULL;
+
+ for (uint32_t i = 0; i < header->e_phnum; i++)
+ {
+ Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
+ if (phtentry->p_type == PT_INTERP)
+ {
+ if (!*phinterp)
+ {
+ *phinterp = phtentry;
+ }
+ else
+ {
+ dbg(DBG_ELF, "ELF load failed: multiple interpreters\n");
+ return -EINVAL;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Calculates the lower and upper virtual addresses that the given program
+ * header table would load into if _elf64_map_progsegs were called. We traverse
+ * all the program segments of type PT_LOAD and look at p_vaddr and p_memsz
+ * Return the low and high vaddrs in the given arguments if they are non-NULL.
+ * The high vaddr is one plus the highest vaddr used by the program. */
+static void _elf64_calc_progbounds(Elf64_Ehdr *header, char *pht, void **low,
+ void **high)
+{
+ Elf64_Addr curlow = (Elf64_Addr)-1;
+ Elf64_Addr curhigh = 0;
+ for (uint32_t i = 0; i < header->e_phnum; i++)
+ {
+ Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
+ if (phtentry->p_type == PT_LOAD)
+ {
+ if (phtentry->p_vaddr < curlow)
+ {
+ curlow = phtentry->p_vaddr;
+ }
+ if (phtentry->p_vaddr + phtentry->p_memsz > curhigh)
+ {
+ curhigh = phtentry->p_vaddr + phtentry->p_memsz;
+ }
+ }
+ }
+ if (low)
+ {
+ *low = (void *)curlow;
+ }
+ if (high)
+ {
+ *high = (void *)curhigh;
+ }
+}
+
+/* Calculates the total size of all the arguments that need to be placed on the
+ * user stack before execution can begin. See AMD64 ABI Draft 0.99.6 page 29
+ * Returns total size on success. Returns the number of non-NULL entries in
+ * argv, envp, and auxv in argc, envc, and auxc arguments, respectively */
+static size_t _elf64_calc_argsize(char *const argv[], char *const envp[],
+ Elf64_auxv_t *auxv, size_t phtsize,
+ size_t *argc, size_t *envc, size_t *auxc)
+{
+ size_t size = 0;
+ size_t i;
+ /* All strings in argv */
+ for (i = 0; argv[i]; i++)
+ {
+ size += strlen(argv[i]) + 1; /* null terminator */
+ }
+ if (argc)
+ {
+ *argc = i;
+ }
+ /* argv itself (+ null terminator) */
+ size += (i + 1) * sizeof(char *);
+
+ /* All strings in envp */
+ for (i = 0; envp[i] != NULL; i++)
+ {
+ size += strlen(envp[i]) + 1; /* null terminator */
+ }
+ if (envc != NULL)
+ {
+ *envc = i;
+ }
+ /* envp itself (+ null terminator) */
+ size += (i + 1) * sizeof(char *);
+
+ /* The only extra-space-consuming entry in auxv is AT_PHDR, as if we find
+ * that entry we'll need to put the program header table on the stack */
+ for (i = 0; auxv[i].a_type != AT_NULL; i++)
+ {
+ if (auxv[i].a_type == AT_PHDR)
+ {
+ size += phtsize;
+ }
+ }
+ if (auxc)
+ {
+ *auxc = i;
+ }
+ /* auxv itself (+ null terminator) */
+ size += (i + 1) * sizeof(Elf64_auxv_t);
+
+ /* argc - reserving 8 bytes for alignment purposes */
+ size += sizeof(int64_t);
+ /* argv, envp, and auxv pointers (as passed to main) */
+ size += 3 * sizeof(void *);
+
+ /*
+ * cjm5: the above isn't strictly ABI compliant. normally the userspace
+ * wrappers to main() (__libc_static_entry or _bootstrap for ld-weenix) are
+ * responsible for calculating *argv, *envp, *and *auxv to pass to main().
+ * It's easier to do it here, though.
+ */
+
+ return size;
+}
+
+/* Copies the arguments that must be on the stack prior to execution onto the
+ * user stack. This should never fail.
+ * arglow: low address on the user stack where we should start the copying
+ * argsize: total size of everything to go on the stack
+ * buf: a kernel buffer at least as big as argsize (for convenience)
+ * argv, envp, auxv: various vectors of stuff (to go on the stack)
+ * argc, envc, auxc: number of non-NULL entries in argv, envp, auxv,
+ * respectively (to avoid recomputing them)
+ * phtsize: the size of the program header table (to avoid recomputing)
+ * c.f. Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29
+ */
+static void _elf64_load_args(vmmap_t *map, void *arglow, size_t argsize,
+ char *buf, char *const argv[], char *const envp[],
+ Elf64_auxv_t *auxv, size_t argc, size_t envc,
+ size_t auxc, size_t phtsize)
+{
+ dbg(DBG_ELF,
+ "Loading initial stack contents at 0x%p, argc = %lu, envc = %lu, auxc "
+ "= %lu\n",
+ arglow, argc, envc, auxc);
+
+ size_t i;
+
+ /* Copy argc: in x86-64, this is an eight-byte value, despite being treated
+ * as an int in a C main() function. See AMD64 ABI Draft 0.99.6 page 29 */
+ *((int64_t *)buf) = (int64_t)argc;
+
+ /* Calculate where the strings / tables pointed to by the vectors start */
+ size_t veclen = (argc + 1 + envc + 1) * sizeof(char *) +
+ (auxc + 1) * sizeof(Elf64_auxv_t);
+
+ char *vecstart =
+ buf + sizeof(int64_t) +
+ 3 * sizeof(void *); /* Beginning of argv (in kernel buffer) */
+
+ char *vvecstart =
+ ((char *)arglow) + sizeof(int64_t) +
+ 3 * sizeof(void *); /* Beginning of argv (in user space) */
+
+ char *strstart = vecstart + veclen; /* Beginning of first string pointed to
+ by argv (in kernel buffer) */
+
+ /* Beginning of first string pointed to by argv (in user space) */
+ char *vstrstart = vvecstart + veclen;
+
+ /*
+ * cjm5: since the first 6 arguments that can fit in registers are placed
+ * there in x86-64, __libc_static_entry (and ld-weenix, if it is ever ported
+ * to x86-64) have to take the following pointers off the stack and move
+ * them and argc into the first 4 argument registers before calling main().
+ */
+
+ /* Copy over pointer to argv */
+ *(char **)(buf + 8) = vvecstart;
+ /* Copy over pointer to envp */
+ *(char **)(buf + 16) = vvecstart + (argc + 1) * sizeof(char *);
+ /* Copy over pointer to auxv */
+ *(char **)(buf + 24) = vvecstart + (argc + 1 + envc + 1) * sizeof(char *);
+
+ /* Copy over argv along with every string in it */
+ for (i = 0; i < argc; i++)
+ {
+ size_t len = strlen(argv[i]) + 1;
+ strcpy(strstart, argv[i]);
+ /* Remember that we need to use the virtual address of the string */
+ *(char **)vecstart = vstrstart;
+ strstart += len;
+ vstrstart += len;
+ vecstart += sizeof(char *);
+ }
+ /* null terminator of argv */
+ *(char **)vecstart = NULL;
+ vecstart += sizeof(char *);
+
+ /* Copy over envp along with every string in it */
+ for (i = 0; i < envc; i++)
+ {
+ size_t len = strlen(envp[i]) + 1;
+ strcpy(strstart, envp[i]);
+ /* Remember that we need to use the virtual address of the string */
+ *(char **)vecstart = vstrstart;
+ strstart += len;
+ vstrstart += len;
+ vecstart += sizeof(char *);
+ }
+ /* null terminator of envp */
+ *(char **)vecstart = NULL;
+ vecstart += sizeof(char *);
+
+ /* Copy over auxv along with the program header (if we find it) */
+ for (i = 0; i < auxc; i++)
+ {
+ /* Copy over the auxv entry */
+ memcpy(vecstart, &auxv[i], sizeof(Elf64_auxv_t));
+ /* Check if it points to the program header */
+ if (auxv[i].a_type == AT_PHDR)
+ {
+ /* Copy over the program header table */
+ memcpy(strstart, auxv[i].a_un.a_ptr, (size_t)phtsize);
+ /* And modify the address */
+ ((Elf64_auxv_t *)vecstart)->a_un.a_ptr = vstrstart;
+ }
+ vecstart += sizeof(Elf64_auxv_t);
+ }
+ /* null terminator of auxv */
+ ((Elf64_auxv_t *)vecstart)->a_type = NULL;
+
+ /* Finally, we're done copying into the kernel buffer. Now just copy the
+ * kernel buffer into user space */
+ long ret = vmmap_write(map, arglow, buf, argsize);
+ /* If this failed, we must have set up the address space wrong... */
+ KASSERT(!ret);
+}
+
+static long _elf64_load(const char *filename, int fd, char *const argv[],
+ char *const envp[], uint64_t *rip, uint64_t *rsp)
+{
+ long ret = 0;
+ Elf64_Ehdr header;
+ Elf64_Ehdr interpheader;
+
+ /* variables to clean up on failure */
+ vmmap_t *map = NULL;
+ file_t *file = NULL;
+ char *pht = NULL;
+ char *interpname = NULL;
+ long interpfd = -1;
+ file_t *interpfile = NULL;
+ char *interppht = NULL;
+ Elf64_auxv_t *auxv = NULL;
+ char *argbuf = NULL;
+
+ uintptr_t entry;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ /* Load and verify the ELF header */
+ ret = _elf64_load_ehdr(fd, &header, 0);
+ if (ret)
+ goto done;
+
+ map = vmmap_create();
+ if (!map)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ // Program header table entry size multiplied by
+ // number of entries.
+ size_t phtsize = header.e_phentsize * header.e_phnum;
+ pht = kmalloc(phtsize);
+ if (!pht)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Read in the program header table */
+ ret = _elf64_load_phtable(fd, &header, pht, phtsize);
+ if (ret)
+ goto done;
+
+ /* Load the segments in the program header table */
+ ret = _elf64_map_progsegs(file->f_vnode, map, &header, pht, 0);
+ if (ret < 0)
+ goto done;
+
+ /* Check if program requires an interpreter */
+ Elf64_Phdr *phinterp = NULL;
+ ret = _elf64_find_phinterp(&header, pht, &phinterp);
+ if (ret)
+ goto done;
+
+ /* Calculate program bounds for future reference */
+ void *proglow;
+ void *proghigh;
+ _elf64_calc_progbounds(&header, pht, &proglow, &proghigh);
+
+ entry = (uintptr_t)header.e_entry;
+
+ /* if an interpreter was requested load it */
+ if (phinterp)
+ {
+ /* read the file name of the interpreter from the binary */
+ ret = do_lseek(fd, (int)(phinterp->p_offset), SEEK_SET);
+ if (ret < 0)
+ goto done;
+
+ interpname = kmalloc(phinterp->p_filesz);
+ if (!interpname)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ ret = do_read(fd, interpname, phinterp->p_filesz);
+ if (ret < 0)
+ goto done;
+
+ if ((size_t)ret != phinterp->p_filesz)
+ {
+ ret = -ENOEXEC;
+ goto done;
+ }
+
+ /* open the interpreter */
+ dbgq(DBG_ELF, "ELF Interpreter: %*s\n", (int)phinterp->p_filesz,
+ interpname);
+ interpfd = do_open(interpname, O_RDONLY);
+ if (interpfd < 0)
+ {
+ ret = interpfd;
+ goto done;
+ }
+ kfree(interpname);
+ interpname = NULL;
+
+ interpfile = fget((int)interpfd);
+ KASSERT(interpfile);
+
+ /* Load and verify the interpreter ELF header */
+ ret = _elf64_load_ehdr((int)interpfd, &interpheader, 1);
+ if (ret)
+ goto done;
+
+ size_t interpphtsize = interpheader.e_phentsize * interpheader.e_phnum;
+ interppht = kmalloc(interpphtsize);
+ if (!interppht)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Read in the program header table */
+ ret = _elf64_load_phtable((int)interpfd, &interpheader, interppht,
+ interpphtsize);
+ if (ret)
+ goto done;
+
+ /* Interpreter shouldn't itself need an interpreter */
+ Elf64_Phdr *interpphinterp;
+ ret = _elf64_find_phinterp(&interpheader, interppht, &interpphinterp);
+ if (ret)
+ goto done;
+
+ if (interpphinterp)
+ {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* Calculate the interpreter program size */
+ void *interplow;
+ void *interphigh;
+ _elf64_calc_progbounds(&interpheader, interppht, &interplow,
+ &interphigh);
+ uint64_t interpnpages =
+ ADDR_TO_PN(PAGE_ALIGN_UP(interphigh)) - ADDR_TO_PN(interplow);
+
+ /* Find space for the interpreter */
+ /* This is the first pn at which the interpreter will be mapped */
+ uint64_t interppagebase =
+ (uint64_t)vmmap_find_range(map, interpnpages, VMMAP_DIR_HILO);
+ if (interppagebase == ~0UL)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /* Base address at which the interpreter begins on that page */
+ void *interpbase = (void *)((uintptr_t)PN_TO_ADDR(interppagebase) +
+ PAGE_OFFSET(interplow));
+
+ /* Offset from "expected base" in number of pages */
+ int64_t interpoff =
+ (int64_t)interppagebase - (int64_t)ADDR_TO_PN(interplow);
+
+ entry = (uintptr_t)interpbase +
+ ((uintptr_t)interpheader.e_entry - (uintptr_t)interplow);
+
+ /* Load the interpreter program header and map in its segments */
+ ret = _elf64_map_progsegs(interpfile->f_vnode, map, &interpheader,
+ interppht, interpoff);
+ if (ret < 0)
+ goto done;
+
+ /* Build the ELF aux table */
+ /* Need to hold AT_PHDR, AT_PHENT, AT_PHNUM, AT_ENTRY, AT_BASE,
+ * AT_PAGESZ, AT_NULL */
+ auxv = (Elf64_auxv_t *)kmalloc(7 * sizeof(Elf64_auxv_t));
+ if (!auxv)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ Elf64_auxv_t *auxvent = auxv;
+
+ /* Add all the necessary entries */
+ auxvent->a_type = AT_PHDR;
+ auxvent->a_un.a_ptr = pht;
+ auxvent++;
+
+ auxvent->a_type = AT_PHENT;
+ auxvent->a_un.a_val = header.e_phentsize;
+ auxvent++;
+
+ auxvent->a_type = AT_PHNUM;
+ auxvent->a_un.a_val = header.e_phnum;
+ auxvent++;
+
+ auxvent->a_type = AT_ENTRY;
+ auxvent->a_un.a_ptr = (void *)header.e_entry;
+ auxvent++;
+
+ auxvent->a_type = AT_BASE;
+ auxvent->a_un.a_ptr = interpbase;
+ auxvent++;
+
+ auxvent->a_type = AT_PAGESZ;
+ auxvent->a_un.a_val = PAGE_SIZE;
+ auxvent++;
+
+ auxvent->a_type = AT_NULL;
+ }
+ else
+ {
+ /* Just put AT_NULL (we don't really need this at all) */
+ auxv = (Elf64_auxv_t *)kmalloc(sizeof(Elf64_auxv_t));
+ if (!auxv)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ auxv->a_type = AT_NULL;
+ }
+
+ /* Allocate stack at the top of the address space */
+ uint64_t stack_lopage = (uint64_t)vmmap_find_range(
+ map, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1, VMMAP_DIR_HILO);
+ if (stack_lopage == ~0UL)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ ret =
+ vmmap_map(map, NULL, stack_lopage, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, 0, 0, NULL);
+ KASSERT(0 == ret);
+ dbg(DBG_ELF, "Mapped Stack at low addr 0x%p, size %#lx\n",
+ PN_TO_ADDR(stack_lopage), DEFAULT_STACK_SIZE + PAGE_SIZE);
+
+ /* Calculate size needed on user stack for arguments */
+ size_t argc, envc, auxc;
+ size_t argsize =
+ _elf64_calc_argsize(argv, envp, auxv, phtsize, &argc, &envc, &auxc);
+ /* Make sure it fits on the stack */
+ if (argsize >= DEFAULT_STACK_SIZE)
+ {
+ ret = -E2BIG;
+ goto done;
+ }
+ /* Allocate kernel buffer for temporarily storing arguments */
+ argbuf = (char *)kmalloc(argsize);
+ if (!argbuf)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Calculate where in user space we start putting the args. */
+ // the args go at the beginning (top) of the stack
+ void *arglow =
+ (char *)PN_TO_ADDR(stack_lopage) +
+ (uint64_t)(
+ ((uint64_t)PN_TO_ADDR((DEFAULT_STACK_SIZE / PAGE_SIZE) + 1)) -
+ argsize);
+
+ /* Copy everything into the user address space, modifying addresses in
+ * argv, envp, and auxv to be user addresses as we go. */
+ _elf64_load_args(map, arglow, argsize, argbuf, argv, envp, auxv, argc, envc,
+ auxc, phtsize);
+
+ dbg(DBG_ELF,
+ "Past the point of no return. Swapping to map at 0x%p, setting brk to "
+ "0x%p\n",
+ map, proghigh);
+ /* the final threshold / What warm unspoken secrets will we learn? / Beyond
+ * the point of no return ... */
+
+ /* Give the process the new mappings. */
+ vmmap_destroy(&curproc->p_vmmap);
+ map->vmm_proc = curproc;
+ curproc->p_vmmap = map;
+ map = NULL; /* So it doesn't get cleaned up at the end */
+
+ /* Flush the process pagetables and TLB */
+ pt_unmap_range(curproc->p_pml4, USER_MEM_LOW, USER_MEM_HIGH);
+ tlb_flush_all();
+
+ /* Set the process break and starting break (immediately after the mapped-in
+ * text/data/bss from the executable) */
+ curproc->p_brk = proghigh;
+ curproc->p_start_brk = proghigh;
+
+ strncpy(curproc->p_name, filename, PROC_NAME_LEN);
+
+ /* Tell the caller the correct stack pointer and instruction
+ * pointer to begin execution in user space */
+ *rip = (uint64_t)entry;
+ *rsp = ((uint64_t)arglow) -
+ 8; /* Space on the user stack for the (garbage) return address */
+ /* Note that the return address will be fixed by the userland entry code,
+ * whether in static or dynamic */
+
+ /* And we're done */
+ ret = 0;
+
+// https://www.youtube.com/watch?v=PJhXVg2QisM
+done:
+ fput(&file);
+ if (map)
+ {
+ vmmap_destroy(&map);
+ }
+ if (pht)
+ {
+ kfree(pht);
+ }
+ if (interpname)
+ {
+ kfree(interpname);
+ }
+ if (interpfd >= 0)
+ {
+ do_close((int)interpfd);
+ }
+ if (interpfile)
+ {
+ fput(&interpfile);
+ }
+ if (interppht)
+ {
+ kfree(interppht);
+ }
+ if (auxv)
+ {
+ kfree(auxv);
+ }
+ if (argbuf)
+ {
+ kfree(argbuf);
+ }
+ return ret;
+}
+
+void elf64_init(void) { binfmt_add("ELF64", _elf64_load); }
diff --git a/kernel/api/exec.c b/kernel/api/exec.c
new file mode 100644
index 0000000..e0b66e8
--- /dev/null
+++ b/kernel/api/exec.c
@@ -0,0 +1,110 @@
+#include "util/debug.h"
+#include <util/string.h>
+
+#include "main/gdt.h"
+
+#include "api/binfmt.h"
+#include "api/exec.h"
+#include "api/syscall.h"
+
+/* Enters userland from the kernel. Call this for a process that has up to now
+ * been a kernel-only process. Takes the registers to start userland execution
+ * with. Does not return. Note that the regs passed in should be on the current
+ * stack of execution.
+ */
+
+void userland_entry(const regs_t regs)
+{
+ KASSERT(preemption_enabled());
+
+ dbg(DBG_ELF, ">>>>>>>>>>>>>>> pid: %d\n", curproc->p_pid);
+
+ intr_disable();
+ dbg(DBG_ELF, ">>>>>>>>>>>>>>>> intr_disable()\n");
+ intr_setipl(IPL_LOW);
+ dbg(DBG_ELF, ">>>>>>>>>>>>>>>> intr_setipl()\n");
+
+ __asm__ __volatile__(
+ "movq %%rax, %%rsp\n\t" /* Move stack pointer up to regs */
+ "popq %%r15\n\t" /* Pop all general purpose registers (except rsp, */
+ "popq %%r14\n\t" /* which gets popped by iretq) */
+ "popq %%r13\n\t"
+ "popq %%r12\n\t"
+ "popq %%rbp\n\t"
+ "popq %%rbx\n\t"
+ "popq %%r11\n\t"
+ "popq %%r10\n\t"
+ "popq %%r9\n\t"
+ "popq %%r8\n\t"
+ "popq %%rax\n\t"
+ "popq %%rcx\n\t"
+ "popq %%rdx\n\t"
+ "popq %%rsi\n\t"
+ "popq %%rdi\n\t"
+ "add $16, %%rsp\n\t" /*
+ * Move stack pointer up to the location of the
+ * arguments automatically pushed by the processor
+ * on an interrupt
+ */
+ "iretq\n"
+ /* We're now in userland! */
+ : /* No outputs */
+ : "a"(&regs) /* Forces regs to be in the 'a' register (%rax). */
+ );
+}
+
+long do_execve(const char *filename, char *const *argv, char *const *envp,
+ struct regs *regs)
+{
+ uint64_t rip, rsp;
+ long ret = binfmt_load(filename, argv, envp, &rip, &rsp);
+ if (ret < 0)
+ {
+ return ret;
+ }
+ /* Make sure we "return" into the start of the newly loaded binary */
+ dbg(DBG_EXEC, "Executing binary with rip 0x%p, rsp 0x%p\n", (void *)rip,
+ (void *)rsp);
+ regs->r_rip = rip;
+ regs->r_rsp = rsp;
+ return 0;
+}
+
+/*
+ * The kernel version of execve needs to construct a set of saved user registers
+ * and fake a return from an interrupt to get to userland. The 64-bit version
+ * behaves mostly the same as the 32-bit version, but there are a few
+ * differences. Besides different general purpose registers, there is no longer
+ * a need for two esp/rsp fields since popa is not valid assembly in 64-bit. The
+ * only non-null segment registers are now cs and ss, but they are set the same
+ * as in 32-bit, although the segment descriptors they point to are slightly
+ * different.
+ */
+void kernel_execve(const char *filename, char *const *argv, char *const *envp)
+{
+ uint64_t rip, rsp;
+ long ret = binfmt_load(filename, argv, envp, &rip, &rsp);
+ dbg(DBG_EXEC, "ret = %ld\n", ret);
+
+ KASSERT(0 == ret); /* Should never fail to load the first binary */
+
+ dbg(DBG_EXEC, "Entering userland with rip 0x%p, rsp 0x%p\n", (void *)rip,
+ (void *)rsp);
+ /* To enter userland, we build a set of saved registers to "trick" the
+ * processor into thinking we were in userland before. Yes, it's horrible.
+ * c.f. http://wiki.osdev.org/index.php?title=Getting_to_Ring_3&oldid=8195
+ */
+ regs_t regs;
+ memset(&regs, 0, sizeof(regs_t));
+
+ /* Userland gdt entries (0x3 for ring 3) */
+ regs.r_cs = GDT_USER_TEXT | 0x3;
+ regs.r_ss = GDT_USER_DATA | 0x3;
+
+ /* Userland instruction pointer and stack pointer */
+ regs.r_rip = rip;
+ regs.r_rsp = rsp;
+
+ regs.r_rflags = 0x202; // see 32-bit version
+ userland_entry(regs);
+} \ No newline at end of file
diff --git a/kernel/api/syscall.c b/kernel/api/syscall.c
new file mode 100644
index 0000000..1be5276
--- /dev/null
+++ b/kernel/api/syscall.c
@@ -0,0 +1,757 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include <fs/vfs.h>
+#include <util/time.h>
+
+#include "main/inits.h"
+#include "main/interrupt.h"
+
+#include "mm/kmalloc.h"
+#include "mm/mman.h"
+
+#include "fs/vfs_syscall.h"
+#include "fs/vnode.h"
+
+#include "drivers/tty/tty.h"
+#include "test/kshell/kshell.h"
+
+#include "vm/brk.h"
+#include "vm/mmap.h"
+
+#include "api/access.h"
+#include "api/exec.h"
+#include "api/syscall.h"
+#include "api/utsname.h"
+
+static long syscall_handler(regs_t *regs);
+
+static long syscall_dispatch(size_t sysnum, uintptr_t args, regs_t *regs);
+
+extern size_t active_tty;
+
+static const char *syscall_strings[49] = {
+ "syscall", "exit", "fork", "read", "write", "open",
+ "close", "waitpid", "link", "unlink", "execve", "chdir",
+ "sleep", "unknown", "lseek", "sync", "nuke", "dup",
+ "pipe", "ioctl", "unknown", "rmdir", "mkdir", "getdents",
+ "mmap", "mprotect", "munmap", "rename", "uname", "thr_create",
+ "thr_cancel", "thr_exit", "thr_yield", "thr_join", "gettid", "getpid",
+ "unknown", "unkown", "unknown", "errno", "halt", "get_free_mem",
+ "set_errno", "dup2", "brk", "mount", "umount", "stat", "usleep"};
+
+void syscall_init(void) { intr_register(INTR_SYSCALL, syscall_handler); }
+
+// if condition, set errno to err and return -1
+#define ERROR_OUT(condition, err) \
+ if (condition) \
+ { \
+ curthr->kt_errno = (err); \
+ return -1; \
+ }
+
+// if ret < 0, set errno to -ret and return -1
+#define ERROR_OUT_RET(ret) ERROR_OUT(ret < 0, -ret)
+
+/*
+ * Be sure to look at other examples of implemented system calls to see how
+ * this should be done - the general outline is as follows.
+ *
+ * - Initialize a read_args_t struct locally in kernel space and copy from
+ * userland args.
+ * - Allocate a temporary buffer (a page-aligned block of n pages that are
+ * enough space to store the number of bytes to read)
+ * - Call do_read() with the buffer and then copy the buffer to the userland
+ * args after the system call
+ * - Make sure to free the temporary buffer allocated
+ * - Return the number of bytes read, or return -1 and set the current thread's
+ * errno appropriately using ERROR_OUT_RET.
+ */
+static long sys_read(read_args_t *args)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * Be sure to look at other examples of implemented system calls to see how
+ * this should be done - the general outline is as follows.
+ *
+ * This function is very similar to sys_read - see above comments. You'll need
+ * to use the functions copy_from_user() and do_write(). Make sure to
+ * allocate a new temporary buffer for the data that is being written. This
+ * is to ensure that pagefaults within kernel mode do not happen.
+ */
+static long sys_write(write_args_t *args)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * This similar to the other system calls that you have implemented above.
+ *
+ * The general steps are as follows:
+ * - Copy the arguments from user memory
+ * - Check that the count field is at least the size of a dirent_t
+ * - Use a while loop to read count / sizeof(dirent_t) directory entries into
+ * the provided dirp and call do_getdent
+ * - Return the number of bytes read
+ */
+static long sys_getdents(getdents_args_t *args)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+#ifdef __MOUNTING__
+static long sys_mount(mount_args_t *arg)
+{
+ mount_args_t kern_args;
+ char *source;
+ char *target;
+ char *type;
+ long ret;
+
+ if (copy_from_user(&kern_args, arg, sizeof(kern_args)) < 0)
+ {
+ curthr->kt_errno = EFAULT;
+ return -1;
+ }
+
+ /* null is okay only for the source */
+ source = user_strdup(&kern_args.spec);
+ if (NULL == (target = user_strdup(&kern_args.dir)))
+ {
+ kfree(source);
+ curthr->kt_errno = EINVAL;
+ return -1;
+ }
+ if (NULL == (type = user_strdup(&kern_args.fstype)))
+ {
+ kfree(source);
+ kfree(target);
+ curthr->kt_errno = EINVAL;
+ return -1;
+ }
+
+ ret = do_mount(source, target, type);
+ kfree(source);
+ kfree(target);
+ kfree(type);
+
+ if (ret)
+ {
+ curthr->kt_errno = -ret;
+ return -1;
+ }
+
+ return 0;
+}
+
+static long sys_umount(argstr_t *input)
+{
+ argstr_t kstr;
+ char *target;
+ long ret;
+
+ if (copy_from_user(&kstr, input, sizeof(kstr)) < 0)
+ {
+ curthr->kt_errno = EFAULT;
+ return -1;
+ }
+
+ if (NULL == (target = user_strdup(&kstr)))
+ {
+ curthr->kt_errno = EINVAL;
+ return -1;
+ }
+
+ ret = do_umount(target);
+ kfree(target);
+
+ if (ret)
+ {
+ curthr->kt_errno = -ret;
+ return -1;
+ }
+
+ return 0;
+}
+#endif
+
+static long sys_close(int fd)
+{
+ long ret = do_close(fd);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_dup(int fd)
+{
+ long ret = do_dup(fd);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_dup2(const dup2_args_t *args)
+{
+ dup2_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+ ret = do_dup2(kargs.ofd, kargs.nfd);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_mkdir(mkdir_args_t *args)
+{
+ mkdir_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs.path, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_mkdir(path);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_rmdir(argstr_t *args)
+{
+ argstr_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_rmdir(path);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_unlink(argstr_t *args)
+{
+ argstr_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_unlink(path);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_link(link_args_t *args)
+{
+ link_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *to, *from;
+ ret = user_strdup(&kargs.to, &to);
+ ERROR_OUT_RET(ret);
+
+ ret = user_strdup(&kargs.from, &from);
+ if (ret)
+ {
+ kfree(to);
+ ERROR_OUT_RET(ret);
+ }
+
+ ret = do_link(from, to);
+ kfree(to);
+ kfree(from);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_rename(rename_args_t *args)
+{
+ rename_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *oldpath, *newpath;
+ ret = user_strdup(&kargs.oldpath, &oldpath);
+ ERROR_OUT_RET(ret);
+
+ ret = user_strdup(&kargs.newpath, &newpath);
+ if (ret)
+ {
+ kfree(oldpath);
+ ERROR_OUT_RET(ret);
+ }
+
+ ret = do_rename(oldpath, newpath);
+ kfree(oldpath);
+ kfree(newpath);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_chdir(argstr_t *args)
+{
+ argstr_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_chdir(path);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_lseek(lseek_args_t *args)
+{
+ lseek_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ ret = do_lseek(kargs.fd, kargs.offset, kargs.whence);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_open(open_args_t *args)
+{
+ open_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs.filename, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_open(path, kargs.flags);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_munmap(munmap_args_t *args)
+{
+ munmap_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ ret = do_munmap(kargs.addr, kargs.len);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static void *sys_mmap(mmap_args_t *arg)
+{
+ mmap_args_t kargs;
+
+ if (copy_from_user(&kargs, arg, sizeof(mmap_args_t)))
+ {
+ curthr->kt_errno = EFAULT;
+ return MAP_FAILED;
+ }
+
+ void *ret;
+ long err = do_mmap(kargs.mma_addr, kargs.mma_len, kargs.mma_prot,
+ kargs.mma_flags, kargs.mma_fd, kargs.mma_off, &ret);
+ if (err)
+ {
+ curthr->kt_errno = -err;
+ return MAP_FAILED;
+ }
+ return ret;
+}
+
+static pid_t sys_waitpid(waitpid_args_t *args)
+{
+ waitpid_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ int status;
+ pid_t pid = do_waitpid(kargs.wpa_pid, &status, kargs.wpa_options);
+ ERROR_OUT_RET(pid);
+
+ if (kargs.wpa_status)
+ {
+ ret = copy_to_user(kargs.wpa_status, &status, sizeof(int));
+ ERROR_OUT_RET(ret);
+ }
+
+ return pid;
+}
+
+static void *sys_brk(void *addr)
+{
+ void *new_brk;
+ long ret = do_brk(addr, &new_brk);
+ if (ret)
+ {
+ curthr->kt_errno = -ret;
+ return (void *)-1;
+ }
+ return new_brk;
+}
+
+static void sys_halt(void) { proc_kill_all(); }
+
+static long sys_stat(stat_args_t *args)
+{
+ stat_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs.path, &path);
+ ERROR_OUT_RET(ret);
+
+ stat_t stat_buf;
+ ret = do_stat(path, &stat_buf);
+ kfree(path);
+ ERROR_OUT_RET(ret);
+
+ ret = copy_to_user(kargs.buf, &stat_buf, sizeof(stat_buf));
+ ERROR_OUT_RET(ret);
+
+ return ret;
+}
+
+static long sys_pipe(int args[2])
+{
+ int kargs[2];
+ long ret = do_pipe(kargs);
+ ERROR_OUT_RET(ret);
+
+ ret = copy_to_user(args, kargs, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ return ret;
+}
+
+static long sys_uname(struct utsname *arg)
+{
+ static const char sysname[] = "Weenix";
+ static const char release[] = "1.2";
+ /* Version = last compilation time */
+ static const char version[] = "#1 " __DATE__ " " __TIME__;
+ static const char nodename[] = "";
+ static const char machine[] = "";
+ long ret = 0;
+
+ ret = copy_to_user(arg->sysname, sysname, sizeof(sysname));
+ ERROR_OUT_RET(ret);
+ ret = copy_to_user(arg->release, release, sizeof(release));
+ ERROR_OUT_RET(ret);
+ ret = copy_to_user(arg->version, version, sizeof(version));
+ ERROR_OUT_RET(ret);
+ ret = copy_to_user(arg->nodename, nodename, sizeof(nodename));
+ ERROR_OUT_RET(ret);
+ ret = copy_to_user(arg->machine, machine, sizeof(machine));
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_time(time_t *utloc)
+{
+ time_t time = do_time();
+ if (utloc)
+ {
+ long ret = copy_to_user(utloc, &time, sizeof(time_t));
+ ERROR_OUT_RET(ret);
+ }
+ return time;
+}
+
+static long sys_fork(regs_t *regs)
+{
+ long ret = do_fork(regs);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static void free_vector(char **vect)
+{
+ char **temp;
+ for (temp = vect; *temp; temp++)
+ {
+ kfree(*temp);
+ }
+ kfree(vect);
+}
+
+static long sys_execve(execve_args_t *args, regs_t *regs)
+{
+ execve_args_t kargs;
+ char *filename = NULL;
+ char **argv = NULL;
+ char **envp = NULL;
+
+ long ret;
+ if ((ret = copy_from_user(&kargs, args, sizeof(kargs))))
+ goto cleanup;
+
+ if ((ret = user_strdup(&kargs.filename, &filename)))
+ goto cleanup;
+
+ if (kargs.argv.av_vec && (ret = user_vecdup(&kargs.argv, &argv)))
+ goto cleanup;
+
+ if (kargs.envp.av_vec && (ret = user_vecdup(&kargs.envp, &envp)))
+ goto cleanup;
+
+ ret = do_execve(filename, argv, envp, regs);
+
+cleanup:
+ if (filename)
+ kfree(filename);
+ if (argv)
+ free_vector(argv);
+ if (envp)
+ free_vector(envp);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_debug(argstr_t *args)
+{
+ argstr_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *str;
+ ret = user_strdup(&kargs, &str);
+ ERROR_OUT_RET(ret);
+ dbg(DBG_USER, "%s\n", str);
+ kfree(str);
+ return ret;
+}
+
+static long sys_kshell(int ttyid)
+{
+ // ignoring the ttyid passed in as it always defaults to 0,
+ // instead using the active_tty value
+ kshell_t *ksh = kshell_create(active_tty);
+ ERROR_OUT(!ksh, ENODEV);
+
+ long ret;
+ while ((ret = kshell_execute_next(ksh)) > 0)
+ ;
+ kshell_destroy(ksh);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_usleep(usleep_args_t *args)
+{
+ return do_usleep(args->usec);
+}
+
+static inline void check_curthr_cancelled()
+{
+ KASSERT(list_empty(&curthr->kt_mutexes));
+ long cancelled = curthr->kt_cancelled;
+ void *retval = curthr->kt_retval;
+
+ if (cancelled)
+ {
+ dbg(DBG_SYSCALL, "CANCELLING: thread 0x%p of P%d (%s)\n", curthr,
+ curproc->p_pid, curproc->p_name);
+ kthread_exit(retval);
+ }
+}
+
+static long syscall_handler(regs_t *regs)
+{
+ size_t sysnum = (size_t)regs->r_rax;
+ uintptr_t args = (uintptr_t)regs->r_rdx;
+
+ const char *syscall_string;
+ if (sysnum <= 47)
+ {
+ syscall_string = syscall_strings[sysnum];
+ }
+ else
+ {
+ if (sysnum == 9001)
+ {
+ syscall_string = "debug";
+ }
+ else if (sysnum == 9002)
+ {
+ syscall_string = "kshell";
+ }
+ else
+ {
+ syscall_string = "unknown";
+ }
+ }
+
+ if (sysnum != SYS_errno)
+ dbg(DBG_SYSCALL, ">> pid %d, sysnum: %lu (%s), arg: %lu (0x%p)\n",
+ curproc->p_pid, sysnum, syscall_string, args, (void *)args);
+
+ check_curthr_cancelled();
+ long ret = syscall_dispatch(sysnum, args, regs);
+ check_curthr_cancelled();
+
+ if (sysnum != SYS_errno)
+ dbg(DBG_SYSCALL, "<< pid %d, sysnum: %lu (%s), returned: %lu (%#lx)\n",
+ curproc->p_pid, sysnum, syscall_string, ret, ret);
+
+ regs->r_rax = (uint64_t)ret;
+ return 0;
+}
+
+static long syscall_dispatch(size_t sysnum, uintptr_t args, regs_t *regs)
+{
+ switch (sysnum)
+ {
+ case SYS_waitpid:
+ return sys_waitpid((waitpid_args_t *)args);
+
+ case SYS_exit:
+ do_exit((int)args);
+ panic("exit failed!\n");
+
+ case SYS_thr_exit:
+ kthread_exit((void *)args);
+ panic("thr_exit failed!\n");
+
+ case SYS_sched_yield:
+ sched_yield();
+ return 0;
+
+ case SYS_fork:
+ return sys_fork(regs);
+
+ case SYS_getpid:
+ return curproc->p_pid;
+
+ case SYS_sync:
+ do_sync();
+ return 0;
+
+#ifdef __MOUNTING__
+ case SYS_mount:
+ return sys_mount((mount_args_t *)args);
+
+ case SYS_umount:
+ return sys_umount((argstr_t *)args);
+#endif
+
+ case SYS_mmap:
+ return (long)sys_mmap((mmap_args_t *)args);
+
+ case SYS_munmap:
+ return sys_munmap((munmap_args_t *)args);
+
+ case SYS_open:
+ return sys_open((open_args_t *)args);
+
+ case SYS_close:
+ return sys_close((int)args);
+
+ case SYS_read:
+ return sys_read((read_args_t *)args);
+
+ case SYS_write:
+ return sys_write((write_args_t *)args);
+
+ case SYS_dup:
+ return sys_dup((int)args);
+
+ case SYS_dup2:
+ return sys_dup2((dup2_args_t *)args);
+
+ case SYS_mkdir:
+ return sys_mkdir((mkdir_args_t *)args);
+
+ case SYS_rmdir:
+ return sys_rmdir((argstr_t *)args);
+
+ case SYS_unlink:
+ return sys_unlink((argstr_t *)args);
+
+ case SYS_link:
+ return sys_link((link_args_t *)args);
+
+ case SYS_rename:
+ return sys_rename((rename_args_t *)args);
+
+ case SYS_chdir:
+ return sys_chdir((argstr_t *)args);
+
+ case SYS_getdents:
+ return sys_getdents((getdents_args_t *)args);
+
+ case SYS_brk:
+ return (long)sys_brk((void *)args);
+
+ case SYS_lseek:
+ return sys_lseek((lseek_args_t *)args);
+
+ case SYS_halt:
+ sys_halt();
+ return -1;
+
+ case SYS_set_errno:
+ curthr->kt_errno = (long)args;
+ return 0;
+
+ case SYS_errno:
+ return curthr->kt_errno;
+
+ case SYS_execve:
+ return sys_execve((execve_args_t *)args, regs);
+
+ case SYS_stat:
+ return sys_stat((stat_args_t *)args);
+
+ case SYS_pipe:
+ return sys_pipe((int *)args);
+
+ case SYS_uname:
+ return sys_uname((struct utsname *)args);
+
+ case SYS_time:
+ return sys_time((time_t *)args);
+
+ case SYS_debug:
+ return sys_debug((argstr_t *)args);
+
+ case SYS_kshell:
+ return sys_kshell((int)args);
+
+ case SYS_usleep:
+ return sys_usleep((usleep_args_t *)args);
+
+ default:
+ dbg(DBG_ERROR, "ERROR: unknown system call: %lu (args: 0x%p)\n",
+ sysnum, (void *)args);
+ curthr->kt_errno = ENOSYS;
+ return -1;
+ }
+}