aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authornthnluu <nate1299@me.com>2024-01-28 21:20:27 -0500
committernthnluu <nate1299@me.com>2024-01-28 21:20:27 -0500
commitc63f340d90800895f007de64b7d2d14624263331 (patch)
tree2c0849fa597dd6da831c8707b6f2603403778d7b /kernel
Created student weenix repository
Diffstat (limited to 'kernel')
-rw-r--r--kernel/.gitignore20
-rw-r--r--kernel/Makefile94
-rw-r--r--kernel/api/access.c136
-rw-r--r--kernel/api/binfmt.c88
-rw-r--r--kernel/api/elf.c905
-rw-r--r--kernel/api/exec.c110
-rw-r--r--kernel/api/syscall.c757
-rw-r--r--kernel/boot/boot.S174
-rw-r--r--kernel/drivers/Submodules1
-rw-r--r--kernel/drivers/blockdev.c96
-rw-r--r--kernel/drivers/chardev.c43
-rw-r--r--kernel/drivers/cmos.c78
-rw-r--r--kernel/drivers/disk/sata.c512
-rw-r--r--kernel/drivers/keyboard.c208
-rw-r--r--kernel/drivers/memdevs.c108
-rw-r--r--kernel/drivers/pcie.c77
-rw-r--r--kernel/drivers/screen.c513
-rw-r--r--kernel/drivers/tty/ldisc.c120
-rw-r--r--kernel/drivers/tty/tty.c135
-rw-r--r--kernel/drivers/tty/vterminal.c1384
-rw-r--r--kernel/entry/entry.c14
-rw-r--r--kernel/fs/Submodules1
-rw-r--r--kernel/fs/file.c115
-rw-r--r--kernel/fs/namev.c263
-rw-r--r--kernel/fs/open.c67
-rw-r--r--kernel/fs/pipe.c256
-rw-r--r--kernel/fs/ramfs/ramfs.c852
-rw-r--r--kernel/fs/s5fs/s5fs.c860
-rw-r--r--kernel/fs/s5fs/s5fs_subr.c590
-rw-r--r--kernel/fs/vfs.c222
-rw-r--r--kernel/fs/vfs_syscall.c356
-rw-r--r--kernel/fs/vnode.c250
-rw-r--r--kernel/fs/vnode_specials.c176
-rw-r--r--kernel/include/api/access.h19
-rw-r--r--kernel/include/api/binfmt.h12
-rw-r--r--kernel/include/api/elf.h2595
-rw-r--r--kernel/include/api/exec.h12
-rw-r--r--kernel/include/api/syscall.h196
-rw-r--r--kernel/include/api/utsname.h14
-rw-r--r--kernel/include/boot/config.h12
-rw-r--r--kernel/include/boot/multiboot_macros.h6
-rw-r--r--kernel/include/config.h50
-rw-r--r--kernel/include/ctype.h124
-rw-r--r--kernel/include/drivers/blockdev.h99
-rw-r--r--kernel/include/drivers/chardev.h51
-rw-r--r--kernel/include/drivers/cmos.h40
-rw-r--r--kernel/include/drivers/dev.h49
-rw-r--r--kernel/include/drivers/disk/ahci.h325
-rw-r--r--kernel/include/drivers/disk/sata.h14
-rw-r--r--kernel/include/drivers/keyboard.h43
-rw-r--r--kernel/include/drivers/memdevs.h6
-rw-r--r--kernel/include/drivers/pcie.h112
-rw-r--r--kernel/include/drivers/screen.h72
-rw-r--r--kernel/include/drivers/tty/ldisc.h68
-rw-r--r--kernel/include/drivers/tty/tty.h21
-rw-r--r--kernel/include/drivers/tty/vterminal.h249
-rw-r--r--kernel/include/errno.h151
-rw-r--r--kernel/include/fs/dirent.h25
-rw-r--r--kernel/include/fs/fcntl.h18
-rw-r--r--kernel/include/fs/file.h62
-rw-r--r--kernel/include/fs/lseek.h5
-rw-r--r--kernel/include/fs/open.h5
-rw-r--r--kernel/include/fs/pipe.h10
-rw-r--r--kernel/include/fs/ramfs/ramfs.h5
-rw-r--r--kernel/include/fs/s5fs/s5fs.h145
-rw-r--r--kernel/include/fs/s5fs/s5fs_privtest.h6
-rw-r--r--kernel/include/fs/s5fs/s5fs_subr.h53
-rw-r--r--kernel/include/fs/stat.h44
-rw-r--r--kernel/include/fs/vfs.h162
-rw-r--r--kernel/include/fs/vfs_privtest.h3
-rw-r--r--kernel/include/fs/vfs_syscall.h39
-rw-r--r--kernel/include/fs/vnode.h358
-rw-r--r--kernel/include/fs/vnode_specials.h0
-rw-r--r--kernel/include/globals.h11
-rw-r--r--kernel/include/kernel.h79
-rw-r--r--kernel/include/limits.h18
-rw-r--r--kernel/include/main/acpi.h20
-rw-r--r--kernel/include/main/apic.h73
-rw-r--r--kernel/include/main/cpuid.h118
-rw-r--r--kernel/include/main/entry.h3
-rw-r--r--kernel/include/main/gdt.h21
-rw-r--r--kernel/include/main/inits.h15
-rw-r--r--kernel/include/main/interrupt.h117
-rw-r--r--kernel/include/main/io.h46
-rw-r--r--kernel/include/main/smp.h22
-rw-r--r--kernel/include/mm/kmalloc.h7
-rw-r--r--kernel/include/mm/mm.h8
-rw-r--r--kernel/include/mm/mman.h25
-rw-r--r--kernel/include/mm/mobj.h75
-rw-r--r--kernel/include/mm/page.h124
-rw-r--r--kernel/include/mm/pagecache.h9
-rw-r--r--kernel/include/mm/pagetable.h94
-rw-r--r--kernel/include/mm/pframe.h23
-rw-r--r--kernel/include/mm/slab.h96
-rw-r--r--kernel/include/mm/tlb.h35
-rw-r--r--kernel/include/multiboot.h417
-rw-r--r--kernel/include/proc/context.h62
-rw-r--r--kernel/include/proc/core.h15
-rw-r--r--kernel/include/proc/kmutex.h60
-rw-r--r--kernel/include/proc/kthread.h106
-rw-r--r--kernel/include/proc/proc.h200
-rw-r--r--kernel/include/proc/sched.h126
-rw-r--r--kernel/include/proc/spinlock.h37
-rw-r--r--kernel/include/stdarg.h7
-rw-r--r--kernel/include/test/driverstest.h3
-rw-r--r--kernel/include/test/kshell/io.h61
-rw-r--r--kernel/include/test/kshell/kshell.h52
-rw-r--r--kernel/include/test/proctest.h3
-rw-r--r--kernel/include/test/s5fstest.h3
-rw-r--r--kernel/include/test/usertest.h51
-rw-r--r--kernel/include/test/vfstest/vfstest.h156
-rw-r--r--kernel/include/types.h31
-rw-r--r--kernel/include/util/atomic.h31
-rw-r--r--kernel/include/util/bits.h27
-rw-r--r--kernel/include/util/debug.h305
-rw-r--r--kernel/include/util/delay.h73
-rw-r--r--kernel/include/util/gdb.h5
-rw-r--r--kernel/include/util/init.h21
-rw-r--r--kernel/include/util/list.h224
-rw-r--r--kernel/include/util/printf.h87
-rw-r--r--kernel/include/util/string.h93
-rw-r--r--kernel/include/util/time.h25
-rw-r--r--kernel/include/util/timer.h28
-rw-r--r--kernel/include/vm/anon.h9
-rw-r--r--kernel/include/vm/brk.h3
-rw-r--r--kernel/include/vm/mmap.h8
-rw-r--r--kernel/include/vm/pagefault.h11
-rw-r--r--kernel/include/vm/shadow.h11
-rw-r--r--kernel/include/vm/vmmap.h71
-rw-r--r--kernel/link.ld68
-rw-r--r--kernel/main/acpi.c161
-rw-r--r--kernel/main/apic.c648
-rw-r--r--kernel/main/gdt.c129
-rw-r--r--kernel/main/gdt.gdb3
-rw-r--r--kernel/main/interrupt.c1077
-rw-r--r--kernel/main/kmain.c200
-rw-r--r--kernel/main/smp.c138
-rw-r--r--kernel/main/smp_trampoline.S81
-rw-r--r--kernel/mm/memcheck.py158
-rw-r--r--kernel/mm/mobj.c313
-rw-r--r--kernel/mm/page.c658
-rw-r--r--kernel/mm/page.py47
-rw-r--r--kernel/mm/pagecache.c23
-rw-r--r--kernel/mm/pagetable.c873
-rw-r--r--kernel/mm/pagetable.gdb25
-rw-r--r--kernel/mm/pframe.c59
-rw-r--r--kernel/mm/slab.c550
-rw-r--r--kernel/mm/slab.py55
-rw-r--r--kernel/proc/context.c150
-rw-r--r--kernel/proc/fork.c62
-rw-r--r--kernel/proc/kmutex.c88
-rw-r--r--kernel/proc/kthread.c136
-rw-r--r--kernel/proc/kthread.gdb39
-rw-r--r--kernel/proc/proc.c440
-rw-r--r--kernel/proc/proc.py38
-rw-r--r--kernel/proc/sched.c368
-rw-r--r--kernel/proc/spinlock.c21
-rw-r--r--kernel/test/Submodules1
-rw-r--r--kernel/test/driverstest.c288
-rw-r--r--kernel/test/kshell/command.c46
-rw-r--r--kernel/test/kshell/command.h20
-rw-r--r--kernel/test/kshell/commands.c404
-rw-r--r--kernel/test/kshell/commands.h32
-rw-r--r--kernel/test/kshell/io.c78
-rw-r--r--kernel/test/kshell/kshell.c504
-rw-r--r--kernel/test/kshell/priv.h43
-rw-r--r--kernel/test/kshell/tokenizer.c74
-rw-r--r--kernel/test/kshell/tokenizer.h39
-rw-r--r--kernel/test/pipes.c133
-rw-r--r--kernel/test/proctest.c57
-rw-r--r--kernel/test/s5fstest.c251
-rw-r--r--kernel/test/usertest.c174
-rw-r--r--kernel/test/vfstest/vfstest.c1173
-rw-r--r--kernel/test/vmtest.c74
-rw-r--r--kernel/util/debug.c237
-rw-r--r--kernel/util/debug.py77
-rw-r--r--kernel/util/init.c142
-rw-r--r--kernel/util/list.c53
-rw-r--r--kernel/util/list.py32
-rw-r--r--kernel/util/math.c411
-rw-r--r--kernel/util/printf.c996
-rw-r--r--kernel/util/string.c509
-rw-r--r--kernel/util/time.c194
-rw-r--r--kernel/util/timer.c121
-rw-r--r--kernel/vm/anon.c65
-rw-r--r--kernel/vm/brk.c58
-rw-r--r--kernel/vm/mmap.c83
-rw-r--r--kernel/vm/pagefault.c53
-rw-r--r--kernel/vm/shadow.c173
-rw-r--r--kernel/vm/vmmap.c326
-rw-r--r--kernel/vm/vmmap.gdb24
-rw-r--r--kernel/weenix.dbgbin0 -> 1220864 bytes
192 files changed, 31875 insertions, 0 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore
new file mode 100644
index 0000000..444ef19
--- /dev/null
+++ b/kernel/.gitignore
@@ -0,0 +1,20 @@
+# cscope
+cscope.files
+cscope.in.out
+cscope.out
+cscope.po.out
+
+# ctags
+TAGS
+
+# kernel binaries
+kernel.bin
+symbols.dbg
+weenix.img
+weenix.iso
+.iso/*
+
+gdb-commands
+
+# Empty target stuff
+empty-target
diff --git a/kernel/Makefile b/kernel/Makefile
new file mode 100644
index 0000000..e784a99
--- /dev/null
+++ b/kernel/Makefile
@@ -0,0 +1,94 @@
+LDFLAGS := --build-id=none -z max-page-size=0x1000 -n
+
+include ../Global.mk
+
+CFLAGS += -D__KERNEL__
+
+###
+
+HEAD := $(wildcard include/*/*.h include/*/*/*.h)
+SRCDIR := boot entry main util drivers drivers/disk drivers/tty mm proc fs/ramfs fs/s5fs fs vm api test test/kshell test/vfstest
+
+SRC := $(foreach dr, $(SRCDIR), $(wildcard $(dr)/*.[cS]))
+
+OBJS := $(addsuffix .o,$(basename $(SRC)))
+ASM_FILES :=
+SCRIPTS := $(foreach dr, $(SRCDIR), $(wildcard $(dr)/*.gdb $(dr)/*.py))
+
+BSYMBOLS := symbols.dbg
+KERNEL := kernel.bin
+IMAGE := weenix.img
+ISO_IMAGE := weenix.iso
+GDBCOMM := gdb-commands
+
+.PHONY: all cscope clean
+
+%.a:
+ touch $@
+
+all: $(ISO_IMAGE) $(GDBCOMM) $(ASM_FILES) $(BSYMBOLS)
+
+$(KERNEL) : $(OBJS)
+ @ echo " Linking for \"kernel/$@\"..."
+ @ # entry.o included from link.ld. boot/boot-{32,64}.S must be the first file so that the multiboot header is close enough to the front.
+ @ $(LD) $(LDFLAGS) -T link.ld boot/boot.o $(filter-out boot/boot.o entry/entry.o,$^) -o $@
+
+$(BSYMBOLS): $(KERNEL)
+ @ echo " Generating kernel symbols list..."
+ @ readelf -Ws $(KERNEL) | grep -Ev 'SECTION|UND|FILE|Num:|Symbol|^$$' | awk '{printf "0x%s %s\n", $$2, $$8}' > $@
+
+$(ISO_IMAGE): $(KERNEL)
+ @ echo " Creating \"kernel/$@\" from floppy disk image..."
+ @ rm -rf .iso
+ @ mkdir -p .iso/boot/grub
+ @ ln -f $< .iso/boot/$< || cp -f $< .iso/boot/$<
+ @ echo "default=0" > .iso/boot/grub/grub.cfg
+ @ echo "timeout=0" > .iso/boot/grub/grub.cfg
+ @ echo "menuentry \"$@\" {" >> .iso/boot/grub/grub.cfg
+ @ echo " echo \"Booting $@ from /boot/$<\" " >> .iso/boot/grub/grub.cfg
+ @ echo " echo \"Welcome To 64-bit Weenix!\" " >> .iso/boot/grub/grub.cfg
+ @ echo " multiboot2 /boot/$< " >> .iso/boot/grub/grub.cfg
+ @ echo " acpi -2 " >> .iso/boot/grub/grub.cfg
+
+ @ echo " boot " >> .iso/boot/grub/grub.cfg
+ @ echo " GRUB_GFXMODE=1024x768x32" >> .iso/boot/grub/grub.cfg
+ @ echo "}" >> .iso/boot/grub/grub.cfg
+ @ $(MKRESCUE) -o $@ ./.iso
+
+$(GDBCOMM): $(SCRIPTS)
+ @ echo " Creating gdb command list..."
+ @ $(foreach script, $(SCRIPTS), echo $(abspath $(script)) >> $(dir $(script))$(shell basename $(dir $(script))).gdbcomm; )
+ @ # We supress an error here if no command files exist
+ -@ cat */*.gdbcomm > $@
+
+%.S: %.c
+ @ echo " Compiling \"kernel/$<\"..."
+ @ $(CC) $(CFLAGS) -S $< -o $@
+
+%.o: %.c
+ @ echo " Compiling \"kernel/$<\"..."
+ @ $(CC) -c $(CFLAGS) $< -o $@
+
+%.o: %.S
+ @ echo " Compiling \"kernel/$<\"..."
+ @ $(CC) -c $(ASFLAGS) $(CFLAGS) $< -o $@
+
+cscope: $(HEAD) $(SRC)
+ @ echo " Updating cscope symbol cross-reference..."
+ @ echo $(HEAD) $(SRC) > cscope.files
+ @ $(CSCOPE) -k -b -q -v > /dev/null
+
+FILTER=`echo "DRIVERS $(DRIVERS)\nVFS $(VFS)\nS5FS $(S5FS)\nVM $(VM)" | grep 1 | cut -f1 -d" " | tr "\n" "|"`PROCS
+nyi:
+ @ echo " Not yet implemented:"
+ @ echo
+ @ find . -name \*.c -printf "%P\n" \
+| xargs grep -Hn "NOT_YET_IMPLEMENTED" \
+| sed -e 's/^\(.*:.*\):.*\"\(.*\): \(.*\)\".*/\2 \1 \3/' \
+| grep -E "^($(FILTER))" \
+| awk '{printf("%25s %30s() %8s\n", $$2, $$3, $$1)}'
+
+clean:
+ @ find . -name "*.o" -type f -delete
+ @ rm -f $(OBJS) $(BSYMBOLS) $(KERNEL) $(IMAGE) $(ISO_IMAGE) $(GDBCOMM) */*.gdbcomm cscope*.out cscope.files
+ @ rm -rf .iso \ No newline at end of file
diff --git a/kernel/api/access.c b/kernel/api/access.c
new file mode 100644
index 0000000..d56e45d
--- /dev/null
+++ b/kernel/api/access.c
@@ -0,0 +1,136 @@
+#include "errno.h"
+#include "globals.h"
+#include <mm/mm.h>
+#include <util/string.h>
+
+#include "util/debug.h"
+
+#include "mm/kmalloc.h"
+#include "mm/mman.h"
+
+#include "api/access.h"
+#include "api/syscall.h"
+
+static inline long userland_address(const void *addr)
+{
+ return addr >= (void *)USER_MEM_LOW && addr < (void *)USER_MEM_HIGH;
+}
+
+/*
+ * Check for permissions on [uaddr, uaddr + nbytes), then
+ * copy nbytes from userland address uaddr to kernel address kaddr.
+ * Do not access the userland virtual addresses directly; instead,
+ * use vmmap_read.
+ */
+long copy_from_user(void *kaddr, const void *uaddr, size_t nbytes)
+{
+ if (!range_perm(curproc, uaddr, nbytes, PROT_READ))
+ {
+ return -EFAULT;
+ }
+ KASSERT(userland_address(uaddr) && !userland_address(kaddr));
+ return vmmap_read(curproc->p_vmmap, uaddr, kaddr, nbytes);
+}
+
+/*
+ * Check for permissions on [uaddr, uaddr + nbytes), then
+ * copy nbytes from kernel address kaddr to userland address uaddr.
+ * Do not access the userland virtual addresses directly; instead,
+ * use vmmap_write.
+ */
+long copy_to_user(void *uaddr, const void *kaddr, size_t nbytes)
+{
+ if (!range_perm(curproc, uaddr, nbytes, PROT_WRITE))
+ {
+ return -EFAULT;
+ }
+ KASSERT(userland_address(uaddr) && !userland_address(kaddr));
+ return vmmap_write(curproc->p_vmmap, uaddr, kaddr, nbytes);
+}
+
+/*
+ * Duplicate the string identified by ustr into kernel memory.
+ * The kernel memory string kstr should be allocated using kmalloc.
+ */
+long user_strdup(argstr_t *ustr, char **kstrp)
+{
+ KASSERT(!userland_address(ustr));
+ KASSERT(userland_address(ustr->as_str));
+
+ *kstrp = kmalloc(ustr->as_len + 1);
+ if (!*kstrp)
+ return -ENOMEM;
+ long ret = copy_from_user(*kstrp, ustr->as_str, ustr->as_len + 1);
+ if (ret)
+ {
+ kfree(*kstrp);
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Duplicate the string of vectors identified by uvec into kernel memory.
+ * The vector itself (char**) and each string (char*) should be allocated
+ * using kmalloc.
+ */
+long user_vecdup(argvec_t *uvec, char ***kvecp)
+{
+ KASSERT(!userland_address(uvec));
+ KASSERT(userland_address(uvec->av_vec));
+
+ char **kvec = kmalloc((uvec->av_len + 1) * sizeof(char *));
+ *kvecp = kvec;
+
+ if (!kvec)
+ {
+ return -ENOMEM;
+ }
+ memset(kvec, 0, (uvec->av_len + 1) * sizeof(char *));
+
+ long ret = 0;
+ for (size_t i = 0; i < uvec->av_len && !ret; i++)
+ {
+ argstr_t argstr;
+ copy_from_user(&argstr, uvec->av_vec + i, sizeof(argstr_t));
+ ret = user_strdup(&argstr, kvec + i);
+ }
+
+ if (ret)
+ {
+ for (size_t i = 0; i < uvec->av_len; i++)
+ if (kvec[i])
+ kfree(kvec[i]);
+ kfree(kvec);
+ *kvecp = NULL;
+ }
+
+ return ret;
+}
+
+/*
+ * Return 1 if process p has permissions perm for virtual address vaddr;
+ * otherwise return 0.
+ *
+ * Check against the vmarea's protections on the mapping.
+ */
+long addr_perm(proc_t *p, const void *vaddr, int perm)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+/*
+ * Return 1 if process p has permissions perm for virtual address range [vaddr,
+ * vaddr + len); otherwise return 0.
+ *
+ * Hints:
+ * You can use addr_perm in your implementation.
+ * Make sure to consider the case when the range of addresses that is being
+ * checked is less than a page.
+ */
+long range_perm(proc_t *p, const void *vaddr, size_t len, int perm)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
diff --git a/kernel/api/binfmt.c b/kernel/api/binfmt.c
new file mode 100644
index 0000000..1597fdf
--- /dev/null
+++ b/kernel/api/binfmt.c
@@ -0,0 +1,88 @@
+#include "errno.h"
+
+#include "main/inits.h"
+
+#include "fs/fcntl.h"
+#include "fs/file.h"
+#include "fs/vfs_syscall.h"
+
+#include "util/debug.h"
+#include "util/init.h"
+#include "util/list.h"
+
+#include "mm/kmalloc.h"
+
+#include "api/binfmt.h"
+
+typedef struct binfmt
+{
+ const char *bf_id;
+ binfmt_load_func_t bf_load;
+ list_link_t bf_link;
+} binfmt_t;
+
+static list_t binfmt_list = LIST_INITIALIZER(binfmt_list);
+
+long binfmt_add(const char *id, binfmt_load_func_t loadfunc)
+{
+ binfmt_t *fmt;
+ if (NULL == (fmt = kmalloc(sizeof(*fmt))))
+ {
+ return -ENOMEM;
+ }
+
+ dbg(DBG_EXEC, "Registering binary loader %s\n", id);
+
+ fmt->bf_id = id;
+ fmt->bf_load = loadfunc;
+ list_insert_head(&binfmt_list, &fmt->bf_link);
+
+ return 0;
+}
+
+long binfmt_load(const char *filename, char *const *argv, char *const *envp,
+ uint64_t *rip, uint64_t *rsp)
+{
+ long fd = do_open(filename, O_RDONLY);
+ if (fd < 0)
+ {
+ dbg(DBG_EXEC, "ERROR: exec failed to open file %s\n", filename);
+ return fd;
+ }
+ file_t *file = fget((int)fd);
+ long ret = 0;
+ if (S_ISDIR(file->f_vnode->vn_mode))
+ {
+ ret = -EISDIR;
+ }
+ if (!ret && !S_ISREG(file->f_vnode->vn_mode))
+ {
+ ret = -EACCES;
+ }
+ fput(&file);
+ if (ret)
+ {
+ do_close((int)fd);
+ return ret;
+ }
+
+ list_iterate(&binfmt_list, fmt, binfmt_t, bf_link)
+ {
+ dbg(DBG_EXEC, "Trying to exec %s using binary loader %s\n", filename,
+ fmt->bf_id);
+
+ /* ENOEXE indicates that the given loader is unable to load
+ * the given file, any other error indicates that the file
+ * was recognized, but some other error existed which should
+ * be returned to the user, only if all loaders specify ENOEXEC
+ * do we actually return ENOEXEC */
+ ret = fmt->bf_load(filename, (int)fd, argv, envp, rip, rsp);
+ if (ret != -ENOEXEC)
+ {
+ do_close((int)fd);
+ }
+ }
+
+ do_close((int)fd);
+ return ret;
+}
diff --git a/kernel/api/elf.c b/kernel/api/elf.c
new file mode 100644
index 0000000..5ad4a33
--- /dev/null
+++ b/kernel/api/elf.c
@@ -0,0 +1,905 @@
+/*
+ * The elf32 loader (the basis for this file) was modified by twd in 7/2018 so
+ * that it lays out the address space in a more Unix-like fashion (e.g., the
+ * stack is at the top of user memory, text is near the bottom).
+ *
+ * This loader (and the elf32 loader) are not strictly ABI compliant. See the
+ * Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29 for
+ * what initial process stacks are supposed to look like after the iret(q) in
+ * userland_entry is executed. The following would be required (but not
+ * necessarily sufficient!) for full compliance:
+ *
+ * 1) Remove the pointers to argv, envp, and auxv from the initial stack.
+ * 2) Have __libc_static_entry (static entry) and _ldloadrtld (callee of dynamic
+ * entry) calculate those pointers and place them on the stack (x86) or in
+ * registers (x86-64) along with argc as arguments to main. 3) Ensure that the
+ * stack pointer is 4 byte (x86) or 16 byte (x86-64) aligned by padding the end
+ * of the arguments being written to the stack with zeros. 4) Have the stack
+ * pointer point to argc, rather than a garbage return address. 5) Have
+ * __libc_static_entry and _bootstrap (ld-weenix) respect this change.
+ */
+
+#include "errno.h"
+#include "globals.h"
+
+#include "main/inits.h"
+
+#include "mm/kmalloc.h"
+#include "mm/mm.h"
+#include "mm/mman.h"
+#include "mm/tlb.h"
+
+#include "api/binfmt.h"
+#include "api/elf.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#include "fs/fcntl.h"
+#include "fs/file.h"
+#include "fs/lseek.h"
+#include "fs/vfs_syscall.h"
+
+static long _elf64_platform_check(const Elf64_Ehdr *header)
+{
+ return (EM_X86_64 == header->e_machine) // machine
+ && (ELFCLASS64 == header->e_ident[EI_CLASS]) // 32 or 64 bit
+ && (ELFDATA2LSB == header->e_ident[EI_DATA]); // endianness
+}
+
+/* Helper function for the ELF loader. Maps the specified segment
+ * of the program header from the given file in to the given address
+ * space with the given memory offset (in pages). On success returns 0,
+ * otherwise returns a negative error code for the ELF loader to return. Note
+ * that since any error returned by this function should cause the ELF loader to
+ * give up, it is acceptable for the address space to be modified after
+ * returning an error. Note that memoff can be negative */
+static long _elf64_map_segment(vmmap_t *map, vnode_t *file, int64_t memoff,
+ const Elf64_Phdr *segment)
+{
+ /* calculate starting virtual address of segment e*/
+ uintptr_t addr;
+ if (memoff < 0)
+ {
+ KASSERT(ADDR_TO_PN(segment->p_vaddr) > (uint64_t)-memoff);
+ addr = (uintptr_t)segment->p_vaddr - (uintptr_t)PN_TO_ADDR(-memoff);
+ }
+ else
+ {
+ addr = (uintptr_t)segment->p_vaddr + (uintptr_t)PN_TO_ADDR(memoff);
+ }
+ uint64_t off = segment->p_offset;
+ uint64_t memsz = segment->p_memsz;
+ uint64_t filesz = segment->p_filesz;
+
+ dbg(DBG_ELF,
+ "Mapping program segment: type %#x, offset %#16lx,"
+ " vaddr %#16lx, filesz %#lx, memsz %#lx, flags %#x, align %#lx\n",
+ segment->p_type, segment->p_offset, segment->p_vaddr, segment->p_filesz,
+ segment->p_memsz, segment->p_flags, segment->p_align);
+
+ /* check for bad data in the segment header */
+ if ((segment->p_align % PAGE_SIZE))
+ {
+ dbg(DBG_ELF, "ERROR: segment not aligned on page\n");
+ return -ENOEXEC;
+ }
+ else if (filesz > memsz)
+ {
+ dbg(DBG_ELF, "ERROR: segment file size is greater than memory size\n");
+ return -ENOEXEC;
+ }
+ else if (PAGE_OFFSET(addr) != PAGE_OFFSET(off))
+ {
+ dbg(DBG_ELF,
+ "ERROR: segment address and offset are not aligned correctly\n");
+ return -ENOEXEC;
+ }
+
+ /* calculate segment permissions */
+ int perms = 0;
+ if (PF_R & segment->p_flags)
+ {
+ perms |= PROT_READ;
+ }
+ if (PF_W & segment->p_flags)
+ {
+ perms |= PROT_WRITE;
+ }
+ if (PF_X & segment->p_flags)
+ {
+ perms |= PROT_EXEC;
+ }
+
+ if (filesz > 0)
+ {
+ /* something needs to be mapped from the file */
+ /* start from the starting address and include enough pages to
+ * map all filesz bytes of the file */
+ uint64_t lopage = ADDR_TO_PN(addr);
+ uint64_t npages = ADDR_TO_PN(addr + filesz - 1) - lopage + 1;
+ off_t fileoff = (off_t)PAGE_ALIGN_DOWN(off);
+
+ if (!vmmap_is_range_empty(map, lopage, npages))
+ {
+ dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
+ return -ENOEXEC;
+ }
+ long ret = vmmap_map(map, file, lopage, npages, perms,
+ MAP_PRIVATE | MAP_FIXED, fileoff, 0, NULL);
+ if (ret)
+ return ret;
+ dbg(DBG_ELF,
+ "Mapped segment of length %lu pages at %#lx, memoff = %#lx\n",
+ npages, addr, memoff);
+ }
+
+ if (memsz > filesz)
+ {
+ /* there is left over memory in the segment which must
+ * be initialized to 0 (anonymously mapped) */
+ uint64_t lopage = ADDR_TO_PN(
+ addr +
+ filesz); // the first page containing data not stored in the file
+ uint64_t npages =
+ ADDR_TO_PN(PAGE_ALIGN_UP(addr + memsz)) -
+ lopage; // the first page totally unused by memory, minus low page
+
+ /* check for overlapping mappings, considering the case where lopage
+ * contains file data and the case where it doesn't*/
+ if (PAGE_ALIGNED(addr + filesz) &&
+ !vmmap_is_range_empty(map, lopage, npages))
+ {
+ dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
+ return -ENOEXEC;
+ }
+ if (!PAGE_ALIGNED(addr + filesz) && npages > 1 &&
+ !vmmap_is_range_empty(map, lopage + 1, npages - 1))
+ {
+ dbg(DBG_ELF, "ERROR: ELF file contains overlapping segments\n");
+ return -ENOEXEC;
+ }
+ long ret = vmmap_map(map, NULL, lopage, npages, perms,
+ MAP_PRIVATE | MAP_FIXED, 0, 0, NULL);
+ if (ret)
+ return ret;
+ if (!PAGE_ALIGNED(addr + filesz) && filesz > 0)
+ {
+ /* In this case, we have accidentally zeroed too much of memory, as
+ * we zeroed all memory in the page containing addr + filesz.
+ * However, the remaining part of the data is not a full page, so we
+ * should not just map in another page (as there could be garbage
+ * after addr+filesz). For instance, consider the data-bss boundary
+ * (c.f. Intel x86 ELF supplement pp. 82).
+ * To fix this, we need to read in the contents of the file manually
+ * and put them at that user space addr in the anon map we just
+ * added. */
+ void *buf = page_alloc();
+ if (!buf)
+ return -ENOMEM;
+
+ vlock(file);
+ ret = file->vn_ops->read(file,
+ (size_t)PAGE_ALIGN_DOWN(off + filesz - 1),
+ buf, PAGE_OFFSET(addr + filesz));
+ if (ret >= 0)
+ {
+ KASSERT((uintptr_t)ret == PAGE_OFFSET(addr + filesz));
+ ret = vmmap_write(map, PAGE_ALIGN_DOWN(addr + filesz - 1), buf,
+ PAGE_OFFSET(addr + filesz));
+ }
+ vunlock(file);
+ page_free(buf);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/* Read in the given fd's ELF header into the location pointed to by the given
+ * argument and does some basic checks that it is a valid ELF file, is an
+ * executable, and is for the correct platform
+ * interp is 1 if we are loading an interpreter, 0 otherwise
+ * Returns 0 on success, -errno on failure. Returns the ELF header in the header
+ * argument. */
+static long _elf64_load_ehdr(int fd, Elf64_Ehdr *header, int interp)
+{
+ long ret;
+ memset(header, 0, sizeof(*header));
+
+ /* Preliminary check that this is an ELF file */
+ ret = do_read(fd, header, sizeof(*header));
+ if (ret < 0)
+ return ret;
+ if ((ret < SELFMAG) || memcmp(&header->e_ident[0], ELFMAG, SELFMAG) != 0)
+ {
+ dbg(DBG_ELF, "ELF load failed: no magic number present\n");
+ return -ENOEXEC;
+ }
+ if (ret < header->e_ehsize)
+ {
+ dbg(DBG_ELF, "ELF load failed: bad file size\n");
+ return -ENOEXEC;
+ }
+ /* Log information about the file */
+ dbg(DBG_ELF, "loading ELF file\n");
+ dbgq(DBG_ELF, "ELF Header Information:\n");
+ dbgq(DBG_ELF, "Version: %d\n", (int)header->e_ident[EI_VERSION]);
+ dbgq(DBG_ELF, "Class: %d\n", (int)header->e_ident[EI_CLASS]);
+ dbgq(DBG_ELF, "Data: %d\n", (int)header->e_ident[EI_DATA]);
+ dbgq(DBG_ELF, "Type: %d\n", (int)header->e_type);
+ dbgq(DBG_ELF, "Machine: %d\n", (int)header->e_machine);
+
+ /* Check that the ELF file is executable and targets
+ * the correct platform */
+ if (interp && header->e_type != ET_DYN)
+ {
+ dbg(DBG_ELF,
+ "ELF load failed: interpreter is not a shared object file\n");
+ return -ENOEXEC;
+ }
+ if (!interp && header->e_type != ET_EXEC)
+ {
+ dbg(DBG_ELF, "ELF load failed: not executable ELF\n");
+ return -ENOEXEC;
+ }
+ if (!_elf64_platform_check(header))
+ {
+ dbg(DBG_ELF, "ELF load failed: incorrect platform\n");
+ return -ENOEXEC;
+ }
+ return 0;
+}
+
+/* Loads the program header tables from from the ELF file specified by
+ * the open file descriptor fd. header should point to the header information
+ * for that ELF file. pht is a buffer of size size. It must be large enough
+ * to hold the program header tables (whose size can be determined from
+ * the ELF header).
+ *
+ * Returns 0 on success or -errno on error. */
+static long _elf64_load_phtable(int fd, Elf64_Ehdr *header, char *pht,
+ size_t size)
+{
+ size_t phtsize = header->e_phentsize * header->e_phnum;
+ KASSERT(phtsize <= size);
+ /* header->e_phoff is a uint64_t cast to int. since the max file size on
+ * s5fs is way smaller than uint32_t, offsets in practice should never
+ * cause this cast to behave badly, although if weenix ever adds support
+ * for very large (> 4GB) files, this will be a bug.
+ */
+ long ret = do_lseek(fd, (int)(header->e_phoff), SEEK_SET);
+ if (ret < 0)
+ return ret;
+
+ ret = do_read(fd, pht, phtsize);
+ if (ret < 0)
+ return ret;
+
+ KASSERT((size_t)ret <= phtsize);
+ if ((size_t)ret < phtsize)
+ {
+ return -ENOEXEC;
+ }
+ return 0;
+}
+
+/* Maps the PT_LOAD segments for an ELF file into the given address space.
+ * vnode should be the open vnode of the ELF file.
+ * map is the address space to map the ELF file into.
+ * header is the ELF file's header.
+ * pht is the full program header table.
+ * memoff is the difference (in pages) between the desired base address and the
+ * base address given in the ELF file (usually 0x8048094)
+ *
+ * Returns the number of segments loaded on success, -errno on failure. */
+static long _elf64_map_progsegs(vnode_t *vnode, vmmap_t *map,
+ Elf64_Ehdr *header, char *pht, int64_t memoff)
+{
+ long ret = 0;
+
+ long loadcount = 0;
+ for (uint32_t i = 0; i < header->e_phnum; i++)
+ {
+ Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
+ if (phtentry->p_type == PT_LOAD)
+ {
+ ret = _elf64_map_segment(map, vnode, memoff, phtentry);
+ if (ret)
+ return ret;
+ loadcount++;
+ }
+ }
+
+ if (!loadcount)
+ {
+ dbg(DBG_ELF, "ERROR: ELF file contained no loadable sections\n");
+ return -ENOEXEC;
+ }
+ return loadcount;
+}
+
+/* Locates the program header for the interpreter in the given list of program
+ * headers through the phinterp out-argument. Returns 0 on success (even if
+ * there is no interpreter) or -errno on error. If there is no interpreter
+ * section then phinterp is set to NULL. If there is more than one interpreter
+ * then -EINVAL is returned. */
+static long _elf64_find_phinterp(Elf64_Ehdr *header, char *pht,
+ Elf64_Phdr **phinterp)
+{
+ *phinterp = NULL;
+
+ for (uint32_t i = 0; i < header->e_phnum; i++)
+ {
+ Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
+ if (phtentry->p_type == PT_INTERP)
+ {
+ if (!*phinterp)
+ {
+ *phinterp = phtentry;
+ }
+ else
+ {
+ dbg(DBG_ELF, "ELF load failed: multiple interpreters\n");
+ return -EINVAL;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Calculates the lower and upper virtual addresses that the given program
+ * header table would load into if _elf64_map_progsegs were called. We traverse
+ * all the program segments of type PT_LOAD and look at p_vaddr and p_memsz
+ * Return the low and high vaddrs in the given arguments if they are non-NULL.
+ * The high vaddr is one plus the highest vaddr used by the program. */
+static void _elf64_calc_progbounds(Elf64_Ehdr *header, char *pht, void **low,
+ void **high)
+{
+ Elf64_Addr curlow = (Elf64_Addr)-1;
+ Elf64_Addr curhigh = 0;
+ for (uint32_t i = 0; i < header->e_phnum; i++)
+ {
+ Elf64_Phdr *phtentry = (Elf64_Phdr *)(pht + i * header->e_phentsize);
+ if (phtentry->p_type == PT_LOAD)
+ {
+ if (phtentry->p_vaddr < curlow)
+ {
+ curlow = phtentry->p_vaddr;
+ }
+ if (phtentry->p_vaddr + phtentry->p_memsz > curhigh)
+ {
+ curhigh = phtentry->p_vaddr + phtentry->p_memsz;
+ }
+ }
+ }
+ if (low)
+ {
+ *low = (void *)curlow;
+ }
+ if (high)
+ {
+ *high = (void *)curhigh;
+ }
+}
+
+/* Calculates the total size of all the arguments that need to be placed on the
+ * user stack before execution can begin. See AMD64 ABI Draft 0.99.6 page 29
+ * Returns total size on success. Returns the number of non-NULL entries in
+ * argv, envp, and auxv in argc, envc, and auxc arguments, respectively */
+static size_t _elf64_calc_argsize(char *const argv[], char *const envp[],
+ Elf64_auxv_t *auxv, size_t phtsize,
+ size_t *argc, size_t *envc, size_t *auxc)
+{
+ size_t size = 0;
+ size_t i;
+ /* All strings in argv */
+ for (i = 0; argv[i]; i++)
+ {
+ size += strlen(argv[i]) + 1; /* null terminator */
+ }
+ if (argc)
+ {
+ *argc = i;
+ }
+ /* argv itself (+ null terminator) */
+ size += (i + 1) * sizeof(char *);
+
+ /* All strings in envp */
+ for (i = 0; envp[i] != NULL; i++)
+ {
+ size += strlen(envp[i]) + 1; /* null terminator */
+ }
+ if (envc != NULL)
+ {
+ *envc = i;
+ }
+ /* envp itself (+ null terminator) */
+ size += (i + 1) * sizeof(char *);
+
+ /* The only extra-space-consuming entry in auxv is AT_PHDR, as if we find
+ * that entry we'll need to put the program header table on the stack */
+ for (i = 0; auxv[i].a_type != AT_NULL; i++)
+ {
+ if (auxv[i].a_type == AT_PHDR)
+ {
+ size += phtsize;
+ }
+ }
+ if (auxc)
+ {
+ *auxc = i;
+ }
+ /* auxv itself (+ null terminator) */
+ size += (i + 1) * sizeof(Elf64_auxv_t);
+
+ /* argc - reserving 8 bytes for alignment purposes */
+ size += sizeof(int64_t);
+ /* argv, envp, and auxv pointers (as passed to main) */
+ size += 3 * sizeof(void *);
+
+ /*
+ * cjm5: the above isn't strictly ABI compliant. normally the userspace
+ * wrappers to main() (__libc_static_entry or _bootstrap for ld-weenix) are
+ * responsible for calculating *argv, *envp, *and *auxv to pass to main().
+ * It's easier to do it here, though.
+ */
+
+ return size;
+}
+
+/* Copies the arguments that must be on the stack prior to execution onto the
+ * user stack. This should never fail.
+ * arglow: low address on the user stack where we should start the copying
+ * argsize: total size of everything to go on the stack
+ * buf: a kernel buffer at least as big as argsize (for convenience)
+ * argv, envp, auxv: various vectors of stuff (to go on the stack)
+ * argc, envc, auxc: number of non-NULL entries in argv, envp, auxv,
+ * respectively (to avoid recomputing them)
+ * phtsize: the size of the program header table (to avoid recomputing)
+ * c.f. Intel i386 ELF supplement pp 54-59 and AMD64 ABI Draft 0.99.6 page 29
+ */
+static void _elf64_load_args(vmmap_t *map, void *arglow, size_t argsize,
+ char *buf, char *const argv[], char *const envp[],
+ Elf64_auxv_t *auxv, size_t argc, size_t envc,
+ size_t auxc, size_t phtsize)
+{
+ dbg(DBG_ELF,
+ "Loading initial stack contents at 0x%p, argc = %lu, envc = %lu, auxc "
+ "= %lu\n",
+ arglow, argc, envc, auxc);
+
+ size_t i;
+
+ /* Copy argc: in x86-64, this is an eight-byte value, despite being treated
+ * as an int in a C main() function. See AMD64 ABI Draft 0.99.6 page 29 */
+ *((int64_t *)buf) = (int64_t)argc;
+
+ /* Calculate where the strings / tables pointed to by the vectors start */
+ size_t veclen = (argc + 1 + envc + 1) * sizeof(char *) +
+ (auxc + 1) * sizeof(Elf64_auxv_t);
+
+ char *vecstart =
+ buf + sizeof(int64_t) +
+ 3 * sizeof(void *); /* Beginning of argv (in kernel buffer) */
+
+ char *vvecstart =
+ ((char *)arglow) + sizeof(int64_t) +
+ 3 * sizeof(void *); /* Beginning of argv (in user space) */
+
+ char *strstart = vecstart + veclen; /* Beginning of first string pointed to
+ by argv (in kernel buffer) */
+
+ /* Beginning of first string pointed to by argv (in user space) */
+ char *vstrstart = vvecstart + veclen;
+
+ /*
+ * cjm5: since the first 6 arguments that can fit in registers are placed
+ * there in x86-64, __libc_static_entry (and ld-weenix, if it is ever ported
+ * to x86-64) have to take the following pointers off the stack and move
+ * them and argc into the first 4 argument registers before calling main().
+ */
+
+ /* Copy over pointer to argv */
+ *(char **)(buf + 8) = vvecstart;
+ /* Copy over pointer to envp */
+ *(char **)(buf + 16) = vvecstart + (argc + 1) * sizeof(char *);
+ /* Copy over pointer to auxv */
+ *(char **)(buf + 24) = vvecstart + (argc + 1 + envc + 1) * sizeof(char *);
+
+ /* Copy over argv along with every string in it */
+ for (i = 0; i < argc; i++)
+ {
+ size_t len = strlen(argv[i]) + 1;
+ strcpy(strstart, argv[i]);
+ /* Remember that we need to use the virtual address of the string */
+ *(char **)vecstart = vstrstart;
+ strstart += len;
+ vstrstart += len;
+ vecstart += sizeof(char *);
+ }
+ /* null terminator of argv */
+ *(char **)vecstart = NULL;
+ vecstart += sizeof(char *);
+
+ /* Copy over envp along with every string in it */
+ for (i = 0; i < envc; i++)
+ {
+ size_t len = strlen(envp[i]) + 1;
+ strcpy(strstart, envp[i]);
+ /* Remember that we need to use the virtual address of the string */
+ *(char **)vecstart = vstrstart;
+ strstart += len;
+ vstrstart += len;
+ vecstart += sizeof(char *);
+ }
+ /* null terminator of envp */
+ *(char **)vecstart = NULL;
+ vecstart += sizeof(char *);
+
+ /* Copy over auxv along with the program header (if we find it) */
+ for (i = 0; i < auxc; i++)
+ {
+ /* Copy over the auxv entry */
+ memcpy(vecstart, &auxv[i], sizeof(Elf64_auxv_t));
+ /* Check if it points to the program header */
+ if (auxv[i].a_type == AT_PHDR)
+ {
+ /* Copy over the program header table */
+ memcpy(strstart, auxv[i].a_un.a_ptr, (size_t)phtsize);
+ /* And modify the address */
+ ((Elf64_auxv_t *)vecstart)->a_un.a_ptr = vstrstart;
+ }
+ vecstart += sizeof(Elf64_auxv_t);
+ }
+ /* null terminator of auxv */
+ ((Elf64_auxv_t *)vecstart)->a_type = NULL;
+
+ /* Finally, we're done copying into the kernel buffer. Now just copy the
+ * kernel buffer into user space */
+ long ret = vmmap_write(map, arglow, buf, argsize);
+ /* If this failed, we must have set up the address space wrong... */
+ KASSERT(!ret);
+}
+
+static long _elf64_load(const char *filename, int fd, char *const argv[],
+ char *const envp[], uint64_t *rip, uint64_t *rsp)
+{
+ long ret = 0;
+ Elf64_Ehdr header;
+ Elf64_Ehdr interpheader;
+
+ /* variables to clean up on failure */
+ vmmap_t *map = NULL;
+ file_t *file = NULL;
+ char *pht = NULL;
+ char *interpname = NULL;
+ long interpfd = -1;
+ file_t *interpfile = NULL;
+ char *interppht = NULL;
+ Elf64_auxv_t *auxv = NULL;
+ char *argbuf = NULL;
+
+ uintptr_t entry;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ /* Load and verify the ELF header */
+ ret = _elf64_load_ehdr(fd, &header, 0);
+ if (ret)
+ goto done;
+
+ map = vmmap_create();
+ if (!map)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ // Program header table entry size multiplied by
+ // number of entries.
+ size_t phtsize = header.e_phentsize * header.e_phnum;
+ pht = kmalloc(phtsize);
+ if (!pht)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Read in the program header table */
+ ret = _elf64_load_phtable(fd, &header, pht, phtsize);
+ if (ret)
+ goto done;
+
+ /* Load the segments in the program header table */
+ ret = _elf64_map_progsegs(file->f_vnode, map, &header, pht, 0);
+ if (ret < 0)
+ goto done;
+
+ /* Check if program requires an interpreter */
+ Elf64_Phdr *phinterp = NULL;
+ ret = _elf64_find_phinterp(&header, pht, &phinterp);
+ if (ret)
+ goto done;
+
+ /* Calculate program bounds for future reference */
+ void *proglow;
+ void *proghigh;
+ _elf64_calc_progbounds(&header, pht, &proglow, &proghigh);
+
+ entry = (uintptr_t)header.e_entry;
+
+ /* if an interpreter was requested load it */
+ if (phinterp)
+ {
+ /* read the file name of the interpreter from the binary */
+ ret = do_lseek(fd, (int)(phinterp->p_offset), SEEK_SET);
+ if (ret < 0)
+ goto done;
+
+ interpname = kmalloc(phinterp->p_filesz);
+ if (!interpname)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ ret = do_read(fd, interpname, phinterp->p_filesz);
+ if (ret < 0)
+ goto done;
+
+ if ((size_t)ret != phinterp->p_filesz)
+ {
+ ret = -ENOEXEC;
+ goto done;
+ }
+
+ /* open the interpreter */
+ dbgq(DBG_ELF, "ELF Interpreter: %*s\n", (int)phinterp->p_filesz,
+ interpname);
+ interpfd = do_open(interpname, O_RDONLY);
+ if (interpfd < 0)
+ {
+ ret = interpfd;
+ goto done;
+ }
+ kfree(interpname);
+ interpname = NULL;
+
+ interpfile = fget((int)interpfd);
+ KASSERT(interpfile);
+
+ /* Load and verify the interpreter ELF header */
+ ret = _elf64_load_ehdr((int)interpfd, &interpheader, 1);
+ if (ret)
+ goto done;
+
+ size_t interpphtsize = interpheader.e_phentsize * interpheader.e_phnum;
+ interppht = kmalloc(interpphtsize);
+ if (!interppht)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Read in the program header table */
+ ret = _elf64_load_phtable((int)interpfd, &interpheader, interppht,
+ interpphtsize);
+ if (ret)
+ goto done;
+
+ /* Interpreter shouldn't itself need an interpreter */
+ Elf64_Phdr *interpphinterp;
+ ret = _elf64_find_phinterp(&interpheader, interppht, &interpphinterp);
+ if (ret)
+ goto done;
+
+ if (interpphinterp)
+ {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* Calculate the interpreter program size */
+ void *interplow;
+ void *interphigh;
+ _elf64_calc_progbounds(&interpheader, interppht, &interplow,
+ &interphigh);
+ uint64_t interpnpages =
+ ADDR_TO_PN(PAGE_ALIGN_UP(interphigh)) - ADDR_TO_PN(interplow);
+
+ /* Find space for the interpreter */
+ /* This is the first pn at which the interpreter will be mapped */
+ uint64_t interppagebase =
+ (uint64_t)vmmap_find_range(map, interpnpages, VMMAP_DIR_HILO);
+ if (interppagebase == ~0UL)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /* Base address at which the interpreter begins on that page */
+ void *interpbase = (void *)((uintptr_t)PN_TO_ADDR(interppagebase) +
+ PAGE_OFFSET(interplow));
+
+ /* Offset from "expected base" in number of pages */
+ int64_t interpoff =
+ (int64_t)interppagebase - (int64_t)ADDR_TO_PN(interplow);
+
+ entry = (uintptr_t)interpbase +
+ ((uintptr_t)interpheader.e_entry - (uintptr_t)interplow);
+
+ /* Load the interpreter program header and map in its segments */
+ ret = _elf64_map_progsegs(interpfile->f_vnode, map, &interpheader,
+ interppht, interpoff);
+ if (ret < 0)
+ goto done;
+
+ /* Build the ELF aux table */
+ /* Need to hold AT_PHDR, AT_PHENT, AT_PHNUM, AT_ENTRY, AT_BASE,
+ * AT_PAGESZ, AT_NULL */
+ auxv = (Elf64_auxv_t *)kmalloc(7 * sizeof(Elf64_auxv_t));
+ if (!auxv)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ Elf64_auxv_t *auxvent = auxv;
+
+ /* Add all the necessary entries */
+ auxvent->a_type = AT_PHDR;
+ auxvent->a_un.a_ptr = pht;
+ auxvent++;
+
+ auxvent->a_type = AT_PHENT;
+ auxvent->a_un.a_val = header.e_phentsize;
+ auxvent++;
+
+ auxvent->a_type = AT_PHNUM;
+ auxvent->a_un.a_val = header.e_phnum;
+ auxvent++;
+
+ auxvent->a_type = AT_ENTRY;
+ auxvent->a_un.a_ptr = (void *)header.e_entry;
+ auxvent++;
+
+ auxvent->a_type = AT_BASE;
+ auxvent->a_un.a_ptr = interpbase;
+ auxvent++;
+
+ auxvent->a_type = AT_PAGESZ;
+ auxvent->a_un.a_val = PAGE_SIZE;
+ auxvent++;
+
+ auxvent->a_type = AT_NULL;
+ }
+ else
+ {
+ /* Just put AT_NULL (we don't really need this at all) */
+ auxv = (Elf64_auxv_t *)kmalloc(sizeof(Elf64_auxv_t));
+ if (!auxv)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ auxv->a_type = AT_NULL;
+ }
+
+ /* Allocate stack at the top of the address space */
+ uint64_t stack_lopage = (uint64_t)vmmap_find_range(
+ map, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1, VMMAP_DIR_HILO);
+ if (stack_lopage == ~0UL)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ ret =
+ vmmap_map(map, NULL, stack_lopage, (DEFAULT_STACK_SIZE / PAGE_SIZE) + 1,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, 0, 0, NULL);
+ KASSERT(0 == ret);
+ dbg(DBG_ELF, "Mapped Stack at low addr 0x%p, size %#lx\n",
+ PN_TO_ADDR(stack_lopage), DEFAULT_STACK_SIZE + PAGE_SIZE);
+
+ /* Calculate size needed on user stack for arguments */
+ size_t argc, envc, auxc;
+ size_t argsize =
+ _elf64_calc_argsize(argv, envp, auxv, phtsize, &argc, &envc, &auxc);
+ /* Make sure it fits on the stack */
+ if (argsize >= DEFAULT_STACK_SIZE)
+ {
+ ret = -E2BIG;
+ goto done;
+ }
+ /* Allocate kernel buffer for temporarily storing arguments */
+ argbuf = (char *)kmalloc(argsize);
+ if (!argbuf)
+ {
+ ret = -ENOMEM;
+ goto done;
+ }
+ /* Calculate where in user space we start putting the args. */
+ // the args go at the beginning (top) of the stack
+ void *arglow =
+ (char *)PN_TO_ADDR(stack_lopage) +
+ (uint64_t)(
+ ((uint64_t)PN_TO_ADDR((DEFAULT_STACK_SIZE / PAGE_SIZE) + 1)) -
+ argsize);
+
+ /* Copy everything into the user address space, modifying addresses in
+ * argv, envp, and auxv to be user addresses as we go. */
+ _elf64_load_args(map, arglow, argsize, argbuf, argv, envp, auxv, argc, envc,
+ auxc, phtsize);
+
+ dbg(DBG_ELF,
+ "Past the point of no return. Swapping to map at 0x%p, setting brk to "
+ "0x%p\n",
+ map, proghigh);
+ /* the final threshold / What warm unspoken secrets will we learn? / Beyond
+ * the point of no return ... */
+
+ /* Give the process the new mappings. */
+ vmmap_destroy(&curproc->p_vmmap);
+ map->vmm_proc = curproc;
+ curproc->p_vmmap = map;
+ map = NULL; /* So it doesn't get cleaned up at the end */
+
+ /* Flush the process pagetables and TLB */
+ pt_unmap_range(curproc->p_pml4, USER_MEM_LOW, USER_MEM_HIGH);
+ tlb_flush_all();
+
+ /* Set the process break and starting break (immediately after the mapped-in
+ * text/data/bss from the executable) */
+ curproc->p_brk = proghigh;
+ curproc->p_start_brk = proghigh;
+
+ strncpy(curproc->p_name, filename, PROC_NAME_LEN);
+
+ /* Tell the caller the correct stack pointer and instruction
+ * pointer to begin execution in user space */
+ *rip = (uint64_t)entry;
+ *rsp = ((uint64_t)arglow) -
+ 8; /* Space on the user stack for the (garbage) return address */
+ /* Note that the return address will be fixed by the userland entry code,
+ * whether in static or dynamic */
+
+ /* And we're done */
+ ret = 0;
+
+// https://www.youtube.com/watch?v=PJhXVg2QisM
+done:
+ fput(&file);
+ if (map)
+ {
+ vmmap_destroy(&map);
+ }
+ if (pht)
+ {
+ kfree(pht);
+ }
+ if (interpname)
+ {
+ kfree(interpname);
+ }
+ if (interpfd >= 0)
+ {
+ do_close((int)interpfd);
+ }
+ if (interpfile)
+ {
+ fput(&interpfile);
+ }
+ if (interppht)
+ {
+ kfree(interppht);
+ }
+ if (auxv)
+ {
+ kfree(auxv);
+ }
+ if (argbuf)
+ {
+ kfree(argbuf);
+ }
+ return ret;
+}
+
+void elf64_init(void) { binfmt_add("ELF64", _elf64_load); }
diff --git a/kernel/api/exec.c b/kernel/api/exec.c
new file mode 100644
index 0000000..e0b66e8
--- /dev/null
+++ b/kernel/api/exec.c
@@ -0,0 +1,110 @@
+#include "util/debug.h"
+#include <util/string.h>
+
+#include "main/gdt.h"
+
+#include "api/binfmt.h"
+#include "api/exec.h"
+#include "api/syscall.h"
+
+/* Enters userland from the kernel. Call this for a process that has up to now
+ * been a kernel-only process. Takes the registers to start userland execution
+ * with. Does not return. Note that the regs passed in should be on the current
+ * stack of execution.
+ */
+
+void userland_entry(const regs_t regs)
+{
+ KASSERT(preemption_enabled());
+
+ dbg(DBG_ELF, ">>>>>>>>>>>>>>> pid: %d\n", curproc->p_pid);
+
+ intr_disable();
+ dbg(DBG_ELF, ">>>>>>>>>>>>>>>> intr_disable()\n");
+ intr_setipl(IPL_LOW);
+ dbg(DBG_ELF, ">>>>>>>>>>>>>>>> intr_setipl()\n");
+
+ __asm__ __volatile__(
+ "movq %%rax, %%rsp\n\t" /* Move stack pointer up to regs */
+ "popq %%r15\n\t" /* Pop all general purpose registers (except rsp, */
+ "popq %%r14\n\t" /* which gets popped by iretq) */
+ "popq %%r13\n\t"
+ "popq %%r12\n\t"
+ "popq %%rbp\n\t"
+ "popq %%rbx\n\t"
+ "popq %%r11\n\t"
+ "popq %%r10\n\t"
+ "popq %%r9\n\t"
+ "popq %%r8\n\t"
+ "popq %%rax\n\t"
+ "popq %%rcx\n\t"
+ "popq %%rdx\n\t"
+ "popq %%rsi\n\t"
+ "popq %%rdi\n\t"
+ "add $16, %%rsp\n\t" /*
+ * Move stack pointer up to the location of the
+ * arguments automatically pushed by the processor
+ * on an interrupt
+ */
+ "iretq\n"
+ /* We're now in userland! */
+ : /* No outputs */
+ : "a"(&regs) /* Forces regs to be in the 'a' register (%rax). */
+ );
+}
+
+long do_execve(const char *filename, char *const *argv, char *const *envp,
+ struct regs *regs)
+{
+ uint64_t rip, rsp;
+ long ret = binfmt_load(filename, argv, envp, &rip, &rsp);
+ if (ret < 0)
+ {
+ return ret;
+ }
+ /* Make sure we "return" into the start of the newly loaded binary */
+ dbg(DBG_EXEC, "Executing binary with rip 0x%p, rsp 0x%p\n", (void *)rip,
+ (void *)rsp);
+ regs->r_rip = rip;
+ regs->r_rsp = rsp;
+ return 0;
+}
+
+/*
+ * The kernel version of execve needs to construct a set of saved user registers
+ * and fake a return from an interrupt to get to userland. The 64-bit version
+ * behaves mostly the same as the 32-bit version, but there are a few
+ * differences. Besides different general purpose registers, there is no longer
+ * a need for two esp/rsp fields since popa is not valid assembly in 64-bit. The
+ * only non-null segment registers are now cs and ss, but they are set the same
+ * as in 32-bit, although the segment descriptors they point to are slightly
+ * different.
+ */
+void kernel_execve(const char *filename, char *const *argv, char *const *envp)
+{
+ uint64_t rip, rsp;
+ long ret = binfmt_load(filename, argv, envp, &rip, &rsp);
+ dbg(DBG_EXEC, "ret = %ld\n", ret);
+
+ KASSERT(0 == ret); /* Should never fail to load the first binary */
+
+ dbg(DBG_EXEC, "Entering userland with rip 0x%p, rsp 0x%p\n", (void *)rip,
+ (void *)rsp);
+ /* To enter userland, we build a set of saved registers to "trick" the
+ * processor into thinking we were in userland before. Yes, it's horrible.
+ * c.f. http://wiki.osdev.org/index.php?title=Getting_to_Ring_3&oldid=8195
+ */
+ regs_t regs;
+ memset(&regs, 0, sizeof(regs_t));
+
+ /* Userland gdt entries (0x3 for ring 3) */
+ regs.r_cs = GDT_USER_TEXT | 0x3;
+ regs.r_ss = GDT_USER_DATA | 0x3;
+
+ /* Userland instruction pointer and stack pointer */
+ regs.r_rip = rip;
+ regs.r_rsp = rsp;
+
+ regs.r_rflags = 0x202; // see 32-bit version
+ userland_entry(regs);
+} \ No newline at end of file
diff --git a/kernel/api/syscall.c b/kernel/api/syscall.c
new file mode 100644
index 0000000..1be5276
--- /dev/null
+++ b/kernel/api/syscall.c
@@ -0,0 +1,757 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include <fs/vfs.h>
+#include <util/time.h>
+
+#include "main/inits.h"
+#include "main/interrupt.h"
+
+#include "mm/kmalloc.h"
+#include "mm/mman.h"
+
+#include "fs/vfs_syscall.h"
+#include "fs/vnode.h"
+
+#include "drivers/tty/tty.h"
+#include "test/kshell/kshell.h"
+
+#include "vm/brk.h"
+#include "vm/mmap.h"
+
+#include "api/access.h"
+#include "api/exec.h"
+#include "api/syscall.h"
+#include "api/utsname.h"
+
+static long syscall_handler(regs_t *regs);
+
+static long syscall_dispatch(size_t sysnum, uintptr_t args, regs_t *regs);
+
+extern size_t active_tty;
+
+static const char *syscall_strings[49] = {
+ "syscall", "exit", "fork", "read", "write", "open",
+ "close", "waitpid", "link", "unlink", "execve", "chdir",
+ "sleep", "unknown", "lseek", "sync", "nuke", "dup",
+ "pipe", "ioctl", "unknown", "rmdir", "mkdir", "getdents",
+ "mmap", "mprotect", "munmap", "rename", "uname", "thr_create",
+ "thr_cancel", "thr_exit", "thr_yield", "thr_join", "gettid", "getpid",
+ "unknown", "unkown", "unknown", "errno", "halt", "get_free_mem",
+ "set_errno", "dup2", "brk", "mount", "umount", "stat", "usleep"};
+
+void syscall_init(void) { intr_register(INTR_SYSCALL, syscall_handler); }
+
+// if condition, set errno to err and return -1
+#define ERROR_OUT(condition, err) \
+ if (condition) \
+ { \
+ curthr->kt_errno = (err); \
+ return -1; \
+ }
+
+// if ret < 0, set errno to -ret and return -1
+#define ERROR_OUT_RET(ret) ERROR_OUT(ret < 0, -ret)
+
+/*
+ * Be sure to look at other examples of implemented system calls to see how
+ * this should be done - the general outline is as follows.
+ *
+ * - Initialize a read_args_t struct locally in kernel space and copy from
+ * userland args.
+ * - Allocate a temporary buffer (a page-aligned block of n pages that are
+ * enough space to store the number of bytes to read)
+ * - Call do_read() with the buffer and then copy the buffer to the userland
+ * args after the system call
+ * - Make sure to free the temporary buffer allocated
+ * - Return the number of bytes read, or return -1 and set the current thread's
+ * errno appropriately using ERROR_OUT_RET.
+ */
+static long sys_read(read_args_t *args)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * Be sure to look at other examples of implemented system calls to see how
+ * this should be done - the general outline is as follows.
+ *
+ * This function is very similar to sys_read - see above comments. You'll need
+ * to use the functions copy_from_user() and do_write(). Make sure to
+ * allocate a new temporary buffer for the data that is being written. This
+ * is to ensure that pagefaults within kernel mode do not happen.
+ */
+static long sys_write(write_args_t *args)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * This similar to the other system calls that you have implemented above.
+ *
+ * The general steps are as follows:
+ * - Copy the arguments from user memory
+ * - Check that the count field is at least the size of a dirent_t
+ * - Use a while loop to read count / sizeof(dirent_t) directory entries into
+ * the provided dirp and call do_getdent
+ * - Return the number of bytes read
+ */
+static long sys_getdents(getdents_args_t *args)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+#ifdef __MOUNTING__
+static long sys_mount(mount_args_t *arg)
+{
+ mount_args_t kern_args;
+ char *source;
+ char *target;
+ char *type;
+ long ret;
+
+ if (copy_from_user(&kern_args, arg, sizeof(kern_args)) < 0)
+ {
+ curthr->kt_errno = EFAULT;
+ return -1;
+ }
+
+ /* null is okay only for the source */
+ source = user_strdup(&kern_args.spec);
+ if (NULL == (target = user_strdup(&kern_args.dir)))
+ {
+ kfree(source);
+ curthr->kt_errno = EINVAL;
+ return -1;
+ }
+ if (NULL == (type = user_strdup(&kern_args.fstype)))
+ {
+ kfree(source);
+ kfree(target);
+ curthr->kt_errno = EINVAL;
+ return -1;
+ }
+
+ ret = do_mount(source, target, type);
+ kfree(source);
+ kfree(target);
+ kfree(type);
+
+ if (ret)
+ {
+ curthr->kt_errno = -ret;
+ return -1;
+ }
+
+ return 0;
+}
+
+static long sys_umount(argstr_t *input)
+{
+ argstr_t kstr;
+ char *target;
+ long ret;
+
+ if (copy_from_user(&kstr, input, sizeof(kstr)) < 0)
+ {
+ curthr->kt_errno = EFAULT;
+ return -1;
+ }
+
+ if (NULL == (target = user_strdup(&kstr)))
+ {
+ curthr->kt_errno = EINVAL;
+ return -1;
+ }
+
+ ret = do_umount(target);
+ kfree(target);
+
+ if (ret)
+ {
+ curthr->kt_errno = -ret;
+ return -1;
+ }
+
+ return 0;
+}
+#endif
+
+static long sys_close(int fd)
+{
+ long ret = do_close(fd);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_dup(int fd)
+{
+ long ret = do_dup(fd);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_dup2(const dup2_args_t *args)
+{
+ dup2_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+ ret = do_dup2(kargs.ofd, kargs.nfd);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_mkdir(mkdir_args_t *args)
+{
+ mkdir_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs.path, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_mkdir(path);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_rmdir(argstr_t *args)
+{
+ argstr_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_rmdir(path);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_unlink(argstr_t *args)
+{
+ argstr_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_unlink(path);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_link(link_args_t *args)
+{
+ link_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *to, *from;
+ ret = user_strdup(&kargs.to, &to);
+ ERROR_OUT_RET(ret);
+
+ ret = user_strdup(&kargs.from, &from);
+ if (ret)
+ {
+ kfree(to);
+ ERROR_OUT_RET(ret);
+ }
+
+ ret = do_link(from, to);
+ kfree(to);
+ kfree(from);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_rename(rename_args_t *args)
+{
+ rename_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *oldpath, *newpath;
+ ret = user_strdup(&kargs.oldpath, &oldpath);
+ ERROR_OUT_RET(ret);
+
+ ret = user_strdup(&kargs.newpath, &newpath);
+ if (ret)
+ {
+ kfree(oldpath);
+ ERROR_OUT_RET(ret);
+ }
+
+ ret = do_rename(oldpath, newpath);
+ kfree(oldpath);
+ kfree(newpath);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_chdir(argstr_t *args)
+{
+ argstr_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_chdir(path);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_lseek(lseek_args_t *args)
+{
+ lseek_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ ret = do_lseek(kargs.fd, kargs.offset, kargs.whence);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_open(open_args_t *args)
+{
+ open_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs.filename, &path);
+ ERROR_OUT_RET(ret);
+
+ ret = do_open(path, kargs.flags);
+ kfree(path);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_munmap(munmap_args_t *args)
+{
+ munmap_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ ret = do_munmap(kargs.addr, kargs.len);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static void *sys_mmap(mmap_args_t *arg)
+{
+ mmap_args_t kargs;
+
+ if (copy_from_user(&kargs, arg, sizeof(mmap_args_t)))
+ {
+ curthr->kt_errno = EFAULT;
+ return MAP_FAILED;
+ }
+
+ void *ret;
+ long err = do_mmap(kargs.mma_addr, kargs.mma_len, kargs.mma_prot,
+ kargs.mma_flags, kargs.mma_fd, kargs.mma_off, &ret);
+ if (err)
+ {
+ curthr->kt_errno = -err;
+ return MAP_FAILED;
+ }
+ return ret;
+}
+
+static pid_t sys_waitpid(waitpid_args_t *args)
+{
+ waitpid_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ int status;
+ pid_t pid = do_waitpid(kargs.wpa_pid, &status, kargs.wpa_options);
+ ERROR_OUT_RET(pid);
+
+ if (kargs.wpa_status)
+ {
+ ret = copy_to_user(kargs.wpa_status, &status, sizeof(int));
+ ERROR_OUT_RET(ret);
+ }
+
+ return pid;
+}
+
+static void *sys_brk(void *addr)
+{
+ void *new_brk;
+ long ret = do_brk(addr, &new_brk);
+ if (ret)
+ {
+ curthr->kt_errno = -ret;
+ return (void *)-1;
+ }
+ return new_brk;
+}
+
+static void sys_halt(void) { proc_kill_all(); }
+
+static long sys_stat(stat_args_t *args)
+{
+ stat_args_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *path;
+ ret = user_strdup(&kargs.path, &path);
+ ERROR_OUT_RET(ret);
+
+ stat_t stat_buf;
+ ret = do_stat(path, &stat_buf);
+ kfree(path);
+ ERROR_OUT_RET(ret);
+
+ ret = copy_to_user(kargs.buf, &stat_buf, sizeof(stat_buf));
+ ERROR_OUT_RET(ret);
+
+ return ret;
+}
+
+static long sys_pipe(int args[2])
+{
+ int kargs[2];
+ long ret = do_pipe(kargs);
+ ERROR_OUT_RET(ret);
+
+ ret = copy_to_user(args, kargs, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ return ret;
+}
+
+static long sys_uname(struct utsname *arg)
+{
+ static const char sysname[] = "Weenix";
+ static const char release[] = "1.2";
+ /* Version = last compilation time */
+ static const char version[] = "#1 " __DATE__ " " __TIME__;
+ static const char nodename[] = "";
+ static const char machine[] = "";
+ long ret = 0;
+
+ ret = copy_to_user(arg->sysname, sysname, sizeof(sysname));
+ ERROR_OUT_RET(ret);
+ ret = copy_to_user(arg->release, release, sizeof(release));
+ ERROR_OUT_RET(ret);
+ ret = copy_to_user(arg->version, version, sizeof(version));
+ ERROR_OUT_RET(ret);
+ ret = copy_to_user(arg->nodename, nodename, sizeof(nodename));
+ ERROR_OUT_RET(ret);
+ ret = copy_to_user(arg->machine, machine, sizeof(machine));
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_time(time_t *utloc)
+{
+ time_t time = do_time();
+ if (utloc)
+ {
+ long ret = copy_to_user(utloc, &time, sizeof(time_t));
+ ERROR_OUT_RET(ret);
+ }
+ return time;
+}
+
+static long sys_fork(regs_t *regs)
+{
+ long ret = do_fork(regs);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static void free_vector(char **vect)
+{
+ char **temp;
+ for (temp = vect; *temp; temp++)
+ {
+ kfree(*temp);
+ }
+ kfree(vect);
+}
+
+static long sys_execve(execve_args_t *args, regs_t *regs)
+{
+ execve_args_t kargs;
+ char *filename = NULL;
+ char **argv = NULL;
+ char **envp = NULL;
+
+ long ret;
+ if ((ret = copy_from_user(&kargs, args, sizeof(kargs))))
+ goto cleanup;
+
+ if ((ret = user_strdup(&kargs.filename, &filename)))
+ goto cleanup;
+
+ if (kargs.argv.av_vec && (ret = user_vecdup(&kargs.argv, &argv)))
+ goto cleanup;
+
+ if (kargs.envp.av_vec && (ret = user_vecdup(&kargs.envp, &envp)))
+ goto cleanup;
+
+ ret = do_execve(filename, argv, envp, regs);
+
+cleanup:
+ if (filename)
+ kfree(filename);
+ if (argv)
+ free_vector(argv);
+ if (envp)
+ free_vector(envp);
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_debug(argstr_t *args)
+{
+ argstr_t kargs;
+ long ret = copy_from_user(&kargs, args, sizeof(kargs));
+ ERROR_OUT_RET(ret);
+
+ char *str;
+ ret = user_strdup(&kargs, &str);
+ ERROR_OUT_RET(ret);
+ dbg(DBG_USER, "%s\n", str);
+ kfree(str);
+ return ret;
+}
+
+static long sys_kshell(int ttyid)
+{
+ // ignoring the ttyid passed in as it always defaults to 0,
+ // instead using the active_tty value
+ kshell_t *ksh = kshell_create(active_tty);
+ ERROR_OUT(!ksh, ENODEV);
+
+ long ret;
+ while ((ret = kshell_execute_next(ksh)) > 0)
+ ;
+ kshell_destroy(ksh);
+
+ ERROR_OUT_RET(ret);
+ return ret;
+}
+
+static long sys_usleep(usleep_args_t *args)
+{
+ return do_usleep(args->usec);
+}
+
+static inline void check_curthr_cancelled()
+{
+ KASSERT(list_empty(&curthr->kt_mutexes));
+ long cancelled = curthr->kt_cancelled;
+ void *retval = curthr->kt_retval;
+
+ if (cancelled)
+ {
+ dbg(DBG_SYSCALL, "CANCELLING: thread 0x%p of P%d (%s)\n", curthr,
+ curproc->p_pid, curproc->p_name);
+ kthread_exit(retval);
+ }
+}
+
+static long syscall_handler(regs_t *regs)
+{
+ size_t sysnum = (size_t)regs->r_rax;
+ uintptr_t args = (uintptr_t)regs->r_rdx;
+
+ const char *syscall_string;
+ if (sysnum <= 47)
+ {
+ syscall_string = syscall_strings[sysnum];
+ }
+ else
+ {
+ if (sysnum == 9001)
+ {
+ syscall_string = "debug";
+ }
+ else if (sysnum == 9002)
+ {
+ syscall_string = "kshell";
+ }
+ else
+ {
+ syscall_string = "unknown";
+ }
+ }
+
+ if (sysnum != SYS_errno)
+ dbg(DBG_SYSCALL, ">> pid %d, sysnum: %lu (%s), arg: %lu (0x%p)\n",
+ curproc->p_pid, sysnum, syscall_string, args, (void *)args);
+
+ check_curthr_cancelled();
+ long ret = syscall_dispatch(sysnum, args, regs);
+ check_curthr_cancelled();
+
+ if (sysnum != SYS_errno)
+ dbg(DBG_SYSCALL, "<< pid %d, sysnum: %lu (%s), returned: %lu (%#lx)\n",
+ curproc->p_pid, sysnum, syscall_string, ret, ret);
+
+ regs->r_rax = (uint64_t)ret;
+ return 0;
+}
+
+static long syscall_dispatch(size_t sysnum, uintptr_t args, regs_t *regs)
+{
+ switch (sysnum)
+ {
+ case SYS_waitpid:
+ return sys_waitpid((waitpid_args_t *)args);
+
+ case SYS_exit:
+ do_exit((int)args);
+ panic("exit failed!\n");
+
+ case SYS_thr_exit:
+ kthread_exit((void *)args);
+ panic("thr_exit failed!\n");
+
+ case SYS_sched_yield:
+ sched_yield();
+ return 0;
+
+ case SYS_fork:
+ return sys_fork(regs);
+
+ case SYS_getpid:
+ return curproc->p_pid;
+
+ case SYS_sync:
+ do_sync();
+ return 0;
+
+#ifdef __MOUNTING__
+ case SYS_mount:
+ return sys_mount((mount_args_t *)args);
+
+ case SYS_umount:
+ return sys_umount((argstr_t *)args);
+#endif
+
+ case SYS_mmap:
+ return (long)sys_mmap((mmap_args_t *)args);
+
+ case SYS_munmap:
+ return sys_munmap((munmap_args_t *)args);
+
+ case SYS_open:
+ return sys_open((open_args_t *)args);
+
+ case SYS_close:
+ return sys_close((int)args);
+
+ case SYS_read:
+ return sys_read((read_args_t *)args);
+
+ case SYS_write:
+ return sys_write((write_args_t *)args);
+
+ case SYS_dup:
+ return sys_dup((int)args);
+
+ case SYS_dup2:
+ return sys_dup2((dup2_args_t *)args);
+
+ case SYS_mkdir:
+ return sys_mkdir((mkdir_args_t *)args);
+
+ case SYS_rmdir:
+ return sys_rmdir((argstr_t *)args);
+
+ case SYS_unlink:
+ return sys_unlink((argstr_t *)args);
+
+ case SYS_link:
+ return sys_link((link_args_t *)args);
+
+ case SYS_rename:
+ return sys_rename((rename_args_t *)args);
+
+ case SYS_chdir:
+ return sys_chdir((argstr_t *)args);
+
+ case SYS_getdents:
+ return sys_getdents((getdents_args_t *)args);
+
+ case SYS_brk:
+ return (long)sys_brk((void *)args);
+
+ case SYS_lseek:
+ return sys_lseek((lseek_args_t *)args);
+
+ case SYS_halt:
+ sys_halt();
+ return -1;
+
+ case SYS_set_errno:
+ curthr->kt_errno = (long)args;
+ return 0;
+
+ case SYS_errno:
+ return curthr->kt_errno;
+
+ case SYS_execve:
+ return sys_execve((execve_args_t *)args, regs);
+
+ case SYS_stat:
+ return sys_stat((stat_args_t *)args);
+
+ case SYS_pipe:
+ return sys_pipe((int *)args);
+
+ case SYS_uname:
+ return sys_uname((struct utsname *)args);
+
+ case SYS_time:
+ return sys_time((time_t *)args);
+
+ case SYS_debug:
+ return sys_debug((argstr_t *)args);
+
+ case SYS_kshell:
+ return sys_kshell((int)args);
+
+ case SYS_usleep:
+ return sys_usleep((usleep_args_t *)args);
+
+ default:
+ dbg(DBG_ERROR, "ERROR: unknown system call: %lu (args: 0x%p)\n",
+ sysnum, (void *)args);
+ curthr->kt_errno = ENOSYS;
+ return -1;
+ }
+}
diff --git a/kernel/boot/boot.S b/kernel/boot/boot.S
new file mode 100644
index 0000000..bb3cbef
--- /dev/null
+++ b/kernel/boot/boot.S
@@ -0,0 +1,174 @@
+.file "boot.S"
+
+#define ASM_FILE 1
+#include "multiboot.h"
+#include "boot/config.h"
+#undef ASM_FILE
+#define AOUT_KLUDGE MULTIBOOT_AOUT_KLUDGE
+#define PHYSADDR(x) (x - 0xffff800000000000)
+
+.global entry, _start, initial_page_table
+
+.code32
+.set ARCH, 0
+.set CHECKSUM, -(MULTIBOOT2_HEADER_MAGIC + ARCH + (multiboot_header_end - multiboot_header))
+
+/* This header tells GRUB we can be run */
+.section .multiboot
+.align 8
+multiboot_header:
+ .long MULTIBOOT2_HEADER_MAGIC
+ .long ARCH
+ .long multiboot_header_end - multiboot_header
+ .long CHECKSUM
+
+
+.align 8
+address_tag_start:
+ .short MULTIBOOT_HEADER_TAG_ADDRESS
+ .short MULTIBOOT_HEADER_TAG_OPTIONAL
+ .long address_tag_end - address_tag_start
+ .long PHYSADDR(multiboot_header) /* header_addr = beginning of MB header */
+ .long PHYSADDR(k_start) /* load_addr = beginning of .text */
+ .long PHYSADDR(_edata) /* load_end_addr = end of .data */
+ .long PHYSADDR(_end) /* bss_end_addr = end of .bss */
+address_tag_end:
+
+.align 8
+entry_address_tag_start:
+ .short MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS
+ .short MULTIBOOT_HEADER_TAG_OPTIONAL
+ .long entry_address_tag_end - entry_address_tag_start
+ .long PHYSADDR(_start) /* entry_addr */
+entry_address_tag_end:
+
+#if 0
+.align 8
+framebuffer_tag_start:
+ .short 5
+ .short 0
+ .long frame_buffer_tag_end - framebuffer_tag_start
+ .long 0 // 1280
+ .long 0 // 720
+ .long 0 // 32
+frame_buffer_tag_end:
+#endif
+
+.align 8
+.short MULTIBOOT_HEADER_TAG_END
+ .short 0
+ .long 8
+multiboot_header_end:
+
+
+_start:
+ // disable interrupts during boot
+ cli
+
+ // Take the multiboot information and store it somewhere.
+ movl $PHYSADDR(sys_stack_bottom), %esp
+
+ // reset the stack flags
+ pushl $0
+ popf
+
+ // set base pointer
+ movl %esp, %ebp
+
+ // pushl %eax
+ pushl $0x0
+ pushl %ebx /* Stash the meminfo for later */
+
+ // Set up the gdt
+ lgdt PHYSADDR(GDTPointer)
+
+ // set cr3 = start of PML4
+ mov $PHYSADDR(pml4), %eax
+ mov %eax, %cr3
+
+ // enable PAE
+ mov %cr4, %eax
+ or $0x20, %eax
+ mov %eax, %cr4
+
+ // enter long mode
+ mov $0xC0000080, %ecx
+ rdmsr
+ or $0x101, %eax
+ wrmsr
+
+ // Enable paging
+ movl %cr0, %eax
+ or $0x80000000, %eax
+ movl %eax, %cr0
+
+ // jump into 64 bit code
+ ljmp $0x08, $PHYSADDR(_trampoline)
+
+.code64
+
+// for some god-knows why reason, GDB wont set up breakpoints correctly without this trampoline
+// even though Weenix still runs if you ljmp directly into _start64 -_-
+_trampoline:
+ // paging is at this point enabled, so no more need more PHYSADDR() wrappers
+ movabsq $_start64, %rax
+ jmp *%rax
+
+_start64:
+ // move the stack pointer to himem so that it is valid once we delete the low map
+ movq $KERNEL_VMA, %rax
+ addq %rax, %rsp
+ addq %rax, %rbp
+
+ popq %rbx
+ movq %rbx, %r11
+
+ // set up sregs
+ movq $0x0, %rax
+ mov %ax, %ds
+ mov %ax, %es
+ mov %ax, %ss
+ mov %ax, %fs
+ mov %ax, %gs
+
+ mov %r11, %rdi
+ // now we jump into the C entrypoint.
+ call entry
+ cli
+ hlt // when its done, we are done
+// [+] TODO we dont actually set the stack pointer anywhere here???
+
+.align 16
+GDT64:
+ GDTNull:
+ .quad 0
+ GDTKernelCode:
+ // base = 0x0, limit = 0x0
+ // flags: present, ring 0, executable, readable, 64bit
+ .word 0, 0
+ .byte 0, 0x9a, 0x20, 0
+ GDTEnd:
+ GDTPointer:
+ .word GDTEnd - GDT64 - 1 // size of gdt - 1
+ .long PHYSADDR(GDT64) // pointer to gdt
+
+.code32
+.data
+sys_stack: // set up 1KB stack
+ .align 4
+ .skip 0x1000
+sys_stack_bottom:
+
+.align 0x1000
+initial_page_table: // maps first 1GB of RAM to both 0x0000000000000000 and 0xffff800000000000
+pml4:
+ .quad PHYSADDR(pdpt) + 3 // 0x0000000000000000
+ .fill 255,8,0
+ .quad PHYSADDR(pdpt) + 3 // 0xffff800000000000
+ .fill 255,8,0
+pdpt:
+ .quad 0x0000000000000083 // 0
+ .fill 511,8,0
+
+
+
diff --git a/kernel/drivers/Submodules b/kernel/drivers/Submodules
new file mode 100644
index 0000000..dc26997
--- /dev/null
+++ b/kernel/drivers/Submodules
@@ -0,0 +1 @@
+tty disk
diff --git a/kernel/drivers/blockdev.c b/kernel/drivers/blockdev.c
new file mode 100644
index 0000000..5c8eb82
--- /dev/null
+++ b/kernel/drivers/blockdev.c
@@ -0,0 +1,96 @@
+#include "kernel.h"
+#include "util/debug.h"
+#include <drivers/disk/sata.h>
+
+#include "drivers/blockdev.h"
+
+#include "mm/pframe.h"
+#include "fs/s5fs/s5fs.h"
+
+#ifdef NO
+static mobj_ops_t blockdev_mobj_ops = {.get_pframe = NULL,
+ .fill_pframe = blockdev_fill_pframe,
+ .flush_pframe = blockdev_flush_pframe,
+ .destructor = NULL};
+#endif
+
+static list_t blockdevs = LIST_INITIALIZER(blockdevs);
+
+void blockdev_init() { sata_init(); }
+
+long blockdev_register(blockdev_t *dev)
+{
+ if (!dev || dev->bd_id == NULL_DEVID || !dev->bd_ops)
+ {
+ return -1;
+ }
+
+ list_iterate(&blockdevs, bd, blockdev_t, bd_link)
+ {
+ if (dev->bd_id == bd->bd_id)
+ {
+ return -1;
+ }
+ }
+
+#ifdef NO
+ mobj_init(&dev->bd_mobj, MOBJ_BLOCKDEV, &blockdev_mobj_ops);
+#endif
+
+ list_insert_tail(&blockdevs, &dev->bd_link);
+ return 0;
+}
+
+blockdev_t *blockdev_lookup(devid_t id)
+{
+ list_iterate(&blockdevs, bd, blockdev_t, bd_link)
+ {
+ if (id == bd->bd_id)
+ {
+ return bd;
+ }
+ }
+ return NULL;
+}
+
+#ifdef NO
+static long blockdev_fill_pframe(mobj_t *mobj, pframe_t *pf)
+{
+ KASSERT(mobj && pf);
+ KASSERT(pf->pf_pagenum <= (1UL << (8 * sizeof(blocknum_t))));
+ blockdev_t *bd = CONTAINER_OF(mobj, blockdev_t, bd_mobj);
+ return bd->bd_ops->read_block(bd, pf->pf_addr, (blocknum_t)pf->pf_pagenum,
+ 1);
+}
+
+static long blockdev_flush_pframe(mobj_t *mobj, pframe_t *pf)
+{
+ KASSERT(mobj && pf);
+ KASSERT(pf->pf_pagenum <= (1UL << (8 * sizeof(blocknum_t))));
+ dbg(DBG_S5FS, "writing disk block %lu\n", pf->pf_pagenum);
+ blockdev_t *bd = CONTAINER_OF(mobj, blockdev_t, bd_mobj);
+ return bd->bd_ops->write_block(bd, pf->pf_addr, (blocknum_t)pf->pf_pagenum,
+ 1);
+}
+#endif
+
+long blockdev_fill_pframe(mobj_t *mobj, pframe_t *pf)
+{
+ KASSERT(mobj && pf);
+ KASSERT(pf->pf_pagenum <= (1UL << (8 * sizeof(blocknum_t))));
+ blockdev_t *bd = CONTAINER_OF(mobj, s5fs_t, s5f_mobj)->s5f_bdev;
+ KASSERT(pf->pf_loc);
+ return bd->bd_ops->read_block(bd, pf->pf_addr, (blocknum_t)pf->pf_loc,
+ 1);
+}
+
+long blockdev_flush_pframe(mobj_t *mobj, pframe_t *pf)
+{
+ KASSERT(mobj && pf);
+ KASSERT(pf->pf_pagenum <= (1UL << (8 * sizeof(blocknum_t))));
+ dbg(DBG_S5FS, "writing disk block %lu\n", pf->pf_pagenum);
+ blockdev_t *bd = CONTAINER_OF(mobj, s5fs_t, s5f_mobj)->s5f_bdev;
+ KASSERT(pf->pf_loc);
+ return bd->bd_ops->write_block(bd, pf->pf_addr, (blocknum_t)pf->pf_loc,
+ 1);
+} \ No newline at end of file
diff --git a/kernel/drivers/chardev.c b/kernel/drivers/chardev.c
new file mode 100644
index 0000000..b8eb146
--- /dev/null
+++ b/kernel/drivers/chardev.c
@@ -0,0 +1,43 @@
+#include "drivers/chardev.h"
+#include "drivers/memdevs.h"
+#include "drivers/tty/tty.h"
+#include "kernel.h"
+#include "util/debug.h"
+
+static list_t chardevs = LIST_INITIALIZER(chardevs);
+
+void chardev_init()
+{
+ tty_init();
+ memdevs_init();
+}
+
+long chardev_register(chardev_t *dev)
+{
+ if (!dev || (NULL_DEVID == dev->cd_id) || !(dev->cd_ops))
+ {
+ return -1;
+ }
+ list_iterate(&chardevs, cd, chardev_t, cd_link)
+ {
+ if (dev->cd_id == cd->cd_id)
+ {
+ return -1;
+ }
+ }
+ list_insert_tail(&chardevs, &dev->cd_link);
+ return 0;
+}
+
+chardev_t *chardev_lookup(devid_t id)
+{
+ list_iterate(&chardevs, cd, chardev_t, cd_link)
+ {
+ KASSERT(NULL_DEVID != cd->cd_id);
+ if (id == cd->cd_id)
+ {
+ return cd;
+ }
+ }
+ return NULL;
+}
diff --git a/kernel/drivers/cmos.c b/kernel/drivers/cmos.c
new file mode 100644
index 0000000..5f6ed34
--- /dev/null
+++ b/kernel/drivers/cmos.c
@@ -0,0 +1,78 @@
+#include "drivers/cmos.h"
+
+int cmos_update_flag_set()
+{
+ outb(CMOS_ADDR, CMOS_REG_STAT_A);
+ return (inb(CMOS_DATA) & 0x80);
+}
+
+unsigned char cmos_read_register(int reg)
+{
+ outb(CMOS_ADDR, reg);
+ return inb(CMOS_DATA);
+}
+
+int rtc_time_match(rtc_time_t a, rtc_time_t b)
+{
+ return (a.second == b.second) && (a.minute == b.minute) &&
+ (a.hour == b.hour) && (a.day == b.day) && (a.month == b.month) &&
+ (a.year == b.year) && (a.__century == b.__century);
+}
+
+rtc_time_t __get_rtc_time()
+{
+ rtc_time_t tm;
+
+ while (cmos_update_flag_set())
+ ;
+
+ tm.second = cmos_read_register(CMOS_REG_SECOND);
+ tm.minute = cmos_read_register(CMOS_REG_MINUTE);
+ tm.hour = cmos_read_register(CMOS_REG_HOUR);
+ tm.day = cmos_read_register(CMOS_REG_DAY);
+ tm.month = cmos_read_register(CMOS_REG_MONTH);
+ tm.year = cmos_read_register(CMOS_REG_YEAR);
+ tm.__century = cmos_read_register(CMOS_REG_CENTURY);
+
+ return tm;
+}
+
+/* Our ticks -> time calculation is so suspect, we just get the time from the
+ * CMOS RTC */
+rtc_time_t rtc_get_time()
+{
+ // Check the result of CMOS twice to ensure we didn't get a torn read.
+ rtc_time_t tm_a;
+ rtc_time_t tm_b;
+
+ do
+ {
+ tm_a = __get_rtc_time();
+ tm_b = __get_rtc_time();
+ } while (!rtc_time_match(tm_a, tm_b));
+
+ unsigned char cmos_settings = cmos_read_register(CMOS_REG_STAT_B);
+
+ // Convert from BCD
+ if (!(cmos_settings & 0x04))
+ {
+ tm_a.second = (tm_a.second & 0x0F) + ((tm_a.second / 16) * 10);
+ tm_a.minute = (tm_a.minute & 0x0F) + ((tm_a.minute / 16) * 10);
+ tm_a.hour = ((tm_a.hour & 0x0F) + (((tm_a.hour & 0x70) / 16) * 10)) |
+ (tm_a.hour & 0x80);
+ tm_a.day = (tm_a.day & 0x0F) + ((tm_a.day / 16) * 10);
+ tm_a.month = (tm_a.month & 0x0F) + ((tm_a.month / 16) * 10);
+ tm_a.year = (tm_a.year & 0x0F) + ((tm_a.year / 16) * 10);
+ tm_a.__century = (tm_a.__century & 0x0F) + ((tm_a.__century / 16) * 10);
+ }
+
+ // Convert 12-hour clock to 24-hour clock:
+ if (!(cmos_settings & 0x02) && (tm_a.hour & 0x80))
+ {
+ tm_a.hour = ((tm_a.hour & 0x7F) + 12) % 24;
+ }
+
+ tm_a.year += (tm_a.__century * 100);
+
+ return tm_a;
+} \ No newline at end of file
diff --git a/kernel/drivers/disk/sata.c b/kernel/drivers/disk/sata.c
new file mode 100644
index 0000000..00ac63d
--- /dev/null
+++ b/kernel/drivers/disk/sata.c
@@ -0,0 +1,512 @@
+#include <drivers/blockdev.h>
+#include <drivers/disk/ahci.h>
+#include <drivers/disk/sata.h>
+#include <drivers/pcie.h>
+#include <errno.h>
+#include <mm/kmalloc.h>
+#include <mm/page.h>
+#include <util/debug.h>
+#include <util/string.h>
+
+#define ENABLE_NATIVE_COMMAND_QUEUING 1
+
+#define bdev_to_ata_disk(bd) (CONTAINER_OF((bd), ata_disk_t, bdev))
+#define SATA_SECTORS_PER_BLOCK (SATA_BLOCK_SIZE / ATA_SECTOR_SIZE)
+
+#define SATA_PCI_CLASS 0x1 /* 0x1 = mass storage device */
+#define SATA_PCI_SUBCLASS 0x6 /* 0x6 = sata */
+#define SATA_AHCI_INTERFACE 0x1 /* 0x1 = ahci */
+
+static hba_t *hba; /* host bus adapter */
+
+/* If NCQ, this is an outstanding tag bitmap.
+ * If standard, this is an outstanding command slot bitmap. */
+static uint32_t outstanding_requests[AHCI_MAX_NUM_PORTS] = {0};
+
+/* Each command slot on each port has a waitqueue for a thread waiting on a
+ * command to finish execution. */
+static ktqueue_t outstanding_request_queues[AHCI_MAX_NUM_PORTS]
+ [AHCI_COMMAND_HEADERS_PER_LIST];
+
+/* Each port has a waitqueue for a thread waiting on a new command slot to open
+ * up. */
+static ktqueue_t command_slot_queues[AHCI_MAX_NUM_PORTS];
+
+long sata_read_block(blockdev_t *bdev, char *buf, blocknum_t block,
+ size_t block_count);
+long sata_write_block(blockdev_t *bdev, const char *buf, blocknum_t block,
+ size_t block_count);
+
+/* sata_disk_ops - Block device operations for SATA devices. */
+static blockdev_ops_t sata_disk_ops = {
+ .read_block = sata_read_block,
+ .write_block = sata_write_block,
+};
+
+/* find_cmdslot - Checks various bitmaps to find the lowest index command slot
+ * that is free for a given port. */
+inline long find_cmdslot(hba_port_t *port)
+{
+ /* From 1.3.1: Free command slot will have corresponding bit clear in both
+ * px_sact and px_ci. To be safe, also check against our local copy of
+ * outstanding requests, in case a recently completed command is clear in
+ * the port's actual descriptor, but has not been processed by Weenix yet.
+ */
+ return __builtin_ctz(~(port->px_sact | port->px_ci |
+ outstanding_requests[PORT_INDEX(hba, port)]));
+}
+
+/* ensure_mapped - Wrapper for pt_map_range(). */
+void ensure_mapped(void *addr, size_t size)
+{
+ pt_map_range(pt_get(), (uintptr_t)PAGE_ALIGN_DOWN(addr) - PHYS_OFFSET,
+ (uintptr_t)PAGE_ALIGN_DOWN(addr),
+ (uintptr_t)PAGE_ALIGN_UP((uintptr_t)addr + size),
+ PT_WRITE | PT_PRESENT, PT_WRITE | PT_PRESENT);
+}
+
+kmutex_t because_qemu_doesnt_emulate_ahci_ncq_correctly;
+
+/* ahci_do_operation - Sends a command to the HBA to initiate a disk operation.
+ */
+long ahci_do_operation(hba_port_t *port, ssize_t lba, uint16_t count, void *buf,
+ int write)
+{
+ kmutex_lock(&because_qemu_doesnt_emulate_ahci_ncq_correctly);
+ KASSERT(count && buf);
+ // KASSERT(lba >= 0 && lba < (1L << 48));
+ KASSERT(lba >= 0 && lba < 1L << 23); //8388608
+
+ /* Obtain the port and the physical system memory in question. */
+ size_t port_index = PORT_INDEX(hba, port);
+
+ uint8_t ipl = intr_setipl(IPL_HIGH);
+
+ uint64_t physbuf = pt_virt_to_phys((uintptr_t)buf);
+
+ /* Get an available command slot. */
+ long command_slot;
+ while ((command_slot = find_cmdslot(port)) == -1)
+ {
+ sched_sleep_on(command_slot_queues + port_index);
+ }
+
+ /* Get corresponding command_header in the port's command_list. */
+ command_list_t *command_list =
+ (command_list_t *)(port->px_clb + PHYS_OFFSET);
+ command_header_t *command_header =
+ command_list->command_headers + command_slot;
+ memset(command_header, 0, sizeof(command_header_t));
+
+ /* Command setup: Header. */
+ command_header->cfl = sizeof(h2d_register_fis_t) / sizeof(uint32_t);
+ command_header->write = (uint8_t)write;
+ command_header->prdtl = (uint16_t)(
+ ALIGN_UP_POW_2(count, AHCI_SECTORS_PER_PRDT) / AHCI_SECTORS_PER_PRDT);
+ KASSERT(command_header->prdtl);
+
+ /* Command setup: Table. */
+ command_table_t *command_table =
+ (command_table_t *)(command_header->ctba + PHYS_OFFSET);
+ memset(command_table, 0, sizeof(command_table_t));
+
+ /* Command setup: Physical region descriptor table. */
+ prd_t *prdt = command_table->prdt;
+ /* Note that this loop is only called when the size of the data transfer is
+ * REALLY big. */
+ for (unsigned i = 0; i < command_header->prdtl - 1U; i++)
+ {
+ prdt->dbc = AHCI_MAX_PRDT_SIZE - 1;
+ prdt->dba = physbuf; /* Data from physical buffer. */
+ prdt->i = 1; /* Set interrupt on completion. */
+ physbuf +=
+ AHCI_MAX_PRDT_SIZE; /* Advance physical buffer for next prd. */
+ prdt++;
+ }
+ prdt->dbc = (uint32_t)(count % AHCI_SECTORS_PER_PRDT) * ATA_SECTOR_SIZE - 1;
+ prdt->dba = (uint64_t)physbuf;
+
+ /* Set up the particular h2d_register_fis command (the only one we use). */
+ h2d_register_fis_t *command_fis = &command_table->cfis.h2d_register_fis;
+ command_fis->fis_type = fis_type_h2d_register;
+ command_fis->c = 1;
+ command_fis->device = ATA_DEVICE_LBA_MODE;
+ command_fis->lba = (uint32_t)lba;
+ command_fis->lba_exp = (uint32_t)(lba >> 24);
+
+ /* NCQ: Allows the hardware to queue commands in its *own* order,
+ * independent of software delivery. */
+#if ENABLE_NATIVE_COMMAND_QUEUING
+ if (hba->ghc.cap.sncq)
+ {
+ /* For NCQ, sector count is stored in features. */
+ command_fis->features = (uint8_t)count;
+ command_fis->features_exp = (uint8_t)(count >> 8);
+
+ /* For NCQ, bits 7:3 of sector_count field specify NCQ tag. */
+ command_fis->sector_count = (uint16_t)(command_slot << 3);
+
+ /* Choose the appropriate NCQ read/write command. */
+ command_fis->command = (uint8_t)(write ? ATA_WRITE_FPDMA_QUEUED_COMMAND
+ : ATA_READ_FPDMA_QUEUED_COMMAND);
+ }
+ else
+ {
+ command_fis->sector_count = count;
+
+ command_fis->command = (uint8_t)(write ? ATA_WRITE_DMA_EXT_COMMAND
+ : ATA_READ_DMA_EXT_COMMAND);
+ }
+#else
+ /* For regular commands, simply set the command type and the sector count.
+ */
+ command_fis->sector_count = count;
+ command_fis->command =
+ (uint8_t)(write ? ATA_WRITE_DMA_EXT_COMMAND : ATA_READ_DMA_EXT_COMMAND);
+#endif
+
+ dbg(DBG_DISK, "initiating request on slot %ld to %s sectors [%lu, %lu)\n",
+ command_slot, write ? "write" : "read", lba, lba + count);
+
+ /* Locally mark that we sent out a command on the given command slot of the
+ * given port. */
+ outstanding_requests[port_index] |= (1 << command_slot);
+
+ /* Explicitly notify the port that a command is available for execution. */
+ port->px_sact |= (1 << command_slot);
+ port->px_ci |= (1 << command_slot);
+
+ /* Sleep until the command has been serviced. */
+ KASSERT(!curthr->kt_retval);
+
+ dbg(DBG_DISK,
+ "initiating request on slot %ld to %s sectors [%lu, %lu)...sleeping\n",
+ command_slot, write ? "write" : "read", lba, lba + count);
+ sched_sleep_on(outstanding_request_queues[port_index] + command_slot);
+ intr_setipl(ipl);
+ dbg(DBG_DISK, "completed request on slot %ld to %s sectors [%lu, %lu)\n",
+ command_slot, write ? "write" : "read", lba, lba + count);
+ kmutex_unlock(&because_qemu_doesnt_emulate_ahci_ncq_correctly);
+
+ long ret = (long)curthr->kt_retval;
+
+ return ret;
+}
+
+/* start_cmd - Start a port's DMA engines. See 10.3 of 1.3.1. */
+static inline void start_cmd(hba_port_t *port)
+{
+ while (port->px_cmd.cr)
+ ; /* Wait for command list DMA to stop running. */
+ port->px_cmd.fre = 1; /* Enable posting received FIS. */
+ port->px_cmd.st = 1; /* Enable processing the command list. */
+}
+
+/* stop_cmd - Stop a port's DMA engines. See 10.3 of 1.3.1. */
+static inline void stop_cmd(hba_port_t *port)
+{
+ port->px_cmd.st = 0; /* Stop processing the command list. */
+ while (port->px_cmd.cr)
+ ; /* Wait for command list DMA to stop running. */
+ port->px_cmd.fre = 0; /* Stop posting received FIS. */
+ while (port->px_cmd.fr)
+ ; /* Wait for FIS receive DMA to stop running. */
+}
+
+/* ahci_initialize_port */
+static void ahci_initialize_port(hba_port_t *port, unsigned int port_number,
+ uintptr_t ahci_base)
+{
+ dbg(DBG_DISK, "Initializing AHCI Port %d\n", port_number);
+
+ /* Pretty sure this is unnecessary. */
+ // port->px_serr = port->px_serr;
+
+ /* Make sure the port is not doing any DMA. */
+ stop_cmd(port);
+
+ /* Pretty sure this is unnecessary. */
+ // port->px_serr = (unsigned) -1;
+
+ /* Determine and set the command list and received FIS base addresses in the
+ * port's descriptor. */
+ command_list_t *command_list =
+ (command_list_t *)AHCI_COMMAND_LIST_ARRAY_BASE(ahci_base) + port_number;
+ received_fis_t *received_fis =
+ (received_fis_t *)AHCI_RECEIVED_FIS_ARRAY_BASE(ahci_base) + port_number;
+
+ port->px_clb = (uint64_t)command_list - PHYS_OFFSET;
+ port->px_fb = (uint64_t)received_fis - PHYS_OFFSET;
+ port->px_ie =
+ px_interrupt_enable_all_enabled; /* FLAG: Weenix does not need to enable
+ * all interrupts. Aside from dhrs and
+ * sdbs, I think we could either
+ * disable others,
+ * or tell the handler to panic if
+ * other interrupts are encountered. */
+ port->px_is =
+ px_interrupt_status_clear; /* RWC: Read / Write '1' to Clear. */
+
+ /* Determine and set the command tables.
+ * For each header, set its corresponding table and set up its queue. */
+ command_table_t *port_command_table_array_base =
+ (command_table_t *)AHCI_COMMAND_TABLE_ARRAY_BASE(ahci_base) +
+ port_number * AHCI_COMMAND_HEADERS_PER_LIST;
+ for (unsigned i = 0; i < AHCI_COMMAND_HEADERS_PER_LIST; i++)
+ {
+ command_list->command_headers[i].ctba =
+ (uint64_t)(port_command_table_array_base + i) - PHYS_OFFSET;
+ sched_queue_init(outstanding_request_queues[port_number] + i);
+ }
+
+ /* Start the queue to wait for an open command slot. */
+ sched_queue_init(command_slot_queues + port_number);
+
+ /* For SATA disks, allocate, setup, and register the disk / block device. */
+ if (port->px_sig == SATA_SIG_ATA)
+ {
+ dbg(DBG_DISK, "\tAdding SATA Disk Drive at Port %d\n", port_number);
+ ata_disk_t *disk = kmalloc(sizeof(ata_disk_t));
+ disk->port = port;
+ disk->bdev.bd_id = MKDEVID(DISK_MAJOR, port_number);
+ disk->bdev.bd_ops = &sata_disk_ops;
+ list_link_init(&disk->bdev.bd_link);
+ long ret = blockdev_register(&disk->bdev);
+ KASSERT(!ret);
+ }
+ else
+ {
+ /* FLAG: Should we just check sig first and save some work on unknown
+ * devices? */
+ dbg(DBG_DISK, "\tunknown device signature: 0x%x\n", port->px_sig);
+ }
+
+ /* Start the port's DMA engines and allow it to start servicing commands. */
+ start_cmd(port);
+
+ /* RWC: Write back to clear errors one more time. FLAG: WHY?! */
+ // port->px_serr = port->px_serr;
+}
+
+/* ahci_initialize_hba - Called at initialization to set up hba-related fields.
+ */
+void ahci_initialize_hba()
+{
+ kmutex_init(&because_qemu_doesnt_emulate_ahci_ncq_correctly);
+
+ /* Get the HBA controller for the SATA device. */
+ pcie_device_t *dev =
+ pcie_lookup(SATA_PCI_CLASS, SATA_PCI_SUBCLASS, SATA_AHCI_INTERFACE);
+
+ /* Set bit 2 to enable memory and I/O requests.
+ * This actually doesn't seem to be necessary...
+ * See: 2.1.2, AHCI SATA 1.3.1. */
+ // dev->standard.command |= 0x4;
+
+ /* Traverse the pcie_device_t's capabilities to look for an MSI capability.
+ */
+ KASSERT(dev->standard.capabilities_ptr & PCI_CAPABILITY_PTR_MASK);
+ pci_capability_t *cap =
+ (pci_capability_t *)((uintptr_t)dev + (dev->standard.capabilities_ptr &
+ PCI_CAPABILITY_PTR_MASK));
+ while (cap->id != PCI_MSI_CAPABILITY_ID)
+ {
+ KASSERT(cap->next_cap && "couldn't find msi control for ahci device");
+ cap = (pci_capability_t *)((uintptr_t)dev +
+ (cap->next_cap & PCI_CAPABILITY_PTR_MASK));
+ }
+ msi_capability_t *msi_cap = (msi_capability_t *)cap;
+
+ /* Set MSI Enable to turn on MSI. */
+ msi_cap->control.msie = 1;
+
+ /* For more info on MSI, consult Intel 3A 10.11.1, and also 2.3 of the 1.3.1
+ * spec. */
+
+ /* Set up MSI for processor 1, with interrupt vector INTR_DISK_PRIMARY.
+ * TODO: Check MSI setup details to determine if MSI can be handled more
+ * efficiently in SMP.
+ */
+ if (msi_cap->control.c64)
+ {
+ msi_cap->address_data.ad64.addr = MSI_ADDRESS_FOR(1);
+ msi_cap->address_data.ad64.data = MSI_DATA_FOR(INTR_DISK_PRIMARY);
+ }
+ else
+ {
+ msi_cap->address_data.ad32.addr = MSI_ADDRESS_FOR(1);
+ msi_cap->address_data.ad32.data = MSI_DATA_FOR(INTR_DISK_PRIMARY);
+ }
+
+ KASSERT(dev && "Could not find AHCI Controller");
+ dbg(DBG_DISK, "Found AHCI Controller\n");
+
+ /* bar = base address register. The last bar points to base memory for the
+ * host bus adapter. */
+ hba = (hba_t *)(PHYS_OFFSET + dev->standard.bar[5]);
+
+ /* Create a page table mapping for the hba. */
+ ensure_mapped(hba, sizeof(hba_t));
+
+ /* This seems to do nothing, because interrupt_line is never set, and MSIE
+ * is set. */
+ // intr_map(dev->standard.interrupt_line, INTR_DISK_PRIMARY);
+
+ /* Allocate space for what will become the command lists and received FISs
+ * for each port. */
+ uintptr_t ahci_base = (uintptr_t)page_alloc_n(AHCI_SIZE_PAGES);
+ memset((void *)ahci_base, 0, AHCI_SIZE_PAGES * PAGE_SIZE);
+
+ KASSERT(ahci_base);
+ /* Set AHCI Enable bit.
+ * Actually this bit appears to be read-only (see 3.1.2 AE and 3.1.1 SAM).
+ * I do get a "mis-aligned write" complaint when I try to manually set it.
+ */
+ KASSERT(hba->ghc.ghc.ae);
+
+ /* Temporarily clear Interrupt Enable bit before setting up ports. */
+ hba->ghc.ghc.ie = 0;
+
+ dbg(DBG_DISK, "ahci ncq supported: %s\n",
+ hba->ghc.cap.sncq ? "true" : "false");
+
+ /* Initialize each of the available ports. */
+ uint32_t ports_implemented = hba->ghc.pi;
+ KASSERT(ports_implemented);
+ while (ports_implemented)
+ {
+ unsigned port_number = __builtin_ctz(ports_implemented);
+ ports_implemented &= ~(1 << port_number);
+ ahci_initialize_port(hba->ports + port_number, port_number, ahci_base);
+ }
+
+ /* Clear any outstanding interrupts from any ports. */
+ hba->ghc.is = (uint32_t)-1;
+
+ /* Restore Interrupt Enable bit. */
+ hba->ghc.ghc.ie = 1;
+}
+
+/* ahci_interrupt_handler - Service an interrupt that was raised by the HBA.
+ */
+static long ahci_interrupt_handler(regs_t *regs)
+{
+ /* Check interrupt status bitmap for ports to service. */
+ while (hba->ghc.is)
+ {
+ /* Get a port from the global interrupt status bitmap. */
+ unsigned port_index = __builtin_ctz(hba->ghc.is);
+
+ /* Get the port descriptor from the HBA's ports array. */
+ hba_port_t *port = hba->ports + port_index;
+
+ /* Beware: If a register is marked "RWC" in the spec, you must clear it
+ * by writing 1. This is rather understated in the specification. */
+
+ /* Clear the cause of the interrupt.
+ * See 5.6.2 and 5.6.4 in the 1.3.1 spec for confirmation of the FIS and
+ * corresponding interrupt that are used depending on the type of
+ * command.
+ */
+
+#if ENABLE_NATIVE_COMMAND_QUEUING
+ if (hba->ghc.cap.sncq)
+ {
+ KASSERT(port->px_is.bits.sdbs);
+ port->px_is.bits.sdbs = 1;
+ }
+ else
+ {
+ KASSERT(port->px_is.bits.dhrs);
+ port->px_is.bits.dhrs = 1;
+ }
+#else
+ KASSERT(port->px_is.bits.dhrs);
+ port->px_is.bits.dhrs = 1;
+#endif
+
+ /* Clear the port's bit on the global interrupt status bitmap, to
+ * indicate we have handled it. */
+ /* Note: Changed from ~ to regular, because this register is RWC. */
+ hba->ghc.is &= (1 << port_index);
+
+ /* Get the list of commands still outstanding. */
+#if ENABLE_NATIVE_COMMAND_QUEUING
+ /* If NCQ, use SACT register. */
+ uint32_t active = hba->ghc.cap.sncq ? port->px_sact : port->px_ci;
+#else
+ /* If not NCQ, use CI register. */
+ uint32_t active = port->px_ci;
+#endif
+
+ /* Compare the active commands against those we actually sent out to get
+ * completed commands. */
+ uint32_t completed = outstanding_requests[port_index] &
+ ~(outstanding_requests[port_index] & active);
+ /* Handle each completed command: */
+ while (completed)
+ {
+ uint32_t slot = __builtin_ctz(completed);
+
+ /* Wake up the thread that was waiting on that command. */
+ kthread_t *thr;
+ sched_wakeup_on(&outstanding_request_queues[port_index][slot],
+ &thr);
+
+ /* Mark the command as available. */
+ completed &= ~(1 << slot);
+ outstanding_requests[port_index] &= ~(1 << slot);
+
+ /* TODO: Wake up threads that were waiting for a command slot to
+ * free up on the port. */
+ }
+ }
+ return 0;
+}
+
+void sata_init()
+{
+ intr_register(INTR_DISK_PRIMARY, ahci_interrupt_handler);
+ ahci_initialize_hba();
+}
+
+/**
+ * Read the given number of blocks from a block device starting at
+ * a given block number into a buffer.
+ *
+ * To do this, you will need to call ahci_do_operation(). SATA devices
+ * conduct operations in terms of sectors, rather than blocks, thus
+ * you will need to convert the arguments passed in to be in terms of
+ * sectors.
+ *
+ * @param bdev block device to read from
+ * @param buf buffer to write to
+ * @param block block number to start reading at
+ * @param block_count the number of blocks to read
+ * @return 0 on success and <0 on error
+ */
+long sata_read_block(blockdev_t *bdev, char *buf, blocknum_t block,
+ size_t block_count)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return -1;
+}
+
+/**
+ * Writes a a given number of blocks from a buffer to a block device
+ * starting at a given block. This function should be very similar to what
+ * is done in sata_read, save for the write argument that is passed to
+ * ahci_do_operation().
+ *
+ * @param bdev block device to write to
+ * @param buf buffer to read from
+ * @param block block number to start writing at
+ * @param block_count the number of blocks to write
+ * @return 0 on success and <0 on error
+ */
+long sata_write_block(blockdev_t *bdev, const char *buf, blocknum_t block,
+ size_t block_count)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return -1;
+}
diff --git a/kernel/drivers/keyboard.c b/kernel/drivers/keyboard.c
new file mode 100644
index 0000000..c0c4b5e
--- /dev/null
+++ b/kernel/drivers/keyboard.c
@@ -0,0 +1,208 @@
+#include "drivers/keyboard.h"
+
+#include "drivers/tty/tty.h"
+
+#include "main/interrupt.h"
+#include "main/io.h"
+
+#define IRQ_KEYBOARD 1
+
+/* Indicates that one of these is "being held down" */
+#define SHIFT_MASK 0x1
+#define CTRL_MASK 0x2
+/* Indicates that an escape code was the previous key received */
+#define ESC_MASK 0x4
+static int curmask = 0;
+
+/* Where to read from to get scancodes */
+#define KEYBOARD_IN_PORT 0x60
+#define KEYBOARD_CMD_PORT 0x61
+
+/* Scancodes for special keys */
+#define LSHIFT 0x2a
+#define RSHIFT 0x36
+#define CTRL 0x1d
+/* Right ctrl is escaped */
+/* Our keyboard driver totally ignores ALT */
+
+#define ESC0 0xe0
+#define ESC1 0xe1
+
+/* If the scancode & BREAK_MASK, it's a break code; otherwise, it's a make code
+ */
+#define BREAK_MASK 0x80
+
+#define NORMAL_KEY_HIGH 0x39
+
+/* Some sneaky value to indicate we don't actually pass anything to the terminal
+ */
+#define NO_CHAR 0xff
+
+#define F1_SCANCODE 0x3b
+#define F12_SCANCODE (F1_SCANCODE + 11)
+
+/* Scancode tables copied from
+ http://www.win.tue.nl/~aeb/linux/kbd/scancodes-1.html */
+
+/* The scancode table for "normal" scancodes - from 02 to 39 */
+/* Unsupported chars are symbolized by \0 */
+static const char *normal_scancodes =
+ "\0" /* Error */
+ "\e" /* Escape key */
+ "1234567890-=" /* Top row */
+ "\b" /* Backspace */
+ "\tqwertyuiop[]\n" /* Next row - ish */
+ "\0" /* Left ctrl */
+ "asdfghjkl;\'`"
+ "\0" /* Lshift */
+ "\\"
+ "zxcvbnm,./"
+ "\0\0\0" /* Rshift, prtscrn, Lalt */
+ " "; /* Space bar */
+/* As above, but if shift is pressed */
+static const char *shift_scancodes =
+ "\0"
+ "\e"
+ "!@#$%^&*()_+"
+ "\b"
+ "\tQWERTYUIOP{}\n"
+ "\0"
+ "ASDFGHJKL:\"~"
+ "\0"
+ "|"
+ "ZXCVBNM<>?"
+ "\0\0\0"
+ " ";
+
+static keyboard_char_handler_t keyboard_handler = NULL;
+
+/* This is the function we register with the interrupt handler - it reads the
+ * scancode and, if appropriate, call's the tty's receive_char function */
+static long keyboard_intr_handler(regs_t *regs)
+{
+ uint8_t sc; /* The scancode we receive */
+ int break_code; /* Was it a break code */
+ /* the resulting character ('\0' -> ignored char) */
+ uint8_t c = NO_CHAR;
+ /* Get the scancode */
+ sc = inb(KEYBOARD_IN_PORT);
+ /* Separate out the break code */
+ break_code = sc & BREAK_MASK;
+ sc &= ~BREAK_MASK;
+
+ /* dbg(DBG_KB, ("scancode 0x%x, break 0x%x\n", sc, break_code)); */
+
+ /* The order of this conditional is very, very tricky - be careful when
+ * editing! */
+
+ /* Most break codes are ignored */
+ if (break_code)
+ {
+ /* Shift/ctrl release */
+ if (sc == LSHIFT || sc == RSHIFT)
+ {
+ curmask &= ~SHIFT_MASK;
+ }
+ else if (sc == CTRL)
+ {
+ curmask &= ~CTRL_MASK;
+ }
+ }
+ /* Check for the special keys */
+ else if (sc == LSHIFT || sc == RSHIFT)
+ {
+ curmask |= SHIFT_MASK;
+ }
+ else if (sc == CTRL)
+ {
+ curmask |= CTRL_MASK;
+ }
+ /* All escaped keys past this point (anything except right shift and right
+ * ctrl) will be ignored */
+ else if (curmask & ESC_MASK)
+ {
+ /* Escape mask only lasts for one key */
+ curmask &= ~ESC_MASK;
+ }
+ /* Now check for escape code */
+ else if (sc == ESC0 || sc == ESC1)
+ {
+ curmask |= ESC_MASK;
+ }
+
+ else if (sc >= F1_SCANCODE && sc <= F12_SCANCODE)
+ {
+ c = (uint8_t)(F1 + (sc - F1_SCANCODE));
+ }
+ /* Check for Ctrl+Backspace which indicates scroll down */
+ else if ((curmask & CTRL_MASK) && (curmask & SHIFT_MASK) &&
+ sc == SCROLL_DOWN)
+ {
+ c = SCROLL_DOWN_PAGE;
+ }
+
+ else if ((curmask & CTRL_MASK) && (curmask & SHIFT_MASK) &&
+ sc == SCROLL_UP)
+ {
+ c = SCROLL_UP_PAGE;
+ }
+
+ else if ((curmask & CTRL_MASK) && sc == SCROLL_DOWN)
+ {
+ c = SCROLL_DOWN;
+ }
+ /* Check for Ctrl+Enter which indicates scroll down */
+ else if ((curmask & CTRL_MASK) && sc == SCROLL_UP)
+ {
+ c = SCROLL_UP;
+ }
+ /* Check to make sure the key isn't high enough that it won't be found in
+ * tables */
+ else if (sc > NORMAL_KEY_HIGH)
+ {
+ /* ignore */
+ }
+ /* Control characters */
+ else if (curmask & CTRL_MASK)
+ {
+ /* Because of the way ASCII works, the control chars are based on the
+ * values of the shifted chars produced without control */
+ c = (uint8_t)shift_scancodes[sc];
+ /* Range of chars that have corresponding control chars */
+ if (c >= 0x40 && c < 0x60)
+ {
+ c -= 0x40;
+ }
+ else
+ {
+ c = NO_CHAR;
+ }
+ }
+ /* Capitals */
+ else if (curmask & SHIFT_MASK)
+ {
+ c = (uint8_t)shift_scancodes[sc];
+ }
+ else
+ {
+ c = (uint8_t)normal_scancodes[sc];
+ }
+
+ if (c != NO_CHAR)
+ {
+ keyboard_handler(c);
+ }
+ else
+ {
+ // panic("get rid of me: char was: %c (%d) (%x)\n", c, c, c);
+ }
+ dbg(DBG_KB, "received scancode 0x%x; resolved to char 0x%x\n", sc, c);
+ return 0;
+}
+
+void keyboard_init(keyboard_char_handler_t handler)
+{
+ intr_map(IRQ_KEYBOARD, INTR_KEYBOARD);
+ intr_register(INTR_KEYBOARD, keyboard_intr_handler);
+ keyboard_handler = handler;
+}
diff --git a/kernel/drivers/memdevs.c b/kernel/drivers/memdevs.c
new file mode 100644
index 0000000..4898614
--- /dev/null
+++ b/kernel/drivers/memdevs.c
@@ -0,0 +1,108 @@
+#include "errno.h"
+#include "globals.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#include "mm/kmalloc.h"
+#include "mm/mobj.h"
+
+#include "drivers/chardev.h"
+
+#include "vm/anon.h"
+
+#include "fs/vnode.h"
+
+static ssize_t null_read(chardev_t *dev, size_t pos, void *buf, size_t count);
+
+static ssize_t null_write(chardev_t *dev, size_t pos, const void *buf,
+ size_t count);
+
+static ssize_t zero_read(chardev_t *dev, size_t pos, void *buf, size_t count);
+
+static long zero_mmap(vnode_t *file, mobj_t **ret);
+
+chardev_ops_t null_dev_ops = {.read = null_read,
+ .write = null_write,
+ .mmap = NULL,
+ .fill_pframe = NULL,
+ .flush_pframe = NULL};
+
+chardev_ops_t zero_dev_ops = {.read = zero_read,
+ .write = null_write,
+ .mmap = zero_mmap,
+ .fill_pframe = NULL,
+ .flush_pframe = NULL};
+
+/**
+ * The char device code needs to know about these mem devices, so create
+ * chardev_t's for null and zero, fill them in, and register them.
+ *
+ * Use kmalloc, MEM_NULL_DEVID, MEM_ZERO_DEVID, and chardev_register.
+ * See dev.h for device ids to use with MKDEVID.
+ */
+void memdevs_init()
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+}
+
+/**
+ * Reads a given number of bytes from the null device into a
+ * buffer. Any read performed on the null device should read 0 bytes.
+ *
+ * @param dev the null device
+ * @param pos the offset to read from; should be ignored
+ * @param buf the buffer to read into
+ * @param count the maximum number of bytes to read
+ * @return the number of bytes read, which should be 0
+ */
+static ssize_t null_read(chardev_t *dev, size_t pos, void *buf, size_t count)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return -ENOMEM;
+}
+
+/**
+ * Writes a given number of bytes to the null device from a
+ * buffer. Writing to the null device should _ALWAYS_ be successful
+ * and write the maximum number of bytes.
+ *
+ * @param dev the null device
+ * @param pos offset the offset to write to; should be ignored
+ * @param buf buffer to read from
+ * @param count the maximum number of bytes to write
+ * @return the number of bytes written, which should be `count`
+ */
+static ssize_t null_write(chardev_t *dev, size_t pos, const void *buf,
+ size_t count)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return -ENOMEM;
+}
+
+/**
+ * Reads a given number of bytes from the zero device into a
+ * buffer. Any read from the zero device should be a series of zeros.
+ *
+ * @param dev the zero device
+ * @param pos the offset to start reading from; should be ignored
+ * @param buf the buffer to write to
+ * @param count the maximum number of bytes to read
+ * @return the number of bytes read. Hint: should always read the maximum
+ * number of bytes
+ */
+static ssize_t zero_read(chardev_t *dev, size_t pos, void *buf, size_t count)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return 0;
+}
+
+/**
+ * Unlike in s5fs_mmap(), you can't necessarily use the file's underlying mobj.
+ * Instead, you should simply provide an anonymous object to ret.
+ */
+static long zero_mmap(vnode_t *file, mobj_t **ret)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
diff --git a/kernel/drivers/pcie.c b/kernel/drivers/pcie.c
new file mode 100644
index 0000000..6003eab
--- /dev/null
+++ b/kernel/drivers/pcie.c
@@ -0,0 +1,77 @@
+#include "drivers/pcie.h"
+#include <drivers/pcie.h>
+#include <main/acpi.h>
+#include <mm/kmalloc.h>
+#include <mm/pagetable.h>
+#include <util/debug.h>
+
+#define MCFG_SIGNATURE (*(uint32_t *)"MCFG")
+static uintptr_t pcie_base_addr;
+
+typedef struct pcie_table
+{
+ pcie_device_t devices[PCI_NUM_BUSES][PCI_NUM_DEVICES_PER_BUS]
+ [PCI_NUM_FUNCTIONS_PER_DEVICE];
+} pcie_table_t;
+
+static pcie_table_t *pcie_table;
+
+#define PCIE_DEV(bus, device, func) \
+ (&pcie_table->devices[(bus)][(device)][(func)])
+static list_t pcie_wrapper_list;
+
+void pci_init(void)
+{
+ // TODO document; needs -machine type=q35 flag in qemu!
+ void *table = acpi_table(MCFG_SIGNATURE, 0);
+ KASSERT(table);
+ pcie_base_addr = *(uintptr_t *)((uintptr_t)table + 44) + PHYS_OFFSET;
+ pcie_table = (pcie_table_t *)pcie_base_addr;
+ pt_map_range(pt_get(), pcie_base_addr - PHYS_OFFSET, pcie_base_addr,
+ pcie_base_addr + PAGE_SIZE_1GB, PT_WRITE | PT_PRESENT,
+ PT_WRITE | PT_PRESENT);
+
+ list_init(&pcie_wrapper_list);
+ for (unsigned bus = 0; bus < PCI_NUM_BUSES; bus++)
+ {
+ for (unsigned device = 0; device < PCI_NUM_DEVICES_PER_BUS; device++)
+ {
+ unsigned int max_functions =
+ (PCIE_DEV(bus, device, 0)->standard.header_type & 0x80)
+ ? PCI_NUM_DEVICES_PER_BUS
+ : 1;
+ for (unsigned function = 0; function < max_functions; function++)
+ {
+ pcie_device_t *dev = PCIE_DEV(bus, device, function);
+ if (!dev->standard.vendor_id ||
+ dev->standard.vendor_id == (uint16_t)-1)
+ continue;
+ pcie_device_wrapper_t *wrapper =
+ kmalloc(sizeof(pcie_device_wrapper_t));
+ wrapper->dev = dev;
+ wrapper->class = dev->standard.class;
+ wrapper->subclass = dev->standard.subclass;
+ wrapper->interface = dev->standard.prog_if;
+ list_link_init(&wrapper->link);
+ list_insert_tail(&pcie_wrapper_list, &wrapper->link);
+ }
+ }
+ }
+}
+
+pcie_device_t *pcie_lookup(uint8_t class, uint8_t subclass, uint8_t interface)
+{
+ list_iterate(&pcie_wrapper_list, wrapper, pcie_device_wrapper_t, link)
+ {
+ /* verify the class subclass and interface are correct */
+ if (((class == PCI_LOOKUP_WILDCARD) || (wrapper->class == class)) &&
+ ((subclass == PCI_LOOKUP_WILDCARD) ||
+ (wrapper->subclass == subclass)) &&
+ ((interface == PCI_LOOKUP_WILDCARD) ||
+ (wrapper->interface == interface)))
+ {
+ return wrapper->dev;
+ }
+ }
+ return NULL;
+}
diff --git a/kernel/drivers/screen.c b/kernel/drivers/screen.c
new file mode 100644
index 0000000..a14ad08
--- /dev/null
+++ b/kernel/drivers/screen.c
@@ -0,0 +1,513 @@
+#include <boot/config.h>
+#include <boot/multiboot_macros.h>
+#include <drivers/screen.h>
+#include <multiboot.h>
+#include <types.h>
+#include <util/debug.h>
+#include <util/string.h>
+
+#ifdef __VGABUF___
+
+#define BITMAP_HEIGHT 13
+
+// https://stackoverflow.com/questions/2156572/c-header-file-with-bitmapped-fonts
+unsigned const char bitmap_letters[95][BITMAP_HEIGHT] = {
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00}, // space :32
+ {0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18}, // ! :33
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36,
+ 0x36},
+ {0x00, 0x00, 0x00, 0x66, 0x66, 0xff, 0x66, 0x66, 0xff, 0x66, 0x66, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x18, 0x7e, 0xff, 0x1b, 0x1f, 0x7e, 0xf8, 0xd8, 0xff, 0x7e,
+ 0x18},
+ {0x00, 0x00, 0x0e, 0x1b, 0xdb, 0x6e, 0x30, 0x18, 0x0c, 0x76, 0xdb, 0xd8,
+ 0x70},
+ {0x00, 0x00, 0x7f, 0xc6, 0xcf, 0xd8, 0x70, 0x70, 0xd8, 0xcc, 0xcc, 0x6c,
+ 0x38},
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x1c, 0x0c,
+ 0x0e},
+ {0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x18,
+ 0x0c},
+ {0x00, 0x00, 0x30, 0x18, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18,
+ 0x30},
+ {0x00, 0x00, 0x00, 0x00, 0x99, 0x5a, 0x3c, 0xff, 0x3c, 0x5a, 0x99, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0xff, 0xff, 0x18, 0x18, 0x18, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x30, 0x18, 0x1c, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x60, 0x60, 0x30, 0x30, 0x18, 0x18, 0x0c, 0x0c, 0x06, 0x06, 0x03,
+ 0x03},
+ {0x00, 0x00, 0x3c, 0x66, 0xc3, 0xe3, 0xf3, 0xdb, 0xcf, 0xc7, 0xc3, 0x66,
+ 0x3c},
+ {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x78, 0x38,
+ 0x18},
+ {0x00, 0x00, 0xff, 0xc0, 0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x03, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0x7e, 0x07, 0x03, 0x03, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xff, 0xcc, 0x6c, 0x3c, 0x1c,
+ 0x0c},
+ {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0,
+ 0xff},
+ {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc7, 0xfe, 0xc0, 0xc0, 0xc0, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x18, 0x0c, 0x06, 0x03, 0x03, 0x03,
+ 0xff},
+ {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xe7, 0x7e, 0xe7, 0xc3, 0xc3, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x03, 0x7f, 0xe7, 0xc3, 0xc3, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x30, 0x18, 0x1c, 0x1c, 0x00, 0x00, 0x1c, 0x1c, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x60, 0x30, 0x18, 0x0c,
+ 0x06},
+ {0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x03, 0x06, 0x0c, 0x18, 0x30,
+ 0x60},
+ {0x00, 0x00, 0x18, 0x00, 0x00, 0x18, 0x18, 0x0c, 0x06, 0x03, 0xc3, 0xc3,
+ 0x7e},
+ {0x00, 0x00, 0x3f, 0x60, 0xcf, 0xdb, 0xd3, 0xdd, 0xc3, 0x7e, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xff, 0xc3, 0xc3, 0xc3, 0x66, 0x3c,
+ 0x18},
+ {0x00, 0x00, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7,
+ 0xfe},
+ {0x00, 0x00, 0x7e, 0xe7, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0xfc, 0xce, 0xc7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc7, 0xce,
+ 0xfc},
+ {0x00, 0x00, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xfc, 0xc0, 0xc0, 0xc0, 0xc0,
+ 0xff},
+ {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xfc, 0xc0, 0xc0, 0xc0,
+ 0xff},
+ {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xcf, 0xc0, 0xc0, 0xc0, 0xc0, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xff, 0xc3, 0xc3, 0xc3, 0xc3,
+ 0xc3},
+ {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x7e},
+ {0x00, 0x00, 0x7c, 0xee, 0xc6, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06},
+ {0x00, 0x00, 0xc3, 0xc6, 0xcc, 0xd8, 0xf0, 0xe0, 0xf0, 0xd8, 0xcc, 0xc6,
+ 0xc3},
+ {0x00, 0x00, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
+ 0xc0},
+ {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xff, 0xff, 0xe7,
+ 0xc3},
+ {0x00, 0x00, 0xc7, 0xc7, 0xcf, 0xcf, 0xdf, 0xdb, 0xfb, 0xf3, 0xf3, 0xe3,
+ 0xe3},
+ {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7,
+ 0xfe},
+ {0x00, 0x00, 0x3f, 0x6e, 0xdf, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66,
+ 0x3c},
+ {0x00, 0x00, 0xc3, 0xc6, 0xcc, 0xd8, 0xf0, 0xfe, 0xc7, 0xc3, 0xc3, 0xc7,
+ 0xfe},
+ {0x00, 0x00, 0x7e, 0xe7, 0x03, 0x03, 0x07, 0x7e, 0xe0, 0xc0, 0xc0, 0xe7,
+ 0x7e},
+ {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0xff},
+ {0x00, 0x00, 0x7e, 0xe7, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3,
+ 0xc3},
+ {0x00, 0x00, 0x18, 0x3c, 0x3c, 0x66, 0x66, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3,
+ 0xc3},
+ {0x00, 0x00, 0xc3, 0xe7, 0xff, 0xff, 0xdb, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3,
+ 0xc3},
+ {0x00, 0x00, 0xc3, 0x66, 0x66, 0x3c, 0x3c, 0x18, 0x3c, 0x3c, 0x66, 0x66,
+ 0xc3},
+ {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x66, 0x66,
+ 0xc3},
+ {0x00, 0x00, 0xff, 0xc0, 0xc0, 0x60, 0x30, 0x7e, 0x0c, 0x06, 0x03, 0x03,
+ 0xff},
+ {0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ 0x3c},
+ {0x00, 0x03, 0x03, 0x06, 0x06, 0x0c, 0x0c, 0x18, 0x18, 0x30, 0x30, 0x60,
+ 0x60},
+ {0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x3c},
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c,
+ 0x18},
+ {0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x30,
+ 0x70},
+ {0x00, 0x00, 0x7f, 0xc3, 0xc3, 0x7f, 0x03, 0xc3, 0x7e, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xfe, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0,
+ 0xc0},
+ {0x00, 0x00, 0x7e, 0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x7f, 0xc3, 0xc3, 0xc3, 0xc3, 0x7f, 0x03, 0x03, 0x03, 0x03,
+ 0x03},
+ {0x00, 0x00, 0x7f, 0xc0, 0xc0, 0xfe, 0xc3, 0xc3, 0x7e, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x30, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, 0x33,
+ 0x1e},
+ {0x7e, 0xc3, 0x03, 0x03, 0x7f, 0xc3, 0xc3, 0xc3, 0x7e, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0xc0, 0xc0, 0xc0,
+ 0xc0},
+ {0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x18,
+ 0x00},
+ {0x38, 0x6c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x00, 0x00, 0x0c,
+ 0x00},
+ {0x00, 0x00, 0xc6, 0xcc, 0xf8, 0xf0, 0xd8, 0xcc, 0xc6, 0xc0, 0xc0, 0xc0,
+ 0xc0},
+ {0x00, 0x00, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x78},
+ {0x00, 0x00, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0xfe, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xfc, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00,
+ 0x00},
+ {0xc0, 0xc0, 0xc0, 0xfe, 0xc3, 0xc3, 0xc3, 0xc3, 0xfe, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x03, 0x03, 0x03, 0x7f, 0xc3, 0xc3, 0xc3, 0xc3, 0x7f, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xe0, 0xfe, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xfe, 0x03, 0x03, 0x7e, 0xc0, 0xc0, 0x7f, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x1c, 0x36, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30,
+ 0x00},
+ {0x00, 0x00, 0x7e, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x18, 0x3c, 0x3c, 0x66, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xc3, 0xe7, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, 0x00, 0x00, 0x00,
+ 0x00},
+ {0xc0, 0x60, 0x60, 0x30, 0x18, 0x3c, 0x66, 0x66, 0xc3, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0xff, 0x60, 0x30, 0x18, 0x0c, 0x06, 0xff, 0x00, 0x00, 0x00,
+ 0x00},
+ {0x00, 0x00, 0x0f, 0x18, 0x18, 0x18, 0x38, 0xf0, 0x38, 0x18, 0x18, 0x18,
+ 0x0f},
+ {0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+ 0x18},
+ {0x00, 0x00, 0xf0, 0x18, 0x18, 0x18, 0x1c, 0x0f, 0x1c, 0x18, 0x18, 0x18,
+ 0xf0},
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x8f, 0xf1, 0x60, 0x00, 0x00,
+ 0x00},
+};
+
+#define DOUBLE_BUFFERING 0
+
+#define BITWISE_TERNARY(condition, x, y) \
+ (!!(condition) * (x) + !(condition) * (y))
+
+static uint32_t *fb;
+static uint32_t fb_width;
+static uint32_t fb_height;
+static uint32_t fb_pitch;
+
+static uint32_t *fb_buffer;
+
+void screen_init()
+{
+ static long inited = 0;
+ if (inited)
+ return;
+ inited = 1;
+
+ struct multiboot_tag_framebuffer *fb_tag = NULL;
+ for (struct multiboot_tag *tag =
+ (struct multiboot_tag *)((uintptr_t)(mb_tag + 1) + PHYS_OFFSET);
+ tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size))
+ {
+ if (tag->type != MULTIBOOT_TAG_TYPE_FRAMEBUFFER)
+ {
+ continue;
+ }
+ fb_tag = (struct multiboot_tag_framebuffer *)tag;
+ break;
+ }
+ KASSERT(fb_tag);
+
+ fb = (uint32_t *)(PHYS_OFFSET + fb_tag->common.framebuffer_addr);
+ fb_width = fb_tag->common.framebuffer_width;
+ fb_height = fb_tag->common.framebuffer_height;
+ fb_pitch = fb_tag->common.framebuffer_pitch;
+ KASSERT(fb_pitch == fb_width * sizeof(uint32_t));
+ KASSERT(fb_tag->common.framebuffer_bpp == 32);
+ KASSERT(fb_tag->common.framebuffer_type == 1);
+ KASSERT(fb_tag->framebuffer_red_field_position == 0x10);
+ KASSERT(fb_tag->framebuffer_green_field_position == 0x08);
+ KASSERT(fb_tag->framebuffer_blue_field_position == 0x00);
+ KASSERT(fb_tag->framebuffer_red_mask_size);
+ KASSERT(fb_tag->framebuffer_green_mask_size == 8);
+ KASSERT(fb_tag->framebuffer_blue_mask_size == 8);
+
+ size_t npages = 0;
+ for (uintptr_t page = (uintptr_t)PAGE_ALIGN_DOWN(fb);
+ page < (uintptr_t)PAGE_ALIGN_UP(fb + fb_width * fb_height);
+ page += PAGE_SIZE)
+ {
+ page_mark_reserved((void *)(page - PHYS_OFFSET));
+ npages++;
+ }
+
+ struct multiboot_tag_vbe *vbe_info = NULL;
+ for (struct multiboot_tag *tag =
+ (struct multiboot_tag *)((uintptr_t)(mb_tag + 1) + PHYS_OFFSET);
+ tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size))
+ {
+ if (tag->type != MULTIBOOT_TAG_TYPE_VBE)
+ {
+ continue;
+ }
+ vbe_info = (struct multiboot_tag_vbe *)tag;
+ break;
+ }
+ KASSERT(vbe_info);
+
+#if DOUBLE_BUFFERING
+ fb_buffer = page_alloc_n(npages);
+ KASSERT(fb_buffer && "couldn't allocate double buffer for screen");
+#else
+ fb_buffer = fb;
+#endif
+ pt_map_range(pt_get(), (uintptr_t)fb - PHYS_OFFSET, (uintptr_t)fb,
+ (uintptr_t)PAGE_ALIGN_UP(fb + fb_width * fb_height),
+ PT_PRESENT | PT_WRITE, PT_PRESENT | PT_WRITE);
+ pt_set(pt_get());
+ for (uint32_t i = 0; i < fb_width * fb_height; i++)
+ fb_buffer[i] = 0x008A2BE2;
+ screen_flush();
+}
+
+inline size_t screen_get_width() { return fb_width; }
+
+inline size_t screen_get_height() { return fb_height; }
+
+inline size_t screen_get_character_width() { return SCREEN_CHARACTER_WIDTH; }
+
+inline size_t screen_get_character_height() { return SCREEN_CHARACTER_HEIGHT; }
+
+inline void screen_draw_string(size_t x, size_t y, const char *s, size_t len,
+ color_t color)
+{
+ uint32_t *pos = fb_buffer + y * fb_width + x;
+ while (len--)
+ {
+ const char c = *s++;
+ if (c < ' ' || c > '~')
+ continue;
+ const unsigned char *bitmap = bitmap_letters[c - ' '];
+
+ size_t bm_row = BITMAP_HEIGHT;
+ while (bm_row--)
+ {
+ unsigned char cols = bitmap[bm_row];
+ *pos = BITWISE_TERNARY(cols & 0x80, color.value, *pos);
+ pos++;
+ *pos = BITWISE_TERNARY(cols & 0x40, color.value, *pos);
+ pos++;
+ *pos = BITWISE_TERNARY(cols & 0x20, color.value, *pos);
+ pos++;
+ *pos = BITWISE_TERNARY(cols & 0x10, color.value, *pos);
+ pos++;
+ *pos = BITWISE_TERNARY(cols & 0x08, color.value, *pos);
+ pos++;
+ *pos = BITWISE_TERNARY(cols & 0x04, color.value, *pos);
+ pos++;
+ *pos = BITWISE_TERNARY(cols & 0x02, color.value, *pos);
+ pos++;
+ *pos = BITWISE_TERNARY(cols & 0x01, color.value, *pos);
+ pos++;
+ pos += fb_width - 8;
+ }
+ pos = pos - fb_width * BITMAP_HEIGHT + SCREEN_CHARACTER_WIDTH;
+ }
+}
+
+inline void screen_draw_horizontal(uint32_t *pos, size_t count, color_t color)
+{
+ // while(count--) *pos++ = color.value;
+ __asm__ volatile("cld; rep stosl;" ::"a"(color.value), "D"(pos), "c"(count)
+ : "cc");
+}
+
+inline void screen_copy_horizontal(uint32_t *from, uint32_t *to, size_t count)
+{
+ __asm__ volatile("cld; rep movsl;" ::"S"(from), "D"(to), "c"(count)
+ : "cc");
+}
+
+inline void screen_draw_rect(size_t x, size_t y, size_t width, size_t height,
+ color_t color)
+{
+ uint32_t *top = fb_buffer + y * fb_width + x;
+ screen_draw_horizontal(top, width, color);
+ screen_draw_horizontal(top + height * fb_width, width, color);
+ while (height--)
+ {
+ *top = *(top + width) = color.value;
+ top += fb_width;
+ }
+}
+
+inline void screen_fill(color_t color)
+{
+ __asm__ volatile("cld; rep stosl;" ::"a"(color.value), "D"(fb_buffer),
+ "c"(fb_width * fb_height)
+ : "cc");
+}
+
+inline void screen_fill_rect(size_t x, size_t y, size_t width, size_t height,
+ color_t color)
+{
+ uint32_t *top = fb_buffer + y * fb_width + x;
+ while (height--)
+ {
+ screen_draw_horizontal(top, width, color);
+ top += fb_width;
+ }
+}
+
+inline void screen_copy_rect(size_t fromx, size_t fromy, size_t width,
+ size_t height, size_t tox, size_t toy)
+{
+ uint32_t *from = fb_buffer + fromy * fb_width + fromx;
+ uint32_t *to = fb_buffer + toy * fb_width + tox;
+ while (height--)
+ {
+ screen_copy_horizontal(from, to, width);
+ from += fb_width;
+ to += fb_width;
+ }
+}
+
+inline void screen_flush()
+{
+#if DOUBLE_BUFFERING
+ __asm__ volatile("cld; rep movsl;" ::"S"(fb_buffer), "D"(fb),
+ "c"(fb_width * fb_height)
+ : "cc");
+#endif
+}
+
+static char *shutdown_message = "Weenix has halted cleanly!";
+void screen_print_shutdown()
+{
+ color_t background = {.value = 0x00000000};
+ color_t foreground = {.value = 0x00FFFFFF};
+ screen_fill(background);
+ size_t str_len = strlen(shutdown_message);
+ size_t str_width = str_len * screen_get_character_width();
+ size_t str_height = screen_get_character_height();
+ screen_draw_string((screen_get_width() - str_width) >> 1,
+ (screen_get_height() - str_height) >> 1,
+ shutdown_message, str_len, foreground);
+}
+
+#else
+
+#include "config.h"
+#include "drivers/screen.h"
+#include "main/io.h"
+
+/* Port addresses for the CRT controller */
+#define CRT_CONTROL_ADDR 0x3d4
+#define CRT_CONTROL_DATA 0x3d5
+
+/* Addresses we can pass to the CRT_CONTROLL_ADDR port */
+#define CURSOR_HIGH 0x0e
+#define CURSOR_LOW 0x0f
+
+static uintptr_t vga_textbuffer_phys = 0xB8000;
+static uint16_t *vga_textbuffer;
+static uint16_t vga_blank_screen[VGA_HEIGHT][VGA_WIDTH];
+uint16_t vga_blank_row[VGA_WIDTH];
+
+void vga_enable_cursor()
+{
+ outb(0x3D4, 0x0A);
+ outb(0x3D5, (inb(0x3D5) & 0xC0) | 0);
+
+ outb(0x3D4, 0x0B);
+ outb(0x3D5, (inb(0x3D5) & 0xE0) | 15);
+}
+
+void vga_disable_cursor()
+{
+ outb(0x3D4, 0x0A);
+ outb(0x3D5, 0x20);
+}
+
+void vga_init()
+{
+ /* map the VGA textbuffer (vaddr) to the VGA textbuffer physical address */
+ size_t pages =
+ ADDR_TO_PN(PAGE_ALIGN_UP((uintptr_t)sizeof(vga_blank_screen)));
+ vga_textbuffer = page_alloc_n(pages);
+ KASSERT(vga_textbuffer);
+
+ pt_map_range(pt_get(), (uintptr_t)vga_textbuffer_phys,
+ (uintptr_t)vga_textbuffer,
+ (uintptr_t)vga_textbuffer + ((uintptr_t)PN_TO_ADDR(pages)),
+ PT_PRESENT | PT_WRITE, PT_PRESENT | PT_WRITE);
+ pt_set(pt_get());
+
+ for (size_t i = 0; i < VGA_WIDTH; i++)
+ {
+ vga_blank_row[i] = (VGA_DEFAULT_ATTRIB << 8) | ' ';
+ }
+ for (size_t i = 0; i < VGA_HEIGHT; i++)
+ {
+ memcpy(&vga_blank_screen[i], vga_blank_row, VGA_LINE_SIZE);
+ }
+
+ vga_enable_cursor();
+ vga_clear_screen();
+}
+
+void vga_set_cursor(size_t row, size_t col)
+{
+ uint16_t pos = (row * VGA_WIDTH) + col;
+ outb(0x3D4, 0x0F);
+ outb(0x3D5, (uint8_t)(pos & 0xFF));
+ outb(0x3D4, 0x0E);
+ outb(0x3D5, (uint8_t)((pos >> 8) & 0xFF));
+}
+
+void vga_clear_screen()
+{
+ memcpy(vga_textbuffer, vga_blank_screen, sizeof(vga_blank_screen));
+}
+
+void vga_write_char_at(size_t row, size_t col, uint16_t v)
+{
+ KASSERT(row < VGA_HEIGHT && col < VGA_WIDTH);
+ vga_textbuffer[(row * VGA_WIDTH) + col] = v;
+}
+
+static char *shutdown_message = "Weenix has halted cleanly!";
+void screen_print_shutdown()
+{
+ vga_disable_cursor();
+ vga_clear_screen();
+ int x = (VGA_WIDTH - strlen(shutdown_message)) / 2;
+ int y = VGA_HEIGHT / 2;
+
+ for (size_t i = 0; i < strlen(shutdown_message); i++)
+ {
+ vga_write_char_at(y, x + i,
+ (VGA_DEFAULT_ATTRIB << 8) | shutdown_message[i]);
+ }
+}
+
+#endif \ No newline at end of file
diff --git a/kernel/drivers/tty/ldisc.c b/kernel/drivers/tty/ldisc.c
new file mode 100644
index 0000000..d1044f2
--- /dev/null
+++ b/kernel/drivers/tty/ldisc.c
@@ -0,0 +1,120 @@
+#include "drivers/tty/ldisc.h"
+#include <drivers/keyboard.h>
+#include <drivers/tty/tty.h>
+#include <errno.h>
+#include <util/bits.h>
+#include <util/debug.h>
+#include <util/string.h>
+
+#define ldisc_to_tty(ldisc) CONTAINER_OF((ldisc), tty_t, tty_ldisc)
+
+/**
+ * Initialize the line discipline. Don't forget to wipe the buffer associated
+ * with the line discipline clean.
+ *
+ * @param ldisc line discipline.
+ */
+void ldisc_init(ldisc_t *ldisc)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+}
+
+/**
+ * While there are no new characters to be read from the line discipline's
+ * buffer, you should make the current thread to sleep on the line discipline's
+ * read queue. Note that this sleep can be cancelled. What conditions must be met
+ * for there to be no characters to be read?
+ *
+ * @param ldisc the line discipline
+ * @param lock the lock associated with `ldisc`
+ * @return 0 if there are new characters to be read or the ldisc is full.
+ * If the sleep was interrupted, return what
+ * `sched_cancellable_sleep_on` returned (i.e. -EINTR)
+ */
+long ldisc_wait_read(ldisc_t *ldisc)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return -1;
+}
+
+/**
+ * Reads `count` bytes (at max) from the line discipline's buffer into the
+ * provided buffer. Keep in mind the the ldisc's buffer is circular.
+ *
+ * If you encounter a new line symbol before you have read `count` bytes, you
+ * should stop copying and return the bytes read until now.
+ *
+ * If you encounter an `EOT` you should stop reading and you should NOT include
+ * the `EOT` in the count of the number of bytes read
+ *
+ * @param ldisc the line discipline
+ * @param buf the buffer to read into.
+ * @param count the maximum number of bytes to read from ldisc.
+ * @return the number of bytes read from the ldisc.
+ */
+size_t ldisc_read(ldisc_t *ldisc, char *buf, size_t count)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return 0;
+}
+
+/**
+ * Place the character received into the ldisc's buffer. You should also update
+ * relevant fields of the struct.
+ *
+ * An easier way of handling new characters is making sure that you always have
+ * one byte left in the line discipline. This way, if the new character you
+ * received is a new line symbol (user hit enter), you can still place the new
+ * line symbol into the buffer; if the new character is not a new line symbol,
+ * you shouldn't place it into the buffer so that you can leave the space for
+ * a new line symbol in the future.
+ *
+ * If the line discipline is full, all incoming characters should be ignored.
+ *
+ * Here are some special cases to consider:
+ * 1. If the character is a backspace:
+ * * if there is a character to remove you must also emit a `\b` to
+ * the vterminal.
+ * 2. If the character is end of transmission (EOT) character (typing ctrl-d)
+ * 3. If the character is end of text (ETX) character (typing ctrl-c)
+ * 4. If your buffer is almost full and what you received is not a new line
+ * symbol
+ *
+ * If you did receive a new line symbol, you should wake up the thread that is
+ * sleeping on the wait queue of the line discipline. You should also
+ * emit a `\n` to the vterminal by using `vterminal_write`.
+ *
+ * If you encounter the `EOT` character, you should add it to the buffer,
+ * cook the buffer, and wake up the reader (but do not emit an `\n` character
+ * to the vterminal)
+ *
+ * In case of `ETX` you should cause the input line to be effectively transformed
+ * into a cooked blank line. You should clear uncooked portion of the line, by
+ * adjusting ldisc_head.
+ *
+ * Finally, if the none of the above cases apply you should fallback to
+ * `vterminal_key_pressed`.
+ *
+ * Don't forget to write the corresponding characters to the virtual terminal
+ * when it applies!
+ *
+ * @param ldisc the line discipline
+ * @param c the new character
+ */
+void ldisc_key_pressed(ldisc_t *ldisc, char c)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+}
+
+/**
+ * Copy the raw part of the line discipline buffer into the buffer provided.
+ *
+ * @param ldisc the line discipline
+ * @param s the character buffer to write to
+ * @return the number of bytes copied
+ */
+size_t ldisc_get_current_line_raw(ldisc_t *ldisc, char *s)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return 0;
+}
diff --git a/kernel/drivers/tty/tty.c b/kernel/drivers/tty/tty.c
new file mode 100644
index 0000000..a08df13
--- /dev/null
+++ b/kernel/drivers/tty/tty.c
@@ -0,0 +1,135 @@
+#include "drivers/tty/tty.h"
+#include "drivers/chardev.h"
+#include "drivers/dev.h"
+#include "drivers/keyboard.h"
+#include "kernel.h"
+#include "mm/kmalloc.h"
+#include "util/debug.h"
+#include <errno.h>
+
+#ifndef NTERMS
+#define NTERMS 3
+#endif
+
+ssize_t tty_read(chardev_t *cdev, size_t pos, void *buf, size_t count);
+ssize_t tty_write(chardev_t *cdev, size_t pos, const void *buf, size_t count);
+
+chardev_ops_t tty_cdev_ops = {.read = tty_read,
+ .write = tty_write,
+ .mmap = NULL,
+ .fill_pframe = NULL,
+ .flush_pframe = NULL};
+
+tty_t *ttys[NTERMS] = {NULL};
+
+size_t active_tty;
+
+static void tty_receive_char_multiplexer(uint8_t c);
+
+void tty_init()
+{
+ for (unsigned i = 0; i < NTERMS; i++)
+ {
+ tty_t *tty = ttys[i] = kmalloc(sizeof(tty_t));
+ vterminal_init(&tty->tty_vterminal);
+ ldisc_init(&tty->tty_ldisc);
+
+ tty->tty_cdev.cd_id = MKDEVID(TTY_MAJOR, i);
+ list_link_init(&tty->tty_cdev.cd_link);
+ tty->tty_cdev.cd_ops = &tty_cdev_ops;
+
+ kmutex_init(&tty->tty_write_mutex);
+ kmutex_init(&tty->tty_read_mutex);
+
+ long ret = chardev_register(&tty->tty_cdev);
+ KASSERT(!ret);
+ }
+ active_tty = 0;
+ vterminal_make_active(&ttys[active_tty]->tty_vterminal);
+ KASSERT(ttys[active_tty]);
+
+ keyboard_init(tty_receive_char_multiplexer);
+}
+
+/**
+ * Reads from the tty to the buffer.
+ *
+ * You should first lock the read mutex of the tty. You should
+ * then wait until there is something in the line discipline's buffer and only
+ * read from the ldisc's buffer if there are new characters.
+ *
+ * To prevent being preempted, you should set IPL using INTR_KEYBOARD
+ * correctly and revert it once you are done.
+ *
+ * @param cdev the character device that represents tty
+ * @param pos the position to start reading from; should be ignored
+ * @param buf the buffer to read into
+ * @param count the maximum number of bytes to read
+ * @return the number of bytes actually read into the buffer
+ */
+ssize_t tty_read(chardev_t *cdev, size_t pos, void *buf, size_t count)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return -1;
+}
+
+/**
+ * Writes to the tty from the buffer.
+ *
+ * You should first lock the write mutex of the tty. Then you can use
+ * `vterminal_write` to write to the terminal. Don't forget to use IPL to
+ * guard this from preemption!
+ *
+ * @param cdev the character device that represents tty
+ * @param pos the position to start reading from; should be ignored
+ * @param buf the buffer to read from
+ * @param count the maximum number of bytes to write to the terminal
+ * @return the number of bytes actually written
+ */
+ssize_t tty_write(chardev_t *cdev, size_t pos, const void *buf, size_t count)
+{
+ NOT_YET_IMPLEMENTED("DRIVERS: ***none***");
+ return -1;
+}
+
+static void tty_receive_char_multiplexer(uint8_t c)
+{
+ tty_t *tty = ttys[active_tty];
+
+ if (c >= F1 && c <= F12)
+ {
+ if (c - F1 < NTERMS)
+ {
+ /* TODO: this is totally unsafe... Fix it */
+ active_tty = (unsigned)c - F1;
+ tty = ttys[active_tty];
+ vterminal_make_active(&tty->tty_vterminal);
+ }
+ return;
+ }
+ if (c == CR)
+ c = LF;
+ else if (c == DEL)
+ c = BS;
+
+ vterminal_t *vt = &tty->tty_vterminal;
+ switch ((unsigned)c)
+ {
+ case SCROLL_DOWN:
+ case SCROLL_UP:
+ // vterminal_scroll(vt, c == SCROLL_DOWN ? 1 : -1);
+ break;
+ case SCROLL_DOWN_PAGE:
+ case SCROLL_UP_PAGE:
+ // vterminal_scroll(vt, c == SCROLL_DOWN_PAGE ? vt->vt_height :
+ // -vt->vt_height);
+ break;
+ case ESC:
+ // vterminal_scroll_to_bottom(vt);
+ break;
+ default:
+ ldisc_key_pressed(&tty->tty_ldisc, c);
+ // vterminal_key_pressed(vt);
+ break;
+ }
+}
diff --git a/kernel/drivers/tty/vterminal.c b/kernel/drivers/tty/vterminal.c
new file mode 100644
index 0000000..9ac3421
--- /dev/null
+++ b/kernel/drivers/tty/vterminal.c
@@ -0,0 +1,1384 @@
+#include <drivers/keyboard.h>
+#include <drivers/tty/ldisc.h>
+#include <drivers/tty/tty.h>
+#include <drivers/tty/vterminal.h>
+#include <errno.h>
+#include <mm/kmalloc.h>
+#include <util/debug.h>
+#include <util/string.h>
+
+/*
+
+vterminal.c is used to manage the display of the terminal screen, this includes
+printing the keys pressed, output of the command passed, managing the cursor
+position, etc.
+
+vterminal_write is called by functions in tty.c and ldisc.c, namely tty_write
+and ldisc_key_pressed. vterminal_write then calls vtconsole_write which takes
+care of the processing of the characters with the help of vtconsole_process
+vtconsole_process and vtconsole_append are responsible for printing the
+characters corresponding to the keys pressed onto the console.
+
+vtconsole_append also manages the position of the cursor while the uncooked
+part of the buffer is being printed. There are mutltiple other functions defined
+in this file which help in displaying the cursor on the console. The console
+also supports scrolling which is handled by vtconsole_scroll. vterminal_clear
+is used to clear the content of the console.
+
+The functions, vterminal_make_active, vterminal_init, vtconsole, paint_callback
+and cursor_move_callback are responsible for carrying out the necessary
+initialization and initial display of the console.
+
+*/
+
+#define vterminal_to_tty(vterminal) \
+ CONTAINER_OF((vterminal), tty_t, tty_vterminal)
+
+#ifdef __VGABUF___
+
+/*
+Without turning on VGABUF, the terminal is treated as a simple device: one sent characters
+to it to be displayed. It did the right thing with new lines and with backspaces,
+but didn't handle any other control characters. The VGA handles all sorts of other things,
+but we also have to explicitly tell it to scroll. VGABUF allows Weenix to toggle between
+VGA text mode (that understands text) and VGA buffer mode (that is pixel based).
+*/
+
+#define VT_LINE_POSITION(vt, line) \
+ ((vt)->vt_line_positions[((vt)->vt_line_offset + (vt)->vt_height + \
+ (line)) % \
+ (vt)->vt_height])
+
+#define vterminal_to_tty(vterminal) \
+ CONTAINER_OF((vterminal), tty_t, tty_vterminal)
+
+#define VT_OFFSCREEN ((size_t)-1)
+
+static long vterminal_add_chunk(vterminal_t *vt);
+
+static vterminal_t *active_vt = NULL;
+
+void vterminal_init(vterminal_t *vt)
+{
+ vt->vt_width = screen_get_width() / screen_get_character_width();
+ vt->vt_height = screen_get_height() / screen_get_character_height();
+ list_init(&vt->vt_history_chunks);
+ vt->vt_line_positions = kmalloc(sizeof(size_t) * vt->vt_height * 2);
+ KASSERT(vt->vt_line_positions);
+ vt->vt_line_widths = vt->vt_line_positions + vt->vt_height;
+
+ list_init(&vt->vt_history_chunks);
+ long success = vterminal_add_chunk(vt);
+ KASSERT(success && !list_empty(&vt->vt_history_chunks));
+
+ vterminal_clear(vt);
+}
+
+static void vterminal_seek_to_pos(vterminal_t *vt, size_t pos,
+ vterminal_history_chunk_t **chunk,
+ size_t *offset)
+{
+ if (pos > vt->vt_len)
+ {
+ *chunk = NULL;
+ *offset = 0;
+ return;
+ }
+ *offset = pos % VT_CHARS_PER_HISTORY_CHUNK;
+ size_t n_chunks = vt->vt_len / VT_CHARS_PER_HISTORY_CHUNK;
+ size_t iterations = pos / VT_CHARS_PER_HISTORY_CHUNK;
+ if (iterations > n_chunks >> 1)
+ {
+ iterations = n_chunks - iterations;
+ list_iterate_reverse(&vt->vt_history_chunks, chunk_iter,
+ vterminal_history_chunk_t, link)
+ {
+ if (!iterations--)
+ {
+ *chunk = chunk_iter;
+ return;
+ }
+ }
+ }
+ else
+ {
+ list_iterate(&vt->vt_history_chunks, chunk_iter,
+ vterminal_history_chunk_t, link)
+ {
+ if (!iterations--)
+ {
+ *chunk = chunk_iter;
+ return;
+ }
+ }
+ }
+}
+
+static inline long vterminal_seek_to_offset(vterminal_t *vt,
+ vterminal_history_chunk_t **chunk,
+ size_t *offset)
+{
+ while (*offset >= VT_CHARS_PER_HISTORY_CHUNK)
+ {
+ if (*chunk ==
+ list_tail(&vt->vt_history_chunks, vterminal_history_chunk_t, link))
+ return 0;
+ *chunk = list_next(*chunk, vterminal_history_chunk_t, link);
+ *offset -= VT_CHARS_PER_HISTORY_CHUNK;
+ }
+ return 1;
+}
+
+size_t vterminal_calculate_line_width_forward(vterminal_t *vt, size_t pos)
+{
+ vterminal_history_chunk_t *chunk;
+ size_t offset;
+ vterminal_seek_to_pos(vt, pos, &chunk, &offset);
+ if (!chunk)
+ return 0;
+ size_t width = 0;
+ while (pos + width < vt->vt_len && chunk->chars[offset++] != LF)
+ {
+ width++;
+ if (!vterminal_seek_to_offset(vt, &chunk, &offset))
+ break;
+ }
+ return width;
+}
+static void vterminal_redraw_lines(vterminal_t *vt, size_t start, size_t end)
+{
+ KASSERT(start < vt->vt_height && start < end && end <= vt->vt_height);
+
+ size_t pos = VT_LINE_POSITION(vt, start);
+ vterminal_history_chunk_t *chunk;
+ size_t offset;
+ vterminal_seek_to_pos(vt, pos, &chunk, &offset);
+
+ color_t cursor = {.value = 0x00D3D3D3};
+ color_t background = {.value = 0x00000000};
+ color_t foreground = {.value = 0x00FFFFFF};
+
+ size_t screen_y = screen_get_character_height() * start;
+
+ size_t line = start;
+ while (line < end && pos <= vt->vt_len &&
+ vterminal_seek_to_offset(vt, &chunk, &offset))
+ {
+ KASSERT(pos == VT_LINE_POSITION(vt, line));
+
+ size_t cur_width = vt->vt_line_widths[line];
+ size_t new_width, next_pos;
+ if (line + 1 < vt->vt_height &&
+ (next_pos = VT_LINE_POSITION(vt, line + 1)) != VT_OFFSCREEN)
+ {
+ new_width = next_pos - pos - 1;
+ }
+ else
+ {
+ new_width = vterminal_calculate_line_width_forward(vt, pos);
+ }
+ vt->vt_line_widths[line] = new_width;
+
+ screen_fill_rect(
+ 0, screen_y,
+ MAX(cur_width, new_width) * screen_get_character_width(),
+ screen_get_character_height(), background);
+ if (pos <= vt->vt_cursor_pos && vt->vt_cursor_pos <= pos + new_width)
+ {
+ screen_fill_rect(
+ (vt->vt_cursor_pos - pos) * screen_get_character_width(),
+ screen_y, screen_get_character_width(),
+ screen_get_character_height(), cursor);
+ vt->vt_line_widths[line]++;
+ }
+ size_t drawn = 0;
+ while (drawn != new_width)
+ {
+ size_t to_draw =
+ MIN(VT_CHARS_PER_HISTORY_CHUNK - offset, new_width - drawn);
+ screen_draw_string(drawn * screen_get_character_width(), screen_y,
+ chunk->chars + offset, to_draw, foreground);
+ drawn += to_draw;
+ offset += to_draw;
+ if (!vterminal_seek_to_offset(vt, &chunk, &offset))
+ {
+ vterminal_seek_to_offset(vt, &chunk, &offset);
+ KASSERT(drawn == new_width);
+ break;
+ }
+ }
+
+ pos += new_width + 1;
+ KASSERT(chunk->chars[offset] == LF || pos >= vt->vt_len);
+
+ offset++;
+ line++;
+ screen_y += screen_get_character_height();
+ }
+ while (line < end)
+ {
+ // dbg(DBG_TEMP, "clearing line %lu\n", line);
+ screen_fill_rect(
+ 0, screen_y,
+ vt->vt_line_widths[line] * screen_get_character_width(),
+ screen_get_character_height(), background);
+ vt->vt_line_widths[line] = 0;
+ line++;
+ screen_y += screen_get_character_height();
+ }
+}
+
+void vterminal_make_active(vterminal_t *vt)
+{
+ KASSERT(vt);
+ if (active_vt == vt)
+ return;
+ active_vt = vt;
+ for (size_t line = 0; line < vt->vt_height; line++)
+ {
+ vt->vt_line_widths[line] = vt->vt_width;
+ }
+ color_t background = {.value = 0x00000000};
+ screen_fill_rect(
+ vt->vt_width * screen_get_character_width(), 0,
+ screen_get_width() - vt->vt_width * screen_get_character_width(),
+ screen_get_height(), background);
+ screen_fill_rect(
+ 0, vt->vt_height * screen_get_character_height(), screen_get_width(),
+ screen_get_height() - vt->vt_height * screen_get_character_height(),
+ background);
+ vterminal_redraw_lines(vt, 0, vt->vt_height);
+}
+
+size_t vterminal_calculate_line_width_backward(vterminal_t *vt, size_t pos)
+{
+ if (!pos)
+ return 0;
+ vterminal_history_chunk_t *chunk;
+ size_t offset;
+ vterminal_seek_to_pos(vt, pos - 1, &chunk, &offset);
+ size_t width = 0;
+ while (chunk->chars[offset] != LF)
+ {
+ width++;
+ if (offset == 0)
+ {
+ if (chunk == list_head(&vt->vt_history_chunks,
+ vterminal_history_chunk_t, link))
+ break;
+ chunk = list_prev(chunk, vterminal_history_chunk_t, link);
+ offset = VT_CHARS_PER_HISTORY_CHUNK;
+ }
+ offset--;
+ }
+ return width;
+}
+
+static inline void vterminal_get_last_visible_line_information(vterminal_t *vt,
+ size_t *position,
+ size_t *width)
+{
+ for (long line = vt->vt_height - 1; line >= 0; line--)
+ {
+ if (VT_LINE_POSITION(vt, line) != VT_OFFSCREEN)
+ {
+ *position = VT_LINE_POSITION(vt, line);
+ *width = vterminal_calculate_line_width_forward(vt, *position);
+ return;
+ }
+ }
+ panic("should always find last visible line information");
+}
+
+static inline long vterminal_scrolled_to_bottom(vterminal_t *vt)
+{
+ size_t position;
+ size_t width;
+ vterminal_get_last_visible_line_information(vt, &position, &width);
+ return position + width == vt->vt_len;
+}
+
+void vterminal_scroll_to_bottom(vterminal_t *vt)
+{
+ if (vterminal_scrolled_to_bottom(vt))
+ return;
+ vt->vt_line_offset = 0;
+ VT_LINE_POSITION(vt, 0) = vt->vt_len + 1;
+ vterminal_scroll(vt, -vt->vt_height);
+ for (size_t line = vt->vt_height - vt->vt_line_offset; line < vt->vt_height;
+ line++)
+ {
+ VT_LINE_POSITION(vt, line) = VT_OFFSCREEN;
+ }
+}
+
+void vterminal_scroll_draw(vterminal_t *vt, long count)
+{
+ if (count > 0)
+ {
+ if ((size_t)count > vt->vt_height)
+ count = vt->vt_height;
+ size_t copy_distance = count * screen_get_character_height();
+ size_t screen_y = 0;
+ for (size_t line = 0; line < vt->vt_height - count; line++)
+ {
+ screen_copy_rect(0, screen_y + copy_distance,
+ MAX(vt->vt_line_widths[line],
+ vt->vt_line_widths[line + count]) *
+ screen_get_character_width(),
+ screen_get_character_height(), 0, screen_y);
+ vt->vt_line_widths[line] = vt->vt_line_widths[line + count];
+ screen_y += screen_get_character_height();
+ }
+ vterminal_redraw_lines(vt, vt->vt_height - count, vt->vt_height);
+ }
+ else if (count < 0)
+ {
+ count *= -1;
+ if ((size_t)count > vt->vt_height)
+ count = vt->vt_height;
+ size_t copy_distance = count * screen_get_character_height();
+ size_t screen_y =
+ (vt->vt_height - count) * screen_get_character_height();
+ for (size_t line = vt->vt_height - count; line >= (size_t)count;
+ line--)
+ {
+ screen_copy_rect(0, screen_y - copy_distance,
+ MAX(vt->vt_line_widths[line],
+ vt->vt_line_widths[line - count]) *
+ screen_get_character_width(),
+ screen_get_character_height(), 0, screen_y);
+ vt->vt_line_widths[line] = vt->vt_line_widths[line - count];
+ screen_y -= screen_get_character_height();
+ }
+ vterminal_redraw_lines(vt, 0, (size_t)count);
+ }
+}
+
+void vterminal_scroll(vterminal_t *vt, long count)
+{
+ long n_scrolls = 0;
+ if (count < 0)
+ {
+ size_t first_line_position = VT_LINE_POSITION(vt, 0);
+ while (count++ && first_line_position)
+ {
+ size_t width = vterminal_calculate_line_width_backward(
+ vt, first_line_position - 1);
+ size_t top_line_position = first_line_position - width - 1;
+ VT_LINE_POSITION(vt, -1) = top_line_position;
+ if (!vt->vt_line_offset)
+ vt->vt_line_offset = vt->vt_height;
+ vt->vt_line_offset--;
+ n_scrolls++;
+ first_line_position = top_line_position;
+ }
+ if (n_scrolls)
+ {
+ vterminal_scroll_draw(vt, -n_scrolls);
+ }
+ }
+ else if (count > 0)
+ {
+ size_t last_line_position;
+ size_t last_line_width;
+ vterminal_get_last_visible_line_information(vt, &last_line_position,
+ &last_line_width);
+ while (count-- && last_line_position + last_line_width < vt->vt_len)
+ {
+ size_t bottom_line_position =
+ last_line_position + last_line_width + 1;
+ VT_LINE_POSITION(vt, 0) = bottom_line_position;
+ vt->vt_line_offset++;
+ if ((unsigned)vt->vt_line_offset == vt->vt_height)
+ vt->vt_line_offset = 0;
+ n_scrolls++;
+ last_line_position = bottom_line_position;
+ last_line_width =
+ vterminal_calculate_line_width_forward(vt, last_line_position);
+ }
+ if (n_scrolls)
+ {
+ vterminal_scroll_draw(vt, n_scrolls);
+ }
+ }
+}
+
+void vterminal_clear(vterminal_t *vt)
+{
+ list_iterate(&vt->vt_history_chunks, chunk, vterminal_history_chunk_t,
+ link)
+ {
+ if (chunk != list_tail(&vt->vt_history_chunks,
+ vterminal_history_chunk_t, link))
+ {
+ list_remove(&chunk->link);
+ page_free_n(chunk, VT_PAGES_PER_HISTORY_CHUNK);
+ }
+ else
+ {
+ memset(chunk, 0, VT_CHARS_PER_HISTORY_CHUNK);
+ }
+ }
+ vt->vt_len = 0;
+ for (size_t i = 0; i < vt->vt_height; i++)
+ {
+ vt->vt_line_widths[i] = 0;
+ vt->vt_line_positions[i] = VT_OFFSCREEN;
+ }
+ vt->vt_line_offset = 0;
+ vt->vt_cursor_pos = 0;
+ vt->vt_input_pos = 0;
+ VT_LINE_POSITION(vt, 0) = 0;
+}
+
+static long vterminal_add_chunk(vterminal_t *vt)
+{
+ vterminal_history_chunk_t *chunk = page_alloc_n(VT_PAGES_PER_HISTORY_CHUNK);
+ if (!chunk)
+ {
+ chunk =
+ list_head(&vt->vt_history_chunks, vterminal_history_chunk_t, link);
+ if (chunk ==
+ list_tail(&vt->vt_history_chunks, vterminal_history_chunk_t, link))
+ return 0;
+ list_remove(&chunk->link);
+
+ // TODO what if the first chunk that we're removing is visible? lol
+ for (size_t i = 0; i < vt->vt_height; i++)
+ {
+ KASSERT(vt->vt_line_positions[i] >= VT_CHARS_PER_HISTORY_CHUNK &&
+ "NYI");
+ vt->vt_line_positions[i] -= VT_CHARS_PER_HISTORY_CHUNK;
+ }
+ KASSERT(vt->vt_input_pos >= VT_CHARS_PER_HISTORY_CHUNK &&
+ vt->vt_cursor_pos >= VT_CHARS_PER_HISTORY_CHUNK &&
+ vt->vt_len >= VT_CHARS_PER_HISTORY_CHUNK && "NYI");
+ vt->vt_input_pos -= VT_CHARS_PER_HISTORY_CHUNK;
+ vt->vt_cursor_pos -= VT_CHARS_PER_HISTORY_CHUNK;
+ vt->vt_len -= VT_CHARS_PER_HISTORY_CHUNK;
+ }
+
+ memset(chunk, 0, sizeof(vterminal_history_chunk_t));
+
+ list_link_init(&chunk->link);
+ list_insert_tail(&vt->vt_history_chunks, &chunk->link);
+
+ return 1;
+}
+
+static inline long vterminal_allocate_to_offset(
+ vterminal_t *vt, vterminal_history_chunk_t **chunk, size_t *offset)
+{
+ if (!vterminal_seek_to_offset(vt, chunk, offset))
+ {
+ if (!vterminal_add_chunk(vt))
+ {
+ return 0;
+ }
+ return vterminal_seek_to_offset(vt, chunk, offset);
+ }
+ return 1;
+}
+
+size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len)
+{
+ size_t written = 0;
+
+ size_t last_line_width =
+ vterminal_calculate_line_width_backward(vt, vt->vt_len);
+ size_t last_line_idx;
+ size_t last_line_position = VT_OFFSCREEN;
+ for (last_line_idx = vt->vt_height - 1;; last_line_idx--)
+ {
+ if ((last_line_position = VT_LINE_POSITION(vt, last_line_idx)) !=
+ VT_OFFSCREEN)
+ {
+ break;
+ }
+ }
+ KASSERT(last_line_idx < vt->vt_height);
+
+ vterminal_history_chunk_t *chunk;
+ size_t offset;
+ vterminal_seek_to_pos(vt, vt->vt_len, &chunk, &offset);
+
+ size_t last_line_idx_initial = (size_t)last_line_idx;
+
+ long need_to_scroll = last_line_position + last_line_width == vt->vt_len;
+ size_t n_scroll_downs = 0;
+ while (len--)
+ {
+ char c = *(buf++);
+ written++;
+ if (c != LF)
+ {
+ chunk->chars[offset++] = c;
+ vt->vt_len++;
+ last_line_width++;
+ if (!vterminal_allocate_to_offset(vt, &chunk, &offset))
+ goto done;
+ }
+ if (last_line_width == vt->vt_width)
+ {
+ c = LF;
+ }
+ if (c == LF)
+ {
+ chunk->chars[offset++] = LF;
+ vt->vt_len++;
+ if (!vterminal_allocate_to_offset(vt, &chunk, &offset))
+ goto done;
+
+ if (need_to_scroll)
+ {
+ KASSERT(last_line_position + last_line_width + 1 == vt->vt_len);
+ if (last_line_idx == vt->vt_height - 1)
+ {
+ vt->vt_line_offset++;
+ n_scroll_downs++;
+ if ((unsigned)vt->vt_line_offset == vt->vt_height)
+ vt->vt_line_offset = 0;
+ if (last_line_idx_initial)
+ last_line_idx_initial--;
+ }
+ else
+ {
+ last_line_idx++;
+ }
+ last_line_width = 0;
+ last_line_position = VT_LINE_POSITION(vt, last_line_idx) =
+ vt->vt_len;
+ }
+ }
+ }
+
+ last_line_idx++;
+done:
+ vt->vt_input_pos = vt->vt_len;
+ vt->vt_cursor_pos = vt->vt_len;
+
+ if (need_to_scroll)
+ {
+ if (active_vt == vt)
+ {
+ if (last_line_idx >= vt->vt_height &&
+ n_scroll_downs < vt->vt_height)
+ {
+ vterminal_scroll_draw(vt, n_scroll_downs);
+ last_line_idx = vt->vt_height;
+ }
+ vterminal_redraw_lines(vt, last_line_idx_initial,
+ MIN(last_line_idx, vt->vt_height));
+ }
+ else
+ {
+ vterminal_scroll(vt, n_scroll_downs);
+ }
+ }
+ return written;
+}
+
+static void vterminal_free_from_position_to_end(vterminal_t *vt, size_t pos)
+{
+ vterminal_history_chunk_t *chunk;
+ size_t offset;
+ vterminal_seek_to_pos(vt, vt->vt_input_pos, &chunk, &offset);
+ while (chunk !=
+ list_tail(&vt->vt_history_chunks, vterminal_history_chunk_t, link))
+ {
+ vterminal_history_chunk_t *to_remove =
+ list_tail(&vt->vt_history_chunks, vterminal_history_chunk_t, link);
+ list_remove(&to_remove->link);
+ page_free_n(to_remove, VT_PAGES_PER_HISTORY_CHUNK);
+ }
+ vt->vt_len = pos;
+ for (size_t line = 0; line < vt->vt_height; line++)
+ {
+ if (VT_LINE_POSITION(vt, line) > vt->vt_len)
+ {
+ VT_LINE_POSITION(vt, line) = VT_OFFSCREEN;
+ vterminal_redraw_lines(vt, line, line + 1);
+ }
+ }
+}
+
+void vterminal_key_pressed(vterminal_t *vt)
+{
+ KASSERT(active_vt == vt);
+ vterminal_scroll_to_bottom(vt);
+ char buf[LDISC_BUFFER_SIZE];
+ size_t len =
+ ldisc_get_current_line_raw(&vterminal_to_tty(vt)->tty_ldisc, buf);
+ size_t initial_input_pos = vt->vt_input_pos;
+ vterminal_free_from_position_to_end(vt, initial_input_pos);
+ vterminal_write(vt, buf, len);
+
+ vt->vt_input_pos = initial_input_pos;
+}
+
+#endif
+
+#define VGA_SCREEN_WIDTH 80
+#define VGA_SCREEN_HEIGHT 25
+
+#define VGACOLOR_BLACK 0X0
+#define VGACOLOR_BLUE 0X1
+#define VGACOLOR_GREEN 0X2
+#define VGACOLOR_CYAN 0X3
+#define VGACOLOR_RED 0X4
+#define VGACOLOR_MAGENTA 0X5
+#define VGACOLOR_BROWN 0X6
+#define VGACOLOR_LIGHT_GRAY 0X7
+#define VGACOLOR_GRAY 0X8
+#define VGACOLOR_LIGHT_BLUE 0X9
+#define VGACOLOR_LIGHT_GREEN 0XA
+#define VGACOLOR_LIGHT_CYAN 0XB
+#define VGACOLOR_LIGHT_RED 0XC
+#define VGACOLOR_LIGHT_MAGENTA 0XD
+#define VGACOLOR_LIGHT_YELLOW 0XE
+#define VGACOLOR_WHITE 0XF
+
+/* --- Constructor/Destructor ----------------------------------------------- */
+
+// vtconsole contructor/init function
+vtconsole_t *vtconsole(vtconsole_t *vtc, int width, int height,
+ vtc_paint_handler_t on_paint,
+ vtc_cursor_handler_t on_move)
+{
+ vtc->width = width;
+ vtc->height = height;
+
+ vtansi_parser_t ap;
+ ap.state = VTSTATE_ESC;
+ ap.index = 0;
+ vtansi_arg_t vta[8];
+ memset(ap.stack, 0, sizeof(vtansi_arg_t) * VTC_ANSI_PARSER_STACK_SIZE);
+ // ap.stack = vta;
+ vtc->ansiparser = ap;
+
+ vtc->attr = VTC_DEFAULT_ATTR;
+
+ vtc->buffer = kmalloc(width * height * sizeof(vtcell_t));
+
+ vtc->tabs = kmalloc(LDISC_BUFFER_SIZE * sizeof(int));
+ vtc->tab_index = 0;
+
+ vtc->cursor = (vtcursor_t){0, 0};
+
+ vtc->on_paint = on_paint;
+ vtc->on_move = on_move;
+
+ vtconsole_clear(vtc, 0, 0, width, height - 1);
+
+ return vtc;
+}
+
+// function to free the vtconosle/vterminal buffer
+void vtconsole_delete(vtconsole_t *vtc)
+{
+ kfree(vtc->buffer);
+ kfree(vtc->tabs);
+ kfree(vtc);
+}
+
+/* --- Internal methods ---------------------------------------------------- */
+
+// function to clear everything on the vterminal
+void vtconsole_clear(vtconsole_t *vtc, int fromx, int fromy, int tox, int toy)
+{
+ for (int i = fromx + fromy * vtc->width; i < tox + toy * vtc->width; i++)
+ {
+ vtcell_t *cell = &vtc->buffer[i];
+
+ cell->attr = VTC_DEFAULT_ATTR;
+ cell->c = ' ';
+
+ if (vtc->on_paint)
+ {
+ vtc->on_paint(vtc, cell, i % vtc->width, i / vtc->width);
+ }
+ }
+}
+
+// helper function for vtconsole_newline to scroll down the screen.
+void vtconsole_scroll(vtconsole_t *vtc, int lines)
+{
+ if (lines == 0)
+ return;
+
+ lines = lines > vtc->height ? vtc->height : lines;
+
+ // Scroll the screen by number of $lines.
+ for (int i = 0; i < ((vtc->width * vtc->height) - (vtc->width * lines));
+ i++)
+ {
+ vtc->buffer[i] = vtc->buffer[i + (vtc->width * lines)];
+
+ if (vtc->on_paint)
+ {
+ vtc->on_paint(vtc, &vtc->buffer[i], i % vtc->width, i / vtc->width);
+ }
+ }
+
+ // Clear the last $lines.
+ for (int i = ((vtc->width * vtc->height) - (vtc->width * lines));
+ i < vtc->width * vtc->height; i++)
+ {
+ vtcell_t *cell = &vtc->buffer[i];
+ cell->attr = VTC_DEFAULT_ATTR;
+ cell->c = ' ';
+
+ if (vtc->on_paint)
+ {
+ vtc->on_paint(vtc, &vtc->buffer[i], i % vtc->width, i / vtc->width);
+ }
+ }
+
+ // Move the cursor up $lines
+ if (vtc->cursor.y > 0)
+ {
+ vtc->cursor.y -= lines;
+
+ if (vtc->cursor.y < 0)
+ vtc->cursor.y = 0;
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+ }
+}
+
+// Append a new line
+void vtconsole_newline(vtconsole_t *vtc)
+{
+ vtc->cursor.x = 0;
+ vtc->cursor.y++;
+
+ if (vtc->cursor.y == vtc->height)
+ {
+ vtconsole_scroll(vtc, 1);
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Append character to the console buffer.
+void vtconsole_append(vtconsole_t *vtc, char c)
+{
+ if (c == '\n')
+ {
+ vtconsole_newline(vtc);
+ }
+ else if (c == '\r')
+ {
+ vtc->cursor.x = 0;
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+ }
+ else if (c == '\t')
+ {
+ int n = 8 - (vtc->cursor.x % 8);
+ // storing all the tabs and their size encountered.
+ vtc->tabs[vtc->tab_index % LDISC_BUFFER_SIZE] = n;
+ vtc->tab_index++;
+
+ for (int i = 0; i < n; i++)
+ {
+ vtconsole_append(vtc, ' ');
+ }
+ }
+ else if (c == '\b')
+ {
+ if (vtc->cursor.x > 0)
+ {
+ vtc->cursor.x--;
+ }
+ else
+ {
+ vtc->cursor.y--;
+ vtc->cursor.x = vtc->width - 1;
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+
+ int i = (vtc->width * vtc->cursor.y) + vtc->cursor.x;
+ vtcell_t *cell = &vtc->buffer[i];
+ cell->attr = VTC_DEFAULT_ATTR;
+ cell->c = ' ';
+ vtc->on_paint(vtc, &vtc->buffer[i], i % vtc->width, i / vtc->width);
+ }
+ else
+ {
+ if (vtc->cursor.x >= vtc->width)
+ vtconsole_newline(vtc);
+
+ vtcell_t *cell =
+ &vtc->buffer[vtc->cursor.x + vtc->cursor.y * vtc->width];
+ cell->c = c;
+ cell->attr = vtc->attr;
+
+ if (vtc->on_paint)
+ {
+ vtc->on_paint(vtc, cell, vtc->cursor.x, vtc->cursor.y);
+ }
+
+ vtc->cursor.x++;
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+ }
+}
+
+// Helper function for vtconsole_process to move the cursor P1 rows up
+void vtconsole_csi_cuu(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count == 1 && !stack[0].empty)
+ {
+ int attr = stack[0].value;
+ vtc->cursor.y = MAX(MIN(vtc->cursor.y - attr, vtc->height - 1), 1);
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Helper function for vtconsole_process to move the cursor P1 columns left
+void vtconsole_csi_cud(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count == 1 && !stack[0].empty)
+ {
+ int attr = stack[0].value;
+ vtc->cursor.y = MAX(MIN(vtc->cursor.y + attr, vtc->height - 1), 1);
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Helper function for vtconsole_process to move the cursor P1 columns right
+void vtconsole_csi_cuf(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count == 1 && !stack[0].empty)
+ {
+ int attr = stack[0].value;
+ vtc->cursor.x = MAX(MIN(vtc->cursor.x + attr, vtc->width - 1), 1);
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Helper function for vtconsole_process to move the cursor P1 rows down
+void vtconsole_csi_cub(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count == 1 && !stack[0].empty)
+ {
+ int attr = stack[0].value;
+ vtc->cursor.x = MAX(MIN(vtc->cursor.x - attr, vtc->width - 1), 1);
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Helper function for vtconsole_process to place the cursor to the first
+// column of line P1 rows down from current
+void vtconsole_csi_cnl(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count == 1 && !stack[0].empty)
+ {
+ int attr = stack[0].value;
+ vtc->cursor.y = MAX(MIN(vtc->cursor.y + attr, vtc->height - 1), 1);
+ vtc->cursor.x = 0;
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Helper function for vtconsole_process to place the cursor to the first
+// column of line P1 rows up from current
+void vtconsole_csi_cpl(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count == 1 && !stack[0].empty)
+ {
+ int attr = stack[0].value;
+ vtc->cursor.y = MAX(MIN(vtc->cursor.y - attr, vtc->height - 1), 1);
+ vtc->cursor.x = 0;
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Helper function of vtconsole_process to move the cursor to column P1
+void vtconsole_csi_cha(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count == 1 && !stack[0].empty)
+ {
+ int attr = stack[0].value;
+ vtc->cursor.y = MAX(MIN(attr, vtc->height - 1), 1);
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Moves the cursor to row n, column m. The values are 1-based,
+void vtconsole_csi_cup(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count == 1 && stack[0].empty)
+ {
+ vtc->cursor.x = 0;
+ vtc->cursor.y = 0;
+ }
+ else if (count == 2)
+ {
+ if (stack[0].empty)
+ {
+ vtc->cursor.y = 0;
+ }
+ else
+ {
+ vtc->cursor.y = MIN(stack[0].value - 1, vtc->height - 1);
+ }
+
+ if (stack[1].empty)
+ {
+ vtc->cursor.y = 0;
+ }
+ else
+ {
+ vtc->cursor.x = MIN(stack[1].value - 1, vtc->width - 1);
+ }
+ }
+
+ if (vtc->on_move)
+ {
+ vtc->on_move(vtc, &vtc->cursor);
+ }
+}
+
+// Clears part of the screen.
+void vtconsole_csi_ed(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ (void)(count);
+
+ vtcursor_t cursor = vtc->cursor;
+
+ if (stack[0].empty)
+ {
+ vtconsole_clear(vtc, cursor.x, cursor.y, vtc->width, vtc->height - 1);
+ }
+ else
+ {
+ int attr = stack[0].value;
+
+ if (attr == 0)
+ vtconsole_clear(vtc, cursor.x, cursor.y, vtc->width,
+ vtc->height - 1);
+ else if (attr == 1)
+ vtconsole_clear(vtc, 0, 0, cursor.x, cursor.y);
+ else if (attr == 2)
+ vtconsole_clear(vtc, 0, 0, vtc->width, vtc->height - 1);
+ }
+}
+
+// Erases part of the line.
+void vtconsole_csi_el(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ (void)(count);
+
+ vtcursor_t cursor = vtc->cursor;
+
+ if (stack[0].empty)
+ {
+ vtconsole_clear(vtc, cursor.x, cursor.y, vtc->width, cursor.y);
+ }
+ else
+ {
+ int attr = stack[0].value;
+
+ if (attr == 0)
+ vtconsole_clear(vtc, cursor.x, cursor.y, vtc->width, cursor.y);
+ else if (attr == 1)
+ vtconsole_clear(vtc, 0, cursor.y, cursor.x, cursor.y);
+ else if (attr == 2)
+ vtconsole_clear(vtc, 0, cursor.y, vtc->width, cursor.y);
+ }
+}
+
+// Sets the appearance of the following characters
+void vtconsole_csi_sgr(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ for (int i = 0; i < count; i++)
+ {
+ if (stack[i].empty || stack[i].value == 0)
+ {
+ vtc->attr = VTC_DEFAULT_ATTR;
+ }
+ else
+ {
+ int attr = stack[i].value;
+
+ if (attr == 1) // Increased intensity
+ {
+ vtc->attr.bright = 1;
+ }
+ else if (attr >= 30 && attr <= 37) // Set foreground color
+ {
+ vtc->attr.fg = attr - 30;
+ }
+ else if (attr >= 40 && attr <= 47) // Set background color
+ {
+ vtc->attr.bg = attr - 40;
+ }
+ }
+ }
+}
+
+void vtconsole_csi_l(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count != 1)
+ {
+ return;
+ }
+ if (stack[0].empty || stack[0].value != 25)
+ {
+ return;
+ }
+
+ vga_disable_cursor();
+}
+
+void vtconsole_csi_h(vtconsole_t *vtc, vtansi_arg_t *stack, int count)
+{
+ if (count != 1)
+ {
+ return;
+ }
+
+ if (stack[0].empty || stack[0].value != 25)
+ {
+ return;
+ }
+
+ vga_enable_cursor();
+}
+
+// vtconsole_append is called by vtconsole_process to process and print the
+// keys pressed onto the console.
+void vtconsole_process(vtconsole_t *vtc, char c)
+{
+ vtansi_parser_t *parser = &vtc->ansiparser;
+
+ switch (parser->state)
+ {
+ case VTSTATE_ESC:
+ if (c == '\033')
+ {
+ parser->state = VTSTATE_BRACKET;
+
+ parser->index = 0;
+
+ parser->stack[parser->index].value = 0;
+ parser->stack[parser->index].empty = 1;
+ }
+ else
+ {
+ parser->state = VTSTATE_ESC;
+ vtconsole_append(vtc, c);
+ }
+ break;
+
+ case VTSTATE_BRACKET:
+ if (c == '[')
+ {
+ parser->state = VTSTATE_ATTR;
+ }
+ else
+ {
+ parser->state = VTSTATE_ESC;
+ vtconsole_append(vtc, c);
+ }
+ break;
+ case VTSTATE_ATTR:
+ if (c >= '0' && c <= '9')
+ {
+ parser->stack[parser->index].value *= 10;
+ parser->stack[parser->index].value += (c - '0');
+ parser->stack[parser->index].empty = 0;
+ }
+ else if (c == '?')
+ {
+ /* questionable (aka wrong) */
+ break;
+ }
+ else
+ {
+ if ((parser->index) < VTC_ANSI_PARSER_STACK_SIZE)
+ {
+ parser->index++;
+ }
+
+ parser->stack[parser->index].value = 0;
+ parser->stack[parser->index].empty = 1;
+
+ parser->state = VTSTATE_ENDVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (parser->state == VTSTATE_ENDVAL)
+ {
+ if (c == ';')
+ {
+ parser->state = VTSTATE_ATTR;
+ }
+ else
+ {
+ switch (c)
+ {
+ case 'A':
+ /* Cursor up P1 rows */
+ vtconsole_csi_cuu(vtc, parser->stack, parser->index);
+ break;
+ case 'B':
+ /* Cursor down P1 rows */
+ vtconsole_csi_cub(vtc, parser->stack, parser->index);
+ break;
+ case 'C':
+ /* Cursor right P1 columns */
+ vtconsole_csi_cuf(vtc, parser->stack, parser->index);
+ break;
+ case 'D':
+ /* Cursor left P1 columns */
+ vtconsole_csi_cud(vtc, parser->stack, parser->index);
+ break;
+ case 'E':
+ /* Cursor to first column of line P1 rows down from current
+ */
+ vtconsole_csi_cnl(vtc, parser->stack, parser->index);
+ break;
+ case 'F':
+ /* Cursor to first column of line P1 rows up from current */
+ vtconsole_csi_cpl(vtc, parser->stack, parser->index);
+ break;
+ case 'G':
+ /* Cursor to column P1 */
+ vtconsole_csi_cha(vtc, parser->stack, parser->index);
+ break;
+ case 'd':
+ /* Cursor left P1 columns */
+ break;
+ case 'H':
+ /* Moves the cursor to row n, column m. */
+ vtconsole_csi_cup(vtc, parser->stack, parser->index);
+ break;
+ case 'J':
+ /* Clears part of the screen. */
+ vtconsole_csi_ed(vtc, parser->stack, parser->index);
+ break;
+ case 'K':
+ /* Erases part of the line. */
+ vtconsole_csi_el(vtc, parser->stack, parser->index);
+ break;
+ case 'm':
+ /* Sets the appearance of the following characters */
+ vtconsole_csi_sgr(vtc, parser->stack, parser->index);
+ break;
+ case 'l':
+ vtconsole_csi_l(vtc, parser->stack, parser->index);
+ break;
+ case 'h':
+ vtconsole_csi_h(vtc, parser->stack, parser->index);
+ break;
+ }
+
+ parser->state = VTSTATE_ESC;
+ }
+ }
+}
+
+// vtconosle_putchar is called from vterminal_key_pressed
+void vtconsole_putchar(vtconsole_t *vtc, char c) { vtconsole_process(vtc, c); }
+
+// vtconsole_write is called from vterminal_write
+void vtconsole_write(vtconsole_t *vtc, const char *buffer, uint32_t size)
+{
+ // looping through the whole size of the buffer
+ for (uint32_t i = 0; i < size; i++)
+ {
+ // acquiting the ldisc associated with the vtconsole/vterminal
+ ldisc_t *new_ldisc = &vterminal_to_tty(vtc)->tty_ldisc;
+
+ // checking if the buffer is a backspsace and the last entered character was a tab
+ if (buffer[i] == '\b' && new_ldisc->ldisc_buffer[(new_ldisc->ldisc_head)] == '\t')
+ {
+ // calling vtcomsole_process 'n' number of times.
+ // where 'n' is the size of the tab.
+ for (int j = 0; j < vtc->tabs[(vtc->tab_index - 1) % LDISC_BUFFER_SIZE]; j++)
+ {
+ vtconsole_process(vtc, buffer[i]);
+ }
+ vtc->tab_index--;
+ }
+ else
+ {
+ vtconsole_process(vtc, buffer[i]);
+ }
+ }
+}
+
+// called by vterminal_make_active to redraw the console.
+void vtconsole_redraw(vtconsole_t *vtc)
+{
+ for (int i = 0; i < (vtc->width * vtc->height); i++)
+ {
+ if (vtc->on_paint)
+ {
+ vtc->on_paint(vtc, &vtc->buffer[i], i % vtc->width, i / vtc->width);
+ }
+ }
+}
+
+#define VGA_COLOR(__fg, __bg) (__bg << 4 | __fg)
+#define VGA_ENTRY(__c, __fg, __bg) \
+ ((((__bg)&0XF) << 4 | ((__fg)&0XF)) << 8 | ((__c)&0XFF))
+
+// helper function for paint_callback.
+void vga_cell(unsigned int x, unsigned int y, unsigned short entry)
+{
+ if (x < VGA_SCREEN_WIDTH)
+ {
+ if (y < VGA_SCREEN_WIDTH)
+ {
+ vga_write_char_at(y, x, entry);
+ }
+ }
+}
+
+static char colors[] = {
+ [VTCOLOR_BLACK] = VGACOLOR_BLACK,
+ [VTCOLOR_RED] = VGACOLOR_RED,
+ [VTCOLOR_GREEN] = VGACOLOR_GREEN,
+ [VTCOLOR_YELLOW] = VGACOLOR_BROWN,
+ [VTCOLOR_BLUE] = VGACOLOR_BLUE,
+ [VTCOLOR_MAGENTA] = VGACOLOR_MAGENTA,
+ [VTCOLOR_CYAN] = VGACOLOR_CYAN,
+ [VTCOLOR_GREY] = VGACOLOR_LIGHT_GRAY,
+};
+
+static char brightcolors[] = {
+ [VTCOLOR_BLACK] = VGACOLOR_GRAY,
+ [VTCOLOR_RED] = VGACOLOR_LIGHT_RED,
+ [VTCOLOR_GREEN] = VGACOLOR_LIGHT_GREEN,
+ [VTCOLOR_YELLOW] = VGACOLOR_LIGHT_YELLOW,
+ [VTCOLOR_BLUE] = VGACOLOR_LIGHT_BLUE,
+ [VTCOLOR_MAGENTA] = VGACOLOR_LIGHT_MAGENTA,
+ [VTCOLOR_CYAN] = VGACOLOR_LIGHT_CYAN,
+ [VTCOLOR_GREY] = VGACOLOR_WHITE,
+};
+
+static vterminal_t *active_vt = NULL;
+
+// used for initializing the vtconsoles.
+void paint_callback(vtconsole_t *vtc, vtcell_t *cell, int x, int y)
+{
+ if (vtc != active_vt)
+ {
+ return;
+ }
+
+ if (cell->attr.bright)
+ {
+ vga_cell(x, y,
+ VGA_ENTRY(cell->c, brightcolors[cell->attr.fg],
+ colors[cell->attr.bg]));
+ }
+ else
+ {
+ vga_cell(
+ x, y,
+ VGA_ENTRY(cell->c, colors[cell->attr.fg], colors[cell->attr.bg]));
+ }
+}
+
+// used for initializing the vtconsoles.
+void cursor_move_callback(vtconsole_t *vtc, vtcursor_t *cur)
+{
+ if (vtc != active_vt)
+ {
+ return;
+ }
+ vga_set_cursor(cur->y, cur->x);
+}
+
+// initialization function for vterminal which calls the vtconsole constructor
+void vterminal_init(vtconsole_t *vt)
+{
+ vtconsole(vt, VGA_SCREEN_WIDTH, VGA_SCREEN_HEIGHT, paint_callback,
+ cursor_move_callback);
+}
+
+// Used in tty.c to make a vterminal active and working.
+void vterminal_make_active(vterminal_t *vt)
+{
+ active_vt = vt;
+ vtconsole_redraw(vt);
+ vga_set_cursor(vt->cursor.y, vt->cursor.x);
+}
+
+// called by ldisc_key_pressed from ldisc.c
+void vterminal_key_pressed(vterminal_t *vt)
+{
+ char buf[LDISC_BUFFER_SIZE];
+ size_t len =
+ ldisc_get_current_line_raw(&vterminal_to_tty(vt)->tty_ldisc, buf);
+ vtconsole_putchar(vt, buf[len - 1]);
+}
+
+void vterminal_scroll_to_bottom(vterminal_t *vt) { KASSERT(0); }
+
+// ldisc_key_pressed calls this vterminal_write if VGA_BUF is not specified.
+size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len)
+{
+ vtconsole_write(vt, buf, len);
+ return len;
+}
+
+// could be used in ldisc_key_pressed
+size_t vterminal_echo_input(vterminal_t *vt, const char *buf, size_t len)
+{
+ vtconsole_write(vt, buf, len);
+ return len;
+}
diff --git a/kernel/entry/entry.c b/kernel/entry/entry.c
new file mode 100644
index 0000000..2bc23a8
--- /dev/null
+++ b/kernel/entry/entry.c
@@ -0,0 +1,14 @@
+/* entry.c */
+#include "main/entry.h"
+#include "types.h"
+
+#include "multiboot.h"
+
+struct multiboot_tag *mb_tag;
+
+void entry(void *bootinfo_addr)
+{
+ mb_tag = bootinfo_addr;
+ kmain();
+ __asm__("cli\n\thlt");
+}
diff --git a/kernel/fs/Submodules b/kernel/fs/Submodules
new file mode 100644
index 0000000..a6a93cb
--- /dev/null
+++ b/kernel/fs/Submodules
@@ -0,0 +1 @@
+ramfs s5fs
diff --git a/kernel/fs/file.c b/kernel/fs/file.c
new file mode 100644
index 0000000..4e79a3d
--- /dev/null
+++ b/kernel/fs/file.c
@@ -0,0 +1,115 @@
+#include "fs/file.h"
+#include "fs/vfs.h"
+#include "fs/vnode.h"
+#include "kernel.h"
+#include "mm/slab.h"
+#include "util/debug.h"
+#include "util/string.h"
+
+static slab_allocator_t *file_allocator;
+
+void file_init(void)
+{
+ file_allocator = slab_allocator_create("file", sizeof(file_t));
+}
+
+void fref(file_t *f)
+{
+ KASSERT(f->f_mode <= FMODE_MAX_VALUE && f->f_vnode);
+
+ f->f_refcount++;
+
+ if (f->f_vnode)
+ {
+ dbg(DBG_FREF, "fref: 0x%p, 0x%p ino %u, up to %lu\n", f,
+ f->f_vnode->vn_fs, f->f_vnode->vn_vno, f->f_refcount);
+ }
+ else
+ {
+ dbg(DBG_FREF, "fref: 0x%p up to %lu\n", f, f->f_refcount);
+ }
+}
+
+/*
+ * Create a file, initialize its members, vref the vnode, call acquire() on the
+ * vnode if the function pointer is non-NULL, and set the file descriptor in
+ * curproc->p_files.
+ *
+ * On successful return, the vnode's refcount should be incremented by one,
+ * the file's refcount should be 1, and curproc->p_files[fd] should point to
+ * the file being returned.
+ */
+file_t *fcreate(int fd, vnode_t *vnode, unsigned int mode)
+{
+ KASSERT(!curproc->p_files[fd]);
+ file_t *file = slab_obj_alloc(file_allocator);
+ if (!file)
+ return NULL;
+ memset(file, 0, sizeof(file_t));
+ file->f_mode = mode;
+
+ vref(file->f_vnode = vnode);
+ if (vnode->vn_ops->acquire)
+ vnode->vn_ops->acquire(vnode, file);
+
+ curproc->p_files[fd] = file;
+ fref(file);
+ return file;
+}
+
+/*
+ * Perform bounds checking on the fd, use curproc->p_files to get the file,
+ * fref it if it exists, and return.
+ */
+file_t *fget(int fd)
+{
+ if (fd < 0 || fd >= NFILES)
+ return NULL;
+ file_t *file = curproc->p_files[fd];
+ if (file)
+ fref(file);
+ return file;
+}
+
+/*
+ * Decrement the refcount, and set *filep to NULL.
+ *
+ * If the refcount drops to 0, call release on the vnode if the function pointer
+ * is non-null, vput() file's vnode, and free the file memory.
+ *
+ * Regardless of the ending refcount, *filep == NULL on return.
+ */
+void fput(file_t **filep)
+{
+ file_t *file = *filep;
+ *filep = NULL;
+
+ KASSERT(file && file->f_mode <= FMODE_MAX_VALUE);
+ KASSERT(file->f_refcount > 0);
+ if (file->f_refcount != 1)
+ KASSERT(file->f_vnode);
+
+ file->f_refcount--;
+
+ if (file->f_vnode)
+ {
+ dbg(DBG_FREF, "fput: 0x%p, 0x%p ino %u, down to %lu\n", file,
+ file->f_vnode->vn_fs, file->f_vnode->vn_vno, file->f_refcount);
+ }
+ else
+ {
+ dbg(DBG_FREF, "fput: 0x%p down to %lu\n", file, file->f_refcount);
+ }
+
+ if (!file->f_refcount)
+ {
+ if (file->f_vnode)
+ {
+ vlock(file->f_vnode);
+ if (file->f_vnode->vn_ops->release)
+ file->f_vnode->vn_ops->release(file->f_vnode, file);
+ vput_locked(&file->f_vnode);
+ }
+ slab_obj_free(file_allocator, file);
+ }
+}
diff --git a/kernel/fs/namev.c b/kernel/fs/namev.c
new file mode 100644
index 0000000..9e55892
--- /dev/null
+++ b/kernel/fs/namev.c
@@ -0,0 +1,263 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include <fs/dirent.h>
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#include "fs/fcntl.h"
+#include "fs/stat.h"
+#include "fs/vfs.h"
+#include "fs/vnode.h"
+
+/*
+ * Get the parent of a directory. dir must not be locked.
+ */
+long namev_get_parent(vnode_t *dir, vnode_t **out)
+{
+ vlock(dir);
+ long ret = namev_lookup(dir, "..", 2, out);
+ vunlock(dir);
+ return ret;
+}
+
+/*
+ * Determines if vnode a is a descendant of vnode b.
+ * Returns 1 if true, 0 otherwise.
+ */
+long namev_is_descendant(vnode_t *a, vnode_t *b)
+{
+ vref(a);
+ vnode_t *cur = a;
+ vnode_t *next = NULL;
+ while (cur != NULL)
+ {
+ if (cur->vn_vno == b->vn_vno)
+ {
+ vput(&cur);
+ return 1;
+ }
+ else if (cur->vn_vno == cur->vn_fs->fs_root->vn_vno)
+ {
+ /* we've reached the root node. */
+ vput(&cur);
+ return 0;
+ }
+
+ /* backup the filesystem tree */
+ namev_get_parent(cur, &next);
+ vnode_t *tmp = cur;
+ cur = next;
+ vput(&tmp);
+ }
+
+ return 0;
+}
+
+/* Wrapper around dir's vnode operation lookup. dir must be locked on entry and
+ * upon return.
+ *
+ * Upon success, return 0 and return the found vnode using res_vnode, or:
+ * - ENOTDIR: dir does not have a lookup operation or is not a directory
+ * - Propagate errors from the vnode operation lookup
+ *
+ * Hints:
+ * Take a look at ramfs_lookup(), which adds a reference to res_vnode but does
+ * not touch any locks. In most cases, this means res_vnode will be unlocked
+ * upon return. However, there is a case where res_vnode would actually be
+ * locked after calling dir's lookup function (i.e. looking up '.'). You
+ * shouldn't deal with any locking in namev_lookup(), but you should be aware of
+ * this special case when writing other functions that use namev_lookup().
+ * Because you are the one writing nearly all of the calls to namev_lookup(), it
+ * is up to you both how you handle all inputs (i.e. dir or name is null,
+ * namelen is 0), and whether namev_lookup() even gets called with a bad input.
+ */
+long namev_lookup(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t **res_vnode)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return ret;
+}
+
+/*
+ * Find the next meaningful token in a string representing a path.
+ *
+ * Returns the token and sets `len` to be the token's length.
+ *
+ * Once all tokens have been returned, the next char* returned is either NULL
+ * or "" (the empty string). In order to handle both, if you're calling
+ * this in a loop, we suggest terminating the loop once the value returned
+ * in len is 0
+ *
+ * Example usage:
+ * - "/dev/null"
+ * ==> *search would point to the first character of "/null"
+ * ==> *len would be 3 (as "dev" is of length 3)
+ * ==> namev_tokenize would return a pointer to the
+ * first character of "dev/null"
+ *
+ * - "a/b/c"
+ * ==> *search would point to the first character of "/b/c"
+ * ==> *len would be 1 (as "a" is of length 1)
+ * ==> namev_tokenize would return a pointer to the first character
+ * of "a/b/c"
+ *
+ * We highly suggest testing this function outside of Weenix; for instance
+ * using an online compiler or compiling and testing locally to fully
+ * understand its behavior. See handout for an example.
+ */
+static const char *namev_tokenize(const char **search, size_t *len)
+{
+ const char *begin;
+
+ if (*search == NULL)
+ {
+ *len = 0;
+ return NULL;
+ }
+
+ KASSERT(NULL != *search);
+
+ /* Skip initial '/' to find the beginning of the token. */
+ while (**search == '/')
+ {
+ (*search)++;
+ }
+
+ /* Determine the length of the token by searching for either the
+ * next '/' or the end of the path. */
+ begin = *search;
+ *len = 0;
+ while (**search && **search != '/')
+ {
+ (*len)++;
+ (*search)++;
+ }
+
+ if (!**search)
+ {
+ *search = NULL;
+ }
+
+ return begin;
+}
+
+/*
+ * Parse path and return in `res_vnode` the vnode corresponding to the directory
+ * containing the basename (last element) of path. `base` must not be locked on
+ * entry or on return. `res_vnode` must not be locked on return. Return via `name`
+ * and `namelen` the basename of path.
+ *
+ * Return 0 on success, or:
+ * - EINVAL: path refers to an empty string
+ * - Propagate errors from namev_lookup()
+ *
+ * Hints:
+ * - When *calling* namev_dir(), if it is unclear what to pass as the `base`, you
+ * should use `curproc->p_cwd` (think about why this makes sense).
+ * - `curproc` is a global variable that represents the current running process
+ * (a proc_t struct), which has a field called p_cwd.
+ * - The first parameter, base, is the vnode from which to start resolving
+ * path, unless path starts with a '/', in which case you should start at
+ * the root vnode, vfs_root_fs.fs_root.
+ * - Use namev_lookup() to handle each individual lookup. When looping, be
+ * careful about locking and refcounts, and make sure to clean up properly
+ * upon failure.
+ * - namev_lookup() should return with the found vnode unlocked, unless the
+ * found vnode is the same as the given directory (e.g. "/./."). Be mindful
+ * of this special case, and any locking/refcounting that comes with it.
+ * - When parsing the path, you do not need to implement hand-over-hand
+ * locking. That is, when calling `namev_lookup(dir, path, pathlen, &out)`,
+ * it is safe to put away and unlock dir before locking `out`.
+ * - You are encouraged to use namev_tokenize() to help parse path.
+ * - Whether you're using the provided base or the root vnode, you will have
+ * to explicitly lock and reference your starting vnode before using it.
+ * - Don't allocate memory to return name. Just set name to point into the
+ * correct part of path.
+ *
+ * Example usage:
+ * - "/a/.././//b/ccc/" ==> res_vnode = vnode for b, name = "ccc", namelen = 3
+ * - "tmp/..//." ==> res_vnode = base, name = ".", namelen = 1
+ * - "/dev/null" ==> rev_vnode = vnode for /dev, name = "null", namelen = 4
+ * For more examples of expected behavior, you can try out the command line
+ * utilities `dirname` and `basename` on your virtual machine or a Brown
+ * department machine.
+ */
+long namev_dir(vnode_t *base, const char *path, vnode_t **res_vnode,
+ const char **name, size_t *namelen)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return 0;
+}
+
+/*
+ * Open the file specified by `base` and `path`, or create it, if necessary.
+ * Return the file's vnode via `res_vnode`, which should be returned unlocked
+ * and with an added reference.
+ *
+ * Return 0 on success, or:
+ * - EINVAL: O_CREAT is specified but path implies a directory
+ * - ENAMETOOLONG: path basename is too long
+ * - ENOTDIR: Attempting to open a regular file as a directory
+ * - Propagate errors from namev_dir() and namev_lookup()
+ *
+ * Hints:
+ * - A path ending in '/' implies that the basename is a directory.
+ * - Use namev_dir() to get the directory containing the basename.
+ * - Use namev_lookup() to try to obtain the desired vnode.
+ * - If namev_lookup() fails and O_CREAT is specified in oflags, use
+ * the parent directory's vnode operation mknod to create the vnode.
+ * Use the basename info from namev_dir(), and the mode and devid
+ * provided to namev_open().
+ * - Use the macro S_ISDIR() to check if a vnode actually is a directory.
+ * - Use the macro NAME_LEN to check the basename length. Check out
+ * ramfs_mknod() to confirm that the name should be null-terminated.
+ */
+long namev_open(vnode_t *base, const char *path, int oflags, int mode,
+ devid_t devid, struct vnode **res_vnode)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return 0;
+}
+
+/*
+ * Wrapper around namev_open with O_RDONLY and 0 mode/devid
+ */
+long namev_resolve(vnode_t *base, const char *path, vnode_t **res_vnode)
+{
+ return namev_open(base, path, O_RDONLY, 0, 0, res_vnode);
+}
+
+#ifdef __GETCWD__
+/* Finds the name of 'entry' in the directory 'dir'. The name is writen
+ * to the given buffer. On success 0 is returned. If 'dir' does not
+ * contain 'entry' then -ENOENT is returned. If the given buffer cannot
+ * hold the result then it is filled with as many characters as possible
+ * and a null terminator, -ERANGE is returned.
+ *
+ * Files can be uniquely identified within a file system by their
+ * inode numbers. */
+int lookup_name(vnode_t *dir, vnode_t *entry, char *buf, size_t size)
+{
+ NOT_YET_IMPLEMENTED("GETCWD: ***none***");
+ return -ENOENT;
+}
+
+NOT_YET_IMPLEMENTED("GETCWD: ***none***");
+
+/* Used to find the absolute path of the directory 'dir'. Since
+ * directories cannot have more than one link there is always
+ * a unique solution. The path is writen to the given buffer.
+ * On success 0 is returned. On error this function returns a
+ * negative error code. See the man page for getcwd(3) for
+ * possible errors. Even if an error code is returned the buffer
+ * will be filled with a valid string which has some partial
+ * information about the wanted path. */
+ssize_t lookup_dirpath(vnode_t *dir, char *buf, size_t osize)
+{
+ NOT_YET_IMPLEMENTED("GETCWD: ***none***");
+
+ return -ENOENT;
+}
+#endif /* __GETCWD__ */
diff --git a/kernel/fs/open.c b/kernel/fs/open.c
new file mode 100644
index 0000000..fa6fe12
--- /dev/null
+++ b/kernel/fs/open.c
@@ -0,0 +1,67 @@
+#include "errno.h"
+#include "fs/fcntl.h"
+#include "fs/file.h"
+#include "fs/vfs.h"
+#include "fs/vfs_syscall.h"
+#include "fs/vnode.h"
+#include "globals.h"
+#include "util/debug.h"
+#include <fs/vnode.h>
+
+// NOTE: IF DOING MULTI-THREADED PROCS, NEED TO SYNCHRONIZE ACCESS TO FILE
+// DESCRIPTORS, AND, MORE GENERALLY SPEAKING, p_files, IN PARTICULAR IN THIS
+// FUNCTION AND ITS CALLERS.
+/*
+ * Go through curproc->p_files and find the first null entry.
+ * If one exists, set fd to that index and return 0.
+ *
+ * Error cases get_empty_fd is responsible for generating:
+ * - EMFILE: no empty file descriptor
+ */
+long get_empty_fd(int *fd)
+{
+ for (*fd = 0; *fd < NFILES; (*fd)++)
+ {
+ if (!curproc->p_files[*fd])
+ {
+ return 0;
+ }
+ }
+ *fd = -1;
+ return -EMFILE;
+}
+
+/*
+ * Open the file at the provided path with the specified flags.
+ *
+ * Returns the file descriptor on success, or error cases:
+ * - EINVAL: Invalid oflags
+ * - EISDIR: Trying to open a directory with write access
+ * - ENXIO: Blockdev or chardev vnode does not have an actual underlying device
+ * - ENOMEM: Not enough kernel memory (if fcreate() fails)
+ *
+ * Hints:
+ * 1) Use get_empty_fd() to get an available fd.
+ * 2) Use namev_open() with oflags, mode S_IFREG, and devid 0.
+ * 3) Check for EISDIR and ENXIO errors.
+ * 4) Convert oflags (O_RDONLY, O_WRONLY, O_RDWR, O_APPEND) into corresponding
+ * file access flags (FMODE_READ, FMODE_WRITE, FMODE_APPEND).
+ * 5) Use fcreate() to create and initialize the corresponding file descriptor
+ * with the vnode from 2) and the mode from 4).
+ *
+ * When checking oflags, you only need to check that the read and write
+ * permissions are consistent. However, because O_RDONLY is 0 and O_RDWR is 2,
+ * there's no way to tell if both were specified. So, you really only need
+ * to check if O_WRONLY and O_RDWR were specified.
+ *
+ * If O_TRUNC specified and the vnode represents a regular file, make sure to call the
+ * the vnode's truncate routine (to reduce the size of the file to 0).
+ *
+ * If a vnode represents a chardev or blockdev, then the appropriate field of
+ * the vnode->vn_dev union will point to the device. Otherwise, the union will be NULL.
+ */
+long do_open(const char *filename, int oflags)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
diff --git a/kernel/fs/pipe.c b/kernel/fs/pipe.c
new file mode 100644
index 0000000..b1d365f
--- /dev/null
+++ b/kernel/fs/pipe.c
@@ -0,0 +1,256 @@
+/*
+ * FILE: pipe.c
+ * AUTH: eric
+ * DESC: Implementation of pipe(2) system call.
+ * DATE: Thu Dec 26 17:08:34 2013
+ */
+
+#include "errno.h"
+#include "globals.h"
+
+#include "fs/file.h"
+#include "fs/pipe.h"
+#include "fs/stat.h"
+#include "fs/vfs.h"
+#include "fs/vfs_syscall.h"
+#include "fs/vnode.h"
+
+#include "mm/kmalloc.h"
+#include "mm/slab.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#define PIPE_BUF_SIZE 4096
+
+static void pipe_read_vnode(fs_t *fs, vnode_t *vnode);
+
+static void pipe_delete_vnode(fs_t *fs, vnode_t *vnode);
+
+static fs_ops_t pipe_fsops = {.read_vnode = pipe_read_vnode,
+ .delete_vnode = pipe_delete_vnode,
+ .umount = NULL};
+
+static fs_t pipe_fs = {.fs_dev = "pipe",
+ .fs_type = "pipe",
+ .fs_ops = &pipe_fsops,
+ .fs_root = NULL,
+ .fs_i = NULL};
+
+static long pipe_read(vnode_t *vnode, size_t pos, void *buf, size_t count);
+
+static long pipe_write(vnode_t *vnode, size_t pos, const void *buf,
+ size_t count);
+
+static long pipe_stat(vnode_t *vnode, stat_t *ss);
+
+static long pipe_acquire(vnode_t *vnode, file_t *file);
+
+static long pipe_release(vnode_t *vnode, file_t *file);
+
+static vnode_ops_t pipe_vops = {
+ .read = pipe_read,
+ .write = pipe_write,
+ .mmap = NULL,
+ .mknod = NULL,
+ .lookup = NULL,
+ .link = NULL,
+ .unlink = NULL,
+ .mkdir = NULL,
+ .rmdir = NULL,
+ .readdir = NULL,
+ .stat = pipe_stat,
+ .acquire = pipe_acquire,
+ .release = pipe_release,
+ .get_pframe = NULL,
+ .fill_pframe = NULL,
+ .flush_pframe = NULL,
+};
+
+/* struct pipe defines some data specific to pipes. One of these
+ should be present in the vn_i field of each pipe vnode. */
+typedef struct pipe
+{
+ /* Buffer for data in the pipe, which has been written but not yet read. */
+ char *pv_buf;
+ /*
+ * Position of the head and number of characters in the buffer. You can
+ * write in characters at position head so long as size does not grow beyond
+ * the pipe buffer size.
+ */
+ off_t pv_head;
+ size_t pv_size;
+ /* Number of file descriptors using this pipe for read and write. */
+ int pv_readers;
+ int pv_writers;
+ /*
+ * Mutexes for reading and writing. Without these, readers might get non-
+ * contiguous reads in a single call (for example, if they empty the buffer
+ * but still have more to read, then the writer continues writing, waking up
+ * a different thread first) and similarly for writers.
+ */
+ kmutex_t pv_rdlock;
+ kmutex_t pv_wrlock;
+ /*
+ * Waitqueues for threads attempting to read from an empty buffer, or
+ * write to a full buffer. When the pipe becomes non-empty (or non-full)
+ * then the corresponding waitq should be broadcasted on to make sure all
+ * of the threads get a chance to go.
+ */
+ ktqueue_t pv_read_waitq;
+ ktqueue_t pv_write_waitq;
+} pipe_t;
+
+#define VNODE_TO_PIPE(vn) ((pipe_t *)((vn)->vn_i))
+
+static slab_allocator_t *pipe_allocator = NULL;
+static int next_pno = 0;
+
+void pipe_init(void)
+{
+ pipe_allocator = slab_allocator_create("pipe", sizeof(pipe_t));
+ KASSERT(pipe_allocator);
+}
+
+/*
+ * Create a pipe struct here. You are going to need to allocate all
+ * of the necessary structs and buffers, and then initialize all of
+ * the necessary fields (head, size, readers, writers, and the locks
+ * and queues.)
+ */
+static pipe_t *pipe_create(void)
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+ return NULL;
+}
+
+/*
+ * Free all necessary memory.
+ */
+static void pipe_destroy(pipe_t *pipe)
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+}
+
+/* pipefs vnode operations */
+static void pipe_read_vnode(fs_t *fs, vnode_t *vnode)
+{
+ vnode->vn_ops = &pipe_vops;
+ vnode->vn_mode = S_IFIFO;
+ vnode->vn_len = 0;
+ vnode->vn_i = NULL;
+}
+
+static void pipe_delete_vnode(fs_t *fs, vnode_t *vnode)
+{
+ pipe_t *p = VNODE_TO_PIPE(vnode);
+ if (p)
+ {
+ pipe_destroy(p);
+ }
+}
+
+/*
+ * Gets a new vnode representing a pipe. The reason
+ * why we don't just do this setup in pipe_read_vnode
+ * is that the creation of the pipe data might fail, since
+ * there is memory allocation going on in there. Thus,
+ * we split it into two steps, the first of which relies on
+ * pipe_read_vnode to do some setup, and then the pipe_create
+ * call, at which point we can safely vput the allocated
+ * vnode if pipe_create fails.
+ */
+static vnode_t *pget(void)
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+ return NULL;
+}
+
+/*
+ * An implementation of the pipe(2) system call. You really
+ * only have to worry about a few things:
+ * o Running out of memory when allocating the vnode, at which
+ * point you should fail with ENOMEM;
+ * o Running out of file descriptors, in which case you should
+ * fail with EMFILE.
+ * Once all of the structures are set up, just put the read-end
+ * file descriptor of the pipe into pipefd[0], and the write-end
+ * descriptor into pipefd[1].
+ */
+int do_pipe(int pipefd[2])
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+ return -ENOTSUP;
+}
+
+/*
+ * When reading from a pipe, you should make sure there are enough characters in
+ * the buffer to read. If there are, grab them and move up the tail by
+ * subtracting from size. offset is ignored. Also, remember to take the reader
+ * lock to prevent other threads from reading while you are waiting for more
+ * characters.
+ *
+ * This might block, e.g. if there are no or not enough characters to read.
+ * It might be the case that there are no more writers and we aren't done
+ * reading. However, in situations like this, there is no way to open the pipe
+ * for writing again so no more writers will ever put characters in the pipe.
+ * The reader should just take as much as it needs (or barring that, as much as
+ * it can get) and return with a partial buffer.
+ */
+static long pipe_read(vnode_t *vnode, size_t pos, void *buf, size_t count)
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+ return -EINVAL;
+}
+
+/*
+ * Writing to a pipe is the dual of reading: if there is room, we can write our
+ * data and go, but if not, we have to wait until there is more room and alert
+ * any potential readers. Like above, you should take the writer lock to make
+ * sure your write is contiguous.
+ *
+ * If there are no more readers, we have a broken pipe, and should fail with
+ * the EPIPE error number.
+ */
+static long pipe_write(vnode_t *vnode, size_t pos, const void *buf,
+ size_t count)
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+ return -EINVAL;
+}
+
+/*
+ * It's still possible to stat a pipe using the fstat call, which takes a file
+ * descriptor. Pipes don't have too much information, though. The only ones that
+ * matter here are st_mode and st_ino, though you want to zero out some of the
+ * others.
+ */
+static long pipe_stat(vnode_t *vnode, stat_t *ss)
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+ return -EINVAL;
+}
+
+/*
+ * If someone is opening the read end of the pipe, we need to increment
+ * the reader count, and the same for the writer count if a file open
+ * for writing is acquiring this vnode. This count needs to be accurate
+ * for correct reading and writing behavior.
+ */
+static long pipe_acquire(vnode_t *vnode, file_t *file)
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+ return 0;
+}
+
+/*
+ * Subtract from the reader or writer count as necessary here. If either
+ * count hits zero, you are going to need to wake up the other group of
+ * threads so they can either return with their partial read or notice
+ * the broken pipe.
+ */
+static long pipe_release(vnode_t *vnode, file_t *file)
+{
+ NOT_YET_IMPLEMENTED("PIPES: ***none***");
+ return 0;
+}
diff --git a/kernel/fs/ramfs/ramfs.c b/kernel/fs/ramfs/ramfs.c
new file mode 100644
index 0000000..72547c4
--- /dev/null
+++ b/kernel/fs/ramfs/ramfs.c
@@ -0,0 +1,852 @@
+/*
+ * This is a special filesystem designed to be a test filesystem before s5fs has
+ * been written. It is an in-memory filesystem that supports almost all of the
+ * vnode operations. It has the following restrictions:
+ *
+ * o File sizes are limited to a single page (4096 bytes) in order
+ * to keep the code simple.
+ *
+ * o There is no support for fill_pframe, etc.
+ *
+ * o There is a maximum directory size limit
+ *
+ * o There is a maximum number of files/directories limit
+ */
+
+#include "fs/ramfs/ramfs.h"
+#include "errno.h"
+#include "fs/dirent.h"
+#include "fs/stat.h"
+#include "fs/vfs.h"
+#include "fs/vnode.h"
+#include "globals.h"
+#include "kernel.h"
+#include "mm/kmalloc.h"
+#include "mm/slab.h"
+#include "util/debug.h"
+#include "util/string.h"
+
+/*
+ * Filesystem operations
+ */
+static void ramfs_read_vnode(fs_t *fs, vnode_t *vn);
+
+static void ramfs_delete_vnode(fs_t *fs, vnode_t *vn);
+
+static long ramfs_umount(fs_t *fs);
+
+static fs_ops_t ramfs_ops = {.read_vnode = ramfs_read_vnode,
+ .delete_vnode = ramfs_delete_vnode,
+ .umount = ramfs_umount};
+
+/*
+ * vnode operations
+ */
+static ssize_t ramfs_read(vnode_t *file, size_t offset, void *buf,
+ size_t count);
+
+static ssize_t ramfs_write(vnode_t *file, size_t offset, const void *buf,
+ size_t count);
+
+/* getpage */
+static ssize_t ramfs_create(vnode_t *dir, const char *name, size_t name_len,
+ vnode_t **result);
+
+static ssize_t ramfs_mknod(struct vnode *dir, const char *name, size_t name_len,
+ int mode, devid_t devid, struct vnode **out);
+
+static ssize_t ramfs_lookup(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t **out);
+
+static long ramfs_link(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t *child);
+
+static ssize_t ramfs_unlink(vnode_t *dir, const char *name, size_t name_len);
+
+static ssize_t ramfs_rename(vnode_t *olddir, const char *oldname,
+ size_t oldnamelen, vnode_t *newdir,
+ const char *newname, size_t newnamelen);
+
+static ssize_t ramfs_mkdir(vnode_t *dir, const char *name, size_t name_len,
+ struct vnode **out);
+
+static ssize_t ramfs_rmdir(vnode_t *dir, const char *name, size_t name_len);
+
+static ssize_t ramfs_readdir(vnode_t *dir, size_t offset, struct dirent *d);
+
+static ssize_t ramfs_stat(vnode_t *file, stat_t *buf);
+
+static void ramfs_truncate_file(vnode_t *file);
+
+static vnode_ops_t ramfs_dir_vops = {.read = NULL,
+ .write = NULL,
+ .mmap = NULL,
+ .mknod = ramfs_mknod,
+ .lookup = ramfs_lookup,
+ .link = ramfs_link,
+ .unlink = ramfs_unlink,
+ .rename = ramfs_rename,
+ .mkdir = ramfs_mkdir,
+ .rmdir = ramfs_rmdir,
+ .readdir = ramfs_readdir,
+ .stat = ramfs_stat,
+ .acquire = NULL,
+ .release = NULL,
+ .get_pframe = NULL,
+ .fill_pframe = NULL,
+ .flush_pframe = NULL,
+ .truncate_file = NULL};
+
+static vnode_ops_t ramfs_file_vops = {.read = ramfs_read,
+ .write = ramfs_write,
+ .mmap = NULL,
+ .mknod = NULL,
+ .lookup = NULL,
+ .link = NULL,
+ .unlink = NULL,
+ .mkdir = NULL,
+ .rmdir = NULL,
+ .stat = ramfs_stat,
+ .acquire = NULL,
+ .release = NULL,
+ .get_pframe = NULL,
+ .fill_pframe = NULL,
+ .flush_pframe = NULL,
+ .truncate_file = ramfs_truncate_file};
+
+/*
+ * The ramfs 'inode' structure
+ */
+typedef struct ramfs_inode
+{
+ size_t rf_size; /* Total file size */
+ ino_t rf_ino; /* Inode number */
+ char *rf_mem; /* Memory for this file (1 page) */
+ ssize_t rf_mode; /* Type of file */
+ ssize_t rf_linkcount; /* Number of links to this file */
+} ramfs_inode_t;
+
+#define RAMFS_TYPE_DATA 0
+#define RAMFS_TYPE_DIR 1
+#define RAMFS_TYPE_CHR 2
+#define RAMFS_TYPE_BLK 3
+
+#define VNODE_TO_RAMFSINODE(vn) ((ramfs_inode_t *)(vn)->vn_i)
+#define VNODE_TO_RAMFS(vn) ((ramfs_t *)(vn)->vn_fs->fs_i)
+#define VNODE_TO_DIRENT(vn) ((ramfs_dirent_t *)VNODE_TO_RAMFSINODE(vn)->rf_mem)
+
+/*
+ * ramfs filesystem structure
+ */
+#define RAMFS_MAX_FILES 64
+
+typedef struct ramfs
+{
+ ramfs_inode_t *rfs_inodes[RAMFS_MAX_FILES]; /* Array of all files */
+} ramfs_t;
+
+/*
+ * For directories, we simply store an array of (ino, name) pairs in the
+ * memory portion of the inode.
+ */
+typedef struct ramfs_dirent
+{
+ ssize_t rd_ino; /* Inode number of this entry */
+ char rd_name[NAME_LEN]; /* Name of this entry */
+} ramfs_dirent_t;
+
+#define RAMFS_MAX_DIRENT ((size_t)(PAGE_SIZE / sizeof(ramfs_dirent_t)))
+
+/* Helper functions */
+static ssize_t ramfs_alloc_inode(fs_t *fs, ssize_t type, devid_t devid)
+{
+ ramfs_t *rfs = (ramfs_t *)fs->fs_i;
+ KASSERT((RAMFS_TYPE_DATA == type) || (RAMFS_TYPE_DIR == type) ||
+ (RAMFS_TYPE_CHR == type) || (RAMFS_TYPE_BLK == type));
+ /* Find a free inode */
+ ssize_t i;
+ for (i = 0; i < RAMFS_MAX_FILES; i++)
+ {
+ if (NULL == rfs->rfs_inodes[i])
+ {
+ ramfs_inode_t *inode;
+ if (NULL == (inode = kmalloc(sizeof(ramfs_inode_t))))
+ {
+ return -ENOSPC;
+ }
+
+ if (RAMFS_TYPE_CHR == type || RAMFS_TYPE_BLK == type)
+ {
+ /* Don't need any space in memory, so put devid in here */
+ inode->rf_mem = (char *)(uint64_t)devid;
+ }
+ else
+ {
+ /* We allocate space for the file's contents immediately */
+ if (NULL == (inode->rf_mem = page_alloc()))
+ {
+ kfree(inode);
+ return -ENOSPC;
+ }
+ memset(inode->rf_mem, 0, PAGE_SIZE);
+ }
+ inode->rf_size = 0;
+ inode->rf_ino = i;
+ inode->rf_mode = type;
+ inode->rf_linkcount = 1;
+
+ /* Install in table and return */
+ rfs->rfs_inodes[i] = inode;
+ return i;
+ }
+ }
+ return -ENOSPC;
+}
+
+/*
+ * Function implementations
+ */
+
+long ramfs_mount(struct fs *fs)
+{
+ /* Allocate filesystem */
+ ramfs_t *rfs = kmalloc(sizeof(ramfs_t));
+ if (NULL == rfs)
+ {
+ return -ENOMEM;
+ }
+
+ memset(rfs->rfs_inodes, 0, sizeof(rfs->rfs_inodes));
+
+ fs->fs_i = rfs;
+ fs->fs_ops = &ramfs_ops;
+
+ /* Set up root inode */
+ ssize_t root_ino;
+ if (0 > (root_ino = ramfs_alloc_inode(fs, RAMFS_TYPE_DIR, 0)))
+ {
+ return root_ino;
+ }
+
+ slab_allocator_t *allocator =
+ slab_allocator_create("ramfs_node", sizeof(vnode_t));
+ fs->fs_vnode_allocator = allocator;
+ KASSERT(allocator);
+
+ KASSERT(0 == root_ino);
+ ramfs_inode_t *root = rfs->rfs_inodes[root_ino];
+
+ /* Set up '.' and '..' in the root directory */
+ ramfs_dirent_t *rootdent = (ramfs_dirent_t *)root->rf_mem;
+ rootdent->rd_ino = 0;
+ strcpy(rootdent->rd_name, ".");
+ rootdent++;
+ rootdent->rd_ino = 0;
+ strcpy(rootdent->rd_name, "..");
+
+ /* Increase root inode size accordingly */
+ root->rf_size = 2 * sizeof(ramfs_dirent_t);
+
+ /* Put the root in the inode table */
+ rfs->rfs_inodes[0] = root;
+
+ /* And vget the root vnode */
+ fs->fs_root = vget(fs, 0);
+
+ return 0;
+}
+
+static void ramfs_read_vnode(fs_t *fs, vnode_t *vn)
+{
+ ramfs_t *rfs = VNODE_TO_RAMFS(vn);
+ ramfs_inode_t *inode = rfs->rfs_inodes[vn->vn_vno];
+ KASSERT(inode && inode->rf_ino == vn->vn_vno);
+
+ inode->rf_linkcount++;
+
+ vn->vn_i = inode;
+ vn->vn_len = inode->rf_size;
+
+ switch (inode->rf_mode)
+ {
+ case RAMFS_TYPE_DATA:
+ vn->vn_mode = S_IFREG;
+ vn->vn_ops = &ramfs_file_vops;
+ break;
+ case RAMFS_TYPE_DIR:
+ vn->vn_mode = S_IFDIR;
+ vn->vn_ops = &ramfs_dir_vops;
+ break;
+ case RAMFS_TYPE_CHR:
+ vn->vn_mode = S_IFCHR;
+ vn->vn_ops = NULL;
+ vn->vn_devid = (devid_t)(uint64_t)(inode->rf_mem);
+ break;
+ case RAMFS_TYPE_BLK:
+ vn->vn_mode = S_IFBLK;
+ vn->vn_ops = NULL;
+ vn->vn_devid = (devid_t)(uint64_t)(inode->rf_mem);
+ break;
+ default:
+ panic("inode %ld has unknown/invalid type %ld!!\n",
+ (ssize_t)vn->vn_vno, (ssize_t)inode->rf_mode);
+ }
+}
+
+static void ramfs_delete_vnode(fs_t *fs, vnode_t *vn)
+{
+ ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(vn);
+ ramfs_t *rfs = VNODE_TO_RAMFS(vn);
+
+ if (0 == --inode->rf_linkcount)
+ {
+ KASSERT(rfs->rfs_inodes[vn->vn_vno] == inode);
+
+ rfs->rfs_inodes[vn->vn_vno] = NULL;
+ if (inode->rf_mode == RAMFS_TYPE_DATA ||
+ inode->rf_mode == RAMFS_TYPE_DIR)
+ {
+ page_free(inode->rf_mem);
+ }
+ /* otherwise, inode->rf_mem is a devid */
+
+ kfree(inode);
+ }
+}
+
+static ssize_t ramfs_umount(fs_t *fs)
+{
+ /* We don't need to do any flushing or anything as everything is in memory.
+ * Just free all of our allocated memory */
+ ramfs_t *rfs = (ramfs_t *)fs->fs_i;
+
+ vput(&fs->fs_root);
+
+ /* Free all the inodes */
+ ssize_t i;
+ for (i = 0; i < RAMFS_MAX_FILES; i++)
+ {
+ if (NULL != rfs->rfs_inodes[i])
+ {
+ if (NULL != rfs->rfs_inodes[i]->rf_mem &&
+ (rfs->rfs_inodes[i]->rf_mode == RAMFS_TYPE_DATA ||
+ rfs->rfs_inodes[i]->rf_mode == RAMFS_TYPE_DIR))
+ {
+ page_free(rfs->rfs_inodes[i]->rf_mem);
+ }
+ kfree(rfs->rfs_inodes[i]);
+ }
+ }
+
+ return 0;
+}
+
+static ssize_t ramfs_create(vnode_t *dir, const char *name, size_t name_len,
+ vnode_t **result)
+{
+ vnode_t *vn;
+ size_t i;
+ ramfs_dirent_t *entry;
+
+ /* Look for space in the directory */
+ entry = VNODE_TO_DIRENT(dir);
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (!entry->rd_name[0])
+ {
+ break;
+ }
+ }
+
+ if (i == RAMFS_MAX_DIRENT)
+ {
+ return -ENOSPC;
+ }
+
+ /* Allocate an inode */
+ ssize_t ino;
+ if (0 > (ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_DATA, 0)))
+ {
+ return ino;
+ }
+
+ /* Get a vnode, set entry in directory */
+ vn = vget(dir->vn_fs, (ino_t)ino);
+
+ entry->rd_ino = vn->vn_vno;
+ strncpy(entry->rd_name, name, MIN(name_len, NAME_LEN - 1));
+ entry->rd_name[MIN(name_len, NAME_LEN - 1)] = '\0';
+
+ VNODE_TO_RAMFSINODE(dir)->rf_size += sizeof(ramfs_dirent_t);
+
+ *result = vn;
+
+ return 0;
+}
+
+static ssize_t ramfs_mknod(struct vnode *dir, const char *name, size_t name_len,
+ int mode, devid_t devid, struct vnode **out)
+{
+ size_t i;
+ ramfs_dirent_t *entry;
+
+ /* Look for space in the directory */
+ entry = VNODE_TO_DIRENT(dir);
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (!entry->rd_name[0])
+ {
+ break;
+ }
+ }
+
+ if (i == RAMFS_MAX_DIRENT)
+ {
+ return -ENOSPC;
+ }
+
+ ssize_t ino;
+ if (S_ISCHR(mode))
+ {
+ ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_CHR, devid);
+ }
+ else if (S_ISBLK(mode))
+ {
+ ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_BLK, devid);
+ }
+ else if (S_ISREG(mode))
+ {
+ ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_DATA, devid);
+ }
+ else
+ {
+ panic("Invalid mode!\n");
+ }
+
+ if (ino < 0)
+ {
+ return ino;
+ }
+
+ /* Set entry in directory */
+ entry->rd_ino = ino;
+ strncpy(entry->rd_name, name, MIN(name_len, NAME_LEN - 1));
+ entry->rd_name[MIN(name_len, NAME_LEN - 1)] = '\0';
+
+ VNODE_TO_RAMFSINODE(dir)->rf_size += sizeof(ramfs_dirent_t);
+
+ vnode_t *child = vget(dir->vn_fs, ino);
+
+ dbg(DBG_VFS, "creating ino(%ld), vno(%d) with path: %s\n", ino,
+ child->vn_vno, entry->rd_name);
+
+ KASSERT(child);
+ *out = child;
+ return 0;
+}
+
+static ssize_t ramfs_lookup(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t **out)
+{
+ size_t i;
+ ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(dir);
+ ramfs_dirent_t *entry = (ramfs_dirent_t *)inode->rf_mem;
+
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (name_match(entry->rd_name, name, namelen))
+ {
+ if (dir->vn_vno != entry->rd_ino)
+ {
+ fs_t *fs = (dir)->vn_fs;
+ *out = vget(fs, entry->rd_ino);
+ }
+ else
+ {
+ vref(dir);
+ *out = dir;
+ }
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static ssize_t ramfs_find_dirent(vnode_t *dir, const char *name,
+ size_t namelen)
+{
+ size_t i;
+ ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(dir);
+ ramfs_dirent_t *entry = (ramfs_dirent_t *)inode->rf_mem;
+
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (name_match(entry->rd_name, name, namelen))
+ {
+ return entry->rd_ino;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static ssize_t ramfs_append_dirent(vnode_t *dir, const char *name,
+ size_t namelen, vnode_t *child)
+{
+ vnode_t *vn;
+ size_t i;
+ ramfs_dirent_t *entry;
+
+ KASSERT(child->vn_fs == dir->vn_fs);
+
+ /* Look for space in the directory */
+ entry = VNODE_TO_DIRENT(dir);
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (name_match(entry->rd_name, name, namelen))
+ {
+ return -EEXIST;
+ }
+
+ if (!entry->rd_name[0])
+ {
+ break;
+ }
+ }
+
+ if (i == RAMFS_MAX_DIRENT)
+ {
+ return -ENOSPC;
+ }
+
+ /* Set entry in parent */
+ entry->rd_ino = child->vn_vno;
+ strncpy(entry->rd_name, name, MIN(namelen, NAME_LEN - 1));
+ entry->rd_name[MIN(namelen, NAME_LEN - 1)] = '\0';
+
+ VNODE_TO_RAMFSINODE(dir)->rf_size += sizeof(ramfs_dirent_t);
+
+ /* Increase linkcount */
+ VNODE_TO_RAMFSINODE(child)->rf_linkcount++;
+
+ return 0;
+}
+
+static ssize_t ramfs_delete_dirent(vnode_t *dir, const char *name,
+ size_t namelen, vnode_t *child)
+{
+ int found = 0;
+ size_t i;
+ ramfs_dirent_t *entry = VNODE_TO_DIRENT(dir);
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (name_match(entry->rd_name, name, namelen))
+ {
+ found = 1;
+ entry->rd_name[0] = '\0';
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ return -EEXIST;
+ }
+
+ VNODE_TO_RAMFSINODE(dir)->rf_size -= sizeof(ramfs_dirent_t);
+ VNODE_TO_RAMFSINODE(child)->rf_linkcount--;
+
+ return 0;
+}
+
+static long ramfs_link(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t *child)
+{
+ return ramfs_append_dirent(dir, name, namelen, child);
+}
+
+static ssize_t ramfs_unlink(vnode_t *dir, const char *name, size_t namelen)
+{
+ ssize_t ret;
+ size_t i;
+ ramfs_dirent_t *entry;
+
+ vnode_t *vn = dir;
+
+ long ino = ramfs_find_dirent(dir, name, namelen);
+ if (ino < 0)
+ {
+ return ino;
+ }
+
+ vnode_t *child = vget_locked(dir->vn_fs, (ino_t)ino);
+ KASSERT(!S_ISDIR(child->vn_mode) && "handled at VFS level");
+
+ ret = ramfs_delete_dirent(dir, name, namelen, child);
+ KASSERT(ret == 0);
+
+ vput_locked(&child);
+
+ return 0;
+}
+
+static ssize_t ramfs_rename(vnode_t *olddir, const char *oldname,
+ size_t oldnamelen, vnode_t *newdir,
+ const char *newname, size_t newnamelen)
+{
+ long ino = ramfs_find_dirent(olddir, oldname, oldnamelen);
+ if (ino < 0)
+ {
+ return ino;
+ }
+
+ vnode_t *oldvn = vget_locked(olddir->vn_fs, (ino_t)ino);
+ if (S_ISDIR(oldvn->vn_mode))
+ {
+ vput_locked(&oldvn);
+ return -EPERM;
+ }
+ if (S_ISDIR(oldvn->vn_mode))
+ {
+ vput_locked(&oldvn);
+ return -EISDIR;
+ }
+
+ /* Determine if an entry corresponding to `newname` already exists */
+ ino = ramfs_find_dirent(newdir, newname, newnamelen);
+ if (ino != -ENOENT)
+ {
+ if (ino < 0)
+ {
+ return ino;
+ }
+ return -EEXIST;
+ }
+
+ ssize_t ret = ramfs_append_dirent(newdir, newname, newnamelen, oldvn);
+ if (ret < 0)
+ {
+ vput_locked(&oldvn);
+ return ret;
+ }
+
+ ret = ramfs_delete_dirent(olddir, oldname, oldnamelen, oldvn);
+ vput_locked(&oldvn);
+
+ return ret;
+}
+
+static ssize_t ramfs_mkdir(vnode_t *dir, const char *name, size_t name_len,
+ struct vnode **out)
+{
+ vnode_t *vn;
+ size_t i;
+ ramfs_dirent_t *entry;
+
+ /* Look for space in the directory */
+ entry = VNODE_TO_DIRENT(dir);
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (!entry->rd_name[0])
+ {
+ break;
+ }
+ }
+
+ if (i == RAMFS_MAX_DIRENT)
+ {
+ return -ENOSPC;
+ }
+
+ /* Allocate an inode */
+ ssize_t ino;
+ if (0 > (ino = ramfs_alloc_inode(dir->vn_fs, RAMFS_TYPE_DIR, 0)))
+ {
+ return ino;
+ }
+
+ /* Set entry in parent */
+ entry->rd_ino = ino;
+ strncpy(entry->rd_name, name, MIN(name_len, NAME_LEN - 1));
+ entry->rd_name[MIN(name_len, NAME_LEN - 1)] = '\0';
+
+ VNODE_TO_RAMFSINODE(dir)->rf_size += sizeof(ramfs_dirent_t);
+
+ /* Set up '.' and '..' in the directory */
+ entry = (ramfs_dirent_t *)VNODE_TO_RAMFS(dir)->rfs_inodes[ino]->rf_mem;
+ entry->rd_ino = ino;
+ strcpy(entry->rd_name, ".");
+ entry++;
+ entry->rd_ino = dir->vn_vno;
+ strcpy(entry->rd_name, "..");
+
+ /* Increase inode size accordingly */
+ VNODE_TO_RAMFS(dir)->rfs_inodes[ino]->rf_size = 2 * sizeof(ramfs_dirent_t);
+
+ /* This probably can't fail... (unless OOM :/) */
+ *out = vget(dir->vn_fs, ino);
+
+ return 0;
+}
+
+static ssize_t ramfs_rmdir(vnode_t *dir, const char *name, size_t name_len)
+{
+ ssize_t ret;
+ size_t i;
+ ramfs_dirent_t *entry;
+
+ KASSERT(!name_match(".", name, name_len) &&
+ !name_match("..", name, name_len));
+
+ long ino = ramfs_find_dirent(dir, name, name_len);
+ if (ino < 0)
+ {
+ return ino;
+ }
+
+ vnode_t *child = vget_locked(dir->vn_fs, (ino_t)ino);
+ if (!S_ISDIR(child->vn_mode))
+ {
+ vput_locked(&child);
+ return -ENOTDIR;
+ }
+
+ /* We have to make sure that this directory is empty */
+ entry = VNODE_TO_DIRENT(child);
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (!strcmp(entry->rd_name, ".") || !strcmp(entry->rd_name, ".."))
+ {
+ continue;
+ }
+
+ if (entry->rd_name[0])
+ {
+ vput_locked(&child);
+ return -ENOTEMPTY;
+ }
+ }
+
+ /* Finally, remove the entry from the parent directory */
+ entry = VNODE_TO_DIRENT(dir);
+ for (i = 0; i < RAMFS_MAX_DIRENT; i++, entry++)
+ {
+ if (name_match(entry->rd_name, name, name_len))
+ {
+ entry->rd_name[0] = '\0';
+ break;
+ }
+ }
+ VNODE_TO_RAMFSINODE(dir)->rf_size -= sizeof(ramfs_dirent_t);
+
+ VNODE_TO_RAMFSINODE(child)->rf_linkcount--;
+ vput_locked(&child);
+
+ return 0;
+}
+
+static ssize_t ramfs_read(vnode_t *file, size_t offset, void *buf,
+ size_t count)
+{
+ ssize_t ret;
+ ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(file);
+
+ KASSERT(!S_ISDIR(file->vn_mode));
+
+ if (offset > inode->rf_size)
+ {
+ ret = 0;
+ }
+ else if (offset + count > inode->rf_size)
+ {
+ ret = inode->rf_size - offset;
+ }
+ else
+ {
+ ret = count;
+ }
+
+ memcpy(buf, inode->rf_mem + offset, ret);
+ return ret;
+}
+
+static ssize_t ramfs_write(vnode_t *file, size_t offset, const void *buf,
+ size_t count)
+{
+ ssize_t ret;
+ ramfs_inode_t *inode = VNODE_TO_RAMFSINODE(file);
+
+ KASSERT(!S_ISDIR(file->vn_mode));
+
+ ret = MIN((size_t)count, (size_t)PAGE_SIZE - offset);
+ memcpy(inode->rf_mem + offset, buf, ret);
+
+ KASSERT(file->vn_len == inode->rf_size);
+ file->vn_len = MAX(file->vn_len, offset + ret);
+ inode->rf_size = file->vn_len;
+
+ return ret;
+}
+
+static ssize_t ramfs_readdir(vnode_t *dir, size_t offset, struct dirent *d)
+{
+ ssize_t ret = 0;
+ ramfs_dirent_t *dir_entry, *targ_entry;
+
+ KASSERT(S_ISDIR(dir->vn_mode));
+ KASSERT(0 == offset % sizeof(ramfs_dirent_t));
+
+ dir_entry = VNODE_TO_DIRENT(dir);
+ dir_entry = (ramfs_dirent_t *)(((char *)dir_entry) + offset);
+ targ_entry = dir_entry;
+
+ while ((offset < (size_t)(RAMFS_MAX_DIRENT * sizeof(ramfs_dirent_t))) &&
+ (!targ_entry->rd_name[0]))
+ {
+ ++targ_entry;
+ offset += sizeof(ramfs_dirent_t);
+ }
+
+ if (offset >= (size_t)(RAMFS_MAX_DIRENT * sizeof(ramfs_dirent_t)))
+ {
+ return 0;
+ }
+
+ ret = sizeof(ramfs_dirent_t) +
+ (targ_entry - dir_entry) * sizeof(ramfs_dirent_t);
+
+ d->d_ino = targ_entry->rd_ino;
+ d->d_off = 0; /* unused */
+ strncpy(d->d_name, targ_entry->rd_name, NAME_LEN - 1);
+ d->d_name[NAME_LEN - 1] = '\0';
+ return ret;
+}
+
+static ssize_t ramfs_stat(vnode_t *file, stat_t *buf)
+{
+ ramfs_inode_t *i = VNODE_TO_RAMFSINODE(file);
+ memset(buf, 0, sizeof(stat_t));
+ buf->st_mode = file->vn_mode;
+ buf->st_ino = (ssize_t)file->vn_vno;
+ buf->st_dev = 0;
+ if (file->vn_mode == S_IFCHR || file->vn_mode == S_IFBLK)
+ {
+ buf->st_rdev = (ssize_t)i->rf_mem;
+ }
+ buf->st_nlink = i->rf_linkcount - 1;
+ buf->st_size = (ssize_t)i->rf_size;
+ buf->st_blksize = (ssize_t)PAGE_SIZE;
+ buf->st_blocks = 1;
+
+ return 0;
+}
+
+static void ramfs_truncate_file(vnode_t *file)
+{
+ KASSERT(S_ISREG(file->vn_mode) && "This routine should only be called for regular files");
+ ramfs_inode_t *i = VNODE_TO_RAMFSINODE(file);
+ i->rf_size = 0;
+ file->vn_len = 0;
+ memset(i->rf_mem, 0, PAGE_SIZE);
+} \ No newline at end of file
diff --git a/kernel/fs/s5fs/s5fs.c b/kernel/fs/s5fs/s5fs.c
new file mode 100644
index 0000000..3790c1a
--- /dev/null
+++ b/kernel/fs/s5fs/s5fs.c
@@ -0,0 +1,860 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include <mm/slab.h>
+
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+#include "proc/kmutex.h"
+
+#include "fs/dirent.h"
+#include "fs/file.h"
+#include "fs/s5fs/s5fs.h"
+#include "fs/s5fs/s5fs_subr.h"
+#include "fs/stat.h"
+
+#include "mm/kmalloc.h"
+
+static long s5_check_super(s5_super_t *super);
+
+static long s5fs_check_refcounts(fs_t *fs);
+
+static void s5fs_read_vnode(fs_t *fs, vnode_t *vn);
+
+static void s5fs_delete_vnode(fs_t *fs, vnode_t *vn);
+
+static long s5fs_umount(fs_t *fs);
+
+static void s5fs_sync(fs_t *fs);
+
+static ssize_t s5fs_read(vnode_t *vnode, size_t pos, void *buf, size_t len);
+
+static ssize_t s5fs_write(vnode_t *vnode, size_t pos, const void *buf,
+ size_t len);
+
+static long s5fs_mmap(vnode_t *file, mobj_t **ret);
+
+static long s5fs_mknod(struct vnode *dir, const char *name, size_t namelen,
+ int mode, devid_t devid, struct vnode **out);
+
+static long s5fs_lookup(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t **out);
+
+static long s5fs_link(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t *child);
+
+static long s5fs_unlink(vnode_t *vdir, const char *name, size_t namelen);
+
+static long s5fs_rename(vnode_t *olddir, const char *oldname, size_t oldnamelen,
+ vnode_t *newdir, const char *newname,
+ size_t newnamelen);
+
+static long s5fs_mkdir(vnode_t *dir, const char *name, size_t namelen,
+ struct vnode **out);
+
+static long s5fs_rmdir(vnode_t *parent, const char *name, size_t namelen);
+
+static long s5fs_readdir(vnode_t *vnode, size_t pos, struct dirent *d);
+
+static long s5fs_stat(vnode_t *vnode, stat_t *ss);
+
+static void s5fs_truncate_file(vnode_t *vnode);
+
+static long s5fs_release(vnode_t *vnode, file_t *file);
+
+static long s5fs_get_pframe(vnode_t *vnode, size_t pagenum, long forwrite,
+ pframe_t **pfp);
+
+static long s5fs_fill_pframe(vnode_t *vnode, pframe_t *pf);
+
+static long s5fs_flush_pframe(vnode_t *vnode, pframe_t *pf);
+
+fs_ops_t s5fs_fsops = {.read_vnode = s5fs_read_vnode,
+ .delete_vnode = s5fs_delete_vnode,
+ .umount = s5fs_umount,
+ .sync = s5fs_sync};
+
+static vnode_ops_t s5fs_dir_vops = {.read = NULL,
+ .write = NULL,
+ .mmap = NULL,
+ .mknod = s5fs_mknod,
+ .lookup = s5fs_lookup,
+ .link = s5fs_link,
+ .unlink = s5fs_unlink,
+ .rename = s5fs_rename,
+ .mkdir = s5fs_mkdir,
+ .rmdir = s5fs_rmdir,
+ .readdir = s5fs_readdir,
+ .stat = s5fs_stat,
+ .acquire = NULL,
+ .release = NULL,
+ .get_pframe = s5fs_get_pframe,
+ .fill_pframe = s5fs_fill_pframe,
+ .flush_pframe = s5fs_flush_pframe,
+ .truncate_file = NULL};
+
+static vnode_ops_t s5fs_file_vops = {.read = s5fs_read,
+ .write = s5fs_write,
+ .mmap = s5fs_mmap,
+ .mknod = NULL,
+ .lookup = NULL,
+ .link = NULL,
+ .unlink = NULL,
+ .mkdir = NULL,
+ .rmdir = NULL,
+ .readdir = NULL,
+ .stat = s5fs_stat,
+ .acquire = NULL,
+ .release = NULL,
+ .get_pframe = s5fs_get_pframe,
+ .fill_pframe = s5fs_fill_pframe,
+ .flush_pframe = s5fs_flush_pframe,
+ .truncate_file = s5fs_truncate_file};
+
+
+static mobj_ops_t s5fs_mobj_ops = {.get_pframe = NULL,
+ .fill_pframe = blockdev_fill_pframe,
+ .flush_pframe = blockdev_flush_pframe,
+ .destructor = NULL};
+
+/*
+ * Initialize the passed-in fs_t. The only members of fs_t that are initialized
+ * before the call to s5fs_mount are fs_dev and fs_type ("s5fs"). You must
+ * initialize everything else: fs_vnode_allocator, fs_i, fs_ops, fs_root.
+ *
+ * Initialize the block device for the s5fs_t that is created, and copy
+ * the super block from disk into memory.
+ */
+long s5fs_mount(fs_t *fs)
+{
+ int num;
+
+ KASSERT(fs);
+
+ if (sscanf(fs->fs_dev, "disk%d", &num) != 1)
+ {
+ return -EINVAL;
+ }
+
+ blockdev_t *dev = blockdev_lookup(MKDEVID(DISK_MAJOR, num));
+ if (!dev)
+ return -EINVAL;
+
+ slab_allocator_t *allocator =
+ slab_allocator_create("s5_node", sizeof(s5_node_t));
+ fs->fs_vnode_allocator = allocator;
+
+ s5fs_t *s5fs = (s5fs_t *)kmalloc(sizeof(s5fs_t));
+
+ if (!s5fs)
+ {
+ slab_allocator_destroy(fs->fs_vnode_allocator);
+ fs->fs_vnode_allocator = NULL;
+ return -ENOMEM;
+ }
+
+ mobj_init(&s5fs->s5f_mobj, MOBJ_FS, &s5fs_mobj_ops);
+ s5fs->s5f_bdev = dev;
+
+#ifndef OLD
+ pframe_t *pf;
+ s5_get_meta_disk_block(s5fs, S5_SUPER_BLOCK, 0, &pf);
+ memcpy(&s5fs->s5f_super, pf->pf_addr, sizeof(s5_super_t));
+ s5_release_disk_block(&pf);
+#endif
+
+ if (s5_check_super(&s5fs->s5f_super))
+ {
+ kfree(s5fs);
+ slab_allocator_destroy(fs->fs_vnode_allocator);
+ fs->fs_vnode_allocator = NULL;
+ return -EINVAL;
+ }
+
+ kmutex_init(&s5fs->s5f_mutex);
+
+ s5fs->s5f_fs = fs;
+
+ fs->fs_i = s5fs;
+ fs->fs_ops = &s5fs_fsops;
+ fs->fs_root = vget(fs, s5fs->s5f_super.s5s_root_inode);
+ // vunlock(fs->fs_root);
+
+ return 0;
+}
+
+/* Initialize a vnode and inode by reading its corresponding inode info from
+ * disk.
+ *
+ * Hints:
+ * - To read the inode from disk, you will need to use the following:
+ * - VNODE_TO_S5NODE to obtain the s5_node_t with the inode corresponding
+ * to the provided vnode
+ * - FS_TO_S5FS to obtain the s5fs object
+ * - S5_INODE_BLOCK(vn->v_vno) to determine the block number of the block that
+ * contains the inode info
+ * - s5_get_disk_block and s5_release_disk_block to handle the disk block
+ * - S5_INODE_OFFSET to find the desired inode within the disk block
+ * containing it (returns the offset that the inode is stored within the block)
+ * - You should initialize the s5_node_t's inode field by reading directly from
+ * the inode on disk by using the page frame returned from s5_get_disk_block. Also
+ * make sure to initialize the dirtied_inode field.
+ * - Using the inode info, you need to initialize the following vnode fields:
+ * vn_len, vn_mode, and vn_ops using the fields found in the s5_inode struct.
+ * - See stat.h for vn_mode values.
+ * - For character and block devices:
+ * 1) Initialize vn_devid by reading the inode's s5_indirect_block field.
+ * 2) Set vn_ops to NULL.
+ */
+static void s5fs_read_vnode(fs_t *fs, vnode_t *vn)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+}
+
+/* Clean up the inode corresponding to the given vnode.
+ *
+ * Hints:
+ * - This function is called in the following way:
+ * mobj_put -> vnode_destructor -> s5fs_delete_vnode.
+ * - Cases to consider:
+ * 1) The inode is no longer in use (linkcount == 0), so free it using
+ * s5_free_inode.
+ * 2) The inode is dirty, so write it back to disk.
+ * 3) The inode is unchanged, so do nothing.
+ */
+static void s5fs_delete_vnode(fs_t *fs, vnode_t *vn)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+}
+
+/*
+ * See umount in vfs.h
+ *
+ * Check reference counts and the super block.
+ * Put the fs_root.
+ * Write the super block out to disk.
+ * Flush the underlying memory object.
+ */
+static long s5fs_umount(fs_t *fs)
+{
+ s5fs_t *s5fs = FS_TO_S5FS(fs);
+ blockdev_t *bd = s5fs->s5f_bdev;
+
+ if (s5fs_check_refcounts(fs))
+ {
+ panic(
+ "s5fs_umount: WARNING: linkcount corruption "
+ "discovered in fs on block device with major %d "
+ "and minor %d!!\n",
+ MAJOR(bd->bd_id), MINOR(bd->bd_id));
+ }
+ if (s5_check_super(&s5fs->s5f_super))
+ {
+ panic(
+ "s5fs_umount: WARNING: corrupted superblock "
+ "discovered on fs on block device with major %d "
+ "and minor %d!!\n",
+ MAJOR(bd->bd_id), MINOR(bd->bd_id));
+ }
+
+ vput(&fs->fs_root);
+
+ s5fs_sync(fs);
+ kfree(s5fs);
+ return 0;
+}
+
+static void s5fs_sync(fs_t *fs)
+{
+#ifdef FIXME
+ s5fs_t *s5fs = FS_TO_S5FS(fs);
+ #ifdef OLD
+ mobj_t *mobj = S5FS_TO_VMOBJ(s5fs);
+ #endif
+ mobj_t *mobj = 0; // XXX FIX ME
+
+ mobj_lock(mobj);
+
+ pframe_t *pf;
+ mobj_get_pframe(mobj, S5_SUPER_BLOCK, 1, &pf);
+ memcpy(pf->pf_addr, &s5fs->s5f_super, sizeof(s5_super_t));
+ pframe_release(&pf);
+
+ mobj_flush(S5FS_TO_VMOBJ(s5fs));
+ mobj_unlock(S5FS_TO_VMOBJ(s5fs));
+#endif
+}
+
+/* Wrapper around s5_read_file. */
+static ssize_t s5fs_read(vnode_t *vnode, size_t pos, void *buf, size_t len)
+{
+ KASSERT(!S_ISDIR(vnode->vn_mode) && "should be handled at the VFS level");
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Wrapper around s5_write_file. */
+static ssize_t s5fs_write(vnode_t *vnode, size_t pos, const void *buf,
+ size_t len)
+{
+ KASSERT(!S_ISDIR(vnode->vn_mode) && "should be handled at the VFS level");
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/*
+ * Any error handling should have been done before this function was called.
+ * Simply add a reference to the underlying mobj and return it through ret.
+ */
+static long s5fs_mmap(vnode_t *file, mobj_t **ret)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+/* Allocate and initialize an inode and its corresponding vnode.
+ *
+ * dir - The directory in which to make the new inode
+ * name - The name of the new inode
+ * namelen - Name length
+ * mode - vn_mode of the new inode, see S_IF{} macros in stat.h
+ * devid - devid of the new inode for special devices
+ * out - Upon success, out must point to the newly created vnode
+ * Upon failure, out must be unchanged
+ *
+ * Return 0 on success, or:
+ * - ENOTSUP: mode is not S_IFCHR, S_BLK, or S_ISREG
+ * - Propagate errors from s5_alloc_inode and s5_link
+ *
+ * Hints:
+ * - Use mode to determine the S5_TYPE_{} for the inode.
+ * - Use s5_alloc_inode is allocate a new inode.
+ * - Use vget to obtain the vnode corresponding to the newly created inode.
+ * - Use s5_link to link the newly created inode/vnode to the parent directory.
+ * - You will need to clean up the vnode using vput in the case that
+ * the link operation fails.
+ */
+static long s5fs_mknod(struct vnode *dir, const char *name, size_t namelen,
+ int mode, devid_t devid, struct vnode **out)
+{
+ KASSERT(S_ISDIR(dir->vn_mode) && "should be handled at the VFS level");
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Search for a given entry within a directory.
+ *
+ * dir - The directory in which to search
+ * name - The name to search for
+ * namelen - Name length
+ * ret - Upon success, ret must point to the found vnode
+ *
+ * Return 0 on success, or:
+ * - Propagate errors from s5_find_dirent
+ *
+ * Hints:
+ * - Use s5_find_dirent, vget, and vref.
+ * - vref can be used in the case where the vnode you're looking for happens
+ * to be dir itself.
+ */
+long s5fs_lookup(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t **ret)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Wrapper around s5_link.
+ *
+ * Return whatever s5_link returns, or:
+ * - EISDIR: child is a directory
+ */
+static long s5fs_link(vnode_t *dir, const char *name, size_t namelen,
+ vnode_t *child)
+{
+ KASSERT(S_ISDIR(dir->vn_mode) && "should be handled at the VFS level");
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Remove the directory entry in dir corresponding to name and namelen.
+ *
+ * Return 0 on success, or:
+ * - Propagate errors from s5_find_dirent
+ *
+ * Hints:
+ * - Use s5_find_dirent and s5_remove_dirent.
+ * - You will probably want to use vget_locked and vput_locked to protect the
+ * found vnode. Make sure your implementation of s5_remove_dirent knows what
+ * to expect.
+ */
+static long s5fs_unlink(vnode_t *dir, const char *name, size_t namelen)
+{
+ KASSERT(S_ISDIR(dir->vn_mode) && "should be handled at the VFS level");
+ KASSERT(!name_match(".", name, namelen));
+ KASSERT(!name_match("..", name, namelen));
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Change the name or location of a file.
+ *
+ * olddir - The directory in which the file currently resides
+ * oldname - The old name of the file
+ * oldnamelen - Length of the old name
+ * newdir - The directory in which to place the file
+ * newname - The new name of the file
+ * newnamelen - Length of the new name
+ *
+ * Return 0 on success, or:
+ * - ENAMETOOLONG: newname is >= NAME_LEN
+ * - ENOTDIR: newdir is not a directory
+ * - EISDIR: newname is a directory
+ * - Propagate errors from s5_find_dirent and s5_link
+ *
+ * Steps:
+ * 1) Use s5_find_dirent and vget_locked to obtain the vnode corresponding to old name.
+ * 2) If newdir already contains an entry for newname:
+ * a) Compare node numbers and do nothing if old name and new name refer to the same inode
+ * b) Check if new-name is a directory
+ * c) Remove the previously existing entry for new name using s5_remove_dirent
+ * d) Link the new direct using s5_link
+ * 3) If there is no entry for newname, use s5_link to add a link to the old node at new name
+ * 4) Use s5_remove_dirent to remove old name’s entry in olddir
+ *
+ *
+ * Hints:
+ * - olddir and newdir should be locked on entry and not unlocked during the
+ * duration of this function. Any other vnodes locked should be unlocked and
+ * put before return.
+ * - Be careful with locking! Because you are making changes to the vnodes,
+ * you should always be using vget_locked and vput_locked. Be sure to clean
+ * up properly in error/special cases.
+ * - You DO NOT need to support renaming of directories in Weenix. If you were to support this
+ * in the s5fs layer (which is not extra credit), you can use the following routine:
+ * 1) Use s5_find_dirent and vget_locked to obtain the vnode corresponding to old name.
+ * 2) If newer already contains an entry for newname:
+ * a) Compare node numbers and do nothing if old name and new name refer to the same inode
+ * b) Check if new-name is a directory
+ * c) Remove the previously existing entry for new name using s5_remove_dirent
+ * d) Link the new direct using s5_link
+ * 3) If there is no entry for newname, use s5_link to add a link to the old node at new name
+ * 4) Use s5_remove_dirent to remove old name’s entry in olddir
+ */
+static long s5fs_rename(vnode_t *olddir, const char *oldname, size_t oldnamelen,
+ vnode_t *newdir, const char *newname,
+ size_t newnamelen)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Create a directory.
+ *
+ * dir - The directory in which to create the new directory
+ * name - The name of the new directory
+ * namelen - Name length of the new directory
+ * out - On success, must point to the new directory, unlocked
+ * On failure, must be unchanged
+ *
+ * Return 0 on success, or:
+ * - Propagate errors from s5_alloc_inode and s5_link
+ *
+ * Steps:
+ * 1) Allocate an inode.
+ * 2) Get the child directory vnode.
+ * 3) Create the "." entry.
+ * 4) Create the ".." entry.
+ * 5) Create the name/namelen entry in the parent (that corresponds
+ * to the new directory)
+ *
+ * Hints:
+ * - If you run into any errors, you must undo previous steps.
+ * - You may assume/assert that undo operations do not fail.
+ * - It may help to assert that linkcounts are correct.
+ */
+static long s5fs_mkdir(vnode_t *dir, const char *name, size_t namelen,
+ struct vnode **out)
+{
+ KASSERT(S_ISDIR((dir)->vn_mode) && "should be handled at the VFS level");
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Remove a directory.
+ *
+ * Return 0 on success, or:
+ * - ENOTDIR: The specified entry is not a directory
+ * - ENOTEMPTY: The directory to be removed has entries besides "." and ".."
+ * - Propagate errors from s5_find_dirent
+ *
+ * Hints:
+ * - If you are confident you are managing directory entries properly, you can
+ * check for ENOTEMPTY by simply checking the length of the directory to be
+ * removed. An empty directory has two entries: "." and "..".
+ * - Remove the three entries created in s5fs_mkdir.
+ */
+static long s5fs_rmdir(vnode_t *parent, const char *name, size_t namelen)
+{
+ KASSERT(!name_match(".", name, namelen));
+ KASSERT(!name_match("..", name, namelen));
+ KASSERT(S_ISDIR(parent->vn_mode) && "should be handled at the VFS level");
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Read a directory entry.
+ *
+ * vnode - The directory from which to read an entry
+ * pos - The position within the directory to start reading from
+ * d - Caller-allocated dirent that must be properly initialized on
+ * successful return
+ *
+ * Return bytes read on success, or:
+ * - Propagate errors from s5_read_file
+ *
+ * Hints:
+ * - Use s5_read_file to read an s5_dirent_t. To do so, you can create a local
+ * s5_dirent_t variable and use that as the buffer to pass into s5_read_file.
+ * - Be careful that you read into an s5_dirent_t and populate the provided
+ * dirent_t properly.
+ */
+static long s5fs_readdir(vnode_t *vnode, size_t pos, struct dirent *d)
+{
+ KASSERT(S_ISDIR(vnode->vn_mode) && "should be handled at the VFS level");
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Get file status.
+ *
+ * vnode - The vnode of the file in question
+ * ss - Caller-allocated stat_t struct that must be initialized on success
+ *
+ * This function should not fail.
+ *
+ * Hint:
+ * - Initialize st_blocks using s5_inode_blocks.
+ * - Initialize st_mode using the corresponding vnode modes in stat.h.
+ * - Initialize st_rdev with the devid of special devices.
+ * - Initialize st_ino with the inode number.
+ * - Initialize st_nlink with the linkcount.
+ * - Initialize st_blksize with S5_BLOCK_SIZE.
+ * - Initialize st_size with the size of the file.
+ * - Initialize st_dev with the bd_id of the s5fs block device.
+ * - Set all other fields to 0.
+ */
+static long s5fs_stat(vnode_t *vnode, stat_t *ss)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/**
+ * Truncate the vnode and inode length to be 0.
+ *
+ * file - the vnode, whose size should be truncated
+ *
+ * This routine should only be called from do_open via
+ * vn_ops in the case that a regular file is opened with the
+ * O_TRUNC flag specified.
+ */
+static void s5fs_truncate_file(vnode_t *file)
+{
+ KASSERT(S_ISREG(file->vn_mode) && "This routine should only be called for regular files");
+ file->vn_len = 0;
+ s5_node_t* s5_node = VNODE_TO_S5NODE(file);
+ s5_inode_t* s5_inode = &s5_node->inode;
+ // setting the size of the inode to be 0 as well
+ s5_inode->s5_un.s5_size = 0;
+ s5_node->dirtied_inode = 1;
+
+ // Call subroutine to free the blocks that were used
+ vlock(file);
+ s5_remove_blocks(s5_node);
+ vunlock(file);
+}
+
+#ifdef OLD
+/*
+ * Wrapper around mobj_get_pframe. Remember to lock the memory object around
+ * the call to mobj_get_pframe. Assert that the get_pframe does not fail.
+ */
+inline void s5_get_disk_block(s5fs_t *s5fs, blocknum_t blocknum, long forwrite,
+ pframe_t **pfp)
+{
+ mobj_lock(S5FS_TO_VMOBJ(s5fs));
+ long ret = mobj_get_pframe(S5FS_TO_VMOBJ(s5fs), blocknum, forwrite, pfp);
+ mobj_unlock(S5FS_TO_VMOBJ(s5fs));
+ KASSERT(!ret && *pfp);
+}
+#endif
+
+/*
+ * Wrapper around device's read_block function; first looks up block in file-system cache.
+ * If not there, allocates and fills a page frame.
+ * Used for meta blocks, thus location is passed in.
+ */
+inline void s5_get_meta_disk_block(s5fs_t *s5fs, uint64_t blocknum, long forwrite,
+ pframe_t **pfp)
+{
+ mobj_lock(&s5fs->s5f_mobj);
+ mobj_find_pframe(&s5fs->s5f_mobj, blocknum, pfp);
+ if (*pfp)
+ {
+ // block is cached
+ mobj_unlock(&s5fs->s5f_mobj);
+ return;
+ }
+ mobj_create_pframe(&s5fs->s5f_mobj, blocknum, blocknum, pfp);
+ pframe_t *pf = *pfp;
+ pf->pf_addr = page_alloc();
+ KASSERT(pf->pf_addr);
+
+ blockdev_t *bd = s5fs->s5f_bdev;
+ long ret = bd->bd_ops->read_block(bd, pf->pf_addr, (blocknum_t)pf->pf_loc, 1);
+ pf->pf_dirty |= forwrite; // needed?
+ KASSERT (!ret);
+ mobj_unlock(&s5fs->s5f_mobj);
+ KASSERT(!ret && *pfp);
+}
+
+/*
+ * Wrapper around device's read_block function; allocates and fills a page frame.
+ * Assumes cache has already been searched.
+ * Used for file blocks, thus file block number is supplied.
+ */
+static inline void s5_get_file_disk_block(vnode_t *vnode, uint64_t blocknum, uint64_t loc, long forwrite,
+ pframe_t **pfp)
+{
+ //mobj_lock(&vnode->vn_mobj);
+ mobj_create_pframe(&vnode->vn_mobj, blocknum, loc, pfp);
+ //mobj_unlock(&vnode->vn_mobj);
+ pframe_t *pf = *pfp;
+ pf->pf_addr = page_alloc();
+ KASSERT(pf->pf_addr);
+ blockdev_t *bd = VNODE_TO_S5FS(vnode)->s5f_bdev;
+ long ret = bd->bd_ops->read_block(bd, pf->pf_addr, pf->pf_loc, 1);
+ pf->pf_dirty |= forwrite; // needed?
+ KASSERT (!ret);
+}
+
+/* Wrapper around pframe_release.
+ *
+ * Note: All pframe_release does is unlock the pframe. Why aren't we actually
+ * writing anything back yet? Because the pframe remains associated with
+ * whatever mobj we provided when we originally called mobj_get_pframe. If
+ * anyone tries to access the pframe later, Weenix will just give them the
+ * cached page frame from the mobj. If the pframe is ever freed (most likely on
+ * shutdown), then it will be written back to disk: mobj_flush_pframe ->
+ * blockdev_flush_pframe.
+ */
+inline void s5_release_disk_block(pframe_t **pfp) { pframe_release(pfp); }
+
+/*
+ * This is where the abstraction of vnode file block/page --> disk block is
+ * finally implemented. Check that the requested page lies within vnode->vn_len.
+ *
+ * Of course, you will want to use s5_file_block_to_disk_block. Pay attention
+ * to what the forwrite argument to s5fs_get_pframe means for the alloc argument
+ * in s5_file_block_to_disk_block.
+ *
+ * If the disk block for the corresponding file block is sparse, you should use
+ * mobj_default_get_pframe on the vnode's own memory object. This will trickle
+ * down to s5fs_fill_pframe if the pframe is not already resident.
+ *
+ * Otherwise, if the disk block is NOT sparse, you will want to simply use
+ * s5_get_disk_block. NOTE: in this case, you also need to make sure you free
+ * the pframe that resides in the vnode itself for the requested pagenum. To
+ * do so, you will want to use mobj_find_pframe and mobj_free_pframe.
+ *
+ * Given the above design, we s5fs itself does not need to implement
+ * flush_pframe. Any pframe that will be written to (forwrite = 1) should always
+ * have a disk block backing it on successful return. Thus, the page frame will
+ * reside in the block device of the filesystem, where the flush_pframe is
+ * already implemented. We do, however, need to implement fill_pframe for sparse
+ * blocks.
+ */
+static long s5fs_get_pframe(vnode_t *vnode, uint64_t pagenum, long forwrite,
+ pframe_t **pfp)
+{
+#ifdef OLD
+ if (vnode->vn_len <= pagenum * PAGE_SIZE)
+ return -EINVAL;
+ long loc =
+ s5_file_block_to_disk_block(VNODE_TO_S5NODE(vnode), pagenum, forwrite);
+ if (loc < 0)
+ return loc;
+ if (loc)
+ {
+ mobj_find_pframe(&vnode->vn_mobj, pagenum, pfp);
+ if (*pfp)
+ {
+ mobj_free_pframe(&vnode->vn_mobj, pfp);
+ }
+ s5_get_disk_block(VNODE_TO_S5FS(vnode), (blocknum_t)loc, forwrite, pfp);
+ return 0;
+ }
+ else
+ {
+ KASSERT(!forwrite);
+ return mobj_default_get_pframe(&vnode->vn_mobj, pagenum, forwrite, pfp);
+ }
+#endif
+
+ if (vnode->vn_len <= pagenum * PAGE_SIZE)
+ return -EINVAL;
+ mobj_find_pframe(&vnode->vn_mobj, pagenum, pfp);
+ if (*pfp)
+ {
+ // block is cached
+ return 0;
+ }
+ int new;
+ long loc = s5_file_block_to_disk_block(VNODE_TO_S5NODE(vnode), pagenum, forwrite, &new);
+ if (loc < 0)
+ return loc;
+ if (loc) {
+ // block is mapped
+ if (new) {
+ // block didn't previously exist, thus its current contents are meaningless
+ *pfp = s5_cache_and_clear_block(&vnode->vn_mobj, pagenum, loc);
+ } else {
+ // block must be read from disk
+ s5_get_file_disk_block(vnode, pagenum, loc, forwrite, pfp);
+ }
+ return 0;
+ }
+ else
+ {
+ // block is in a sparse region of the file
+ KASSERT(!forwrite);
+ return mobj_default_get_pframe(&vnode->vn_mobj, pagenum, forwrite, pfp);
+ }
+}
+
+/*
+ * According the documentation for s5fs_get_pframe, this only gets called when
+ * the file block for a given page number is sparse. In other words, pf
+ * corresponds to a sparse block.
+ */
+static long s5fs_fill_pframe(vnode_t *vnode, pframe_t *pf)
+{
+ memset(pf->pf_addr, 0, PAGE_SIZE);
+ return 0;
+}
+
+/*
+ * Verify the superblock. 0 on success; -1 on failure.
+ */
+static long s5_check_super(s5_super_t *super)
+{
+ if (!(super->s5s_magic == S5_MAGIC &&
+ (super->s5s_free_inode < super->s5s_num_inodes ||
+ super->s5s_free_inode == (uint32_t)-1) &&
+ super->s5s_root_inode < super->s5s_num_inodes))
+ {
+ return -1;
+ }
+ if (super->s5s_version != S5_CURRENT_VERSION)
+ {
+ dbg(DBG_PRINT,
+ "Filesystem is version %d; "
+ "only version %d is supported.\n",
+ super->s5s_version, S5_CURRENT_VERSION);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Calculate refcounts on the filesystem.
+ */
+static void calculate_refcounts(int *counts, vnode_t *vnode)
+{
+ long ret;
+
+ size_t pos = 0;
+ dirent_t dirent;
+ vnode_t *child;
+
+ while ((ret = s5fs_readdir(vnode, pos, &dirent)) > 0)
+ {
+ counts[dirent.d_ino]++;
+ dbg(DBG_S5FS, "incrementing count of inode %d to %d\n", dirent.d_ino,
+ counts[dirent.d_ino]);
+ if (counts[dirent.d_ino] == 1)
+ {
+ child = vget_locked(vnode->vn_fs, dirent.d_ino);
+ if (S_ISDIR(child->vn_mode))
+ {
+ calculate_refcounts(counts, child);
+ }
+ vput_locked(&child);
+ }
+ pos += ret;
+ }
+
+ KASSERT(!ret);
+}
+
+/*
+ * Verify refcounts on the filesystem. 0 on success; -1 on failure.
+ */
+long s5fs_check_refcounts(fs_t *fs)
+{
+ s5fs_t *s5fs = (s5fs_t *)fs->fs_i;
+ int *refcounts;
+ long ret = 0;
+
+ refcounts = kmalloc(s5fs->s5f_super.s5s_num_inodes * sizeof(int));
+ KASSERT(refcounts);
+ memset(refcounts, 0, s5fs->s5f_super.s5s_num_inodes * sizeof(int));
+
+ vlock(fs->fs_root);
+ refcounts[fs->fs_root->vn_vno]++;
+ calculate_refcounts(refcounts, fs->fs_root);
+ refcounts[fs->fs_root->vn_vno]--;
+
+ vunlock(fs->fs_root);
+
+ dbg(DBG_PRINT,
+ "Checking refcounts of s5fs filesystem on block "
+ "device with major %d, minor %d\n",
+ MAJOR(s5fs->s5f_bdev->bd_id), MINOR(s5fs->s5f_bdev->bd_id));
+
+ for (uint32_t i = 0; i < s5fs->s5f_super.s5s_num_inodes; i++)
+ {
+ if (!refcounts[i])
+ {
+ continue;
+ }
+
+ vnode_t *vn = vget(fs, i);
+ KASSERT(vn);
+ s5_node_t *sn = VNODE_TO_S5NODE(vn);
+
+ if (refcounts[i] != sn->inode.s5_linkcount)
+ {
+ dbg(DBG_PRINT, " Inode %d, expecting %d, found %d\n", i,
+ refcounts[i], sn->inode.s5_linkcount);
+ ret = -1;
+ }
+ vput(&vn);
+ }
+
+ dbg(DBG_PRINT,
+ "Refcount check of s5fs filesystem on block "
+ "device with major %d, minor %d completed %s.\n",
+ MAJOR(s5fs->s5f_bdev->bd_id), MINOR(s5fs->s5f_bdev->bd_id),
+ (ret ? "UNSUCCESSFULLY" : "successfully"));
+
+ kfree(refcounts);
+ return ret;
+}
+
+static long s5fs_flush_pframe(vnode_t *vnode, pframe_t *pf) {
+ return blockdev_flush_pframe(&((s5fs_t *)vnode->vn_fs->fs_i)->s5f_mobj, pf);
+} \ No newline at end of file
diff --git a/kernel/fs/s5fs/s5fs_subr.c b/kernel/fs/s5fs/s5fs_subr.c
new file mode 100644
index 0000000..c972d7c
--- /dev/null
+++ b/kernel/fs/s5fs/s5fs_subr.c
@@ -0,0 +1,590 @@
+#include "fs/s5fs/s5fs_subr.h"
+#include "drivers/blockdev.h"
+#include "errno.h"
+#include "fs/s5fs/s5fs.h"
+#include "fs/stat.h"
+#include "fs/vfs.h"
+#include "fs/vnode.h"
+#include "kernel.h"
+#include "mm/pframe.h"
+#include "proc/kmutex.h"
+#include "util/debug.h"
+#include "util/string.h"
+#include <fs/s5fs/s5fs.h>
+
+static void s5_free_block(s5fs_t *s5fs, blocknum_t block);
+
+static long s5_alloc_block(s5fs_t *s5fs);
+
+static inline void s5_lock_super(s5fs_t *s5fs)
+{
+ kmutex_lock(&s5fs->s5f_mutex);
+}
+
+static inline void s5_unlock_super(s5fs_t *s5fs)
+{
+ kmutex_unlock(&s5fs->s5f_mutex);
+}
+
+/* Helper function to obtain inode info from disk given an inode number.
+ *
+ * s5fs - The file system (it will usually be obvious what to pass for this
+ * parameter)
+ * ino - Inode number to fetch
+ * forwrite - Set if you intend to write any fields in the s5_inode_t, clear
+ * if you only intend to read
+ * pfp - Return parameter for a page frame that will contain the disk
+ * block of the desired inode
+ * inodep - Return parameter for the s5_inode_t corresponding to the desired
+ * inode
+ */
+static inline void s5_get_inode(s5fs_t *s5fs, ino_t ino, long forwrite,
+ pframe_t **pfp, s5_inode_t **inodep)
+{
+ s5_get_meta_disk_block(s5fs, S5_INODE_BLOCK(ino), forwrite, pfp);
+ *inodep = (s5_inode_t *)(*pfp)->pf_addr + S5_INODE_OFFSET(ino);
+ KASSERT((*inodep)->s5_number == ino);
+}
+
+/* Release an inode by releasing the page frame of the disk block containing the
+ * inode. See comments above s5_release_disk_block to see why we don't write
+ * anything back yet.
+ *
+ * pfp - The page frame containing the inode
+ * inodep - The inode to be released
+ *
+ * On return, pfp and inodep both point to NULL.
+ */
+static inline void s5_release_inode(pframe_t **pfp, s5_inode_t **inodep)
+{
+ KASSERT((s5_inode_t *)(*pfp)->pf_addr +
+ S5_INODE_OFFSET((*inodep)->s5_number) ==
+ *inodep);
+ *inodep = NULL;
+ s5_release_disk_block(pfp);
+}
+
+/* Helper function to obtain a specific block of a file.
+ *
+ * sn - The s5_node representing the file in question
+ * blocknum - The offset of the desired block relative to the beginning of the
+ * file, i.e. index 8000 is block 1 of the file, even though it may
+ * not be block 1 of the disk
+ * forwrite - Set if you intend to write to the block, clear if you only intend
+ * to read
+ * pfp - Return parameter for a page frame containing the block data
+ */
+static inline long s5_get_file_block(s5_node_t *sn, size_t blocknum,
+ long forwrite, pframe_t **pfp)
+{
+ return sn->vnode.vn_mobj.mo_ops.get_pframe(&sn->vnode.vn_mobj, blocknum,
+ forwrite, pfp);
+}
+
+/* Release the page frame associated with a file block. See comments above
+ * s5_release_disk_block to see why we don't write anything back yet.
+ *
+ * On return, pfp points to NULL.
+ */
+static inline void s5_release_file_block(pframe_t **pfp)
+{
+ pframe_release(pfp);
+}
+
+#ifdef OLD
+/* Given a file and a file block number, return the disk block number of the
+ * desired file block.
+ *
+ * sn - The s5_node representing the file
+ * file_blocknum - The offset of the desired block relative to the beginning of
+ * the file
+ * alloc - If set, allocate the block / indirect block as necessary
+ * If clear, don't allocate sparse blocks
+ *
+ * Return a disk block number on success, or:
+ * - 0: The block is sparse, and alloc is clear, OR
+ * The indirect block would contain the block, but the indirect block is
+ * sparse, and alloc is clear
+ * - EINVAL: The specified block number is greater than or equal to
+ * S5_MAX_FILE_BLOCKS
+ * - Propagate errors from s5_alloc_block.
+ *
+ * Hints:
+ * - Use the file inode's s5_direct_blocks and s5_indirect_block to perform the
+ * translation.
+ * - Use s5_alloc_block to allocate blocks.
+ * - Be sure to mark the inode as dirty when appropriate, i.e. when you are
+ * making changes to the actual s5_inode_t struct. Hint: Does allocating a
+ * direct block dirty the inode? What about allocating the indirect block?
+ * Finally, what about allocating a block pointed to by the indirect block?
+ * - Cases to consider:
+ * 1) file_blocknum < S_NDIRECT_BLOCKS
+ * 2) Indirect block is not allocated but alloc is set. Be careful not to
+ * leak a block in an error case!
+ * 3) Indirect block is allocated. The desired block may be sparse, and you
+ * may have to allocate it.
+ * 4) The indirect block has not been allocated and alloc is clear.
+ */
+long s5_file_block_to_disk_block(s5_node_t *sn, size_t file_blocknum,
+ int alloc)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+#endif
+
+
+long s5_file_block_to_disk_block(s5_node_t *sn, size_t file_blocknum,
+ int alloc, int *newp)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+pframe_t *s5_cache_and_clear_block(mobj_t *mo, long block, long loc) {
+ pframe_t *pf;
+ mobj_create_pframe(mo, block, loc, &pf);
+ pf->pf_addr = page_alloc();
+ memset(pf->pf_addr, 0, PAGE_SIZE);
+ pf->pf_dirty = 1; // XXX do this later
+ return pf;
+}
+
+/* Read from a file.
+ *
+ * sn - The s5_node representing the file to read from
+ * pos - The position to start reading from
+ * buf - The buffer to read into
+ * len - The number of bytes to read
+ *
+ * Return the number of bytes read, or:
+ * - Propagate errors from s5_get_file_block (do not return a partial
+ * read). As in, if s5_get_file_block returns an error,
+ * the call to s5_read_file should fail.
+ *
+ * Hints:
+ * - Do not directly call s5_file_block_to_disk_block. To obtain pframes with
+ * the desired blocks, use s5_get_file_block and s5_release_file_block.
+ * - Be sure to handle all edge cases regarding pos and len relative to the
+ * length of the actual file. (If pos is greater than or equal to the length
+ * of the file, then s5_read_file should return 0).
+ */
+ssize_t s5_read_file(s5_node_t *sn, size_t pos, char *buf, size_t len)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Write to a file.
+ *
+ * sn - The s5_node representing the file to write to
+ * pos - The position to start writing to
+ * buf - The buffer to write from
+ * len - The number of bytes to write
+ *
+ * Return the number of bytes written, or:
+ * - EFBIG: pos was beyond S5_MAX_FILE_SIZE
+ * - Propagate errors from s5_get_file_block (that is, do not return a partial
+ * write)
+ *
+ * Hints:
+ * - You should return -EFBIG only if the provided pos was invalid. Otherwise,
+ * it is okay to make a partial write up to the maximum file size.
+ * - Use s5_get_file_block and s5_release_file_block to obtain pframes with
+ * the desired blocks.
+ * - Because s5_get_file_block calls s5fs_get_pframe, which checks the length
+ * of the vnode, you may have to update the vnode's length before you call
+ * s5_get_file_block. In this case, you should also update the inode's
+ * s5_size and mark the inode dirty.
+ * - If, midway through writing, you run into an error with s5_get_file_block,
+ * it is okay to merely undo your most recent changes while leaving behind
+ * writes you've already made to other blocks, before returning the error.
+ * That is, it is okay to make a partial write that the caller does not know
+ * about, as long as the file's length is consistent with what you've
+ * actually written so far.
+ * - You should maintain the vn_len of the vnode and the s5_un.s5_size field of the
+ * inode to be the same.
+ */
+ssize_t s5_write_file(s5_node_t *sn, size_t pos, const char *buf, size_t len)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+#ifdef OLD
+/* Allocate one block from the filesystem.
+ *
+ * Return the block number of the newly allocated block, or:
+ * - ENOSPC: There are no more free blocks
+ *
+ * Hints:
+ * - Protect access to the super block using s5_lock_super and s5_unlock super.
+ * - Recall that the free block list is a linked list of blocks containing disk
+ * block numbers of free blocks. Each node contains S5_NBLKS_PER_FNODE block
+ * numbers, where the last entry is a pointer to the next node in the linked
+ * list, or -1 if there are no more free blocks remaining. The super block's
+ * s5s_free_blocks is the first node of this linked list.
+ * - The super block's s5s_nfree member is the number of blocks that are free
+ * within s5s_free_blocks. You could use it as an index into the
+ * s5s_free_blocks array. Be sure to update the field appropriately.
+ * - When s5s_free_blocks runs out (i.e. s5s_nfree == 0), refill it by
+ * collapsing the next node of the free list into the super block. Exactly
+ * when you do this is up to you.
+ * - You should initialize the block's contents to 0. Specifically,
+ * when you use s5_alloc_block to allocate an indirect block,
+ * as your implementation of s5_file_block_to_disk_block probably expects
+ * sparse blocks to be represented by a 0.
+ * - You may find it helpful to take a look at the implementation of
+ * s5_free_block below.
+ * - You may assume/assert that any pframe calls succeed.
+ */
+static long s5_alloc_block(s5fs_t *s5fs)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+#endif
+
+static long s5_alloc_block(s5fs_t *s5fs)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/*
+ * The exact opposite of s5_alloc_block: add blockno to the free list of the
+ * filesystem. This should never fail. You may assert that any pframe calls
+ * succeed.
+ *
+ * Don't forget to protect access to the super block, update s5s_nfree, and
+ * expand the linked list correctly if the super block can no longer hold any
+ * more free blocks in its s5s_free_blocks array according to s5s_nfree.
+ */
+static void s5_free_block(s5fs_t *s5fs, blocknum_t blockno)
+{
+ s5_lock_super(s5fs);
+ s5_super_t *s = &s5fs->s5f_super;
+ dbg(DBG_S5FS, "freeing disk block %d\n", blockno);
+ KASSERT(blockno);
+ KASSERT(s->s5s_nfree < S5_NBLKS_PER_FNODE);
+
+ if (s->s5s_nfree == S5_NBLKS_PER_FNODE - 1)
+ {
+ // FIX THIS! Don't need to read prior contents
+ pframe_t *pf;
+ s5_get_meta_disk_block(s5fs, blockno, 1, &pf);
+ memcpy(pf->pf_addr, s->s5s_free_blocks, sizeof(s->s5s_free_blocks));
+ s5_release_disk_block(&pf);
+
+ s->s5s_nfree = 0;
+ s->s5s_free_blocks[S5_NBLKS_PER_FNODE - 1] = blockno;
+ }
+ else
+ {
+ s->s5s_free_blocks[s->s5s_nfree++] = blockno;
+ }
+ s5_unlock_super(s5fs);
+}
+
+/*
+ * Allocate one inode from the filesystem. You will need to use the super block
+ * s5s_free_inode member. You must initialize the on-disk contents of the
+ * allocated inode according to the arguments type and devid.
+ *
+ * Recall that the free inode list is a linked list. Each free inode contains a
+ * link to the next free inode. The super block s5s_free_inode must always point
+ * to the next free inode, or contain -1 to indicate no more inodes are
+ * available.
+ *
+ * Don't forget to protect access to the super block and update s5s_free_inode.
+ *
+ * You should use s5_get_inode and s5_release_inode.
+ *
+ * On success, return the newly allocated inode number.
+ * On failure, return -ENOSPC.
+ */
+long s5_alloc_inode(s5fs_t *s5fs, uint16_t type, devid_t devid)
+{
+ KASSERT((S5_TYPE_DATA == type) || (S5_TYPE_DIR == type) ||
+ (S5_TYPE_CHR == type) || (S5_TYPE_BLK == type));
+
+ s5_lock_super(s5fs);
+ uint32_t new_ino = s5fs->s5f_super.s5s_free_inode;
+ if (new_ino == (uint32_t)-1)
+ {
+ s5_unlock_super(s5fs);
+ return -ENOSPC;
+ }
+
+ pframe_t *pf;
+ s5_inode_t *inode;
+ s5_get_inode(s5fs, new_ino, 1, &pf, &inode);
+
+ s5fs->s5f_super.s5s_free_inode = inode->s5_un.s5_next_free;
+ KASSERT(inode->s5_un.s5_next_free != inode->s5_number);
+
+ inode->s5_un.s5_size = 0;
+ inode->s5_type = type;
+ inode->s5_linkcount = 0;
+ memset(inode->s5_direct_blocks, 0, sizeof(inode->s5_direct_blocks));
+ inode->s5_indirect_block =
+ (S5_TYPE_CHR == type || S5_TYPE_BLK == type) ? devid : 0;
+
+ s5_release_inode(&pf, &inode);
+ s5_unlock_super(s5fs);
+
+ dbg(DBG_S5FS, "allocated inode %d\n", new_ino);
+ return new_ino;
+}
+
+/*
+ * Free the inode by:
+ * 1) adding the inode to the free inode linked list (opposite of
+ * s5_alloc_inode), and 2) freeing all blocks being used by the inode.
+ *
+ * The suggested order of operations to avoid deadlock, is:
+ * 1) lock the super block
+ * 2) get the inode to be freed
+ * 3) update the free inode linked list
+ * 4) copy the blocks to be freed from the inode onto the stack
+ * 5) release the inode
+ * 6) unlock the super block
+ * 7) free all direct blocks
+ * 8) get the indirect block
+ * 9) copy the indirect block array onto the stack
+ * 10) release the indirect block
+ * 11) free the indirect blocks
+ * 12) free the indirect block itself
+ */
+void s5_free_inode(s5fs_t *s5fs, ino_t ino)
+{
+ pframe_t *pf;
+ s5_inode_t *inode;
+ s5_lock_super(s5fs);
+ s5_get_inode(s5fs, ino, 1, &pf, &inode);
+
+ uint32_t direct_blocks_to_free[S5_NDIRECT_BLOCKS];
+ uint32_t indirect_block_to_free;
+ if (inode->s5_type == S5_TYPE_DATA || inode->s5_type == S5_TYPE_DIR)
+ {
+ indirect_block_to_free = inode->s5_indirect_block;
+ memcpy(direct_blocks_to_free, inode->s5_direct_blocks,
+ sizeof(direct_blocks_to_free));
+ }
+ else
+ {
+ KASSERT(inode->s5_type == S5_TYPE_BLK || inode->s5_type == S5_TYPE_CHR);
+ indirect_block_to_free = 0;
+ memset(direct_blocks_to_free, 0, sizeof(direct_blocks_to_free));
+ }
+
+ inode->s5_un.s5_next_free = s5fs->s5f_super.s5s_free_inode;
+ inode->s5_type = S5_TYPE_FREE;
+ s5fs->s5f_super.s5s_free_inode = inode->s5_number;
+
+ s5_release_inode(&pf, &inode);
+ s5_unlock_super(s5fs);
+
+ for (unsigned i = 0; i < S5_NDIRECT_BLOCKS; i++)
+ {
+ if (direct_blocks_to_free[i])
+ {
+ s5_free_block(s5fs, direct_blocks_to_free[i]);
+ }
+ }
+ if (indirect_block_to_free)
+ {
+ uint32_t indirect_blocks_to_free[S5_NIDIRECT_BLOCKS];
+
+ s5_get_meta_disk_block(s5fs, indirect_block_to_free, 0, &pf);
+ KASSERT(S5_BLOCK_SIZE == PAGE_SIZE);
+ memcpy(indirect_blocks_to_free, pf->pf_addr, S5_BLOCK_SIZE);
+ s5_release_disk_block(&pf);
+
+ for (unsigned i = 0; i < S5_NIDIRECT_BLOCKS; i++)
+ {
+ if (indirect_blocks_to_free[i])
+ {
+ s5_free_block(s5fs, indirect_blocks_to_free[i]);
+ }
+ }
+ s5_free_block(s5fs, indirect_block_to_free);
+ }
+ dbg(DBG_S5FS, "freed inode %d\n", ino);
+}
+
+/* Return the inode number corresponding to the directory entry specified by
+ * name and namelen within a given directory.
+ *
+ * sn - The directory to search in
+ * name - The name to search for
+ * namelen - Length of name
+ * filepos - If non-NULL, use filepos to return the starting position of the
+ * directory entry
+ *
+ * Return the desired inode number, or:
+ * - ENOENT: Could not find a directory entry with the specified name
+ *
+ * Hints:
+ * - Use s5_read_file in increments of sizeof(s5_dirent_t) to read successive
+ * directory entries and compare them against name and namelen.
+ * - To avoid reading beyond the end of the directory, check if the return
+ * value of s5_read_file is 0
+ * - You could optimize this function by using s5_get_file_block (rather than
+ * s5_read_file) to ensure you do not read beyond the length of the file,
+ * but doing so is optional.
+ */
+long s5_find_dirent(s5_node_t *sn, const char *name, size_t namelen,
+ size_t *filepos)
+{
+ KASSERT(S_ISDIR(sn->vnode.vn_mode) && "should be handled at the VFS level");
+ KASSERT(S5_BLOCK_SIZE == PAGE_SIZE && "be wary, thee");
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Remove the directory entry specified by name and namelen from the directory
+ * sn.
+ *
+ * child - The found directory entry must correspond to the caller-provided
+ * child
+ *
+ * No return value. This function should never fail. You should assert that
+ * anything which could be incorrect is correct, and any function calls which
+ * could fail succeed.
+ *
+ * Hints:
+ * - Assert that the directory exists.
+ * - Assert that the found directory entry corresponds to child.
+ * - Ensure that the remaining directory entries in the file are contiguous. To
+ * do this, you should:
+ * - Overwrite the removed entry with the last directory entry.
+ * - Truncate the length of the directory by sizeof(s5_dirent_t).
+ * - Make sure you are only using s5_dirent_t, and not dirent_t structs.
+ * - Decrement the child's linkcount, because you have removed the directory's
+ * link to the child.
+ * - Mark the inodes as dirtied.
+ * - Use s5_find_dirent to find the position of the entry being removed.
+ */
+void s5_remove_dirent(s5_node_t *sn, const char *name, size_t namelen,
+ s5_node_t *child)
+{
+ vnode_t *dir = &sn->vnode;
+ s5_inode_t *inode = &sn->inode;
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+}
+
+/* Replace a directory entry.
+ *
+ * sn - The directory to search within
+ * name - The name of the old directory entry
+ * namelen - Length of the old directory entry name
+ * old - The s5_node corresponding to the old directory entry
+ * new - The s5_node corresponding to the new directory entry
+ *
+ * No return value. Similar to s5_remove_dirent, this function should never
+ * fail. You should assert that everything behaves correctly.
+ *
+ * Hints:
+ * - Assert that the directory exists, that the directory entry exists, and
+ * that it corresponds to the old s5_node.
+ * - When forming the new directory entry, use the same name and namelen from
+ * before, but use the inode number from the new s5_node.
+ * - Update linkcounts and dirty inodes appropriately.
+ *
+ * s5_replace_dirent is NOT necessary to implement. It's only useful if
+ * you're planning on implementing the renaming of directories (which you shouldn't
+ * attempt until after the rest of S5FS is done).
+ */
+void s5_replace_dirent(s5_node_t *sn, const char *name, size_t namelen,
+ s5_node_t *old, s5_node_t *new)
+{
+ vnode_t *dir = &sn->vnode;
+ s5_inode_t *inode = &sn->inode;
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+}
+
+/* Create a directory entry.
+ *
+ * dir - The directory within which to create a new entry
+ * name - The name of the new entry
+ * namelen - Length of the new entry name
+ * child - The s5_node holding the inode which the new entry should represent
+ *
+ * Return 0 on success, or:
+ * - EEXIST: The directory entry already exists
+ * - Propagate errors from s5_write_file
+ *
+ * Hints:
+ * - Update linkcounts and mark inodes dirty appropriately.
+ * - You may wish to assert at the end of s5_link that the directory entry
+ * exists and that its inode is, as expected, the inode of child.
+ */
+long s5_link(s5_node_t *dir, const char *name, size_t namelen,
+ s5_node_t *child)
+{
+ KASSERT(kmutex_owns_mutex(&dir->vnode.vn_mobj.mo_mutex));
+
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/* Return the number of file blocks allocated for sn. This means any
+ * file blocks that are not sparse, direct or indirect. If the indirect
+ * block itself is allocated, that must also count. This function should not
+ * fail.
+ *
+ * Hint:
+ * - You may wish to assert that the special character / block files do not
+ * have any blocks allocated to them. Remember, the s5_indirect_block for
+ * these special files is actually the device id.
+ */
+long s5_inode_blocks(s5_node_t *sn)
+{
+ NOT_YET_IMPLEMENTED("S5FS: ***none***");
+ return -1;
+}
+
+/**
+ * Given a s5_node_t, frees the associated direct blocks and
+ * the indirect blocks if they exist.
+ *
+ * Should only be called from the truncate_file routine.
+ */
+void s5_remove_blocks(s5_node_t *sn)
+{
+ // Free the blocks used by the node
+ // First, free the the direct blocks
+ s5fs_t* s5fs = VNODE_TO_S5FS(&sn->vnode);
+ s5_inode_t* s5_inode = &sn->inode;
+ for (unsigned i = 0; i < S5_NDIRECT_BLOCKS; i++)
+ {
+ if (s5_inode->s5_direct_blocks[i])
+ {
+ s5_free_block(s5fs, s5_inode->s5_direct_blocks[i]);
+ }
+ }
+
+ memset(s5_inode->s5_direct_blocks, 0, sizeof(s5_inode->s5_direct_blocks));
+
+ // Get the indirect blocks and free them, if they exist
+ if (s5_inode->s5_indirect_block)
+ {
+ pframe_t *pf;
+ s5_get_meta_disk_block(s5fs, s5_inode->s5_indirect_block, 0, &pf);
+ uint32_t *blocknum_ptr = pf->pf_addr;
+
+ for (unsigned i = 0; i < S5_NIDIRECT_BLOCKS; i++)
+ {
+ if (blocknum_ptr[i])
+ {
+ s5_free_block(s5fs, blocknum_ptr[i]);
+ }
+ }
+
+ s5_release_disk_block(&pf);
+ // Free the indirect block itself
+ s5_free_block(s5fs, s5_inode->s5_indirect_block);
+ s5_inode->s5_indirect_block = 0;
+ }
+}
diff --git a/kernel/fs/vfs.c b/kernel/fs/vfs.c
new file mode 100644
index 0000000..3f5ed15
--- /dev/null
+++ b/kernel/fs/vfs.c
@@ -0,0 +1,222 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include "util/string.h"
+#include <fs/s5fs/s5fs.h>
+#include <fs/vnode.h>
+
+#include "fs/file.h"
+#include "fs/ramfs/ramfs.h"
+
+#include "mm/kmalloc.h"
+#include "mm/slab.h"
+#include "util/debug.h"
+
+#ifdef __S5FS__
+#include "fs/s5fs/s5fs.h"
+#endif
+
+#ifdef __MOUNTING__
+/* The fs listed here are only the non-root file systems */
+list_t mounted_fs_list;
+
+/*
+ * Implementing this function is not required and strongly discouraged unless
+ * you are absolutley sure your Weenix is perfect.
+ *
+ * The purpose of this function is to set up the pointers between the file
+ * system struct and the vnode of the mount point. Remember to watch your
+ * reference counts. (The exception here is when the vnode's vn_mount field
+ * points to the mounted file system's root we do not increment the reference
+ * count on the file system's root vnode. The file system is already keeping
+ * a reference to the vnode which will not go away until the file system is
+ * unmounted. If we kept a second such reference it would conflict with the
+ * behavior of vfs_is_in_use(), make sure you understand why.)
+ *
+ * Once everything is set up add the file system to the list of mounted file
+ * systems.
+ *
+ * Remember proper error handling.
+ *
+ * This function is not meant to mount the root file system.
+ */
+int vfs_mount(struct vnode *mtpt, fs_t *fs)
+{
+ NOT_YET_IMPLEMENTED("MOUNTING: ***none***");
+ return -EINVAL;
+}
+
+/*
+ * Implementing this function is not required and strongly discouraged unless
+ * you are absolutley sure your Weenix is perfect.
+ *
+ * The purpose of this function is to undo the setup done in vfs_mount(). Also
+ * you should call the underlying file system's umount() function. Make sure
+ * to keep track of reference counts. You should also kfree the fs struct at
+ * the end of this method.
+ *
+ * Remember proper error handling. You might want to make sure that you do not
+ * try to call this function on the root file system (this function is not meant
+ * to unmount the root file system).
+ */
+int vfs_umount(fs_t *fs)
+{
+ NOT_YET_IMPLEMENTED("MOUNTING: ***none***");
+ return -EINVAL;
+}
+#endif /* __MOUNTING__ */
+
+fs_t vfs_root_fs = {
+ .fs_dev = VFS_ROOTFS_DEV,
+ .fs_type = VFS_ROOTFS_TYPE,
+ .vnode_list = LIST_INITIALIZER(vfs_root_fs.vnode_list),
+ .vnode_list_mutex = KMUTEX_INITIALIZER(vfs_root_fs.vnode_list_mutex),
+ .fs_vnode_allocator = NULL,
+ .fs_i = NULL,
+ .fs_ops = NULL,
+ .fs_root = NULL,
+};
+
+/*
+ * Call mountfunc on vfs_root_fs and set curproc->p_cwd (reference count!)
+ */
+void vfs_init()
+{
+ long err = mountfunc(&vfs_root_fs);
+ if (err)
+ {
+ panic(
+ "Failed to mount root fs of type \"%s\" on device "
+ "\"%s\" with errno of %ld\n",
+ vfs_root_fs.fs_type, vfs_root_fs.fs_dev, -err);
+ }
+
+ vlock(vfs_root_fs.fs_root);
+ vref(curproc->p_cwd = vfs_root_fs.fs_root);
+ vunlock(vfs_root_fs.fs_root);
+
+#ifdef __MOUNTING__
+ list_init(&mounted_fs_list);
+ fs->fs_mtpt = vfs_root_fs.fs_root;
+#endif
+}
+
+/*
+ * Wrapper around the sync call() to vfs_root_fs using fs_ops
+ */
+void do_sync()
+{
+ vfs_root_fs.fs_ops->sync(&vfs_root_fs);
+#ifdef __MOUNTING__
+ // if implementing mounting, just sync() all the mounted FS's as well
+#endif
+}
+
+/*
+ *
+ */
+long vfs_shutdown()
+{
+ dbg(DBG_VFS, "shutting down vfs\n");
+ long ret = 0;
+
+#ifdef __MOUNTING__
+ list_iterate(&mounted_fs_list, mtfs, fs_t, fs_link)
+ {
+ ret = vfs_umount(mtfs);
+ KASSERT(!ret);
+ }
+#endif
+
+ if (vfs_is_in_use(&vfs_root_fs))
+ {
+ panic("vfs_shutdown: found active vnodes in root filesystem");
+ }
+
+ if (vfs_root_fs.fs_ops->umount)
+ {
+ ret = vfs_root_fs.fs_ops->umount(&vfs_root_fs);
+ }
+ else
+ {
+ // vlock(vfs_root_fs.fs_root);
+ vput(&vfs_root_fs.fs_root);
+ }
+
+ if (vfs_count_active_vnodes(&vfs_root_fs))
+ {
+ panic(
+ "vfs_shutdown: vnodes still in use after unmounting root "
+ "filesystem");
+ }
+ return ret;
+}
+
+long mountfunc(fs_t *fs)
+{
+ static const struct
+ {
+ char *fstype;
+
+ long (*mountfunc)(fs_t *);
+ } types[] = {
+#ifdef __S5FS__
+ {"s5fs", s5fs_mount},
+#endif
+ {"ramfs", ramfs_mount},
+ };
+
+ for (unsigned int i = 0; i < sizeof(types) / sizeof(types[0]); i++)
+ {
+ if (strcmp(fs->fs_type, types[i].fstype) == 0)
+ {
+ return types[i].mountfunc(fs);
+ }
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * A filesystem is in use if the total number of vnode refcounts for that
+ * filesystem > 1. The singular refcount in a fs NOT in use comes from fs_root.
+ *
+ * Error cases vfs_is_in_use is responsible for generating:
+ * - EBUSY: if the filesystem is in use
+ */
+long vfs_is_in_use(fs_t *fs)
+{
+ long ret = 0;
+ // kmutex_lock(&fs->vnode_list_mutex);
+ list_iterate(&fs->vnode_list, vn, vnode_t, vn_link)
+ {
+ vlock(vn);
+ size_t expected_refcount = vn->vn_fs->fs_root == vn ? 1 : 0;
+ size_t refcount = vn->vn_mobj.mo_refcount;
+ vunlock(vn);
+ if (refcount != expected_refcount)
+ {
+ dbg(DBG_VFS,
+ "vnode %d still in use with %d references and %lu mobj "
+ "references (expected %lu)\n",
+ vn->vn_vno, vn->vn_mobj.mo_refcount, refcount,
+ expected_refcount);
+ ret = -EBUSY;
+ // break;
+ }
+ }
+ // kmutex_unlock(&fs->vnode_list_mutex);
+ return ret;
+}
+
+/*
+ * Return the size of fs->vnode_list
+ */
+size_t vfs_count_active_vnodes(fs_t *fs)
+{
+ size_t count = 0;
+ kmutex_lock(&fs->vnode_list_mutex);
+ list_iterate(&fs->vnode_list, vn, vnode_t, vn_link) { count++; }
+ kmutex_unlock(&fs->vnode_list_mutex);
+ return count;
+}
diff --git a/kernel/fs/vfs_syscall.c b/kernel/fs/vfs_syscall.c
new file mode 100644
index 0000000..d2f018c
--- /dev/null
+++ b/kernel/fs/vfs_syscall.c
@@ -0,0 +1,356 @@
+#include "fs/vfs_syscall.h"
+#include "errno.h"
+#include "fs/fcntl.h"
+#include "fs/file.h"
+#include "fs/lseek.h"
+#include "fs/vfs.h"
+#include "fs/vnode.h"
+#include "globals.h"
+#include "kernel.h"
+#include "util/debug.h"
+#include "util/string.h"
+#include <limits.h>
+
+/*
+ * Read len bytes into buf from the fd's file using the file's vnode operation
+ * read.
+ *
+ * Return the number of bytes read on success, or:
+ * - EBADF: fd is invalid or is not open for reading
+ * - EISDIR: fd refers to a directory
+ * - Propagate errors from the vnode operation read
+ *
+ * Hints:
+ * - Be sure to update the file's position appropriately.
+ * - Lock/unlock the file's vnode when calling its read operation.
+ */
+ssize_t do_read(int fd, void *buf, size_t len)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Write len bytes from buf into the fd's file using the file's vnode operation
+ * write.
+ *
+ * Return the number of bytes written on success, or:
+ * - EBADF: fd is invalid or is not open for writing
+ * - Propagate errors from the vnode operation read
+ *
+ * Hints:
+ * - Check out `man 2 write` for details about how to handle the FMODE_APPEND
+ * flag.
+ * - Be sure to update the file's position appropriately.
+ * - Lock/unlock the file's vnode when calling its write operation.
+ */
+ssize_t do_write(int fd, const void *buf, size_t len)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Close the file descriptor fd.
+ *
+ * Return 0 on success, or:
+ * - EBADF: fd is invalid or not open
+ *
+ * Hints:
+ * Check `proc.h` to see if there are any helpful fields in the
+ * proc_t struct for checking if the file associated with the fd is open.
+ * Consider what happens when we open a file and what counts as closing it
+ */
+long do_close(int fd)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Duplicate the file descriptor fd.
+ *
+ * Return the new file descriptor on success, or:
+ * - EBADF: fd is invalid or not open
+ * - Propagate errors from get_empty_fd()
+ *
+ * Hint: Use get_empty_fd() to obtain an available file descriptor.
+ */
+long do_dup(int fd)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Duplicate the file descriptor ofd using the new file descriptor nfd. If nfd
+ * was previously open, close it.
+ *
+ * Return nfd on success, or:
+ * - EBADF: ofd is invalid or not open, or nfd is invalid
+ *
+ * Hint: You don't need to do anything if ofd and nfd are the same.
+ * (If supporting MTP, this action must be atomic)
+ */
+long do_dup2(int ofd, int nfd)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Create a file specified by mode and devid at the location specified by path.
+ *
+ * Return 0 on success, or:
+ * - EINVAL: Mode is not S_IFCHR, S_IFBLK, or S_IFREG
+ * - Propagate errors from namev_open()
+ *
+ * Hints:
+ * - Create the file by calling namev_open() with the O_CREAT flag.
+ * - Be careful about refcounts after calling namev_open(). The newly created
+ * vnode should have no references when do_mknod returns. The underlying
+ * filesystem is responsible for maintaining references to the inode, which
+ * will prevent it from being destroyed, even if the corresponding vnode is
+ * cleaned up.
+ * - You don't need to handle EEXIST (this would be handled within namev_open,
+ * but doing so would likely cause problems elsewhere)
+ */
+long do_mknod(const char *path, int mode, devid_t devid)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Create a directory at the location specified by path.
+ *
+ * Return 0 on success, or:
+ * - ENAMETOOLONG: The last component of path is too long
+ * - ENOTDIR: The parent of the directory to be created is not a directory
+ * - EEXIST: A file located at path already exists
+ * - Propagate errors from namev_dir(), namev_lookup(), and the vnode
+ * operation mkdir
+ *
+ * Hints:
+ * 1) Use namev_dir() to find the parent of the directory to be created.
+ * 2) Use namev_lookup() to check that the directory does not already exist.
+ * 3) Use the vnode operation mkdir to create the directory.
+ * - Compare against NAME_LEN to determine if the basename is too long.
+ * Check out ramfs_mkdir() to confirm that the basename will be null-
+ * terminated.
+ * - Be careful about locking and refcounts after calling namev_dir() and
+ * namev_lookup().
+ */
+long do_mkdir(const char *path)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Delete a directory at path.
+ *
+ * Return 0 on success, or:
+ * - EINVAL: Attempting to rmdir with "." as the final component
+ * - ENOTEMPTY: Attempting to rmdir with ".." as the final component
+ * - ENOTDIR: The parent of the directory to be removed is not a directory
+ * - ENAMETOOLONG: the last component of path is too long
+ * - Propagate errors from namev_dir() and the vnode operation rmdir
+ *
+ * Hints:
+ * - Use namev_dir() to find the parent of the directory to be removed.
+ * - Be careful about refcounts from calling namev_dir().
+ * - Use the parent directory's rmdir operation to remove the directory.
+ * - Lock/unlock the vnode when calling its rmdir operation.
+ */
+long do_rmdir(const char *path)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Remove the link between path and the file it refers to.
+ *
+ * Return 0 on success, or:
+ * - ENOTDIR: the parent of the file to be unlinked is not a directory
+ * - ENAMETOOLONG: the last component of path is too long
+ * - Propagate errors from namev_dir() and the vnode operation unlink
+ *
+ * Hints:
+ * - Use namev_dir() and be careful about refcounts.
+ * - Lock/unlock the parent directory when calling its unlink operation.
+ */
+long do_unlink(const char *path)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Create a hard link newpath that refers to the same file as oldpath.
+ *
+ * Return 0 on success, or:
+ * - EPERM: oldpath refers to a directory
+ * - ENAMETOOLONG: The last component of newpath is too long
+ * - ENOTDIR: The parent of the file to be linked is not a directory
+ *
+ * Hints:
+ * 1) Use namev_resolve() on oldpath to get the target vnode.
+ * 2) Use namev_dir() on newpath to get the directory vnode.
+ * 3) Use vlock_in_order() to lock the directory and target vnodes.
+ * 4) Use the directory vnode's link operation to create a link to the target.
+ * 5) Use vunlock_in_order() to unlock the vnodes.
+ * 6) Make sure to clean up references added from calling namev_resolve() and
+ * namev_dir().
+ */
+long do_link(const char *oldpath, const char *newpath)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/* Rename a file or directory.
+ *
+ * Return 0 on success, or:
+ * - ENOTDIR: the parent of either path is not a directory
+ * - ENAMETOOLONG: the last component of either path is too long
+ * - Propagate errors from namev_dir() and the vnode operation rename
+ *
+ * You DO NOT need to support renaming of directories.
+ * Steps:
+ * 1. namev_dir oldpath --> olddir vnode
+ * 2. namev_dir newpath --> newdir vnode
+ * 4. Lock the olddir and newdir in ancestor-first order (see `vlock_in_order`)
+ * 5. Use the `rename` vnode operation
+ * 6. Unlock the olddir and newdir
+ * 8. vput the olddir and newdir vnodes
+ *
+ * Alternatively, you can allow do_rename() to rename directories if
+ * __RENAMEDIR__ is set in Config.mk. As with all extra credit
+ * projects this is harder and you will get no extra credit (but you
+ * will get our admiration). Please make sure the normal version works first.
+ * Steps:
+ * 1. namev_dir oldpath --> olddir vnode
+ * 2. namev_dir newpath --> newdir vnode
+ * 3. Lock the global filesystem `vnode_rename_mutex`
+ * 4. Lock the olddir and newdir in ancestor-first order (see `vlock_in_order`)
+ * 5. Use the `rename` vnode operation
+ * 6. Unlock the olddir and newdir
+ * 7. Unlock the global filesystem `vnode_rename_mutex`
+ * 8. vput the olddir and newdir vnodes
+ *
+ * P.S. This scheme /probably/ works, but we're not 100% sure.
+ */
+long do_rename(const char *oldpath, const char *newpath)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/* Set the current working directory to the directory represented by path.
+ *
+ * Returns 0 on success, or:
+ * - ENOTDIR: path does not refer to a directory
+ * - Propagate errors from namev_resolve()
+ *
+ * Hints:
+ * - Use namev_resolve() to get the vnode corresponding to path.
+ * - Pay attention to refcounts!
+ * - Remember that p_cwd should not be locked upon return from this function.
+ * - (If doing MTP, must protect access to p_cwd)
+ */
+long do_chdir(const char *path)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Read a directory entry from the file specified by fd into dirp.
+ *
+ * Return sizeof(dirent_t) on success, or:
+ * - EBADF: fd is invalid or is not open
+ * - ENOTDIR: fd does not refer to a directory
+ * - Propagate errors from the vnode operation readdir
+ *
+ * Hints:
+ * - Use the vnode operation readdir.
+ * - Be sure to update file position according to readdir's return value.
+ * - On success (readdir return value is strictly positive), return
+ * sizeof(dirent_t).
+ */
+ssize_t do_getdent(int fd, struct dirent *dirp)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/*
+ * Set the position of the file represented by fd according to offset and
+ * whence.
+ *
+ * Return the new file position, or:
+ * - EBADF: fd is invalid or is not open
+ * - EINVAL: whence is not one of SEEK_SET, SEEK_CUR, or SEEK_END;
+ * or, the resulting file offset would be negative
+ *
+ * Hints:
+ * - See `man 2 lseek` for details about whence.
+ * - Be sure to protect the vnode if you have to access its vn_len.
+ */
+off_t do_lseek(int fd, off_t offset, int whence)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+/* Use buf to return the status of the file represented by path.
+ *
+ * Return 0 on success, or:
+ * - Propagate errors from namev_resolve() and the vnode operation stat.
+ */
+long do_stat(const char *path, stat_t *buf)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return -1;
+}
+
+#ifdef __MOUNTING__
+/*
+ * Implementing this function is not required and strongly discouraged unless
+ * you are absolutely sure your Weenix is perfect.
+ *
+ * This is the syscall entry point into vfs for mounting. You will need to
+ * create the fs_t struct and populate its fs_dev and fs_type fields before
+ * calling vfs's mountfunc(). mountfunc() will use the fields you populated
+ * in order to determine which underlying filesystem's mount function should
+ * be run, then it will finish setting up the fs_t struct. At this point you
+ * have a fully functioning file system, however it is not mounted on the
+ * virtual file system, you will need to call vfs_mount to do this.
+ *
+ * There are lots of things which can go wrong here. Make sure you have good
+ * error handling. Remember the fs_dev and fs_type buffers have limited size
+ * so you should not write arbitrary length strings to them.
+ */
+int do_mount(const char *source, const char *target, const char *type)
+{
+ NOT_YET_IMPLEMENTED("MOUNTING: ***none***");
+ return -EINVAL;
+}
+
+/*
+ * Implementing this function is not required and strongly discouraged unless
+ * you are absolutley sure your Weenix is perfect.
+ *
+ * This function delegates all of the real work to vfs_umount. You should not
+ * worry about freeing the fs_t struct here, that is done in vfs_umount. All
+ * this function does is figure out which file system to pass to vfs_umount and
+ * do good error checking.
+ */
+int do_umount(const char *target)
+{
+ NOT_YET_IMPLEMENTED("MOUNTING: ***none***");
+ return -EINVAL;
+}
+#endif
diff --git a/kernel/fs/vnode.c b/kernel/fs/vnode.c
new file mode 100644
index 0000000..91fee09
--- /dev/null
+++ b/kernel/fs/vnode.c
@@ -0,0 +1,250 @@
+#include "fs/vnode.h"
+#include "errno.h"
+#include "fs/stat.h"
+#include "fs/vfs.h"
+#include "kernel.h"
+#include "mm/slab.h"
+#include "util/debug.h"
+#include "util/string.h"
+#include <fs/vnode_specials.h>
+
+#define MOBJ_TO_VNODE(o) CONTAINER_OF((o), vnode_t, vn_mobj)
+
+static long vnode_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite,
+ pframe_t **pfp);
+static long vnode_fill_pframe(mobj_t *o, pframe_t *pf);
+static long vnode_flush_pframe(mobj_t *o, pframe_t *pf);
+static void vnode_destructor(mobj_t *o);
+
+static mobj_ops_t vnode_mobj_ops = {.get_pframe = vnode_get_pframe,
+ .fill_pframe = vnode_fill_pframe,
+ .flush_pframe = vnode_flush_pframe,
+ .destructor = vnode_destructor};
+
+/**
+ * locks the vnodes in the order of their inode number,
+ * in the case that they are the same vnode, then only one vnode is locked.
+ *
+ * this scheme prevents the A->B/B->A locking problem, but it only
+ * works only if the `vlock_in_order` function is used in all cases where 2
+ * nodes must be locked.
+ */
+void vlock_in_order(vnode_t *a, vnode_t *b)
+{
+ /* these vnode's must be on the same filesystem */
+ KASSERT(a->vn_fs == b->vn_fs);
+
+ if (a->vn_vno == b->vn_vno)
+ {
+ vlock(a);
+ return;
+ }
+
+ /* */
+ if (S_ISDIR(a->vn_mode) && S_ISDIR(b->vn_mode))
+ {
+ if (namev_is_descendant(a, b))
+ {
+ vlock(b);
+ vlock(a);
+ return;
+ }
+ else if (namev_is_descendant(b, a))
+ {
+ vlock(a);
+ vlock(b);
+ return;
+ }
+ }
+ else if (S_ISDIR(a->vn_mode))
+ {
+ vlock(a);
+ vlock(b);
+ }
+ else if (S_ISDIR(b->vn_mode))
+ {
+ vlock(b);
+ vlock(a);
+ }
+ else if (a->vn_vno < b->vn_vno)
+ {
+ vlock(a);
+ vlock(b);
+ }
+ else
+ {
+ vlock(b);
+ vlock(a);
+ }
+}
+
+void vunlock_in_order(vnode_t *a, vnode_t *b)
+{
+ if (a->vn_vno == b->vn_vno)
+ {
+ vunlock(a);
+ return;
+ }
+
+ vunlock(a);
+ vunlock(b);
+}
+
+void await_vnode_loaded(vnode_t *vnode)
+{
+ /* blocks until the vnode's vn_state is loaded */
+ while (vnode->vn_state != VNODE_LOADED)
+ {
+ sched_sleep_on(&vnode->vn_waitq);
+ }
+ KASSERT(vnode->vn_state == VNODE_LOADED);
+}
+
+void notify_vnode_loaded(vnode_t *vn)
+{
+ /* set the state to loaded and release all waiters */
+ vn->vn_state = VNODE_LOADED;
+ sched_broadcast_on(&vn->vn_waitq);
+}
+
+void vnode_init(vnode_t *vn, fs_t *fs, ino_t ino, int state)
+{
+ vn->vn_state = VNODE_LOADING;
+ vn->vn_fs = fs;
+ vn->vn_vno = ino;
+ sched_queue_init(&vn->vn_waitq);
+ mobj_init(&vn->vn_mobj, MOBJ_VNODE, &vnode_mobj_ops);
+ KASSERT(vn->vn_mobj.mo_refcount);
+}
+
+vnode_t *__vget(fs_t *fs, ino_t ino, int get_locked)
+{
+find:
+ kmutex_lock(&fs->vnode_list_mutex);
+ list_iterate(&fs->vnode_list, vn, vnode_t, vn_link)
+ {
+ if (vn->vn_vno == ino)
+ {
+ if (atomic_inc_not_zero(&vn->vn_mobj.mo_refcount))
+ {
+ /* reference acquired, we can release the per-FS list */
+ kmutex_unlock(&fs->vnode_list_mutex);
+ await_vnode_loaded(vn);
+ if (get_locked)
+ {
+ vlock(vn);
+ }
+ return vn;
+ }
+ else
+ {
+ /* count must be 0, wait and try again later */
+ kmutex_unlock(&fs->vnode_list_mutex);
+ sched_yield();
+ goto find;
+ }
+ }
+ }
+
+ /* vnode does not exist, must allocate one */
+ dbg(DBG_VFS, "creating vnode %d\n", ino);
+ vnode_t *vn = slab_obj_alloc(fs->fs_vnode_allocator);
+ KASSERT(vn);
+ memset(vn, 0, sizeof(vnode_t));
+
+ /* initialize the vnode state */
+ vnode_init(vn, fs, ino, VNODE_LOADING);
+
+ /* add the vnode to the per-FS list, lock the vnode, and release the list
+ * (unblocking other `vget` calls) */
+ list_insert_tail(&fs->vnode_list, &vn->vn_link);
+ vlock(vn);
+ kmutex_unlock(&fs->vnode_list_mutex);
+
+ /* load the vnode */
+ vn->vn_fs->fs_ops->read_vnode(vn->vn_fs, vn);
+ if (S_ISCHR(vn->vn_mode) || S_ISBLK(vn->vn_mode))
+ {
+ init_special_vnode(vn);
+ }
+
+ /* notify potential waiters that the vnode is ready for use and return */
+ notify_vnode_loaded(vn);
+ if (!get_locked)
+ {
+ vunlock(vn);
+ }
+ return vn;
+}
+
+inline vnode_t *vget(fs_t *fs, ino_t ino) { return __vget(fs, ino, 0); }
+
+inline vnode_t *vget_locked(fs_t *fs, ino_t ino) { return __vget(fs, ino, 1); }
+
+inline void vref(vnode_t *vn) { mobj_ref(&vn->vn_mobj); }
+
+inline void vlock(vnode_t *vn) { mobj_lock(&vn->vn_mobj); }
+
+inline void vunlock(vnode_t *vn) { mobj_unlock(&vn->vn_mobj); }
+
+inline void vput(struct vnode **vnp)
+{
+ vnode_t *vn = *vnp;
+ *vnp = NULL;
+ mobj_t *mobj = &vn->vn_mobj;
+ mobj_put(&mobj);
+}
+
+inline void vput_locked(struct vnode **vnp)
+{
+ vunlock(*vnp);
+ vput(vnp);
+}
+
+static long vnode_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite,
+ pframe_t **pfp)
+{
+ vnode_t *vnode = MOBJ_TO_VNODE(o);
+ KASSERT(vnode->vn_ops->get_pframe);
+ return vnode->vn_ops->get_pframe(vnode, pagenum, forwrite, pfp);
+}
+
+static long vnode_fill_pframe(mobj_t *o, pframe_t *pf)
+{
+ vnode_t *vnode = MOBJ_TO_VNODE(o);
+ KASSERT(vnode->vn_ops->fill_pframe);
+ return vnode->vn_ops->fill_pframe(vnode, pf);
+}
+
+static long vnode_flush_pframe(mobj_t *o, pframe_t *pf)
+{
+ vnode_t *vnode = MOBJ_TO_VNODE(o);
+ KASSERT(vnode->vn_ops->flush_pframe);
+ return vnode->vn_ops->flush_pframe(vnode, pf);
+}
+
+static void vnode_destructor(mobj_t *o)
+{
+ vnode_t *vn = MOBJ_TO_VNODE(o);
+ dbg(DBG_VFS, "destroying vnode %d\n", vn->vn_vno);
+
+ /* lock, flush, and delete the vnode */
+ KASSERT(!o->mo_refcount);
+ vlock(vn);
+ KASSERT(!o->mo_refcount);
+ KASSERT(!kmutex_has_waiters(&o->mo_mutex));
+ mobj_flush(o);
+ if (vn->vn_fs->fs_ops->delete_vnode)
+ {
+ vn->vn_fs->fs_ops->delete_vnode(vn->vn_fs, vn);
+ }
+ KASSERT(!kmutex_has_waiters(&o->mo_mutex));
+ vunlock(vn);
+
+ /* remove the vnode from the list and free it*/
+ kmutex_lock(&vn->vn_fs->vnode_list_mutex);
+ KASSERT(list_link_is_linked(&vn->vn_link));
+ list_remove(&vn->vn_link);
+ kmutex_unlock(&vn->vn_fs->vnode_list_mutex);
+ slab_obj_free(vn->vn_fs->fs_vnode_allocator, vn);
+}
diff --git a/kernel/fs/vnode_specials.c b/kernel/fs/vnode_specials.c
new file mode 100644
index 0000000..a6c38a3
--- /dev/null
+++ b/kernel/fs/vnode_specials.c
@@ -0,0 +1,176 @@
+#include <errno.h>
+#include <fs/stat.h>
+#include <fs/vfs.h>
+#include <fs/vnode.h>
+#include <util/debug.h>
+
+static long special_file_stat(vnode_t *file, stat_t *ss);
+
+static ssize_t chardev_file_read(vnode_t *file, size_t pos, void *buf,
+ size_t count);
+
+static ssize_t chardev_file_write(vnode_t *file, size_t pos, const void *buf,
+ size_t count);
+
+static long chardev_file_mmap(vnode_t *file, mobj_t **ret);
+
+static long chardev_file_fill_pframe(vnode_t *file, pframe_t *pf);
+
+static long chardev_file_flush_pframe(vnode_t *file, pframe_t *pf);
+
+static vnode_ops_t chardev_spec_vops = {
+ .read = chardev_file_read,
+ .write = chardev_file_write,
+ .mmap = chardev_file_mmap,
+ .mknod = NULL,
+ .lookup = NULL,
+ .link = NULL,
+ .unlink = NULL,
+ .mkdir = NULL,
+ .rmdir = NULL,
+ .readdir = NULL,
+ .stat = special_file_stat,
+ .get_pframe = NULL,
+ .fill_pframe = chardev_file_fill_pframe,
+ .flush_pframe = chardev_file_flush_pframe,
+};
+
+static ssize_t blockdev_file_read(vnode_t *file, size_t pos, void *buf,
+ size_t count);
+
+static ssize_t blockdev_file_write(vnode_t *file, size_t pos, const void *buf,
+ size_t count);
+
+static long blockdev_file_mmap(vnode_t *file, mobj_t **ret);
+
+static long blockdev_file_fill_pframe(vnode_t *file, pframe_t *pf);
+
+static long blockdev_file_flush_pframe(vnode_t *file, pframe_t *pf);
+
+static vnode_ops_t blockdev_spec_vops = {
+ .read = blockdev_file_read,
+ .write = blockdev_file_write,
+ .mmap = blockdev_file_mmap,
+ .mknod = NULL,
+ .lookup = NULL,
+ .link = NULL,
+ .unlink = NULL,
+ .mkdir = NULL,
+ .rmdir = NULL,
+ .readdir = NULL,
+ .stat = special_file_stat,
+ .get_pframe = NULL,
+ .fill_pframe = blockdev_file_fill_pframe,
+ .flush_pframe = blockdev_file_flush_pframe,
+};
+
+void init_special_vnode(vnode_t *vn)
+{
+ if (S_ISCHR(vn->vn_mode))
+ {
+ vn->vn_ops = &chardev_spec_vops;
+ vn->vn_dev.chardev = chardev_lookup(vn->vn_devid);
+ }
+ else
+ {
+ KASSERT(S_ISBLK(vn->vn_mode));
+ vn->vn_ops = &blockdev_spec_vops;
+ vn->vn_dev.blockdev = blockdev_lookup(vn->vn_devid);
+ }
+}
+
+static long special_file_stat(vnode_t *file, stat_t *ss)
+{
+ KASSERT(file->vn_fs->fs_root->vn_ops->stat != NULL);
+ // call the containing file system's stat routine
+ return file->vn_fs->fs_root->vn_ops->stat(file, ss);
+}
+
+/*
+ * Make a read by deferring to the underlying chardev and its read operation.
+ *
+ * Returns what the chardev's read returned.
+ *
+ * Hint: Watch out! chardev_file_read and chardev_file_write are indirectly
+ * called in do_read and do_write, respectively, as the read/write ops for
+ * chardev-type vnodes. This means that the vnode file should be locked
+ * upon entry to this function.
+ *
+ * However, tty_read and tty_write, the read/write ops for the tty chardev,
+ * are potentially blocking. To avoid deadlock, you should unlock the file
+ * before calling the chardev's read, and lock it again after. If you fail
+ * to do this, a shell reading from /dev/tty0 for instance, will block all
+ * access to the /dev/tty0 vnode. This means that if someone runs `ls /dev/`,
+ * while a shell is reading from `/dev/tty0`, the `ls` call will hang.
+ *
+ * Also, if a vnode represents a chardev, you can access the chardev using
+ * vnode->vn_dev.chardev.
+ *
+ */
+static ssize_t chardev_file_read(vnode_t *file, size_t pos, void *buf,
+ size_t count)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return 0;
+}
+
+/*
+ * Make a write by deferring to the underlying chardev and its write operation.
+ *
+ * Return what the chardev's write returned.
+ *
+ * See the comments from chardev_file_read above for hints.
+ *
+ */
+static long chardev_file_write(vnode_t *file, size_t pos, const void *buf,
+ size_t count)
+{
+ NOT_YET_IMPLEMENTED("VFS: ***none***");
+ return 0;
+}
+
+/*
+ * For this and the following chardev functions, simply defer to the underlying
+ * chardev's corresponding operations.
+ */
+static long chardev_file_mmap(vnode_t *file, mobj_t **ret)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+static long chardev_file_fill_pframe(vnode_t *file, pframe_t *pf)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+static long chardev_file_flush_pframe(vnode_t *file, pframe_t *pf)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+static ssize_t blockdev_file_read(vnode_t *file, size_t pos, void *buf,
+ size_t count)
+{
+ return -ENOTSUP;
+}
+
+static long blockdev_file_write(vnode_t *file, size_t pos, const void *buf,
+ size_t count)
+{
+ return -ENOTSUP;
+}
+
+static long blockdev_file_mmap(vnode_t *file, mobj_t **ret) { return -ENOTSUP; }
+
+static long blockdev_file_fill_pframe(vnode_t *file, pframe_t *pf)
+{
+ return -ENOTSUP;
+}
+
+static long blockdev_file_flush_pframe(vnode_t *file, pframe_t *pf)
+{
+ return -ENOTSUP;
+}
diff --git a/kernel/include/api/access.h b/kernel/include/api/access.h
new file mode 100644
index 0000000..77f5e63
--- /dev/null
+++ b/kernel/include/api/access.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "types.h"
+
+struct proc;
+struct argstr;
+struct argvec;
+
+long copy_from_user(void *kaddr, const void *uaddr, size_t nbytes);
+
+long copy_to_user(void *uaddr, const void *kaddr, size_t nbytes);
+
+long user_strdup(struct argstr *ustr, char **kstrp);
+
+long user_vecdup(struct argvec *uvec, char ***kvecp);
+
+long range_perm(struct proc *p, const void *vaddr, size_t len, int perm);
+
+long addr_perm(struct proc *p, const void *vaddr, int perm);
diff --git a/kernel/include/api/binfmt.h b/kernel/include/api/binfmt.h
new file mode 100644
index 0000000..5063276
--- /dev/null
+++ b/kernel/include/api/binfmt.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "fs/vnode.h"
+
+typedef long (*binfmt_load_func_t)(const char *filename, int fd,
+ char *const *argv, char *const *envp,
+ uint64_t *rip, uint64_t *rsp);
+
+long binfmt_add(const char *id, binfmt_load_func_t loadfunc);
+
+long binfmt_load(const char *filename, char *const *argv, char *const *envp,
+ uint64_t *rip, uint64_t *rsp);
diff --git a/kernel/include/api/elf.h b/kernel/include/api/elf.h
new file mode 100644
index 0000000..5ccc109
--- /dev/null
+++ b/kernel/include/api/elf.h
@@ -0,0 +1,2595 @@
+/* This file defines standard ELF types, structures, and macros.
+ Copyright (C) 1995-2003, 2004 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#pragma once
+
+/* __BEGIN_DECLS */
+
+/* Standard ELF types. */
+
+#ifdef __KERNEL__
+#include "types.h"
+#else
+
+#include <sys/types.h>
+
+#endif
+
+/* Type for a 16-bit quantity. */
+typedef uint16_t Elf32_Half;
+typedef uint16_t Elf64_Half;
+
+/* Types for signed and unsigned 32-bit quantities. */
+typedef uint32_t Elf32_Word;
+typedef int32_t Elf32_Sword;
+typedef uint32_t Elf64_Word;
+typedef int32_t Elf64_Sword;
+
+/* Types for signed and unsigned 64-bit quantities. */
+typedef uint64_t Elf32_Xword;
+typedef int64_t Elf32_Sxword;
+typedef uint64_t Elf64_Xword;
+typedef int64_t Elf64_Sxword;
+
+/* Type of addresses. */
+typedef uint32_t Elf32_Addr;
+typedef uint64_t Elf64_Addr;
+
+/* Type of file offsets. */
+typedef uint32_t Elf32_Off;
+typedef uint64_t Elf64_Off;
+
+/* Type for section indices, which are 16-bit quantities. */
+typedef uint16_t Elf32_Section;
+typedef uint16_t Elf64_Section;
+
+/* Type for version symbol information. */
+typedef Elf32_Half Elf32_Versym;
+typedef Elf64_Half Elf64_Versym;
+
+/* The ELF file header. This appears at the start of every ELF file. */
+
+#define EI_NIDENT (16)
+
+typedef struct
+{
+ unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
+ Elf32_Half e_type; /* Object file type */
+ Elf32_Half e_machine; /* Architecture */
+ Elf32_Word e_version; /* Object file version */
+ Elf32_Addr e_entry; /* Entry point virtual address */
+ Elf32_Off e_phoff; /* Program header table file offset */
+ Elf32_Off e_shoff; /* Section header table file offset */
+ Elf32_Word e_flags; /* Processor-specific flags */
+ Elf32_Half e_ehsize; /* ELF header size in bytes */
+ Elf32_Half e_phentsize; /* Program header table entry size */
+ Elf32_Half e_phnum; /* Program header table entry count */
+ Elf32_Half e_shentsize; /* Section header table entry size */
+ Elf32_Half e_shnum; /* Section header table entry count */
+ Elf32_Half e_shstrndx; /* Section header string table index */
+} Elf32_Ehdr;
+
+typedef struct
+{
+ unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
+ Elf64_Half e_type; /* Object file type */
+ Elf64_Half e_machine; /* Architecture */
+ Elf64_Word e_version; /* Object file version */
+ Elf64_Addr e_entry; /* Entry point virtual address */
+ Elf64_Off e_phoff; /* Program header table file offset */
+ Elf64_Off e_shoff; /* Section header table file offset */
+ Elf64_Word e_flags; /* Processor-specific flags */
+ Elf64_Half e_ehsize; /* ELF header size in bytes */
+ Elf64_Half e_phentsize; /* Program header table entry size */
+ Elf64_Half e_phnum; /* Program header table entry count */
+ Elf64_Half e_shentsize; /* Section header table entry size */
+ Elf64_Half e_shnum; /* Section header table entry count */
+ Elf64_Half e_shstrndx; /* Section header string table index */
+} Elf64_Ehdr;
+
+/* Fields in the e_ident array. The EI_* macros are indices into the
+ array. The macros under each EI_* macro are the values the byte
+ may have. */
+
+#define EI_MAG0 0 /* File identification byte 0 index */
+#define ELFMAG0 0x7f /* Magic number byte 0 */
+
+#define EI_MAG1 1 /* File identification byte 1 index */
+#define ELFMAG1 'E' /* Magic number byte 1 */
+
+#define EI_MAG2 2 /* File identification byte 2 index */
+#define ELFMAG2 'L' /* Magic number byte 2 */
+
+#define EI_MAG3 3 /* File identification byte 3 index */
+#define ELFMAG3 'F' /* Magic number byte 3 */
+
+/* Conglomeration of the identification bytes, for easy testing as a word. */
+#define ELFMAG "\177ELF"
+#define SELFMAG 4
+
+#define EI_CLASS 4 /* File class byte index */
+#define ELFCLASSNONE 0 /* Invalid class */
+#define ELFCLASS32 1 /* 32-bit objects */
+#define ELFCLASS64 2 /* 64-bit objects */
+#define ELFCLASSNUM 3
+
+#define EI_DATA 5 /* Data encoding byte index */
+#define ELFDATANONE 0 /* Invalid data encoding */
+#define ELFDATA2LSB 1 /* 2's complement, little endian */
+#define ELFDATA2MSB 2 /* 2's complement, big endian */
+#define ELFDATANUM 3
+
+#define EI_VERSION 6 /* File version byte index */
+/* Value must be EV_CURRENT */
+
+#define EI_OSABI 7 /* OS ABI identification */
+#define ELFOSABI_NONE 0 /* UNIX System V ABI */
+#define ELFOSABI_SYSV 0 /* Alias. */
+#define ELFOSABI_HPUX 1 /* HP-UX */
+#define ELFOSABI_NETBSD 2 /* NetBSD. */
+#define ELFOSABI_LINUX 3 /* Linux. */
+#define ELFOSABI_SOLARIS 6 /* Sun Solaris. */
+#define ELFOSABI_AIX 7 /* IBM AIX. */
+#define ELFOSABI_IRIX 8 /* SGI Irix. */
+#define ELFOSABI_FREEBSD 9 /* FreeBSD. */
+#define ELFOSABI_TRU64 10 /* Compaq TRU64 UNIX. */
+#define ELFOSABI_MODESTO 11 /* Novell Modesto. */
+#define ELFOSABI_OPENBSD 12 /* OpenBSD. */
+#define ELFOSABI_ARM 97 /* ARM */
+#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */
+
+#define EI_ABIVERSION 8 /* ABI version */
+
+#define EI_PAD 9 /* Byte index of padding bytes */
+
+/* Legal values for e_type (object file type). */
+
+#define ET_NONE 0 /* No file type */
+#define ET_REL 1 /* Relocatable file */
+#define ET_EXEC 2 /* Executable file */
+#define ET_DYN 3 /* Shared object file */
+#define ET_CORE 4 /* Core file */
+#define ET_NUM 5 /* Number of defined types */
+#define ET_LOOS 0xfe00 /* OS-specific range start */
+#define ET_HIOS 0xfeff /* OS-specific range end */
+#define ET_LOPROC 0xff00 /* Processor-specific range start */
+#define ET_HIPROC 0xffff /* Processor-specific range end */
+
+/* Legal values for e_machine (architecture). */
+
+#define EM_NONE 0 /* No machine */
+#define EM_M32 1 /* AT&T WE 32100 */
+#define EM_SPARC 2 /* SUN SPARC */
+#define EM_386 3 /* Intel 80386 */
+#define EM_68K 4 /* Motorola m68k family */
+#define EM_88K 5 /* Motorola m88k family */
+#define EM_860 7 /* Intel 80860 */
+#define EM_MIPS 8 /* MIPS R3000 big-endian */
+#define EM_S370 9 /* IBM System/370 */
+#define EM_MIPS_RS3_LE 10 /* MIPS R3000 little-endian */
+
+#define EM_PARISC 15 /* HPPA */
+#define EM_VPP500 17 /* Fujitsu VPP500 */
+#define EM_SPARC32PLUS 18 /* Sun's "v8plus" */
+#define EM_960 19 /* Intel 80960 */
+#define EM_PPC 20 /* PowerPC */
+#define EM_PPC64 21 /* PowerPC 64-bit */
+#define EM_S390 22 /* IBM S390 */
+
+#define EM_V800 36 /* NEC V800 series */
+#define EM_FR20 37 /* Fujitsu FR20 */
+#define EM_RH32 38 /* TRW RH-32 */
+#define EM_RCE 39 /* Motorola RCE */
+#define EM_ARM 40 /* ARM */
+#define EM_FAKE_ALPHA 41 /* Digital Alpha */
+#define EM_SH 42 /* Hitachi SH */
+#define EM_SPARCV9 43 /* SPARC v9 64-bit */
+#define EM_TRICORE 44 /* Siemens Tricore */
+#define EM_ARC 45 /* Argonaut RISC Core */
+#define EM_H8_300 46 /* Hitachi H8/300 */
+#define EM_H8_300H 47 /* Hitachi H8/300H */
+#define EM_H8S 48 /* Hitachi H8S */
+#define EM_H8_500 49 /* Hitachi H8/500 */
+#define EM_IA_64 50 /* Intel Merced */
+#define EM_MIPS_X 51 /* Stanford MIPS-X */
+#define EM_COLDFIRE 52 /* Motorola Coldfire */
+#define EM_68HC12 53 /* Motorola M68HC12 */
+#define EM_MMA 54 /* Fujitsu MMA Multimedia Accelerator*/
+#define EM_PCP 55 /* Siemens PCP */
+#define EM_NCPU 56 /* Sony nCPU embeeded RISC */
+#define EM_NDR1 57 /* Denso NDR1 microprocessor */
+#define EM_STARCORE 58 /* Motorola Start*Core processor */
+#define EM_ME16 59 /* Toyota ME16 processor */
+#define EM_ST100 60 /* STMicroelectronic ST100 processor */
+#define EM_TINYJ 61 /* Advanced Logic Corp. Tinyj emb.fam*/
+#define EM_X86_64 62 /* AMD x86-64 architecture */
+#define EM_PDSP 63 /* Sony DSP Processor */
+
+#define EM_FX66 66 /* Siemens FX66 microcontroller */
+#define EM_ST9PLUS 67 /* STMicroelectronics ST9+ 8/16 mc */
+#define EM_ST7 68 /* STmicroelectronics ST7 8 bit mc */
+#define EM_68HC16 69 /* Motorola MC68HC16 microcontroller */
+#define EM_68HC11 70 /* Motorola MC68HC11 microcontroller */
+#define EM_68HC08 71 /* Motorola MC68HC08 microcontroller */
+#define EM_68HC05 72 /* Motorola MC68HC05 microcontroller */
+#define EM_SVX 73 /* Silicon Graphics SVx */
+#define EM_ST19 74 /* STMicroelectronics ST19 8 bit mc */
+#define EM_VAX 75 /* Digital VAX */
+#define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */
+#define EM_JAVELIN 77 /* Infineon Technologies 32-bit embedded processor */
+#define EM_FIREPATH 78 /* Element 14 64-bit DSP Processor */
+#define EM_ZSP 79 /* LSI Logic 16-bit DSP Processor */
+#define EM_MMIX 80 /* Donald Knuth's educational 64-bit processor */
+#define EM_HUANY 81 /* Harvard University machine-independent object files */
+#define EM_PRISM 82 /* SiTera Prism */
+#define EM_AVR 83 /* Atmel AVR 8-bit microcontroller */
+#define EM_FR30 84 /* Fujitsu FR30 */
+#define EM_D10V 85 /* Mitsubishi D10V */
+#define EM_D30V 86 /* Mitsubishi D30V */
+#define EM_V850 87 /* NEC v850 */
+#define EM_M32R 88 /* Mitsubishi M32R */
+#define EM_MN10300 89 /* Matsushita MN10300 */
+#define EM_MN10200 90 /* Matsushita MN10200 */
+#define EM_PJ 91 /* picoJava */
+#define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */
+#define EM_ARC_A5 93 /* ARC Cores Tangent-A5 */
+#define EM_XTENSA 94 /* Tensilica Xtensa Architecture */
+#define EM_NUM 95
+
+/* If it is necessary to assign new unofficial EM_* values, please
+ pick large random numbers (0x8523, 0xa7f2, etc.) to minimize the
+ chances of collision with official or non-GNU unofficial values. */
+
+#define EM_ALPHA 0x9026
+
+/* Legal values for e_version (version). */
+
+#define EV_NONE 0 /* Invalid ELF version */
+#define EV_CURRENT 1 /* Current version */
+#define EV_NUM 2
+
+/* Section header. */
+
+typedef struct
+{
+ Elf32_Word sh_name; /* Section name (string tbl index) */
+ Elf32_Word sh_type; /* Section type */
+ Elf32_Word sh_flags; /* Section flags */
+ Elf32_Addr sh_addr; /* Section virtual addr at execution */
+ Elf32_Off sh_offset; /* Section file offset */
+ Elf32_Word sh_size; /* Section size in bytes */
+ Elf32_Word sh_link; /* Link to another section */
+ Elf32_Word sh_info; /* Additional section information */
+ Elf32_Word sh_addralign; /* Section alignment */
+ Elf32_Word sh_entsize; /* Entry size if section holds table */
+} Elf32_Shdr;
+
+typedef struct
+{
+ Elf64_Word sh_name; /* Section name (string tbl index) */
+ Elf64_Word sh_type; /* Section type */
+ Elf64_Xword sh_flags; /* Section flags */
+ Elf64_Addr sh_addr; /* Section virtual addr at execution */
+ Elf64_Off sh_offset; /* Section file offset */
+ Elf64_Xword sh_size; /* Section size in bytes */
+ Elf64_Word sh_link; /* Link to another section */
+ Elf64_Word sh_info; /* Additional section information */
+ Elf64_Xword sh_addralign; /* Section alignment */
+ Elf64_Xword sh_entsize; /* Entry size if section holds table */
+} Elf64_Shdr;
+
+/* Special section indices. */
+
+#define SHN_UNDEF 0 /* Undefined section */
+#define SHN_LORESERVE 0xff00 /* Start of reserved indices */
+#define SHN_LOPROC 0xff00 /* Start of processor-specific */
+#define SHN_BEFORE \
+ 0xff00 /* Order section before all others \
+(Solaris). */
+#define SHN_AFTER \
+ 0xff01 /* Order section after all others \
+(Solaris). */
+#define SHN_HIPROC 0xff1f /* End of processor-specific */
+#define SHN_LOOS 0xff20 /* Start of OS-specific */
+#define SHN_HIOS 0xff3f /* End of OS-specific */
+#define SHN_ABS 0xfff1 /* Associated symbol is absolute */
+#define SHN_COMMON 0xfff2 /* Associated symbol is common */
+#define SHN_XINDEX 0xffff /* Index is in extra table. */
+#define SHN_HIRESERVE 0xffff /* End of reserved indices */
+
+/* Legal values for sh_type (section type). */
+
+#define SHT_NULL 0 /* Section header table entry unused */
+#define SHT_PROGBITS 1 /* Program data */
+#define SHT_SYMTAB 2 /* Symbol table */
+#define SHT_STRTAB 3 /* String table */
+#define SHT_RELA 4 /* Relocation entries with addends */
+#define SHT_HASH 5 /* Symbol hash table */
+#define SHT_DYNAMIC 6 /* Dynamic linking information */
+#define SHT_NOTE 7 /* Notes */
+#define SHT_NOBITS 8 /* Program space with no data (bss) */
+#define SHT_REL 9 /* Relocation entries, no addends */
+#define SHT_SHLIB 10 /* Reserved */
+#define SHT_DYNSYM 11 /* Dynamic linker symbol table */
+#define SHT_INIT_ARRAY 14 /* Array of constructors */
+#define SHT_FINI_ARRAY 15 /* Array of destructors */
+#define SHT_PREINIT_ARRAY 16 /* Array of pre-constructors */
+#define SHT_GROUP 17 /* Section group */
+#define SHT_SYMTAB_SHNDX 18 /* Extended section indeces */
+#define SHT_NUM 19 /* Number of defined types. */
+#define SHT_LOOS 0x60000000 /* Start OS-specific */
+#define SHT_GNU_LIBLIST 0x6ffffff7 /* Prelink library list */
+#define SHT_CHECKSUM 0x6ffffff8 /* Checksum for DSO content. */
+#define SHT_LOSUNW 0x6ffffffa /* Sun-specific low bound. */
+#define SHT_SUNW_move 0x6ffffffa
+#define SHT_SUNW_COMDAT 0x6ffffffb
+#define SHT_SUNW_syminfo 0x6ffffffc
+#define SHT_GNU_verdef 0x6ffffffd /* Version definition section. */
+#define SHT_GNU_verneed 0x6ffffffe /* Version needs section. */
+#define SHT_GNU_versym 0x6fffffff /* Version symbol table. */
+#define SHT_HISUNW 0x6fffffff /* Sun-specific high bound. */
+#define SHT_HIOS 0x6fffffff /* End OS-specific type */
+#define SHT_LOPROC 0x70000000 /* Start of processor-specific */
+#define SHT_HIPROC 0x7fffffff /* End of processor-specific */
+#define SHT_LOUSER 0x80000000 /* Start of application-specific */
+#define SHT_HIUSER 0x8fffffff /* End of application-specific */
+
+/* Legal values for sh_flags (section flags). */
+
+#define SHF_WRITE (1 << 0) /* Writable */
+#define SHF_ALLOC (1 << 1) /* Occupies memory during execution */
+#define SHF_EXECINSTR (1 << 2) /* Executable */
+#define SHF_MERGE (1 << 4) /* Might be merged */
+#define SHF_STRINGS (1 << 5) /* Contains nul-terminated strings */
+#define SHF_INFO_LINK (1 << 6) /* `sh_info' contains SHT index */
+#define SHF_LINK_ORDER (1 << 7) /* Preserve order after combining */
+#define SHF_OS_NONCONFORMING \
+ (1 << 8) /* Non-standard OS specific handling \
+required */
+#define SHF_GROUP (1 << 9) /* Section is member of a group. */
+#define SHF_TLS (1 << 10) /* Section hold thread-local data. */
+#define SHF_MASKOS 0x0ff00000 /* OS-specific. */
+#define SHF_MASKPROC 0xf0000000 /* Processor-specific */
+#define SHF_ORDERED \
+ (1 << 30) /* Special ordering requirement \
+(Solaris). */
+#define SHF_EXCLUDE \
+ (1 << 31) /* Section is excluded unless \
+referenced or allocated (Solaris).*/
+
+/* Section group handling. */
+#define GRP_COMDAT 0x1 /* Mark group as COMDAT. */
+
+/* Symbol table entry. */
+
+typedef struct
+{
+ Elf32_Word st_name; /* Symbol name (string tbl index) */
+ Elf32_Addr st_value; /* Symbol value */
+ Elf32_Word st_size; /* Symbol size */
+ unsigned char st_info; /* Symbol type and binding */
+ unsigned char st_other; /* Symbol visibility */
+ Elf32_Section st_shndx; /* Section index */
+} Elf32_Sym;
+
+typedef struct
+{
+ Elf64_Word st_name; /* Symbol name (string tbl index) */
+ unsigned char st_info; /* Symbol type and binding */
+ unsigned char st_other; /* Symbol visibility */
+ Elf64_Section st_shndx; /* Section index */
+ Elf64_Addr st_value; /* Symbol value */
+ Elf64_Xword st_size; /* Symbol size */
+} Elf64_Sym;
+
+/* The syminfo section if available contains additional information about
+ every dynamic symbol. */
+
+typedef struct
+{
+ Elf32_Half si_boundto; /* Direct bindings, symbol bound to */
+ Elf32_Half si_flags; /* Per symbol flags */
+} Elf32_Syminfo;
+
+typedef struct
+{
+ Elf64_Half si_boundto; /* Direct bindings, symbol bound to */
+ Elf64_Half si_flags; /* Per symbol flags */
+} Elf64_Syminfo;
+
+/* Possible values for si_boundto. */
+#define SYMINFO_BT_SELF 0xffff /* Symbol bound to self */
+#define SYMINFO_BT_PARENT 0xfffe /* Symbol bound to parent */
+#define SYMINFO_BT_LOWRESERVE 0xff00 /* Beginning of reserved entries */
+
+/* Possible bitmasks for si_flags. */
+#define SYMINFO_FLG_DIRECT 0x0001 /* Direct bound symbol */
+#define SYMINFO_FLG_PASSTHRU 0x0002 /* Pass-thru symbol for translator */
+#define SYMINFO_FLG_COPY 0x0004 /* Symbol is a copy-reloc */
+#define SYMINFO_FLG_LAZYLOAD \
+ 0x0008 /* Symbol bound to object to be lazy loaded */
+/* Syminfo version values. */
+#define SYMINFO_NONE 0
+#define SYMINFO_CURRENT 1
+#define SYMINFO_NUM 2
+
+/* How to extract and insert information held in the st_info field. */
+
+#define ELF32_ST_BIND(val) (((unsigned char)(val)) >> 4)
+#define ELF32_ST_TYPE(val) ((val)&0xf)
+#define ELF32_ST_INFO(bind, type) (((bind) << 4) + ((type)&0xf))
+
+/* Both Elf32_Sym and Elf64_Sym use the same one-byte st_info field. */
+#define ELF64_ST_BIND(val) ELF32_ST_BIND(val)
+#define ELF64_ST_TYPE(val) ELF32_ST_TYPE(val)
+#define ELF64_ST_INFO(bind, type) ELF32_ST_INFO((bind), (type))
+
+/* Legal values for ST_BIND subfield of st_info (symbol binding). */
+
+#define STB_LOCAL 0 /* Local symbol */
+#define STB_GLOBAL 1 /* Global symbol */
+#define STB_WEAK 2 /* Weak symbol */
+#define STB_NUM 3 /* Number of defined types. */
+#define STB_LOOS 10 /* Start of OS-specific */
+#define STB_HIOS 12 /* End of OS-specific */
+#define STB_LOPROC 13 /* Start of processor-specific */
+#define STB_HIPROC 15 /* End of processor-specific */
+
+/* Legal values for ST_TYPE subfield of st_info (symbol type). */
+
+#define STT_NOTYPE 0 /* Symbol type is unspecified */
+#define STT_OBJECT 1 /* Symbol is a data object */
+#define STT_FUNC 2 /* Symbol is a code object */
+#define STT_SECTION 3 /* Symbol associated with a section */
+#define STT_FILE 4 /* Symbol's name is file name */
+#define STT_COMMON 5 /* Symbol is a common data object */
+#define STT_TLS 6 /* Symbol is thread-local data object*/
+#define STT_NUM 7 /* Number of defined types. */
+#define STT_LOOS 10 /* Start of OS-specific */
+#define STT_HIOS 12 /* End of OS-specific */
+#define STT_LOPROC 13 /* Start of processor-specific */
+#define STT_HIPROC 15 /* End of processor-specific */
+
+/* Symbol table indices are found in the hash buckets and chain table
+ of a symbol hash table section. This special index value indicates
+ the end of a chain, meaning no further symbols are found in that bucket. */
+
+#define STN_UNDEF 0 /* End of a chain. */
+
+/* How to extract and insert information held in the st_other field. */
+
+#define ELF32_ST_VISIBILITY(o) ((o)&0x03)
+
+/* For ELF64 the definitions are the same. */
+#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY(o)
+
+/* Symbol visibility specification encoded in the st_other field. */
+#define STV_DEFAULT 0 /* Default symbol visibility rules */
+#define STV_INTERNAL 1 /* Processor specific hidden class */
+#define STV_HIDDEN 2 /* Sym unavailable in other modules */
+#define STV_PROTECTED 3 /* Not preemptible, not exported */
+
+/* Relocation table entry without addend (in section of type SHT_REL). */
+
+typedef struct
+{
+ Elf32_Addr r_offset; /* Address */
+ Elf32_Word r_info; /* Relocation type and symbol index */
+} Elf32_Rel;
+
+/* I have seen two different definitions of the Elf64_Rel and
+ Elf64_Rela structures, so we'll leave them out until Novell (or
+ whoever) gets their act together. */
+/* The following, at least, is used on Sparc v9, MIPS, and Alpha. */
+
+typedef struct
+{
+ Elf64_Addr r_offset; /* Address */
+ Elf64_Xword r_info; /* Relocation type and symbol index */
+} Elf64_Rel;
+
+/* Relocation table entry with addend (in section of type SHT_RELA). */
+
+typedef struct
+{
+ Elf32_Addr r_offset; /* Address */
+ Elf32_Word r_info; /* Relocation type and symbol index */
+ Elf32_Sword r_addend; /* Addend */
+} Elf32_Rela;
+
+typedef struct
+{
+ Elf64_Addr r_offset; /* Address */
+ Elf64_Xword r_info; /* Relocation type and symbol index */
+ Elf64_Sxword r_addend; /* Addend */
+} Elf64_Rela;
+
+/* How to extract and insert information held in the r_info field. */
+
+#define ELF32_R_SYM(val) ((val) >> 8)
+#define ELF32_R_TYPE(val) ((val)&0xff)
+#define ELF32_R_INFO(sym, type) (((sym) << 8) + ((type)&0xff))
+
+#define ELF64_R_SYM(i) ((i) >> 32)
+#define ELF64_R_TYPE(i) ((i)&0xffffffff)
+#define ELF64_R_INFO(sym, type) ((((Elf64_Xword)(sym)) << 32) + (type))
+
+/* Program segment header. */
+
+typedef struct
+{
+ Elf32_Word p_type; /* Segment type */
+ Elf32_Off p_offset; /* Segment file offset */
+ Elf32_Addr p_vaddr; /* Segment virtual address */
+ Elf32_Addr p_paddr; /* Segment physical address */
+ Elf32_Word p_filesz; /* Segment size in file */
+ Elf32_Word p_memsz; /* Segment size in memory */
+ Elf32_Word p_flags; /* Segment flags */
+ Elf32_Word p_align; /* Segment alignment */
+} Elf32_Phdr;
+
+typedef struct
+{
+ Elf64_Word p_type; /* Segment type */
+ Elf64_Word p_flags; /* Segment flags */
+ Elf64_Off p_offset; /* Segment file offset */
+ Elf64_Addr p_vaddr; /* Segment virtual address */
+ Elf64_Addr p_paddr; /* Segment physical address */
+ Elf64_Xword p_filesz; /* Segment size in file */
+ Elf64_Xword p_memsz; /* Segment size in memory */
+ Elf64_Xword p_align; /* Segment alignment */
+} Elf64_Phdr;
+
+/* Legal values for p_type (segment type). */
+
+#define PT_NULL 0 /* Program header table entry unused */
+#define PT_LOAD 1 /* Loadable program segment */
+#define PT_DYNAMIC 2 /* Dynamic linking information */
+#define PT_INTERP 3 /* Program interpreter */
+#define PT_NOTE 4 /* Auxiliary information */
+#define PT_SHLIB 5 /* Reserved */
+#define PT_PHDR 6 /* Entry for header table itself */
+#define PT_TLS 7 /* Thread-local storage segment */
+#define PT_NUM 8 /* Number of defined types */
+#define PT_LOOS 0x60000000 /* Start of OS-specific */
+#define PT_GNU_EH_FRAME 0x6474e550 /* GCC .eh_frame_hdr segment */
+#define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */
+#define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */
+#define PT_LOSUNW 0x6ffffffa
+#define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment */
+#define PT_SUNWSTACK 0x6ffffffb /* Stack segment */
+#define PT_HISUNW 0x6fffffff
+#define PT_HIOS 0x6fffffff /* End of OS-specific */
+#define PT_LOPROC 0x70000000 /* Start of processor-specific */
+#define PT_HIPROC 0x7fffffff /* End of processor-specific */
+
+/* Legal values for p_flags (segment flags). */
+
+#define PF_X (1 << 0) /* Segment is executable */
+#define PF_W (1 << 1) /* Segment is writable */
+#define PF_R (1 << 2) /* Segment is readable */
+#define PF_MASKOS 0x0ff00000 /* OS-specific */
+#define PF_MASKPROC 0xf0000000 /* Processor-specific */
+
+/* Legal values for note segment descriptor types for core files. */
+
+#define NT_PRSTATUS 1 /* Contains copy of prstatus struct */
+#define NT_FPREGSET 2 /* Contains copy of fpregset struct */
+#define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */
+#define NT_PRXREG 4 /* Contains copy of prxregset struct */
+#define NT_TASKSTRUCT 4 /* Contains copy of task structure */
+#define NT_PLATFORM 5 /* String from sysinfo(SI_PLATFORM) */
+#define NT_AUXV 6 /* Contains copy of auxv array */
+#define NT_GWINDOWS 7 /* Contains copy of gwindows struct */
+#define NT_ASRS 8 /* Contains copy of asrset struct */
+#define NT_PSTATUS 10 /* Contains copy of pstatus struct */
+#define NT_PSINFO 13 /* Contains copy of psinfo struct */
+#define NT_PRCRED 14 /* Contains copy of prcred struct */
+#define NT_UTSNAME 15 /* Contains copy of utsname struct */
+#define NT_LWPSTATUS 16 /* Contains copy of lwpstatus struct */
+#define NT_LWPSINFO 17 /* Contains copy of lwpinfo struct */
+#define NT_PRFPXREG 20 /* Contains copy of fprxregset struct*/
+
+/* Legal values for the note segment descriptor types for object files. */
+
+#define NT_VERSION 1 /* Contains a version string. */
+
+/* Dynamic section entry. */
+
+typedef struct
+{
+ Elf32_Sword d_tag; /* Dynamic entry type */
+ union {
+ Elf32_Word d_val; /* Integer value */
+ Elf32_Addr d_ptr; /* Address value */
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct
+{
+ Elf64_Sxword d_tag; /* Dynamic entry type */
+ union {
+ Elf64_Xword d_val; /* Integer value */
+ Elf64_Addr d_ptr; /* Address value */
+ } d_un;
+} Elf64_Dyn;
+
+/* Legal values for d_tag (dynamic entry type). */
+
+#define DT_NULL 0 /* Marks end of dynamic section */
+#define DT_NEEDED 1 /* Name of needed library */
+#define DT_PLTRELSZ 2 /* Size in bytes of PLT relocs */
+#define DT_PLTGOT 3 /* Processor defined value */
+#define DT_HASH 4 /* Address of symbol hash table */
+#define DT_STRTAB 5 /* Address of string table */
+#define DT_SYMTAB 6 /* Address of symbol table */
+#define DT_RELA 7 /* Address of Rela relocs */
+#define DT_RELASZ 8 /* Total size of Rela relocs */
+#define DT_RELAENT 9 /* Size of one Rela reloc */
+#define DT_STRSZ 10 /* Size of string table */
+#define DT_SYMENT 11 /* Size of one symbol table entry */
+#define DT_INIT 12 /* Address of init function */
+#define DT_FINI 13 /* Address of termination function */
+#define DT_SONAME 14 /* Name of shared object */
+#define DT_RPATH 15 /* Library search path (deprecated) */
+#define DT_SYMBOLIC 16 /* Start symbol search here */
+#define DT_REL 17 /* Address of Rel relocs */
+#define DT_RELSZ 18 /* Total size of Rel relocs */
+#define DT_RELENT 19 /* Size of one Rel reloc */
+#define DT_PLTREL 20 /* Type of reloc in PLT */
+#define DT_DEBUG 21 /* For debugging; unspecified */
+#define DT_TEXTREL 22 /* Reloc might modify .text */
+#define DT_JMPREL 23 /* Address of PLT relocs */
+#define DT_BIND_NOW 24 /* Process relocations of object */
+#define DT_INIT_ARRAY 25 /* Array with addresses of init fct */
+#define DT_FINI_ARRAY 26 /* Array with addresses of fini fct */
+#define DT_INIT_ARRAYSZ 27 /* Size in bytes of DT_INIT_ARRAY */
+#define DT_FINI_ARRAYSZ 28 /* Size in bytes of DT_FINI_ARRAY */
+#define DT_RUNPATH 29 /* Library search path */
+#define DT_FLAGS 30 /* Flags for the object being loaded */
+#define DT_ENCODING 32 /* Start of encoded range */
+#define DT_PREINIT_ARRAY 32 /* Array with addresses of preinit fct*/
+#define DT_PREINIT_ARRAYSZ 33 /* size in bytes of DT_PREINIT_ARRAY */
+#define DT_NUM 34 /* Number used */
+#define DT_LOOS 0x6000000d /* Start of OS-specific */
+#define DT_HIOS 0x6ffff000 /* End of OS-specific */
+#define DT_LOPROC 0x70000000 /* Start of processor-specific */
+#define DT_HIPROC 0x7fffffff /* End of processor-specific */
+#define DT_PROCNUM DT_MIPS_NUM /* Most used by any processor */
+
+/* DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the
+ Dyn.d_un.d_val field of the Elf*_Dyn structure. This follows Sun's
+ approach. */
+#define DT_VALRNGLO 0x6ffffd00
+#define DT_GNU_PRELINKED 0x6ffffdf5 /* Prelinking timestamp */
+#define DT_GNU_CONFLICTSZ 0x6ffffdf6 /* Size of conflict section */
+#define DT_GNU_LIBLISTSZ 0x6ffffdf7 /* Size of library list */
+#define DT_CHECKSUM 0x6ffffdf8
+#define DT_PLTPADSZ 0x6ffffdf9
+#define DT_MOVEENT 0x6ffffdfa
+#define DT_MOVESZ 0x6ffffdfb
+#define DT_FEATURE_1 0x6ffffdfc /* Feature selection (DTF_*). */
+#define DT_POSFLAG_1 \
+ 0x6ffffdfd /* Flags for DT_* entries, effecting \
+the following DT_* entry. */
+#define DT_SYMINSZ 0x6ffffdfe /* Size of syminfo table (in bytes) */
+#define DT_SYMINENT 0x6ffffdff /* Entry size of syminfo */
+#define DT_VALRNGHI 0x6ffffdff
+#define DT_VALTAGIDX(tag) (DT_VALRNGHI - (tag)) /* Reverse order! */
+#define DT_VALNUM 12
+
+/* DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the
+ Dyn.d_un.d_ptr field of the Elf*_Dyn structure.
+
+ If any adjustment is made to the ELF object after it has been
+ built these entries will need to be adjusted. */
+#define DT_ADDRRNGLO 0x6ffffe00
+#define DT_GNU_CONFLICT 0x6ffffef8 /* Start of conflict section */
+#define DT_GNU_LIBLIST 0x6ffffef9 /* Library list */
+#define DT_CONFIG 0x6ffffefa /* Configuration information. */
+#define DT_DEPAUDIT 0x6ffffefb /* Dependency auditing. */
+#define DT_AUDIT 0x6ffffefc /* Object auditing. */
+#define DT_PLTPAD 0x6ffffefd /* PLT padding. */
+#define DT_MOVETAB 0x6ffffefe /* Move table. */
+#define DT_SYMINFO 0x6ffffeff /* Syminfo table. */
+#define DT_ADDRRNGHI 0x6ffffeff
+#define DT_ADDRTAGIDX(tag) (DT_ADDRRNGHI - (tag)) /* Reverse order! */
+#define DT_ADDRNUM 10
+
+/* The versioning entry types. The next are defined as part of the
+ GNU extension. */
+#define DT_VERSYM 0x6ffffff0
+
+#define DT_RELACOUNT 0x6ffffff9
+#define DT_RELCOUNT 0x6ffffffa
+
+/* These were chosen by Sun. */
+#define DT_FLAGS_1 0x6ffffffb /* State flags, see DF_1_* below. */
+#define DT_VERDEF \
+ 0x6ffffffc /* Address of version definition \
+table */
+#define DT_VERDEFNUM 0x6ffffffd /* Number of version definitions */
+#define DT_VERNEED \
+ 0x6ffffffe /* Address of table with needed \
+ versions */
+#define DT_VERNEEDNUM 0x6fffffff /* Number of needed versions */
+#define DT_VERSIONTAGIDX(tag) (DT_VERNEEDNUM - (tag)) /* Reverse order! */
+#define DT_VERSIONTAGNUM 16
+
+/* Sun added these machine-independent extensions in the "processor-specific"
+ range. Be compatible. */
+#define DT_AUXILIARY 0x7ffffffd /* Shared object to load before self */
+#define DT_FILTER 0x7fffffff /* Shared object to get values from */
+#define DT_EXTRATAGIDX(tag) ((Elf32_Word) - ((Elf32_Sword)(tag) << 1 >> 1) - 1)
+#define DT_EXTRANUM 3
+
+/* Values of `d_un.d_val' in the DT_FLAGS entry. */
+#define DF_ORIGIN 0x00000001 /* Object may use DF_ORIGIN */
+#define DF_SYMBOLIC 0x00000002 /* Symbol resolutions starts here */
+#define DF_TEXTREL 0x00000004 /* Object contains text relocations */
+#define DF_BIND_NOW 0x00000008 /* No lazy binding for this object */
+#define DF_STATIC_TLS 0x00000010 /* Module uses the static TLS model */
+
+/* State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1
+ entry in the dynamic section. */
+#define DF_1_NOW 0x00000001 /* Set RTLD_NOW for this object. */
+#define DF_1_GLOBAL 0x00000002 /* Set RTLD_GLOBAL for this object. */
+#define DF_1_GROUP 0x00000004 /* Set RTLD_GROUP for this object. */
+#define DF_1_NODELETE 0x00000008 /* Set RTLD_NODELETE for this object.*/
+#define DF_1_LOADFLTR 0x00000010 /* Trigger filtee loading at runtime.*/
+#define DF_1_INITFIRST 0x00000020 /* Set RTLD_INITFIRST for this object*/
+#define DF_1_NOOPEN 0x00000040 /* Set RTLD_NOOPEN for this object. */
+#define DF_1_ORIGIN 0x00000080 /* $ORIGIN must be handled. */
+#define DF_1_DIRECT 0x00000100 /* Direct binding enabled. */
+#define DF_1_TRANS 0x00000200
+#define DF_1_INTERPOSE 0x00000400 /* Object is used to interpose. */
+#define DF_1_NODEFLIB 0x00000800 /* Ignore default lib search path. */
+#define DF_1_NODUMP 0x00001000 /* Object can't be dldump'ed. */
+#define DF_1_CONFALT 0x00002000 /* Configuration alternative created.*/
+#define DF_1_ENDFILTEE 0x00004000 /* Filtee terminates filters search. */
+#define DF_1_DISPRELDNE 0x00008000 /* Disp reloc applied at build time. */
+#define DF_1_DISPRELPND 0x00010000 /* Disp reloc applied at run-time. */
+
+/* Flags for the feature selection in DT_FEATURE_1. */
+#define DTF_1_PARINIT 0x00000001
+#define DTF_1_CONFEXP 0x00000002
+
+/* Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. */
+#define DF_P1_LAZYLOAD 0x00000001 /* Lazyload following object. */
+#define DF_P1_GROUPPERM \
+ 0x00000002 /* Symbols from next object are not \
+generally available. */
+
+/* Version definition sections. */
+
+typedef struct
+{
+ Elf32_Half vd_version; /* Version revision */
+ Elf32_Half vd_flags; /* Version information */
+ Elf32_Half vd_ndx; /* Version Index */
+ Elf32_Half vd_cnt; /* Number of associated aux entries */
+ Elf32_Word vd_hash; /* Version name hash value */
+ Elf32_Word vd_aux; /* Offset in bytes to verdaux array */
+ Elf32_Word vd_next; /* Offset in bytes to next verdef
+ entry */
+} Elf32_Verdef;
+
+typedef struct
+{
+ Elf64_Half vd_version; /* Version revision */
+ Elf64_Half vd_flags; /* Version information */
+ Elf64_Half vd_ndx; /* Version Index */
+ Elf64_Half vd_cnt; /* Number of associated aux entries */
+ Elf64_Word vd_hash; /* Version name hash value */
+ Elf64_Word vd_aux; /* Offset in bytes to verdaux array */
+ Elf64_Word vd_next; /* Offset in bytes to next verdef
+ entry */
+} Elf64_Verdef;
+
+/* Legal values for vd_version (version revision). */
+#define VER_DEF_NONE 0 /* No version */
+#define VER_DEF_CURRENT 1 /* Current version */
+#define VER_DEF_NUM 2 /* Given version number */
+
+/* Legal values for vd_flags (version information flags). */
+#define VER_FLG_BASE 0x1 /* Version definition of file itself */
+#define VER_FLG_WEAK 0x2 /* Weak version identifier */
+
+/* Versym symbol index values. */
+#define VER_NDX_LOCAL 0 /* Symbol is local. */
+#define VER_NDX_GLOBAL 1 /* Symbol is global. */
+#define VER_NDX_LORESERVE 0xff00 /* Beginning of reserved entries. */
+#define VER_NDX_ELIMINATE 0xff01 /* Symbol is to be eliminated. */
+
+/* Auxialiary version information. */
+
+typedef struct
+{
+ Elf32_Word vda_name; /* Version or dependency names */
+ Elf32_Word vda_next; /* Offset in bytes to next verdaux
+ entry */
+} Elf32_Verdaux;
+
+typedef struct
+{
+ Elf64_Word vda_name; /* Version or dependency names */
+ Elf64_Word vda_next; /* Offset in bytes to next verdaux
+ entry */
+} Elf64_Verdaux;
+
+/* Version dependency section. */
+
+typedef struct
+{
+ Elf32_Half vn_version; /* Version of structure */
+ Elf32_Half vn_cnt; /* Number of associated aux entries */
+ Elf32_Word vn_file; /* Offset of filename for this
+ dependency */
+ Elf32_Word vn_aux; /* Offset in bytes to vernaux array */
+ Elf32_Word vn_next; /* Offset in bytes to next verneed
+ entry */
+} Elf32_Verneed;
+
+typedef struct
+{
+ Elf64_Half vn_version; /* Version of structure */
+ Elf64_Half vn_cnt; /* Number of associated aux entries */
+ Elf64_Word vn_file; /* Offset of filename for this
+ dependency */
+ Elf64_Word vn_aux; /* Offset in bytes to vernaux array */
+ Elf64_Word vn_next; /* Offset in bytes to next verneed
+ entry */
+} Elf64_Verneed;
+
+/* Legal values for vn_version (version revision). */
+#define VER_NEED_NONE 0 /* No version */
+#define VER_NEED_CURRENT 1 /* Current version */
+#define VER_NEED_NUM 2 /* Given version number */
+
+/* Auxiliary needed version information. */
+
+typedef struct
+{
+ Elf32_Word vna_hash; /* Hash value of dependency name */
+ Elf32_Half vna_flags; /* Dependency specific information */
+ Elf32_Half vna_other; /* Unused */
+ Elf32_Word vna_name; /* Dependency name string offset */
+ Elf32_Word vna_next; /* Offset in bytes to next vernaux
+ entry */
+} Elf32_Vernaux;
+
+typedef struct
+{
+ Elf64_Word vna_hash; /* Hash value of dependency name */
+ Elf64_Half vna_flags; /* Dependency specific information */
+ Elf64_Half vna_other; /* Unused */
+ Elf64_Word vna_name; /* Dependency name string offset */
+ Elf64_Word vna_next; /* Offset in bytes to next vernaux
+ entry */
+} Elf64_Vernaux;
+
+/* Legal values for vna_flags. */
+#define VER_FLG_WEAK 0x2 /* Weak version identifier */
+
+/* Auxiliary vector. */
+
+/* This vector is normally only used by the program interpreter. The
+ usual definition in an ABI supplement uses the name auxv_t. The
+ vector is not usually defined in a standard <elf.h> file, but it
+ can't hurt. We rename it to avoid conflicts. The sizes of these
+ types are an arrangement between the exec server and the program
+ interpreter, so we don't fully specify them here. */
+
+typedef struct
+{
+ int a_type; /* Entry type */
+ union {
+ long int a_val; /* Integer value */
+ void *a_ptr; /* Pointer value */
+ void (*a_fcn)(void); /* Function pointer value */
+ } a_un;
+} Elf32_auxv_t;
+
+typedef struct
+{
+ long int a_type; /* Entry type */
+ union {
+ long int a_val; /* Integer value */
+ void *a_ptr; /* Pointer value */
+ void (*a_fcn)(void); /* Function pointer value */
+ } a_un;
+} Elf64_auxv_t;
+
+/* Legal values for a_type (entry type). */
+
+#define AT_NULL 0 /* End of vector */
+#define AT_IGNORE 1 /* Entry should be ignored */
+#define AT_EXECFD 2 /* File descriptor of program */
+#define AT_PHDR 3 /* Program headers for program */
+#define AT_PHENT 4 /* Size of program header entry */
+#define AT_PHNUM 5 /* Number of program headers */
+#define AT_PAGESZ 6 /* System page size */
+#define AT_BASE 7 /* Base address of interpreter */
+#define AT_FLAGS 8 /* Flags */
+#define AT_ENTRY 9 /* Entry point of program */
+#define AT_NOTELF 10 /* Program is not ELF */
+#define AT_UID 11 /* Real uid */
+#define AT_EUID 12 /* Effective uid */
+#define AT_GID 13 /* Real gid */
+#define AT_EGID 14 /* Effective gid */
+#define AT_CLKTCK 17 /* Frequency of times() */
+
+/* Some more special a_type values describing the hardware. */
+#define AT_PLATFORM 15 /* String identifying platform. */
+#define AT_HWCAP \
+ 16 /* Machine dependent hints about \
+processor capabilities. */
+
+/* This entry gives some information about the FPU initialization
+ performed by the kernel. */
+#define AT_FPUCW 18 /* Used FPU control word. */
+
+/* Cache block sizes. */
+#define AT_DCACHEBSIZE 19 /* Data cache block size. */
+#define AT_ICACHEBSIZE 20 /* Instruction cache block size. */
+#define AT_UCACHEBSIZE 21 /* Unified cache block size. */
+
+/* A special ignored value for PPC, used by the kernel to control the
+ interpretation of the AUXV. Must be > 16. */
+#define AT_IGNOREPPC 22 /* Entry should be ignored. */
+
+#define AT_SECURE 23 /* Boolean, was exec setuid-like? */
+
+/* Pointer to the global system page used for system calls and other
+ nice things. */
+#define AT_SYSINFO 32
+#define AT_SYSINFO_EHDR 33
+
+/* Shapes of the caches. Bits 0-3 contains associativity; bits 4-7 contains
+ log2 of line size; mask those to get cache size. */
+#define AT_L1I_CACHESHAPE 34
+#define AT_L1D_CACHESHAPE 35
+#define AT_L2_CACHESHAPE 36
+#define AT_L3_CACHESHAPE 37
+
+/* Note section contents. Each entry in the note section begins with
+ a header of a fixed form. */
+
+typedef struct
+{
+ Elf32_Word n_namesz; /* Length of the note's name. */
+ Elf32_Word n_descsz; /* Length of the note's descriptor. */
+ Elf32_Word n_type; /* Type of the note. */
+} Elf32_Nhdr;
+
+typedef struct
+{
+ Elf64_Word n_namesz; /* Length of the note's name. */
+ Elf64_Word n_descsz; /* Length of the note's descriptor. */
+ Elf64_Word n_type; /* Type of the note. */
+} Elf64_Nhdr;
+
+/* Known names of notes. */
+
+/* Solaris entries in the note section have this name. */
+#define ELF_NOTE_SOLARIS "SUNW Solaris"
+
+/* Note entries for GNU systems have this name. */
+#define ELF_NOTE_GNU "GNU"
+
+/* Defined types of notes for Solaris. */
+
+/* Value of descriptor (one word) is desired pagesize for the binary. */
+#define ELF_NOTE_PAGESIZE_HINT 1
+
+/* Defined note types for GNU systems. */
+
+/* ABI information. The descriptor consists of words:
+ word 0: OS descriptor
+ word 1: major version of the ABI
+ word 2: minor version of the ABI
+ word 3: subminor version of the ABI
+*/
+#define ELF_NOTE_ABI 1
+
+/* Known OSes. These value can appear in word 0 of an ELF_NOTE_ABI
+ note section entry. */
+#define ELF_NOTE_OS_LINUX 0
+#define ELF_NOTE_OS_GNU 1
+#define ELF_NOTE_OS_SOLARIS2 2
+#define ELF_NOTE_OS_FREEBSD 3
+
+/* Move records. */
+typedef struct
+{
+ Elf32_Xword m_value; /* Symbol value. */
+ Elf32_Word m_info; /* Size and index. */
+ Elf32_Word m_poffset; /* Symbol offset. */
+ Elf32_Half m_repeat; /* Repeat count. */
+ Elf32_Half m_stride; /* Stride info. */
+} Elf32_Move;
+
+typedef struct
+{
+ Elf64_Xword m_value; /* Symbol value. */
+ Elf64_Xword m_info; /* Size and index. */
+ Elf64_Xword m_poffset; /* Symbol offset. */
+ Elf64_Half m_repeat; /* Repeat count. */
+ Elf64_Half m_stride; /* Stride info. */
+} Elf64_Move;
+
+/* Macro to construct move records. */
+#define ELF32_M_SYM(info) ((info) >> 8)
+#define ELF32_M_SIZE(info) ((unsigned char)(info))
+#define ELF32_M_INFO(sym, size) (((sym) << 8) + (unsigned char)(size))
+
+#define ELF64_M_SYM(info) ELF32_M_SYM(info)
+#define ELF64_M_SIZE(info) ELF32_M_SIZE(info)
+#define ELF64_M_INFO(sym, size) ELF32_M_INFO(sym, size)
+
+/* Motorola 68k specific definitions. */
+
+/* Values for Elf32_Ehdr.e_flags. */
+#define EF_CPU32 0x00810000
+
+/* m68k relocs. */
+
+#define R_68K_NONE 0 /* No reloc */
+#define R_68K_32 1 /* Direct 32 bit */
+#define R_68K_16 2 /* Direct 16 bit */
+#define R_68K_8 3 /* Direct 8 bit */
+#define R_68K_PC32 4 /* PC relative 32 bit */
+#define R_68K_PC16 5 /* PC relative 16 bit */
+#define R_68K_PC8 6 /* PC relative 8 bit */
+#define R_68K_GOT32 7 /* 32 bit PC relative GOT entry */
+#define R_68K_GOT16 8 /* 16 bit PC relative GOT entry */
+#define R_68K_GOT8 9 /* 8 bit PC relative GOT entry */
+#define R_68K_GOT32O 10 /* 32 bit GOT offset */
+#define R_68K_GOT16O 11 /* 16 bit GOT offset */
+#define R_68K_GOT8O 12 /* 8 bit GOT offset */
+#define R_68K_PLT32 13 /* 32 bit PC relative PLT address */
+#define R_68K_PLT16 14 /* 16 bit PC relative PLT address */
+#define R_68K_PLT8 15 /* 8 bit PC relative PLT address */
+#define R_68K_PLT32O 16 /* 32 bit PLT offset */
+#define R_68K_PLT16O 17 /* 16 bit PLT offset */
+#define R_68K_PLT8O 18 /* 8 bit PLT offset */
+#define R_68K_COPY 19 /* Copy symbol at runtime */
+#define R_68K_GLOB_DAT 20 /* Create GOT entry */
+#define R_68K_JMP_SLOT 21 /* Create PLT entry */
+#define R_68K_RELATIVE 22 /* Adjust by program base */
+/* Keep this the last entry. */
+#define R_68K_NUM 23
+
+/* Intel 80386 specific definitions. */
+
+/* i386 relocs. */
+
+#define R_386_NONE 0 /* No reloc */
+#define R_386_32 1 /* Direct 32 bit */
+#define R_386_PC32 2 /* PC relative 32 bit */
+#define R_386_GOT32 3 /* 32 bit GOT entry */
+#define R_386_PLT32 4 /* 32 bit PLT address */
+#define R_386_COPY 5 /* Copy symbol at runtime */
+#define R_386_GLOB_DAT 6 /* Create GOT entry */
+#define R_386_JMP_SLOT 7 /* Create PLT entry */
+#define R_386_RELATIVE 8 /* Adjust by program base */
+#define R_386_GOTOFF 9 /* 32 bit offset to GOT */
+#define R_386_GOTPC 10 /* 32 bit PC relative offset to GOT */
+#define R_386_32PLT 11
+#define R_386_TLS_TPOFF 14 /* Offset in static TLS block */
+#define R_386_TLS_IE \
+ 15 /* Address of GOT entry for static TLS \
+block offset */
+#define R_386_TLS_GOTIE \
+ 16 /* GOT entry for static TLS block \
+offset */
+#define R_386_TLS_LE \
+ 17 /* Offset relative to static TLS \
+block */
+#define R_386_TLS_GD \
+ 18 /* Direct 32 bit for GNU version of \
+general dynamic thread local data */
+#define R_386_TLS_LDM \
+ 19 /* Direct 32 bit for GNU version of \
+local dynamic thread local data \
+in LE code */
+#define R_386_16 20
+#define R_386_PC16 21
+#define R_386_8 22
+#define R_386_PC8 23
+#define R_386_TLS_GD_32 \
+ 24 /* Direct 32 bit for general dynamic \
+thread local data */
+#define R_386_TLS_GD_PUSH 25 /* Tag for pushl in GD TLS code */
+#define R_386_TLS_GD_CALL \
+ 26 /* Relocation for call to \
+__tls_get_addr() */
+#define R_386_TLS_GD_POP 27 /* Tag for popl in GD TLS code */
+#define R_386_TLS_LDM_32 \
+ 28 /* Direct 32 bit for local dynamic \
+thread local data in LE code */
+#define R_386_TLS_LDM_PUSH 29 /* Tag for pushl in LDM TLS code */
+#define R_386_TLS_LDM_CALL \
+ 30 /* Relocation for call to \
+__tls_get_addr() in LDM code */
+#define R_386_TLS_LDM_POP 31 /* Tag for popl in LDM TLS code */
+#define R_386_TLS_LDO_32 32 /* Offset relative to TLS block */
+#define R_386_TLS_IE_32 \
+ 33 /* GOT entry for negated static TLS \
+block offset */
+#define R_386_TLS_LE_32 \
+ 34 /* Negated offset relative to static \
+TLS block */
+#define R_386_TLS_DTPMOD32 35 /* ID of module containing symbol */
+#define R_386_TLS_DTPOFF32 36 /* Offset in TLS block */
+#define R_386_TLS_TPOFF32 37 /* Negated offset in static TLS block */
+/* Keep this the last entry. */
+#define R_386_NUM 38
+
+/* SUN SPARC specific definitions. */
+
+/* Legal values for ST_TYPE subfield of st_info (symbol type). */
+
+#define STT_SPARC_REGISTER 13 /* Global register reserved to app. */
+
+/* Values for Elf64_Ehdr.e_flags. */
+
+#define EF_SPARCV9_MM 3
+#define EF_SPARCV9_TSO 0
+#define EF_SPARCV9_PSO 1
+#define EF_SPARCV9_RMO 2
+#define EF_SPARC_LEDATA 0x800000 /* little endian data */
+#define EF_SPARC_EXT_MASK 0xFFFF00
+#define EF_SPARC_32PLUS 0x000100 /* generic V8+ features */
+#define EF_SPARC_SUN_US1 0x000200 /* Sun UltraSPARC1 extensions */
+#define EF_SPARC_HAL_R1 0x000400 /* HAL R1 extensions */
+#define EF_SPARC_SUN_US3 0x000800 /* Sun UltraSPARCIII extensions */
+
+/* SPARC relocs. */
+
+#define R_SPARC_NONE 0 /* No reloc */
+#define R_SPARC_8 1 /* Direct 8 bit */
+#define R_SPARC_16 2 /* Direct 16 bit */
+#define R_SPARC_32 3 /* Direct 32 bit */
+#define R_SPARC_DISP8 4 /* PC relative 8 bit */
+#define R_SPARC_DISP16 5 /* PC relative 16 bit */
+#define R_SPARC_DISP32 6 /* PC relative 32 bit */
+#define R_SPARC_WDISP30 7 /* PC relative 30 bit shifted */
+#define R_SPARC_WDISP22 8 /* PC relative 22 bit shifted */
+#define R_SPARC_HI22 9 /* High 22 bit */
+#define R_SPARC_22 10 /* Direct 22 bit */
+#define R_SPARC_13 11 /* Direct 13 bit */
+#define R_SPARC_LO10 12 /* Truncated 10 bit */
+#define R_SPARC_GOT10 13 /* Truncated 10 bit GOT entry */
+#define R_SPARC_GOT13 14 /* 13 bit GOT entry */
+#define R_SPARC_GOT22 15 /* 22 bit GOT entry shifted */
+#define R_SPARC_PC10 16 /* PC relative 10 bit truncated */
+#define R_SPARC_PC22 17 /* PC relative 22 bit shifted */
+#define R_SPARC_WPLT30 18 /* 30 bit PC relative PLT address */
+#define R_SPARC_COPY 19 /* Copy symbol at runtime */
+#define R_SPARC_GLOB_DAT 20 /* Create GOT entry */
+#define R_SPARC_JMP_SLOT 21 /* Create PLT entry */
+#define R_SPARC_RELATIVE 22 /* Adjust by program base */
+#define R_SPARC_UA32 23 /* Direct 32 bit unaligned */
+
+/* Additional Sparc64 relocs. */
+
+#define R_SPARC_PLT32 24 /* Direct 32 bit ref to PLT entry */
+#define R_SPARC_HIPLT22 25 /* High 22 bit PLT entry */
+#define R_SPARC_LOPLT10 26 /* Truncated 10 bit PLT entry */
+#define R_SPARC_PCPLT32 27 /* PC rel 32 bit ref to PLT entry */
+#define R_SPARC_PCPLT22 28 /* PC rel high 22 bit PLT entry */
+#define R_SPARC_PCPLT10 29 /* PC rel trunc 10 bit PLT entry */
+#define R_SPARC_10 30 /* Direct 10 bit */
+#define R_SPARC_11 31 /* Direct 11 bit */
+#define R_SPARC_64 32 /* Direct 64 bit */
+#define R_SPARC_OLO10 33 /* 10bit with secondary 13bit addend */
+#define R_SPARC_HH22 34 /* Top 22 bits of direct 64 bit */
+#define R_SPARC_HM10 35 /* High middle 10 bits of ... */
+#define R_SPARC_LM22 36 /* Low middle 22 bits of ... */
+#define R_SPARC_PC_HH22 37 /* Top 22 bits of pc rel 64 bit */
+#define R_SPARC_PC_HM10 38 /* High middle 10 bit of ... */
+#define R_SPARC_PC_LM22 39 /* Low miggle 22 bits of ... */
+#define R_SPARC_WDISP16 40 /* PC relative 16 bit shifted */
+#define R_SPARC_WDISP19 41 /* PC relative 19 bit shifted */
+#define R_SPARC_7 43 /* Direct 7 bit */
+#define R_SPARC_5 44 /* Direct 5 bit */
+#define R_SPARC_6 45 /* Direct 6 bit */
+#define R_SPARC_DISP64 46 /* PC relative 64 bit */
+#define R_SPARC_PLT64 47 /* Direct 64 bit ref to PLT entry */
+#define R_SPARC_HIX22 48 /* High 22 bit complemented */
+#define R_SPARC_LOX10 49 /* Truncated 11 bit complemented */
+#define R_SPARC_H44 50 /* Direct high 12 of 44 bit */
+#define R_SPARC_M44 51 /* Direct mid 22 of 44 bit */
+#define R_SPARC_L44 52 /* Direct low 10 of 44 bit */
+#define R_SPARC_REGISTER 53 /* Global register usage */
+#define R_SPARC_UA64 54 /* Direct 64 bit unaligned */
+#define R_SPARC_UA16 55 /* Direct 16 bit unaligned */
+#define R_SPARC_TLS_GD_HI22 56
+#define R_SPARC_TLS_GD_LO10 57
+#define R_SPARC_TLS_GD_ADD 58
+#define R_SPARC_TLS_GD_CALL 59
+#define R_SPARC_TLS_LDM_HI22 60
+#define R_SPARC_TLS_LDM_LO10 61
+#define R_SPARC_TLS_LDM_ADD 62
+#define R_SPARC_TLS_LDM_CALL 63
+#define R_SPARC_TLS_LDO_HIX22 64
+#define R_SPARC_TLS_LDO_LOX10 65
+#define R_SPARC_TLS_LDO_ADD 66
+#define R_SPARC_TLS_IE_HI22 67
+#define R_SPARC_TLS_IE_LO10 68
+#define R_SPARC_TLS_IE_LD 69
+#define R_SPARC_TLS_IE_LDX 70
+#define R_SPARC_TLS_IE_ADD 71
+#define R_SPARC_TLS_LE_HIX22 72
+#define R_SPARC_TLS_LE_LOX10 73
+#define R_SPARC_TLS_DTPMOD32 74
+#define R_SPARC_TLS_DTPMOD64 75
+#define R_SPARC_TLS_DTPOFF32 76
+#define R_SPARC_TLS_DTPOFF64 77
+#define R_SPARC_TLS_TPOFF32 78
+#define R_SPARC_TLS_TPOFF64 79
+/* Keep this the last entry. */
+#define R_SPARC_NUM 80
+
+/* For Sparc64, legal values for d_tag of Elf64_Dyn. */
+
+#define DT_SPARC_REGISTER 0x70000001
+#define DT_SPARC_NUM 2
+
+/* Bits present in AT_HWCAP, primarily for Sparc32. */
+
+#define HWCAP_SPARC_FLUSH 1 /* The cpu supports flush insn. */
+#define HWCAP_SPARC_STBAR 2
+#define HWCAP_SPARC_SWAP 4
+#define HWCAP_SPARC_MULDIV 8
+#define HWCAP_SPARC_V9 16 /* The cpu is v9, so v8plus is ok. */
+#define HWCAP_SPARC_ULTRA3 32
+
+/* MIPS R3000 specific definitions. */
+
+/* Legal values for e_flags field of Elf32_Ehdr. */
+
+#define EF_MIPS_NOREORDER 1 /* A .noreorder directive was used */
+#define EF_MIPS_PIC 2 /* Contains PIC code */
+#define EF_MIPS_CPIC 4 /* Uses PIC calling sequence */
+#define EF_MIPS_XGOT 8
+#define EF_MIPS_64BIT_WHIRL 16
+#define EF_MIPS_ABI2 32
+#define EF_MIPS_ABI_ON32 64
+#define EF_MIPS_ARCH 0xf0000000 /* MIPS architecture level */
+
+/* Legal values for MIPS architecture level. */
+
+#define EF_MIPS_ARCH_1 0x00000000 /* -mips1 code. */
+#define EF_MIPS_ARCH_2 0x10000000 /* -mips2 code. */
+#define EF_MIPS_ARCH_3 0x20000000 /* -mips3 code. */
+#define EF_MIPS_ARCH_4 0x30000000 /* -mips4 code. */
+#define EF_MIPS_ARCH_5 0x40000000 /* -mips5 code. */
+#define EF_MIPS_ARCH_32 0x60000000 /* MIPS32 code. */
+#define EF_MIPS_ARCH_64 0x70000000 /* MIPS64 code. */
+
+/* The following are non-official names and should not be used. */
+
+#define E_MIPS_ARCH_1 0x00000000 /* -mips1 code. */
+#define E_MIPS_ARCH_2 0x10000000 /* -mips2 code. */
+#define E_MIPS_ARCH_3 0x20000000 /* -mips3 code. */
+#define E_MIPS_ARCH_4 0x30000000 /* -mips4 code. */
+#define E_MIPS_ARCH_5 0x40000000 /* -mips5 code. */
+#define E_MIPS_ARCH_32 0x60000000 /* MIPS32 code. */
+#define E_MIPS_ARCH_64 0x70000000 /* MIPS64 code. */
+
+/* Special section indices. */
+
+#define SHN_MIPS_ACOMMON 0xff00 /* Allocated common symbols */
+#define SHN_MIPS_TEXT 0xff01 /* Allocated test symbols. */
+#define SHN_MIPS_DATA 0xff02 /* Allocated data symbols. */
+#define SHN_MIPS_SCOMMON 0xff03 /* Small common symbols */
+#define SHN_MIPS_SUNDEFINED 0xff04 /* Small undefined symbols */
+
+/* Legal values for sh_type field of Elf32_Shdr. */
+
+#define SHT_MIPS_LIBLIST 0x70000000 /* Shared objects used in link */
+#define SHT_MIPS_MSYM 0x70000001
+#define SHT_MIPS_CONFLICT 0x70000002 /* Conflicting symbols */
+#define SHT_MIPS_GPTAB 0x70000003 /* Global data area sizes */
+#define SHT_MIPS_UCODE 0x70000004 /* Reserved for SGI/MIPS compilers */
+#define SHT_MIPS_DEBUG 0x70000005 /* MIPS ECOFF debugging information*/
+#define SHT_MIPS_REGINFO 0x70000006 /* Register usage information */
+#define SHT_MIPS_PACKAGE 0x70000007
+#define SHT_MIPS_PACKSYM 0x70000008
+#define SHT_MIPS_RELD 0x70000009
+#define SHT_MIPS_IFACE 0x7000000b
+#define SHT_MIPS_CONTENT 0x7000000c
+#define SHT_MIPS_OPTIONS 0x7000000d /* Miscellaneous options. */
+#define SHT_MIPS_SHDR 0x70000010
+#define SHT_MIPS_FDESC 0x70000011
+#define SHT_MIPS_EXTSYM 0x70000012
+#define SHT_MIPS_DENSE 0x70000013
+#define SHT_MIPS_PDESC 0x70000014
+#define SHT_MIPS_LOCSYM 0x70000015
+#define SHT_MIPS_AUXSYM 0x70000016
+#define SHT_MIPS_OPTSYM 0x70000017
+#define SHT_MIPS_LOCSTR 0x70000018
+#define SHT_MIPS_LINE 0x70000019
+#define SHT_MIPS_RFDESC 0x7000001a
+#define SHT_MIPS_DELTASYM 0x7000001b
+#define SHT_MIPS_DELTAINST 0x7000001c
+#define SHT_MIPS_DELTACLASS 0x7000001d
+#define SHT_MIPS_DWARF 0x7000001e /* DWARF debugging information. */
+#define SHT_MIPS_DELTADECL 0x7000001f
+#define SHT_MIPS_SYMBOL_LIB 0x70000020
+#define SHT_MIPS_EVENTS 0x70000021 /* Event section. */
+#define SHT_MIPS_TRANSLATE 0x70000022
+#define SHT_MIPS_PIXIE 0x70000023
+#define SHT_MIPS_XLATE 0x70000024
+#define SHT_MIPS_XLATE_DEBUG 0x70000025
+#define SHT_MIPS_WHIRL 0x70000026
+#define SHT_MIPS_EH_REGION 0x70000027
+#define SHT_MIPS_XLATE_OLD 0x70000028
+#define SHT_MIPS_PDR_EXCEPTION 0x70000029
+
+/* Legal values for sh_flags field of Elf32_Shdr. */
+
+#define SHF_MIPS_GPREL 0x10000000 /* Must be part of global data area */
+#define SHF_MIPS_MERGE 0x20000000
+#define SHF_MIPS_ADDR 0x40000000
+#define SHF_MIPS_STRINGS 0x80000000
+#define SHF_MIPS_NOSTRIP 0x08000000
+#define SHF_MIPS_LOCAL 0x04000000
+#define SHF_MIPS_NAMES 0x02000000
+#define SHF_MIPS_NODUPE 0x01000000
+
+/* Symbol tables. */
+
+/* MIPS specific values for `st_other'. */
+#define STO_MIPS_DEFAULT 0x0
+#define STO_MIPS_INTERNAL 0x1
+#define STO_MIPS_HIDDEN 0x2
+#define STO_MIPS_PROTECTED 0x3
+#define STO_MIPS_SC_ALIGN_UNUSED 0xff
+
+/* MIPS specific values for `st_info'. */
+#define STB_MIPS_SPLIT_COMMON 13
+
+/* Entries found in sections of type SHT_MIPS_GPTAB. */
+
+typedef union {
+ struct
+ {
+ Elf32_Word gt_current_g_value; /* -G value used for compilation */
+ Elf32_Word gt_unused; /* Not used */
+ } gt_header; /* First entry in section */
+ struct
+ {
+ Elf32_Word gt_g_value; /* If this value were used for -G */
+ Elf32_Word gt_bytes; /* This many bytes would be used */
+ } gt_entry; /* Subsequent entries in section */
+} Elf32_gptab;
+
+/* Entry found in sections of type SHT_MIPS_REGINFO. */
+
+typedef struct
+{
+ Elf32_Word ri_gprmask; /* General registers used */
+ Elf32_Word ri_cprmask[4]; /* Coprocessor registers used */
+ Elf32_Sword ri_gp_value; /* $gp register value */
+} Elf32_RegInfo;
+
+/* Entries found in sections of type SHT_MIPS_OPTIONS. */
+
+typedef struct
+{
+ unsigned char kind; /* Determines interpretation of the
+ variable part of descriptor. */
+ unsigned char size; /* Size of descriptor, including header. */
+ Elf32_Section section; /* Section header index of section affected,
+ 0 for global options. */
+ Elf32_Word info; /* Kind-specific information. */
+} Elf_Options;
+
+/* Values for `kind' field in Elf_Options. */
+
+#define ODK_NULL 0 /* Undefined. */
+#define ODK_REGINFO 1 /* Register usage information. */
+#define ODK_EXCEPTIONS 2 /* Exception processing options. */
+#define ODK_PAD 3 /* Section padding options. */
+#define ODK_HWPATCH 4 /* Hardware workarounds performed */
+#define ODK_FILL 5 /* record the fill value used by the linker. */
+#define ODK_TAGS 6 /* reserve space for desktop tools to write. */
+#define ODK_HWAND 7 /* HW workarounds. 'AND' bits when merging. */
+#define ODK_HWOR 8 /* HW workarounds. 'OR' bits when merging. */
+
+/* Values for `info' in Elf_Options for ODK_EXCEPTIONS entries. */
+
+#define OEX_FPU_MIN 0x1f /* FPE's which MUST be enabled. */
+#define OEX_FPU_MAX 0x1f00 /* FPE's which MAY be enabled. */
+#define OEX_PAGE0 0x10000 /* page zero must be mapped. */
+#define OEX_SMM 0x20000 /* Force sequential memory mode? */
+#define OEX_FPDBUG 0x40000 /* Force floating point debug mode? */
+#define OEX_PRECISEFP OEX_FPDBUG
+#define OEX_DISMISS 0x80000 /* Dismiss invalid address faults? */
+
+#define OEX_FPU_INVAL 0x10
+#define OEX_FPU_DIV0 0x08
+#define OEX_FPU_OFLO 0x04
+#define OEX_FPU_UFLO 0x02
+#define OEX_FPU_INEX 0x01
+
+/* Masks for `info' in Elf_Options for an ODK_HWPATCH entry. */
+
+#define OHW_R4KEOP 0x1 /* R4000 end-of-page patch. */
+#define OHW_R8KPFETCH 0x2 /* may need R8000 prefetch patch. */
+#define OHW_R5KEOP 0x4 /* R5000 end-of-page patch. */
+#define OHW_R5KCVTL 0x8 /* R5000 cvt.[ds].l bug. clean=1. */
+
+#define OPAD_PREFIX 0x1
+#define OPAD_POSTFIX 0x2
+#define OPAD_SYMBOL 0x4
+
+/* Entry found in `.options' section. */
+
+typedef struct
+{
+ Elf32_Word hwp_flags1; /* Extra flags. */
+ Elf32_Word hwp_flags2; /* Extra flags. */
+} Elf_Options_Hw;
+
+/* Masks for `info' in ElfOptions for ODK_HWAND and ODK_HWOR entries. */
+
+#define OHWA0_R4KEOP_CHECKED 0x00000001
+#define OHWA1_R4KEOP_CLEAN 0x00000002
+
+/* MIPS relocs. */
+
+#define R_MIPS_NONE 0 /* No reloc */
+#define R_MIPS_16 1 /* Direct 16 bit */
+#define R_MIPS_32 2 /* Direct 32 bit */
+#define R_MIPS_REL32 3 /* PC relative 32 bit */
+#define R_MIPS_26 4 /* Direct 26 bit shifted */
+#define R_MIPS_HI16 5 /* High 16 bit */
+#define R_MIPS_LO16 6 /* Low 16 bit */
+#define R_MIPS_GPREL16 7 /* GP relative 16 bit */
+#define R_MIPS_LITERAL 8 /* 16 bit literal entry */
+#define R_MIPS_GOT16 9 /* 16 bit GOT entry */
+#define R_MIPS_PC16 10 /* PC relative 16 bit */
+#define R_MIPS_CALL16 11 /* 16 bit GOT entry for function */
+#define R_MIPS_GPREL32 12 /* GP relative 32 bit */
+
+#define R_MIPS_SHIFT5 16
+#define R_MIPS_SHIFT6 17
+#define R_MIPS_64 18
+#define R_MIPS_GOT_DISP 19
+#define R_MIPS_GOT_PAGE 20
+#define R_MIPS_GOT_OFST 21
+#define R_MIPS_GOT_HI16 22
+#define R_MIPS_GOT_LO16 23
+#define R_MIPS_SUB 24
+#define R_MIPS_INSERT_A 25
+#define R_MIPS_INSERT_B 26
+#define R_MIPS_DELETE 27
+#define R_MIPS_HIGHER 28
+#define R_MIPS_HIGHEST 29
+#define R_MIPS_CALL_HI16 30
+#define R_MIPS_CALL_LO16 31
+#define R_MIPS_SCN_DISP 32
+#define R_MIPS_REL16 33
+#define R_MIPS_ADD_IMMEDIATE 34
+#define R_MIPS_PJUMP 35
+#define R_MIPS_RELGOT 36
+#define R_MIPS_JALR 37
+/* Keep this the last entry. */
+#define R_MIPS_NUM 38
+
+/* Legal values for p_type field of Elf32_Phdr. */
+
+#define PT_MIPS_REGINFO 0x70000000 /* Register usage information */
+#define PT_MIPS_RTPROC 0x70000001 /* Runtime procedure table. */
+#define PT_MIPS_OPTIONS 0x70000002
+
+/* Special program header types. */
+
+#define PF_MIPS_LOCAL 0x10000000
+
+/* Legal values for d_tag field of Elf32_Dyn. */
+
+#define DT_MIPS_RLD_VERSION 0x70000001 /* Runtime linker interface version */
+#define DT_MIPS_TIME_STAMP 0x70000002 /* Timestamp */
+#define DT_MIPS_ICHECKSUM 0x70000003 /* Checksum */
+#define DT_MIPS_IVERSION 0x70000004 /* Version string (string tbl index) */
+#define DT_MIPS_FLAGS 0x70000005 /* Flags */
+#define DT_MIPS_BASE_ADDRESS 0x70000006 /* Base address */
+#define DT_MIPS_MSYM 0x70000007
+#define DT_MIPS_CONFLICT 0x70000008 /* Address of CONFLICT section */
+#define DT_MIPS_LIBLIST 0x70000009 /* Address of LIBLIST section */
+#define DT_MIPS_LOCAL_GOTNO 0x7000000a /* Number of local GOT entries */
+#define DT_MIPS_CONFLICTNO 0x7000000b /* Number of CONFLICT entries */
+#define DT_MIPS_LIBLISTNO 0x70000010 /* Number of LIBLIST entries */
+#define DT_MIPS_SYMTABNO 0x70000011 /* Number of DYNSYM entries */
+#define DT_MIPS_UNREFEXTNO 0x70000012 /* First external DYNSYM */
+#define DT_MIPS_GOTSYM 0x70000013 /* First GOT entry in DYNSYM */
+#define DT_MIPS_HIPAGENO 0x70000014 /* Number of GOT page table entries */
+#define DT_MIPS_RLD_MAP 0x70000016 /* Address of run time loader map. */
+#define DT_MIPS_DELTA_CLASS 0x70000017 /* Delta C++ class definition. */
+#define DT_MIPS_DELTA_CLASS_NO \
+ 0x70000018 /* Number of entries in \
+DT_MIPS_DELTA_CLASS. */
+#define DT_MIPS_DELTA_INSTANCE 0x70000019 /* Delta C++ class instances. */
+#define DT_MIPS_DELTA_INSTANCE_NO \
+ 0x7000001a /* Number of entries in \
+DT_MIPS_DELTA_INSTANCE. */
+#define DT_MIPS_DELTA_RELOC 0x7000001b /* Delta relocations. */
+#define DT_MIPS_DELTA_RELOC_NO \
+ 0x7000001c /* Number of entries in \
+DT_MIPS_DELTA_RELOC. */
+#define DT_MIPS_DELTA_SYM \
+ 0x7000001d /* Delta symbols that Delta \
+relocations refer to. */
+#define DT_MIPS_DELTA_SYM_NO \
+ 0x7000001e /* Number of entries in \
+DT_MIPS_DELTA_SYM. */
+#define DT_MIPS_DELTA_CLASSSYM \
+ 0x70000020 /* Delta symbols that hold the \
+class declaration. */
+#define DT_MIPS_DELTA_CLASSSYM_NO \
+ 0x70000021 /* Number of entries in \
+DT_MIPS_DELTA_CLASSSYM. */
+#define DT_MIPS_CXX_FLAGS 0x70000022 /* Flags indicating for C++ flavor. */
+#define DT_MIPS_PIXIE_INIT 0x70000023
+#define DT_MIPS_SYMBOL_LIB 0x70000024
+#define DT_MIPS_LOCALPAGE_GOTIDX 0x70000025
+#define DT_MIPS_LOCAL_GOTIDX 0x70000026
+#define DT_MIPS_HIDDEN_GOTIDX 0x70000027
+#define DT_MIPS_PROTECTED_GOTIDX 0x70000028
+#define DT_MIPS_OPTIONS 0x70000029 /* Address of .options. */
+#define DT_MIPS_INTERFACE 0x7000002a /* Address of .interface. */
+#define DT_MIPS_DYNSTR_ALIGN 0x7000002b
+#define DT_MIPS_INTERFACE_SIZE 0x7000002c /* Size of the .interface section. \
+ */
+#define DT_MIPS_RLD_TEXT_RESOLVE_ADDR \
+ 0x7000002d /* Address of rld_text_rsolve \
+function stored in GOT. */
+#define DT_MIPS_PERF_SUFFIX \
+ 0x7000002e /* Default suffix of dso to be added \
+ by rld on dlopen() calls. */
+#define DT_MIPS_COMPACT_SIZE 0x7000002f /* (O32)Size of compact rel section. \
+ */
+#define DT_MIPS_GP_VALUE 0x70000030 /* GP value for aux GOTs. */
+#define DT_MIPS_AUX_DYNAMIC 0x70000031 /* Address of aux .dynamic. */
+#define DT_MIPS_NUM 0x32
+
+/* Legal values for DT_MIPS_FLAGS Elf32_Dyn entry. */
+
+#define RHF_NONE 0 /* No flags */
+#define RHF_QUICKSTART (1 << 0) /* Use quickstart */
+#define RHF_NOTPOT (1 << 1) /* Hash size not power of 2 */
+#define RHF_NO_LIBRARY_REPLACEMENT (1 << 2) /* Ignore LD_LIBRARY_PATH */
+#define RHF_NO_MOVE (1 << 3)
+#define RHF_SGI_ONLY (1 << 4)
+#define RHF_GUARANTEE_INIT (1 << 5)
+#define RHF_DELTA_C_PLUS_PLUS (1 << 6)
+#define RHF_GUARANTEE_START_INIT (1 << 7)
+#define RHF_PIXIE (1 << 8)
+#define RHF_DEFAULT_DELAY_LOAD (1 << 9)
+#define RHF_REQUICKSTART (1 << 10)
+#define RHF_REQUICKSTARTED (1 << 11)
+#define RHF_CORD (1 << 12)
+#define RHF_NO_UNRES_UNDEF (1 << 13)
+#define RHF_RLD_ORDER_SAFE (1 << 14)
+
+/* Entries found in sections of type SHT_MIPS_LIBLIST. */
+
+typedef struct
+{
+ Elf32_Word l_name; /* Name (string table index) */
+ Elf32_Word l_time_stamp; /* Timestamp */
+ Elf32_Word l_checksum; /* Checksum */
+ Elf32_Word l_version; /* Interface version */
+ Elf32_Word l_flags; /* Flags */
+} Elf32_Lib;
+
+typedef struct
+{
+ Elf64_Word l_name; /* Name (string table index) */
+ Elf64_Word l_time_stamp; /* Timestamp */
+ Elf64_Word l_checksum; /* Checksum */
+ Elf64_Word l_version; /* Interface version */
+ Elf64_Word l_flags; /* Flags */
+} Elf64_Lib;
+
+/* Legal values for l_flags. */
+
+#define LL_NONE 0
+#define LL_EXACT_MATCH (1 << 0) /* Require exact match */
+#define LL_IGNORE_INT_VER (1 << 1) /* Ignore interface version */
+#define LL_REQUIRE_MINOR (1 << 2)
+#define LL_EXPORTS (1 << 3)
+#define LL_DELAY_LOAD (1 << 4)
+#define LL_DELTA (1 << 5)
+
+/* Entries found in sections of type SHT_MIPS_CONFLICT. */
+
+typedef Elf32_Addr Elf32_Conflict;
+
+/* HPPA specific definitions. */
+
+/* Legal values for e_flags field of Elf32_Ehdr. */
+
+#define EF_PARISC_TRAPNIL 0x00010000 /* Trap nil pointer dereference. */
+#define EF_PARISC_EXT 0x00020000 /* Program uses arch. extensions. */
+#define EF_PARISC_LSB 0x00040000 /* Program expects little endian. */
+#define EF_PARISC_WIDE 0x00080000 /* Program expects wide mode. */
+#define EF_PARISC_NO_KABP \
+ 0x00100000 /* No kernel assisted branch \
+prediction. */
+#define EF_PARISC_LAZYSWAP 0x00400000 /* Allow lazy swapping. */
+#define EF_PARISC_ARCH 0x0000ffff /* Architecture version. */
+
+/* Defined values for `e_flags & EF_PARISC_ARCH' are: */
+
+#define EFA_PARISC_1_0 0x020b /* PA-RISC 1.0 big-endian. */
+#define EFA_PARISC_1_1 0x0210 /* PA-RISC 1.1 big-endian. */
+#define EFA_PARISC_2_0 0x0214 /* PA-RISC 2.0 big-endian. */
+
+/* Additional section indeces. */
+
+#define SHN_PARISC_ANSI_COMMON \
+ 0xff00 /* Section for tenatively declared \
+symbols in ANSI C. */
+#define SHN_PARISC_HUGE_COMMON 0xff01 /* Common blocks in huge model. */
+
+/* Legal values for sh_type field of Elf32_Shdr. */
+
+#define SHT_PARISC_EXT 0x70000000 /* Contains product specific ext. */
+#define SHT_PARISC_UNWIND 0x70000001 /* Unwind information. */
+#define SHT_PARISC_DOC 0x70000002 /* Debug info for optimized code. */
+
+/* Legal values for sh_flags field of Elf32_Shdr. */
+
+#define SHF_PARISC_SHORT 0x20000000 /* Section with short addressing. */
+#define SHF_PARISC_HUGE 0x40000000 /* Section far from gp. */
+#define SHF_PARISC_SBP 0x80000000 /* Static branch prediction code. */
+
+/* Legal values for ST_TYPE subfield of st_info (symbol type). */
+
+#define STT_PARISC_MILLICODE 13 /* Millicode function entry point. */
+
+#define STT_HP_OPAQUE (STT_LOOS + 0x1)
+#define STT_HP_STUB (STT_LOOS + 0x2)
+
+/* HPPA relocs. */
+
+#define R_PARISC_NONE 0 /* No reloc. */
+#define R_PARISC_DIR32 1 /* Direct 32-bit reference. */
+#define R_PARISC_DIR21L 2 /* Left 21 bits of eff. address. */
+#define R_PARISC_DIR17R 3 /* Right 17 bits of eff. address. */
+#define R_PARISC_DIR17F 4 /* 17 bits of eff. address. */
+#define R_PARISC_DIR14R 6 /* Right 14 bits of eff. address. */
+#define R_PARISC_PCREL32 9 /* 32-bit rel. address. */
+#define R_PARISC_PCREL21L 10 /* Left 21 bits of rel. address. */
+#define R_PARISC_PCREL17R 11 /* Right 17 bits of rel. address. */
+#define R_PARISC_PCREL17F 12 /* 17 bits of rel. address. */
+#define R_PARISC_PCREL14R 14 /* Right 14 bits of rel. address. */
+#define R_PARISC_DPREL21L 18 /* Left 21 bits of rel. address. */
+#define R_PARISC_DPREL14R 22 /* Right 14 bits of rel. address. */
+#define R_PARISC_GPREL21L 26 /* GP-relative, left 21 bits. */
+#define R_PARISC_GPREL14R 30 /* GP-relative, right 14 bits. */
+#define R_PARISC_LTOFF21L 34 /* LT-relative, left 21 bits. */
+#define R_PARISC_LTOFF14R 38 /* LT-relative, right 14 bits. */
+#define R_PARISC_SECREL32 41 /* 32 bits section rel. address. */
+#define R_PARISC_SEGBASE 48 /* No relocation, set segment base. */
+#define R_PARISC_SEGREL32 49 /* 32 bits segment rel. address. */
+#define R_PARISC_PLTOFF21L 50 /* PLT rel. address, left 21 bits. */
+#define R_PARISC_PLTOFF14R 54 /* PLT rel. address, right 14 bits. */
+#define R_PARISC_LTOFF_FPTR32 57 /* 32 bits LT-rel. function pointer. */
+#define R_PARISC_LTOFF_FPTR21L 58 /* LT-rel. fct ptr, left 21 bits. */
+#define R_PARISC_LTOFF_FPTR14R 62 /* LT-rel. fct ptr, right 14 bits. */
+#define R_PARISC_FPTR64 64 /* 64 bits function address. */
+#define R_PARISC_PLABEL32 65 /* 32 bits function address. */
+#define R_PARISC_PLABEL21L 66 /* Left 21 bits of fct ptr. */
+#define R_PARISC_PLABEL14R 70 /* Left 21 bits of fct ptr. */
+#define R_PARISC_PCREL64 72 /* 64 bits PC-rel. address. */
+#define R_PARISC_PCREL22F 74 /* 22 bits PC-rel. address. */
+#define R_PARISC_PCREL14WR 75 /* PC-rel. address, right 14 bits. */
+#define R_PARISC_PCREL14DR 76 /* PC rel. address, right 14 bits. */
+#define R_PARISC_PCREL16F 77 /* 16 bits PC-rel. address. */
+#define R_PARISC_PCREL16WF 78 /* 16 bits PC-rel. address. */
+#define R_PARISC_PCREL16DF 79 /* 16 bits PC-rel. address. */
+#define R_PARISC_DIR64 80 /* 64 bits of eff. address. */
+#define R_PARISC_DIR14WR 83 /* 14 bits of eff. address. */
+#define R_PARISC_DIR14DR 84 /* 14 bits of eff. address. */
+#define R_PARISC_DIR16F 85 /* 16 bits of eff. address. */
+#define R_PARISC_DIR16WF 86 /* 16 bits of eff. address. */
+#define R_PARISC_DIR16DF 87 /* 16 bits of eff. address. */
+#define R_PARISC_GPREL64 88 /* 64 bits of GP-rel. address. */
+#define R_PARISC_GPREL14WR 91 /* GP-rel. address, right 14 bits. */
+#define R_PARISC_GPREL14DR 92 /* GP-rel. address, right 14 bits. */
+#define R_PARISC_GPREL16F 93 /* 16 bits GP-rel. address. */
+#define R_PARISC_GPREL16WF 94 /* 16 bits GP-rel. address. */
+#define R_PARISC_GPREL16DF 95 /* 16 bits GP-rel. address. */
+#define R_PARISC_LTOFF64 96 /* 64 bits LT-rel. address. */
+#define R_PARISC_LTOFF14WR 99 /* LT-rel. address, right 14 bits. */
+#define R_PARISC_LTOFF14DR 100 /* LT-rel. address, right 14 bits. */
+#define R_PARISC_LTOFF16F 101 /* 16 bits LT-rel. address. */
+#define R_PARISC_LTOFF16WF 102 /* 16 bits LT-rel. address. */
+#define R_PARISC_LTOFF16DF 103 /* 16 bits LT-rel. address. */
+#define R_PARISC_SECREL64 104 /* 64 bits section rel. address. */
+#define R_PARISC_SEGREL64 112 /* 64 bits segment rel. address. */
+#define R_PARISC_PLTOFF14WR 115 /* PLT-rel. address, right 14 bits. */
+#define R_PARISC_PLTOFF14DR 116 /* PLT-rel. address, right 14 bits. */
+#define R_PARISC_PLTOFF16F 117 /* 16 bits LT-rel. address. */
+#define R_PARISC_PLTOFF16WF 118 /* 16 bits PLT-rel. address. */
+#define R_PARISC_PLTOFF16DF 119 /* 16 bits PLT-rel. address. */
+#define R_PARISC_LTOFF_FPTR64 120 /* 64 bits LT-rel. function ptr. */
+#define R_PARISC_LTOFF_FPTR14WR 123 /* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR14DR 124 /* LT-rel. fct. ptr., right 14 bits. */
+#define R_PARISC_LTOFF_FPTR16F 125 /* 16 bits LT-rel. function ptr. */
+#define R_PARISC_LTOFF_FPTR16WF 126 /* 16 bits LT-rel. function ptr. */
+#define R_PARISC_LTOFF_FPTR16DF 127 /* 16 bits LT-rel. function ptr. */
+#define R_PARISC_LORESERVE 128
+#define R_PARISC_COPY 128 /* Copy relocation. */
+#define R_PARISC_IPLT 129 /* Dynamic reloc, imported PLT */
+#define R_PARISC_EPLT 130 /* Dynamic reloc, exported PLT */
+#define R_PARISC_TPREL32 153 /* 32 bits TP-rel. address. */
+#define R_PARISC_TPREL21L 154 /* TP-rel. address, left 21 bits. */
+#define R_PARISC_TPREL14R 158 /* TP-rel. address, right 14 bits. */
+#define R_PARISC_LTOFF_TP21L 162 /* LT-TP-rel. address, left 21 bits. */
+#define R_PARISC_LTOFF_TP14R 166 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14F 167 /* 14 bits LT-TP-rel. address. */
+#define R_PARISC_TPREL64 216 /* 64 bits TP-rel. address. */
+#define R_PARISC_TPREL14WR 219 /* TP-rel. address, right 14 bits. */
+#define R_PARISC_TPREL14DR 220 /* TP-rel. address, right 14 bits. */
+#define R_PARISC_TPREL16F 221 /* 16 bits TP-rel. address. */
+#define R_PARISC_TPREL16WF 222 /* 16 bits TP-rel. address. */
+#define R_PARISC_TPREL16DF 223 /* 16 bits TP-rel. address. */
+#define R_PARISC_LTOFF_TP64 224 /* 64 bits LT-TP-rel. address. */
+#define R_PARISC_LTOFF_TP14WR 227 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP14DR 228 /* LT-TP-rel. address, right 14 bits.*/
+#define R_PARISC_LTOFF_TP16F 229 /* 16 bits LT-TP-rel. address. */
+#define R_PARISC_LTOFF_TP16WF 230 /* 16 bits LT-TP-rel. address. */
+#define R_PARISC_LTOFF_TP16DF 231 /* 16 bits LT-TP-rel. address. */
+#define R_PARISC_HIRESERVE 255
+
+/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */
+
+#define PT_HP_TLS (PT_LOOS + 0x0)
+#define PT_HP_CORE_NONE (PT_LOOS + 0x1)
+#define PT_HP_CORE_VERSION (PT_LOOS + 0x2)
+#define PT_HP_CORE_KERNEL (PT_LOOS + 0x3)
+#define PT_HP_CORE_COMM (PT_LOOS + 0x4)
+#define PT_HP_CORE_PROC (PT_LOOS + 0x5)
+#define PT_HP_CORE_LOADABLE (PT_LOOS + 0x6)
+#define PT_HP_CORE_STACK (PT_LOOS + 0x7)
+#define PT_HP_CORE_SHM (PT_LOOS + 0x8)
+#define PT_HP_CORE_MMF (PT_LOOS + 0x9)
+#define PT_HP_PARALLEL (PT_LOOS + 0x10)
+#define PT_HP_FASTBIND (PT_LOOS + 0x11)
+#define PT_HP_OPT_ANNOT (PT_LOOS + 0x12)
+#define PT_HP_HSL_ANNOT (PT_LOOS + 0x13)
+#define PT_HP_STACK (PT_LOOS + 0x14)
+
+#define PT_PARISC_ARCHEXT 0x70000000
+#define PT_PARISC_UNWIND 0x70000001
+
+/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr. */
+
+#define PF_PARISC_SBP 0x08000000
+
+#define PF_HP_PAGE_SIZE 0x00100000
+#define PF_HP_FAR_SHARED 0x00200000
+#define PF_HP_NEAR_SHARED 0x00400000
+#define PF_HP_CODE 0x01000000
+#define PF_HP_MODIFY 0x02000000
+#define PF_HP_LAZYSWAP 0x04000000
+#define PF_HP_SBP 0x08000000
+
+/* Alpha specific definitions. */
+
+/* Legal values for e_flags field of Elf64_Ehdr. */
+
+#define EF_ALPHA_32BIT 1 /* All addresses must be < 2GB. */
+#define EF_ALPHA_CANRELAX 2 /* Relocations for relaxing exist. */
+
+/* Legal values for sh_type field of Elf64_Shdr. */
+
+/* These two are primerily concerned with ECOFF debugging info. */
+#define SHT_ALPHA_DEBUG 0x70000001
+#define SHT_ALPHA_REGINFO 0x70000002
+
+/* Legal values for sh_flags field of Elf64_Shdr. */
+
+#define SHF_ALPHA_GPREL 0x10000000
+
+/* Legal values for st_other field of Elf64_Sym. */
+#define STO_ALPHA_NOPV 0x80 /* No PV required. */
+#define STO_ALPHA_STD_GPLOAD 0x88 /* PV only used for initial ldgp. */
+
+/* Alpha relocs. */
+
+#define R_ALPHA_NONE 0 /* No reloc */
+#define R_ALPHA_REFLONG 1 /* Direct 32 bit */
+#define R_ALPHA_REFQUAD 2 /* Direct 64 bit */
+#define R_ALPHA_GPREL32 3 /* GP relative 32 bit */
+#define R_ALPHA_LITERAL 4 /* GP relative 16 bit w/optimization */
+#define R_ALPHA_LITUSE 5 /* Optimization hint for LITERAL */
+#define R_ALPHA_GPDISP 6 /* Add displacement to GP */
+#define R_ALPHA_BRADDR 7 /* PC+4 relative 23 bit shifted */
+#define R_ALPHA_HINT 8 /* PC+4 relative 16 bit shifted */
+#define R_ALPHA_SREL16 9 /* PC relative 16 bit */
+#define R_ALPHA_SREL32 10 /* PC relative 32 bit */
+#define R_ALPHA_SREL64 11 /* PC relative 64 bit */
+#define R_ALPHA_GPRELHIGH 17 /* GP relative 32 bit, high 16 bits */
+#define R_ALPHA_GPRELLOW 18 /* GP relative 32 bit, low 16 bits */
+#define R_ALPHA_GPREL16 19 /* GP relative 16 bit */
+#define R_ALPHA_COPY 24 /* Copy symbol at runtime */
+#define R_ALPHA_GLOB_DAT 25 /* Create GOT entry */
+#define R_ALPHA_JMP_SLOT 26 /* Create PLT entry */
+#define R_ALPHA_RELATIVE 27 /* Adjust by program base */
+#define R_ALPHA_TLS_GD_HI 28
+#define R_ALPHA_TLSGD 29
+#define R_ALPHA_TLS_LDM 30
+#define R_ALPHA_DTPMOD64 31
+#define R_ALPHA_GOTDTPREL 32
+#define R_ALPHA_DTPREL64 33
+#define R_ALPHA_DTPRELHI 34
+#define R_ALPHA_DTPRELLO 35
+#define R_ALPHA_DTPREL16 36
+#define R_ALPHA_GOTTPREL 37
+#define R_ALPHA_TPREL64 38
+#define R_ALPHA_TPRELHI 39
+#define R_ALPHA_TPRELLO 40
+#define R_ALPHA_TPREL16 41
+/* Keep this the last entry. */
+#define R_ALPHA_NUM 46
+
+/* Magic values of the LITUSE relocation addend. */
+#define LITUSE_ALPHA_ADDR 0
+#define LITUSE_ALPHA_BASE 1
+#define LITUSE_ALPHA_BYTOFF 2
+#define LITUSE_ALPHA_JSR 3
+#define LITUSE_ALPHA_TLS_GD 4
+#define LITUSE_ALPHA_TLS_LDM 5
+
+/* PowerPC specific declarations */
+
+/* Values for Elf32/64_Ehdr.e_flags. */
+#define EF_PPC_EMB 0x80000000 /* PowerPC embedded flag */
+
+/* Cygnus local bits below */
+#define EF_PPC_RELOCATABLE 0x00010000 /* PowerPC -mrelocatable flag*/
+#define EF_PPC_RELOCATABLE_LIB \
+ 0x00008000 /* PowerPC -mrelocatable-lib \
+flag */
+
+/* PowerPC relocations defined by the ABIs */
+#define R_PPC_NONE 0
+#define R_PPC_ADDR32 1 /* 32bit absolute address */
+#define R_PPC_ADDR24 2 /* 26bit address, 2 bits ignored. */
+#define R_PPC_ADDR16 3 /* 16bit absolute address */
+#define R_PPC_ADDR16_LO 4 /* lower 16bit of absolute address */
+#define R_PPC_ADDR16_HI 5 /* high 16bit of absolute address */
+#define R_PPC_ADDR16_HA 6 /* adjusted high 16bit */
+#define R_PPC_ADDR14 7 /* 16bit address, 2 bits ignored */
+#define R_PPC_ADDR14_BRTAKEN 8
+#define R_PPC_ADDR14_BRNTAKEN 9
+#define R_PPC_REL24 10 /* PC relative 26 bit */
+#define R_PPC_REL14 11 /* PC relative 16 bit */
+#define R_PPC_REL14_BRTAKEN 12
+#define R_PPC_REL14_BRNTAKEN 13
+#define R_PPC_GOT16 14
+#define R_PPC_GOT16_LO 15
+#define R_PPC_GOT16_HI 16
+#define R_PPC_GOT16_HA 17
+#define R_PPC_PLTREL24 18
+#define R_PPC_COPY 19
+#define R_PPC_GLOB_DAT 20
+#define R_PPC_JMP_SLOT 21
+#define R_PPC_RELATIVE 22
+#define R_PPC_LOCAL24PC 23
+#define R_PPC_UADDR32 24
+#define R_PPC_UADDR16 25
+#define R_PPC_REL32 26
+#define R_PPC_PLT32 27
+#define R_PPC_PLTREL32 28
+#define R_PPC_PLT16_LO 29
+#define R_PPC_PLT16_HI 30
+#define R_PPC_PLT16_HA 31
+#define R_PPC_SDAREL16 32
+#define R_PPC_SECTOFF 33
+#define R_PPC_SECTOFF_LO 34
+#define R_PPC_SECTOFF_HI 35
+#define R_PPC_SECTOFF_HA 36
+
+/* PowerPC relocations defined for the TLS access ABI. */
+#define R_PPC_TLS 67 /* none (sym+add)@tls */
+#define R_PPC_DTPMOD32 68 /* word32 (sym+add)@dtpmod */
+#define R_PPC_TPREL16 69 /* half16* (sym+add)@tprel */
+#define R_PPC_TPREL16_LO 70 /* half16 (sym+add)@tprel@l */
+#define R_PPC_TPREL16_HI 71 /* half16 (sym+add)@tprel@h */
+#define R_PPC_TPREL16_HA 72 /* half16 (sym+add)@tprel@ha */
+#define R_PPC_TPREL32 73 /* word32 (sym+add)@tprel */
+#define R_PPC_DTPREL16 74 /* half16* (sym+add)@dtprel */
+#define R_PPC_DTPREL16_LO 75 /* half16 (sym+add)@dtprel@l */
+#define R_PPC_DTPREL16_HI 76 /* half16 (sym+add)@dtprel@h */
+#define R_PPC_DTPREL16_HA 77 /* half16 (sym+add)@dtprel@ha */
+#define R_PPC_DTPREL32 78 /* word32 (sym+add)@dtprel */
+#define R_PPC_GOT_TLSGD16 79 /* half16* (sym+add)@got@tlsgd */
+#define R_PPC_GOT_TLSGD16_LO 80 /* half16 (sym+add)@got@tlsgd@l */
+#define R_PPC_GOT_TLSGD16_HI 81 /* half16 (sym+add)@got@tlsgd@h */
+#define R_PPC_GOT_TLSGD16_HA 82 /* half16 (sym+add)@got@tlsgd@ha */
+#define R_PPC_GOT_TLSLD16 83 /* half16* (sym+add)@got@tlsld */
+#define R_PPC_GOT_TLSLD16_LO 84 /* half16 (sym+add)@got@tlsld@l */
+#define R_PPC_GOT_TLSLD16_HI 85 /* half16 (sym+add)@got@tlsld@h */
+#define R_PPC_GOT_TLSLD16_HA 86 /* half16 (sym+add)@got@tlsld@ha */
+#define R_PPC_GOT_TPREL16 87 /* half16* (sym+add)@got@tprel */
+#define R_PPC_GOT_TPREL16_LO 88 /* half16 (sym+add)@got@tprel@l */
+#define R_PPC_GOT_TPREL16_HI 89 /* half16 (sym+add)@got@tprel@h */
+#define R_PPC_GOT_TPREL16_HA 90 /* half16 (sym+add)@got@tprel@ha */
+#define R_PPC_GOT_DTPREL16 91 /* half16* (sym+add)@got@dtprel */
+#define R_PPC_GOT_DTPREL16_LO 92 /* half16* (sym+add)@got@dtprel@l */
+#define R_PPC_GOT_DTPREL16_HI 93 /* half16* (sym+add)@got@dtprel@h */
+#define R_PPC_GOT_DTPREL16_HA 94 /* half16* (sym+add)@got@dtprel@ha */
+
+/* Keep this the last entry. */
+#define R_PPC_NUM 95
+
+/* The remaining relocs are from the Embedded ELF ABI, and are not
+ in the SVR4 ELF ABI. */
+#define R_PPC_EMB_NADDR32 101
+#define R_PPC_EMB_NADDR16 102
+#define R_PPC_EMB_NADDR16_LO 103
+#define R_PPC_EMB_NADDR16_HI 104
+#define R_PPC_EMB_NADDR16_HA 105
+#define R_PPC_EMB_SDAI16 106
+#define R_PPC_EMB_SDA2I16 107
+#define R_PPC_EMB_SDA2REL 108
+#define R_PPC_EMB_SDA21 109 /* 16 bit offset in SDA */
+#define R_PPC_EMB_MRKREF 110
+#define R_PPC_EMB_RELSEC16 111
+#define R_PPC_EMB_RELST_LO 112
+#define R_PPC_EMB_RELST_HI 113
+#define R_PPC_EMB_RELST_HA 114
+#define R_PPC_EMB_BIT_FLD 115
+#define R_PPC_EMB_RELSDA 116 /* 16 bit relative offset in SDA */
+
+/* Diab tool relocations. */
+#define R_PPC_DIAB_SDA21_LO 180 /* like EMB_SDA21, but lower 16 bit */
+#define R_PPC_DIAB_SDA21_HI 181 /* like EMB_SDA21, but high 16 bit */
+#define R_PPC_DIAB_SDA21_HA 182 /* like EMB_SDA21, adjusted high 16 */
+#define R_PPC_DIAB_RELSDA_LO 183 /* like EMB_RELSDA, but lower 16 bit */
+#define R_PPC_DIAB_RELSDA_HI 184 /* like EMB_RELSDA, but high 16 bit */
+#define R_PPC_DIAB_RELSDA_HA 185 /* like EMB_RELSDA, adjusted high 16 */
+
+/* This is a phony reloc to handle any old fashioned TOC16 references
+ that may still be in object files. */
+#define R_PPC_TOC16 255
+
+/* PowerPC64 relocations defined by the ABIs */
+#define R_PPC64_NONE R_PPC_NONE
+#define R_PPC64_ADDR32 R_PPC_ADDR32 /* 32bit absolute address */
+#define R_PPC64_ADDR24 R_PPC_ADDR24 /* 26bit address, word aligned */
+#define R_PPC64_ADDR16 R_PPC_ADDR16 /* 16bit absolute address */
+#define R_PPC64_ADDR16_LO R_PPC_ADDR16_LO /* lower 16bits of address */
+#define R_PPC64_ADDR16_HI R_PPC_ADDR16_HI /* high 16bits of address. */
+#define R_PPC64_ADDR16_HA R_PPC_ADDR16_HA /* adjusted high 16bits. */
+#define R_PPC64_ADDR14 R_PPC_ADDR14 /* 16bit address, word aligned */
+#define R_PPC64_ADDR14_BRTAKEN R_PPC_ADDR14_BRTAKEN
+#define R_PPC64_ADDR14_BRNTAKEN R_PPC_ADDR14_BRNTAKEN
+#define R_PPC64_REL24 R_PPC_REL24 /* PC-rel. 26 bit, word aligned */
+#define R_PPC64_REL14 R_PPC_REL14 /* PC relative 16 bit */
+#define R_PPC64_REL14_BRTAKEN R_PPC_REL14_BRTAKEN
+#define R_PPC64_REL14_BRNTAKEN R_PPC_REL14_BRNTAKEN
+#define R_PPC64_GOT16 R_PPC_GOT16
+#define R_PPC64_GOT16_LO R_PPC_GOT16_LO
+#define R_PPC64_GOT16_HI R_PPC_GOT16_HI
+#define R_PPC64_GOT16_HA R_PPC_GOT16_HA
+
+#define R_PPC64_COPY R_PPC_COPY
+#define R_PPC64_GLOB_DAT R_PPC_GLOB_DAT
+#define R_PPC64_JMP_SLOT R_PPC_JMP_SLOT
+#define R_PPC64_RELATIVE R_PPC_RELATIVE
+
+#define R_PPC64_UADDR32 R_PPC_UADDR32
+#define R_PPC64_UADDR16 R_PPC_UADDR16
+#define R_PPC64_REL32 R_PPC_REL32
+#define R_PPC64_PLT32 R_PPC_PLT32
+#define R_PPC64_PLTREL32 R_PPC_PLTREL32
+#define R_PPC64_PLT16_LO R_PPC_PLT16_LO
+#define R_PPC64_PLT16_HI R_PPC_PLT16_HI
+#define R_PPC64_PLT16_HA R_PPC_PLT16_HA
+
+#define R_PPC64_SECTOFF R_PPC_SECTOFF
+#define R_PPC64_SECTOFF_LO R_PPC_SECTOFF_LO
+#define R_PPC64_SECTOFF_HI R_PPC_SECTOFF_HI
+#define R_PPC64_SECTOFF_HA R_PPC_SECTOFF_HA
+#define R_PPC64_ADDR30 37 /* word30 (S + A - P) >> 2 */
+#define R_PPC64_ADDR64 38 /* doubleword64 S + A */
+#define R_PPC64_ADDR16_HIGHER 39 /* half16 #higher(S + A) */
+#define R_PPC64_ADDR16_HIGHERA 40 /* half16 #highera(S + A) */
+#define R_PPC64_ADDR16_HIGHEST 41 /* half16 #highest(S + A) */
+#define R_PPC64_ADDR16_HIGHESTA 42 /* half16 #highesta(S + A) */
+#define R_PPC64_UADDR64 43 /* doubleword64 S + A */
+#define R_PPC64_REL64 44 /* doubleword64 S + A - P */
+#define R_PPC64_PLT64 45 /* doubleword64 L + A */
+#define R_PPC64_PLTREL64 46 /* doubleword64 L + A - P */
+#define R_PPC64_TOC16 47 /* half16* S + A - .TOC */
+#define R_PPC64_TOC16_LO 48 /* half16 #lo(S + A - .TOC.) */
+#define R_PPC64_TOC16_HI 49 /* half16 #hi(S + A - .TOC.) */
+#define R_PPC64_TOC16_HA 50 /* half16 #ha(S + A - .TOC.) */
+#define R_PPC64_TOC 51 /* doubleword64 .TOC */
+#define R_PPC64_PLTGOT16 52 /* half16* M + A */
+#define R_PPC64_PLTGOT16_LO 53 /* half16 #lo(M + A) */
+#define R_PPC64_PLTGOT16_HI 54 /* half16 #hi(M + A) */
+#define R_PPC64_PLTGOT16_HA 55 /* half16 #ha(M + A) */
+
+#define R_PPC64_ADDR16_DS 56 /* half16ds* (S + A) >> 2 */
+#define R_PPC64_ADDR16_LO_DS 57 /* half16ds #lo(S + A) >> 2 */
+#define R_PPC64_GOT16_DS 58 /* half16ds* (G + A) >> 2 */
+#define R_PPC64_GOT16_LO_DS 59 /* half16ds #lo(G + A) >> 2 */
+#define R_PPC64_PLT16_LO_DS 60 /* half16ds #lo(L + A) >> 2 */
+#define R_PPC64_SECTOFF_DS 61 /* half16ds* (R + A) >> 2 */
+#define R_PPC64_SECTOFF_LO_DS 62 /* half16ds #lo(R + A) >> 2 */
+#define R_PPC64_TOC16_DS 63 /* half16ds* (S + A - .TOC.) >> 2 */
+#define R_PPC64_TOC16_LO_DS 64 /* half16ds #lo(S + A - .TOC.) >> 2 */
+#define R_PPC64_PLTGOT16_DS 65 /* half16ds* (M + A) >> 2 */
+#define R_PPC64_PLTGOT16_LO_DS 66 /* half16ds #lo(M + A) >> 2 */
+
+/* PowerPC64 relocations defined for the TLS access ABI. */
+#define R_PPC64_TLS 67 /* none (sym+add)@tls */
+#define R_PPC64_DTPMOD64 68 /* doubleword64 (sym+add)@dtpmod */
+#define R_PPC64_TPREL16 69 /* half16* (sym+add)@tprel */
+#define R_PPC64_TPREL16_LO 70 /* half16 (sym+add)@tprel@l */
+#define R_PPC64_TPREL16_HI 71 /* half16 (sym+add)@tprel@h */
+#define R_PPC64_TPREL16_HA 72 /* half16 (sym+add)@tprel@ha */
+#define R_PPC64_TPREL64 73 /* doubleword64 (sym+add)@tprel */
+#define R_PPC64_DTPREL16 74 /* half16* (sym+add)@dtprel */
+#define R_PPC64_DTPREL16_LO 75 /* half16 (sym+add)@dtprel@l */
+#define R_PPC64_DTPREL16_HI 76 /* half16 (sym+add)@dtprel@h */
+#define R_PPC64_DTPREL16_HA 77 /* half16 (sym+add)@dtprel@ha */
+#define R_PPC64_DTPREL64 78 /* doubleword64 (sym+add)@dtprel */
+#define R_PPC64_GOT_TLSGD16 79 /* half16* (sym+add)@got@tlsgd */
+#define R_PPC64_GOT_TLSGD16_LO 80 /* half16 (sym+add)@got@tlsgd@l */
+#define R_PPC64_GOT_TLSGD16_HI 81 /* half16 (sym+add)@got@tlsgd@h */
+#define R_PPC64_GOT_TLSGD16_HA 82 /* half16 (sym+add)@got@tlsgd@ha */
+#define R_PPC64_GOT_TLSLD16 83 /* half16* (sym+add)@got@tlsld */
+#define R_PPC64_GOT_TLSLD16_LO 84 /* half16 (sym+add)@got@tlsld@l */
+#define R_PPC64_GOT_TLSLD16_HI 85 /* half16 (sym+add)@got@tlsld@h */
+#define R_PPC64_GOT_TLSLD16_HA 86 /* half16 (sym+add)@got@tlsld@ha */
+#define R_PPC64_GOT_TPREL16_DS 87 /* half16ds* (sym+add)@got@tprel */
+#define R_PPC64_GOT_TPREL16_LO_DS 88 /* half16ds (sym+add)@got@tprel@l */
+#define R_PPC64_GOT_TPREL16_HI 89 /* half16 (sym+add)@got@tprel@h */
+#define R_PPC64_GOT_TPREL16_HA 90 /* half16 (sym+add)@got@tprel@ha */
+#define R_PPC64_GOT_DTPREL16_DS 91 /* half16ds* (sym+add)@got@dtprel */
+#define R_PPC64_GOT_DTPREL16_LO_DS 92 /* half16ds (sym+add)@got@dtprel@l */
+#define R_PPC64_GOT_DTPREL16_HI 93 /* half16 (sym+add)@got@dtprel@h */
+#define R_PPC64_GOT_DTPREL16_HA 94 /* half16 (sym+add)@got@dtprel@ha */
+#define R_PPC64_TPREL16_DS 95 /* half16ds* (sym+add)@tprel */
+#define R_PPC64_TPREL16_LO_DS 96 /* half16ds (sym+add)@tprel@l */
+#define R_PPC64_TPREL16_HIGHER 97 /* half16 (sym+add)@tprel@higher */
+#define R_PPC64_TPREL16_HIGHERA 98 /* half16 (sym+add)@tprel@highera */
+#define R_PPC64_TPREL16_HIGHEST 99 /* half16 (sym+add)@tprel@highest */
+#define R_PPC64_TPREL16_HIGHESTA 100 /* half16 (sym+add)@tprel@highesta */
+#define R_PPC64_DTPREL16_DS 101 /* half16ds* (sym+add)@dtprel */
+#define R_PPC64_DTPREL16_LO_DS 102 /* half16ds (sym+add)@dtprel@l */
+#define R_PPC64_DTPREL16_HIGHER 103 /* half16 (sym+add)@dtprel@higher */
+#define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */
+#define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */
+#define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */
+
+/* Keep this the last entry. */
+#define R_PPC64_NUM 107
+
+/* PowerPC64 specific values for the Dyn d_tag field. */
+#define DT_PPC64_GLINK (DT_LOPROC + 0)
+#define DT_PPC64_OPD (DT_LOPROC + 1)
+#define DT_PPC64_OPDSZ (DT_LOPROC + 2)
+#define DT_PPC64_NUM 3
+
+/* ARM specific declarations */
+
+/* Processor specific flags for the ELF header e_flags field. */
+#define EF_ARM_RELEXEC 0x01
+#define EF_ARM_HASENTRY 0x02
+#define EF_ARM_INTERWORK 0x04
+#define EF_ARM_APCS_26 0x08
+#define EF_ARM_APCS_FLOAT 0x10
+#define EF_ARM_PIC 0x20
+#define EF_ARM_ALIGN8 0x40 /* 8-bit structure alignment is in use */
+#define EF_ARM_NEW_ABI 0x80
+#define EF_ARM_OLD_ABI 0x100
+
+/* Other constants defined in the ARM ELF spec. version B-01. */
+/* NB. These conflict with values defined above. */
+#define EF_ARM_SYMSARESORTED 0x04
+#define EF_ARM_DYNSYMSUSESEGIDX 0x08
+#define EF_ARM_MAPSYMSFIRST 0x10
+#define EF_ARM_EABIMASK 0XFF000000
+
+#define EF_ARM_EABI_VERSION(flags) ((flags)&EF_ARM_EABIMASK)
+#define EF_ARM_EABI_UNKNOWN 0x00000000
+#define EF_ARM_EABI_VER1 0x01000000
+#define EF_ARM_EABI_VER2 0x02000000
+
+/* Additional symbol types for Thumb */
+#define STT_ARM_TFUNC 0xd
+
+/* ARM-specific values for sh_flags */
+#define SHF_ARM_ENTRYSECT 0x10000000 /* Section contains an entry point */
+#define SHF_ARM_COMDEF \
+ 0x80000000 /* Section may be multiply defined \
+in the input to a link step */
+
+/* ARM-specific program header flags */
+#define PF_ARM_SB \
+ 0x10000000 /* Segment contains the location \
+addressed by the static base */
+
+/* ARM relocs. */
+#define R_ARM_NONE 0 /* No reloc */
+#define R_ARM_PC24 1 /* PC relative 26 bit branch */
+#define R_ARM_ABS32 2 /* Direct 32 bit */
+#define R_ARM_REL32 3 /* PC relative 32 bit */
+#define R_ARM_PC13 4
+#define R_ARM_ABS16 5 /* Direct 16 bit */
+#define R_ARM_ABS12 6 /* Direct 12 bit */
+#define R_ARM_THM_ABS5 7
+#define R_ARM_ABS8 8 /* Direct 8 bit */
+#define R_ARM_SBREL32 9
+#define R_ARM_THM_PC22 10
+#define R_ARM_THM_PC8 11
+#define R_ARM_AMP_VCALL9 12
+#define R_ARM_SWI24 13
+#define R_ARM_THM_SWI8 14
+#define R_ARM_XPC25 15
+#define R_ARM_THM_XPC22 16
+#define R_ARM_COPY 20 /* Copy symbol at runtime */
+#define R_ARM_GLOB_DAT 21 /* Create GOT entry */
+#define R_ARM_JUMP_SLOT 22 /* Create PLT entry */
+#define R_ARM_RELATIVE 23 /* Adjust by program base */
+#define R_ARM_GOTOFF 24 /* 32 bit offset to GOT */
+#define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */
+#define R_ARM_GOT32 26 /* 32 bit GOT entry */
+#define R_ARM_PLT32 27 /* 32 bit PLT address */
+#define R_ARM_ALU_PCREL_7_0 32
+#define R_ARM_ALU_PCREL_15_8 33
+#define R_ARM_ALU_PCREL_23_15 34
+#define R_ARM_LDR_SBREL_11_0 35
+#define R_ARM_ALU_SBREL_19_12 36
+#define R_ARM_ALU_SBREL_27_20 37
+#define R_ARM_GNU_VTENTRY 100
+#define R_ARM_GNU_VTINHERIT 101
+#define R_ARM_THM_PC11 102 /* thumb unconditional branch */
+#define R_ARM_THM_PC9 103 /* thumb conditional branch */
+#define R_ARM_RXPC25 249
+#define R_ARM_RSBREL32 250
+#define R_ARM_THM_RPC22 251
+#define R_ARM_RREL32 252
+#define R_ARM_RABS22 253
+#define R_ARM_RPC24 254
+#define R_ARM_RBASE 255
+/* Keep this the last entry. */
+#define R_ARM_NUM 256
+
+/* IA-64 specific declarations. */
+
+/* Processor specific flags for the Ehdr e_flags field. */
+#define EF_IA_64_MASKOS 0x0000000f /* os-specific flags */
+#define EF_IA_64_ABI64 0x00000010 /* 64-bit ABI */
+#define EF_IA_64_ARCH 0xff000000 /* arch. version mask */
+
+/* Processor specific values for the Phdr p_type field. */
+#define PT_IA_64_ARCHEXT (PT_LOPROC + 0) /* arch extension bits */
+#define PT_IA_64_UNWIND (PT_LOPROC + 1) /* ia64 unwind bits */
+#define PT_IA_64_HP_OPT_ANOT (PT_LOOS + 0x12)
+#define PT_IA_64_HP_HSL_ANOT (PT_LOOS + 0x13)
+#define PT_IA_64_HP_STACK (PT_LOOS + 0x14)
+
+/* Processor specific flags for the Phdr p_flags field. */
+#define PF_IA_64_NORECOV 0x80000000 /* spec insns w/o recovery */
+
+/* Processor specific values for the Shdr sh_type field. */
+#define SHT_IA_64_EXT (SHT_LOPROC + 0) /* extension bits */
+#define SHT_IA_64_UNWIND (SHT_LOPROC + 1) /* unwind bits */
+
+/* Processor specific flags for the Shdr sh_flags field. */
+#define SHF_IA_64_SHORT 0x10000000 /* section near gp */
+#define SHF_IA_64_NORECOV 0x20000000 /* spec insns w/o recovery */
+
+/* Processor specific values for the Dyn d_tag field. */
+#define DT_IA_64_PLT_RESERVE (DT_LOPROC + 0)
+#define DT_IA_64_NUM 1
+
+/* IA-64 relocations. */
+#define R_IA64_NONE 0x00 /* none */
+#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */
+#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */
+#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */
+#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */
+#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */
+#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */
+#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */
+#define R_IA64_GPREL22 0x2a /* @gprel(sym + add), add imm22 */
+#define R_IA64_GPREL64I 0x2b /* @gprel(sym + add), mov imm64 */
+#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym + add), data4 MSB */
+#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym + add), data4 LSB */
+#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym + add), data8 MSB */
+#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF22 0x32 /* @ltoff(sym + add), add imm22 */
+#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym + add), mov imm64 */
+#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym + add), add imm22 */
+#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym + add), mov imm64 */
+#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym + add), data8 MSB */
+#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym + add), data8 LSB */
+#define R_IA64_FPTR64I 0x43 /* @fptr(sym + add), mov imm64 */
+#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym + add), data4 MSB */
+#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym + add), data4 LSB */
+#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym + add), data8 MSB */
+#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym + add), data8 LSB */
+#define R_IA64_PCREL60B 0x48 /* @pcrel(sym + add), brl */
+#define R_IA64_PCREL21B 0x49 /* @pcrel(sym + add), ptb, call */
+#define R_IA64_PCREL21M 0x4a /* @pcrel(sym + add), chk.s */
+#define R_IA64_PCREL21F 0x4b /* @pcrel(sym + add), fchkf */
+#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym + add), data4 MSB */
+#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym + add), data4 LSB */
+#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym + add), data8 MSB */
+#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */
+#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */
+#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), data4 MSB */
+#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), data4 LSB */
+#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), data8 MSB */
+#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), data8 LSB */
+#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym + add), data4 MSB */
+#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym + add), data4 LSB */
+#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym + add), data8 MSB */
+#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym + add), data8 LSB */
+#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym + add), data4 MSB */
+#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym + add), data4 LSB */
+#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym + add), data8 MSB */
+#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym + add), data8 LSB */
+#define R_IA64_REL32MSB 0x6c /* data 4 + REL */
+#define R_IA64_REL32LSB 0x6d /* data 4 + REL */
+#define R_IA64_REL64MSB 0x6e /* data 8 + REL */
+#define R_IA64_REL64LSB 0x6f /* data 8 + REL */
+#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */
+#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */
+#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */
+#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */
+#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym + add), 21bit inst */
+#define R_IA64_PCREL22 0x7a /* @pcrel(sym + add), 22bit inst */
+#define R_IA64_PCREL64I 0x7b /* @pcrel(sym + add), 64bit inst */
+#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */
+#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */
+#define R_IA64_COPY 0x84 /* copy relocation */
+#define R_IA64_SUB 0x85 /* Addend and symbol difference */
+#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */
+#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */
+#define R_IA64_TPREL14 0x91 /* @tprel(sym + add), imm14 */
+#define R_IA64_TPREL22 0x92 /* @tprel(sym + add), imm22 */
+#define R_IA64_TPREL64I 0x93 /* @tprel(sym + add), imm64 */
+#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym + add), data8 MSB */
+#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), imm2 */
+#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym + add), data8 MSB */
+#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym + add), data8 LSB */
+#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(sym + add)), imm22 */
+#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym + add), imm14 */
+#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym + add), imm22 */
+#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym + add), imm64 */
+#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym + add), data4 MSB */
+#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym + add), data4 LSB */
+#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym + add), data8 MSB */
+#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym + add), data8 LSB */
+#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */
+
+/* SH specific declarations */
+
+/* SH relocs. */
+#define R_SH_NONE 0
+#define R_SH_DIR32 1
+#define R_SH_REL32 2
+#define R_SH_DIR8WPN 3
+#define R_SH_IND12W 4
+#define R_SH_DIR8WPL 5
+#define R_SH_DIR8WPZ 6
+#define R_SH_DIR8BP 7
+#define R_SH_DIR8W 8
+#define R_SH_DIR8L 9
+#define R_SH_SWITCH16 25
+#define R_SH_SWITCH32 26
+#define R_SH_USES 27
+#define R_SH_COUNT 28
+#define R_SH_ALIGN 29
+#define R_SH_CODE 30
+#define R_SH_DATA 31
+#define R_SH_LABEL 32
+#define R_SH_SWITCH8 33
+#define R_SH_GNU_VTINHERIT 34
+#define R_SH_GNU_VTENTRY 35
+#define R_SH_TLS_GD_32 144
+#define R_SH_TLS_LD_32 145
+#define R_SH_TLS_LDO_32 146
+#define R_SH_TLS_IE_32 147
+#define R_SH_TLS_LE_32 148
+#define R_SH_TLS_DTPMOD32 149
+#define R_SH_TLS_DTPOFF32 150
+#define R_SH_TLS_TPOFF32 151
+#define R_SH_GOT32 160
+#define R_SH_PLT32 161
+#define R_SH_COPY 162
+#define R_SH_GLOB_DAT 163
+#define R_SH_JMP_SLOT 164
+#define R_SH_RELATIVE 165
+#define R_SH_GOTOFF 166
+#define R_SH_GOTPC 167
+/* Keep this the last entry. */
+#define R_SH_NUM 256
+
+/* Additional s390 relocs */
+
+#define R_390_NONE 0 /* No reloc. */
+#define R_390_8 1 /* Direct 8 bit. */
+#define R_390_12 2 /* Direct 12 bit. */
+#define R_390_16 3 /* Direct 16 bit. */
+#define R_390_32 4 /* Direct 32 bit. */
+#define R_390_PC32 5 /* PC relative 32 bit. */
+#define R_390_GOT12 6 /* 12 bit GOT offset. */
+#define R_390_GOT32 7 /* 32 bit GOT offset. */
+#define R_390_PLT32 8 /* 32 bit PC relative PLT address. */
+#define R_390_COPY 9 /* Copy symbol at runtime. */
+#define R_390_GLOB_DAT 10 /* Create GOT entry. */
+#define R_390_JMP_SLOT 11 /* Create PLT entry. */
+#define R_390_RELATIVE 12 /* Adjust by program base. */
+#define R_390_GOTOFF32 13 /* 32 bit offset to GOT. */
+#define R_390_GOTPC 14 /* 32 bit PC relative offset to GOT. */
+#define R_390_GOT16 15 /* 16 bit GOT offset. */
+#define R_390_PC16 16 /* PC relative 16 bit. */
+#define R_390_PC16DBL 17 /* PC relative 16 bit shifted by 1. */
+#define R_390_PLT16DBL 18 /* 16 bit PC rel. PLT shifted by 1. */
+#define R_390_PC32DBL 19 /* PC relative 32 bit shifted by 1. */
+#define R_390_PLT32DBL 20 /* 32 bit PC rel. PLT shifted by 1. */
+#define R_390_GOTPCDBL 21 /* 32 bit PC rel. GOT shifted by 1. */
+#define R_390_64 22 /* Direct 64 bit. */
+#define R_390_PC64 23 /* PC relative 64 bit. */
+#define R_390_GOT64 24 /* 64 bit GOT offset. */
+#define R_390_PLT64 25 /* 64 bit PC relative PLT address. */
+#define R_390_GOTENT 26 /* 32 bit PC rel. to GOT entry >> 1. */
+#define R_390_GOTOFF16 27 /* 16 bit offset to GOT. */
+#define R_390_GOTOFF64 28 /* 64 bit offset to GOT. */
+#define R_390_GOTPLT12 29 /* 12 bit offset to jump slot. */
+#define R_390_GOTPLT16 30 /* 16 bit offset to jump slot. */
+#define R_390_GOTPLT32 31 /* 32 bit offset to jump slot. */
+#define R_390_GOTPLT64 32 /* 64 bit offset to jump slot. */
+#define R_390_GOTPLTENT 33 /* 32 bit rel. offset to jump slot. */
+#define R_390_PLTOFF16 34 /* 16 bit offset from GOT to PLT. */
+#define R_390_PLTOFF32 35 /* 32 bit offset from GOT to PLT. */
+#define R_390_PLTOFF64 36 /* 16 bit offset from GOT to PLT. */
+#define R_390_TLS_LOAD 37 /* Tag for load insn in TLS code. */
+#define R_390_TLS_GDCALL \
+ 38 /* Tag for function call in general \
+dynamic TLS code. */
+#define R_390_TLS_LDCALL \
+ 39 /* Tag for function call in local \
+dynamic TLS code. */
+#define R_390_TLS_GD32 \
+ 40 /* Direct 32 bit for general dynamic \
+thread local data. */
+#define R_390_TLS_GD64 \
+ 41 /* Direct 64 bit for general dynamic \
+thread local data. */
+#define R_390_TLS_GOTIE12 \
+ 42 /* 12 bit GOT offset for static TLS \
+block offset. */
+#define R_390_TLS_GOTIE32 \
+ 43 /* 32 bit GOT offset for static TLS \
+block offset. */
+#define R_390_TLS_GOTIE64 \
+ 44 /* 64 bit GOT offset for static TLS \
+block offset. */
+#define R_390_TLS_LDM32 \
+ 45 /* Direct 32 bit for local dynamic \
+thread local data in LE code. */
+#define R_390_TLS_LDM64 \
+ 46 /* Direct 64 bit for local dynamic \
+thread local data in LE code. */
+#define R_390_TLS_IE32 \
+ 47 /* 32 bit address of GOT entry for \
+negated static TLS block offset. */
+#define R_390_TLS_IE64 \
+ 48 /* 64 bit address of GOT entry for \
+negated static TLS block offset. */
+#define R_390_TLS_IEENT \
+ 49 /* 32 bit rel. offset to GOT entry for \
+negated static TLS block offset. */
+#define R_390_TLS_LE32 \
+ 50 /* 32 bit negated offset relative to \
+static TLS block. */
+#define R_390_TLS_LE64 \
+ 51 /* 64 bit negated offset relative to \
+static TLS block. */
+#define R_390_TLS_LDO32 \
+ 52 /* 32 bit offset relative to TLS \
+block. */
+#define R_390_TLS_LDO64 \
+ 53 /* 64 bit offset relative to TLS \
+block. */
+#define R_390_TLS_DTPMOD 54 /* ID of module containing symbol. */
+#define R_390_TLS_DTPOFF 55 /* Offset in TLS block. */
+#define R_390_TLS_TPOFF \
+ 56 /* Negated offset in static TLS \
+block. */
+#define R_390_20 57 /* Direct 20 bit. */
+#define R_390_GOT20 58 /* 20 bit GOT offset. */
+#define R_390_GOTPLT20 59 /* 20 bit offset to jump slot. */
+#define R_390_TLS_GOTIE20 \
+ 60 /* 20 bit GOT offset for static TLS \
+block offset. */
+/* Keep this the last entry. */
+#define R_390_NUM 61
+
+/* CRIS relocations. */
+#define R_CRIS_NONE 0
+#define R_CRIS_8 1
+#define R_CRIS_16 2
+#define R_CRIS_32 3
+#define R_CRIS_8_PCREL 4
+#define R_CRIS_16_PCREL 5
+#define R_CRIS_32_PCREL 6
+#define R_CRIS_GNU_VTINHERIT 7
+#define R_CRIS_GNU_VTENTRY 8
+#define R_CRIS_COPY 9
+#define R_CRIS_GLOB_DAT 10
+#define R_CRIS_JUMP_SLOT 11
+#define R_CRIS_RELATIVE 12
+#define R_CRIS_16_GOT 13
+#define R_CRIS_32_GOT 14
+#define R_CRIS_16_GOTPLT 15
+#define R_CRIS_32_GOTPLT 16
+#define R_CRIS_32_GOTREL 17
+#define R_CRIS_32_PLT_GOTREL 18
+#define R_CRIS_32_PLT_PCREL 19
+
+#define R_CRIS_NUM 20
+
+/* AMD x86-64 relocations. */
+#define R_X86_64_NONE 0 /* No reloc */
+#define R_X86_64_64 1 /* Direct 64 bit */
+#define R_X86_64_PC32 2 /* PC relative 32 bit signed */
+#define R_X86_64_GOT32 3 /* 32 bit GOT entry */
+#define R_X86_64_PLT32 4 /* 32 bit PLT address */
+#define R_X86_64_COPY 5 /* Copy symbol at runtime */
+#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */
+#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */
+#define R_X86_64_RELATIVE 8 /* Adjust by program base */
+#define R_X86_64_GOTPCREL \
+ 9 /* 32 bit signed PC relative \
+offset to GOT */
+#define R_X86_64_32 10 /* Direct 32 bit zero extended */
+#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
+#define R_X86_64_16 12 /* Direct 16 bit zero extended */
+#define R_X86_64_PC16 13 /* 16 bit sign extended pc relative */
+#define R_X86_64_8 14 /* Direct 8 bit sign extended */
+#define R_X86_64_PC8 15 /* 8 bit sign extended pc relative */
+#define R_X86_64_DTPMOD64 16 /* ID of module containing symbol */
+#define R_X86_64_DTPOFF64 17 /* Offset in module's TLS block */
+#define R_X86_64_TPOFF64 18 /* Offset in initial TLS block */
+#define R_X86_64_TLSGD \
+ 19 /* 32 bit signed PC relative offset \
+to two GOT entries for GD symbol */
+#define R_X86_64_TLSLD \
+ 20 /* 32 bit signed PC relative offset \
+to two GOT entries for LD symbol */
+#define R_X86_64_DTPOFF32 21 /* Offset in TLS block */
+#define R_X86_64_GOTTPOFF \
+ 22 /* 32 bit signed PC relative offset \
+to GOT entry for IE symbol */
+#define R_X86_64_TPOFF32 23 /* Offset in initial TLS block */
+
+#define R_X86_64_NUM 24
+
+/* AM33 relocations. */
+#define R_MN10300_NONE 0 /* No reloc. */
+#define R_MN10300_32 1 /* Direct 32 bit. */
+#define R_MN10300_16 2 /* Direct 16 bit. */
+#define R_MN10300_8 3 /* Direct 8 bit. */
+#define R_MN10300_PCREL32 4 /* PC-relative 32-bit. */
+#define R_MN10300_PCREL16 5 /* PC-relative 16-bit signed. */
+#define R_MN10300_PCREL8 6 /* PC-relative 8-bit signed. */
+#define R_MN10300_GNU_VTINHERIT 7 /* Ancient C++ vtable garbage... */
+#define R_MN10300_GNU_VTENTRY 8 /* ... collection annotation. */
+#define R_MN10300_24 9 /* Direct 24 bit. */
+#define R_MN10300_GOTPC32 10 /* 32-bit PCrel offset to GOT. */
+#define R_MN10300_GOTPC16 11 /* 16-bit PCrel offset to GOT. */
+#define R_MN10300_GOTOFF32 12 /* 32-bit offset from GOT. */
+#define R_MN10300_GOTOFF24 13 /* 24-bit offset from GOT. */
+#define R_MN10300_GOTOFF16 14 /* 16-bit offset from GOT. */
+#define R_MN10300_PLT32 15 /* 32-bit PCrel to PLT entry. */
+#define R_MN10300_PLT16 16 /* 16-bit PCrel to PLT entry. */
+#define R_MN10300_GOT32 17 /* 32-bit offset to GOT entry. */
+#define R_MN10300_GOT24 18 /* 24-bit offset to GOT entry. */
+#define R_MN10300_GOT16 19 /* 16-bit offset to GOT entry. */
+#define R_MN10300_COPY 20 /* Copy symbol at runtime. */
+#define R_MN10300_GLOB_DAT 21 /* Create GOT entry. */
+#define R_MN10300_JMP_SLOT 22 /* Create PLT entry. */
+#define R_MN10300_RELATIVE 23 /* Adjust by program base. */
+
+#define R_MN10300_NUM 24
+
+/* M32R relocs. */
+#define R_M32R_NONE 0 /* No reloc. */
+#define R_M32R_16 1 /* Direct 16 bit. */
+#define R_M32R_32 2 /* Direct 32 bit. */
+#define R_M32R_24 3 /* Direct 24 bit. */
+#define R_M32R_10_PCREL 4 /* PC relative 10 bit shifted. */
+#define R_M32R_18_PCREL 5 /* PC relative 18 bit shifted. */
+#define R_M32R_26_PCREL 6 /* PC relative 26 bit shifted. */
+#define R_M32R_HI16_ULO 7 /* High 16 bit with unsigned low. */
+#define R_M32R_HI16_SLO 8 /* High 16 bit with signed low. */
+#define R_M32R_LO16 9 /* Low 16 bit. */
+#define R_M32R_SDA16 10 /* 16 bit offset in SDA. */
+#define R_M32R_GNU_VTINHERIT 11
+#define R_M32R_GNU_VTENTRY 12
+/* M32R relocs use SHT_RELA. */
+#define R_M32R_16_RELA 33 /* Direct 16 bit. */
+#define R_M32R_32_RELA 34 /* Direct 32 bit. */
+#define R_M32R_24_RELA 35 /* Direct 24 bit. */
+#define R_M32R_10_PCREL_RELA 36 /* PC relative 10 bit shifted. */
+#define R_M32R_18_PCREL_RELA 37 /* PC relative 18 bit shifted. */
+#define R_M32R_26_PCREL_RELA 38 /* PC relative 26 bit shifted. */
+#define R_M32R_HI16_ULO_RELA 39 /* High 16 bit with unsigned low */
+#define R_M32R_HI16_SLO_RELA 40 /* High 16 bit with signed low */
+#define R_M32R_LO16_RELA 41 /* Low 16 bit */
+#define R_M32R_SDA16_RELA 42 /* 16 bit offset in SDA */
+#define R_M32R_RELA_GNU_VTINHERIT 43
+#define R_M32R_RELA_GNU_VTENTRY 44
+
+#define R_M32R_GOT24 48 /* 24 bit GOT entry */
+#define R_M32R_26_PLTREL 49 /* 26 bit PC relative to PLT shifted */
+#define R_M32R_COPY 50 /* Copy symbol at runtime */
+#define R_M32R_GLOB_DAT 51 /* Create GOT entry */
+#define R_M32R_JMP_SLOT 52 /* Create PLT entry */
+#define R_M32R_RELATIVE 53 /* Adjust by program base */
+#define R_M32R_GOTOFF 54 /* 24 bit offset to GOT */
+#define R_M32R_GOTPC24 55 /* 24 bit PC relative offset to GOT */
+#define R_M32R_GOT16_HI_ULO \
+ 56 /* High 16 bit GOT entry with unsigned \
+low */
+#define R_M32R_GOT16_HI_SLO \
+ 57 /* High 16 bit GOT entry with signed \
+ low */
+#define R_M32R_GOT16_LO 58 /* Low 16 bit GOT entry */
+#define R_M32R_GOTPC_HI_ULO \
+ 59 /* High 16 bit PC relative offset to \
+GOT with unsigned low */
+#define R_M32R_GOTPC_HI_SLO \
+ 60 /* High 16 bit PC relative offset to \
+GOT with signed low */
+#define R_M32R_GOTPC_LO \
+ 61 /* Low 16 bit PC relative offset to \
+GOT */
+#define R_M32R_GOTOFF_HI_ULO \
+ 62 /* High 16 bit offset to GOT \
+with unsigned low */
+#define R_M32R_GOTOFF_HI_SLO \
+ 63 /* High 16 bit offset to GOT \
+ with signed low */
+#define R_M32R_GOTOFF_LO 64 /* Low 16 bit offset to GOT */
+#define R_M32R_NUM 256 /* Keep this the last entry. */
+
+/* __END_DECLS */
diff --git a/kernel/include/api/exec.h b/kernel/include/api/exec.h
new file mode 100644
index 0000000..854ce9e
--- /dev/null
+++ b/kernel/include/api/exec.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "types.h"
+
+struct regs;
+
+long do_execve(const char *filename, char *const *argv, char *const *envp,
+ struct regs *regs);
+
+void kernel_execve(const char *filename, char *const *argv, char *const *envp);
+
+void userland_entry(struct regs regs);
diff --git a/kernel/include/api/syscall.h b/kernel/include/api/syscall.h
new file mode 100644
index 0000000..ea924c3
--- /dev/null
+++ b/kernel/include/api/syscall.h
@@ -0,0 +1,196 @@
+#pragma once
+
+/* Kernel and user header (via symlink) */
+
+#ifdef __KERNEL__
+#include "types.h"
+#else
+
+#include "sys/types.h"
+
+#endif
+
+/* Trap number for syscalls */
+#define INTR_SYSCALL 0x2e
+
+/* Keep all lists IN ORDER! */
+
+#define SYS_syscall 0
+#define SYS_exit 1
+#define SYS_fork 2
+#define SYS_read 3
+#define SYS_write 4
+#define SYS_open 5
+#define SYS_close 6
+#define SYS_waitpid 7
+#define SYS_link 8
+#define SYS_unlink 9
+#define SYS_execve 10
+#define SYS_chdir 11
+#define SYS_sleep 12 /* NYI */
+#define SYS_lseek 14
+#define SYS_sync 15
+#define SYS_nuke 16 /* NYI */
+#define SYS_dup 17
+#define SYS_pipe 18
+#define SYS_ioctl 19 /* NYI */
+#define SYS_rmdir 21
+#define SYS_mkdir 22
+#define SYS_getdents 23
+#define SYS_mmap 24
+#define SYS_mprotect 25 /* NYI */
+#define SYS_munmap 26
+#define SYS_rename 27 /* NYI */
+#define SYS_uname 28
+#define SYS_thr_create 29 /* NYI */
+#define SYS_thr_cancel 30
+#define SYS_thr_exit 31
+#define SYS_sched_yield 32
+#define SYS_thr_join 33 /* NYI */
+#define SYS_gettid 34 /* NYI */
+#define SYS_getpid 35
+#define SYS_errno 39
+#define SYS_halt 40
+#define SYS_get_free_mem 41 /* NYI */
+#define SYS_set_errno 42
+#define SYS_dup2 43
+#define SYS_brk 44
+#define SYS_mount 45
+#define SYS_umount 46
+#define SYS_stat 47
+#define SYS_time 48
+#define SYS_usleep 49
+
+/*
+ * ... what does the scouter say about his syscall?
+ * IT'S OVER 9000!
+ * WHAT?! 9000?!
+ */
+#define SYS_debug 9001
+#define SYS_kshell 9002
+
+struct regs;
+struct stat;
+
+typedef struct argstr
+{
+ const char *as_str;
+ size_t as_len; /* Not including null character */
+} argstr_t;
+
+typedef struct argvec
+{
+ argstr_t *av_vec;
+ size_t av_len; /* Not including null entry */
+} argvec_t;
+
+typedef struct waitpid_args
+{
+ pid_t wpa_pid;
+ int *wpa_status;
+ int wpa_options;
+} waitpid_args_t;
+
+typedef struct mmap_args
+{
+ void *mma_addr;
+ size_t mma_len;
+ int mma_prot;
+ int mma_flags;
+ int mma_fd;
+ off_t mma_off;
+} mmap_args_t;
+
+typedef struct munmap_args
+{
+ void *addr;
+ size_t len;
+} munmap_args_t;
+
+typedef struct open_args
+{
+ argstr_t filename;
+ int flags;
+ int mode;
+} open_args_t;
+
+typedef struct read_args
+{
+ int fd;
+ void *buf;
+ size_t nbytes;
+} read_args_t;
+
+typedef struct write_args
+{
+ int fd;
+ void *buf;
+ size_t nbytes;
+} write_args_t;
+
+typedef struct mkdir_args
+{
+ argstr_t path;
+ int mode;
+} mkdir_args_t;
+
+typedef struct link_args
+{
+ argstr_t to;
+ argstr_t from;
+} link_args_t;
+
+typedef struct execve_args
+{
+ argstr_t filename;
+ argvec_t argv;
+ argvec_t envp;
+} execve_args_t;
+
+typedef struct rename_args
+{
+ argstr_t oldpath;
+ argstr_t newpath;
+} rename_args_t;
+
+typedef struct getdents_args
+{
+ int fd;
+ struct dirent *dirp;
+ size_t count;
+} getdents_args_t;
+
+typedef struct lseek_args
+{
+ int fd;
+ off_t offset;
+ int whence;
+} lseek_args_t;
+
+typedef struct dup2_args
+{
+ int ofd;
+ int nfd;
+} dup2_args_t;
+
+#ifdef __MOUNTING__
+typedef struct mount_args
+{
+ argstr_t spec;
+ argstr_t dir;
+ argstr_t fstype;
+} mount_args_t;
+#endif
+
+typedef struct stat_args
+{
+ argstr_t path;
+ struct stat *buf;
+} stat_args_t;
+
+typedef struct usleep_args
+{
+ useconds_t usec;
+} usleep_args_t;
+
+struct utsname;
diff --git a/kernel/include/api/utsname.h b/kernel/include/api/utsname.h
new file mode 100644
index 0000000..c60ae81
--- /dev/null
+++ b/kernel/include/api/utsname.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#define _UTSNAME_LENGTH 128
+
+struct utsname
+{
+ char sysname[_UTSNAME_LENGTH];
+ char nodename[_UTSNAME_LENGTH];
+ char release[_UTSNAME_LENGTH];
+ char version[_UTSNAME_LENGTH];
+ char machine[_UTSNAME_LENGTH];
+};
+
+int uname(struct utsname *buf);
diff --git a/kernel/include/boot/config.h b/kernel/include/boot/config.h
new file mode 100644
index 0000000..74e0d42
--- /dev/null
+++ b/kernel/include/boot/config.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#define IDENTITY_MAPPED_RAM_SIZE (1 << 16)
+
+#define KERNEL_PHYS_BASE ((uintptr_t)(&kernel_phys_base))
+#define KERNEL_PHYS_END ((uintptr_t)(&kernel_phys_end))
+#define KERNEL_VMA 0xffff800000000000
+
+// https://www.usenix.org/sites/default/files/conference/protected-files/sec14_slides_kemerlis.pdf
+#define PHYS_OFFSET 0xffff880000000000
+
+#define MEMORY_MAP_BASE 0x9000
diff --git a/kernel/include/boot/multiboot_macros.h b/kernel/include/boot/multiboot_macros.h
new file mode 100644
index 0000000..1ca6383
--- /dev/null
+++ b/kernel/include/boot/multiboot_macros.h
@@ -0,0 +1,6 @@
+#pragma once
+
+// random macro for multiboot header
+#define TAG_SIZE(x) (((x)-1) / MULTIBOOT_TAG_ALIGN + 1)
+
+extern struct multiboot_tag *mb_tag;
diff --git a/kernel/include/config.h b/kernel/include/config.h
new file mode 100644
index 0000000..a57edd4
--- /dev/null
+++ b/kernel/include/config.h
@@ -0,0 +1,50 @@
+/*
+ * FILE: config.h
+ * AUTHOR: kma
+ * DESCR: tunable kernel parameters
+ */
+
+#pragma once
+
+/* Kernel and user header (via symlink) */
+
+/*
+ * kernel configuration parameters
+ */
+#define DEFAULT_STACK_SIZE_PAGES 16
+#define DEFAULT_STACK_SIZE (DEFAULT_STACK_SIZE_PAGES << PAGE_SHIFT)
+#define TICK_MSECS 10 /* msecs between clock interrupts */
+
+/*
+ * Memory-management-related:
+ */
+
+/*
+ * Finds fraction of available page frames that will be dedicated to kmem
+ * the rest are given to the vm system. This is currently unused.
+ */
+#define KMEM_FRAC(x) (((x) >> 2) + ((x) >> 3)) /* 37.5%-ish */
+
+/* pframe/mobj-system-related: */
+#define PF_HASH_SIZE 17 /* Number of buckets in pn/mobj->pframe hash. This is currently unused. */
+
+/*
+ * filesystem/vfs configuration parameters
+ */
+
+#define MAXPATHLEN 1024 /* maximum size of a pathname */
+#define MAX_FILES 1024 /* max number of files */
+#define MAX_VFS 8 /* max # of vfses */
+#define MAX_VNODES 1024 /* max number of in-core vnodes */
+#define NAME_LEN 28 /* maximum directory entry length */
+#define NFILES 32 /* maximum number of open files */
+
+/* Note: if rootfs is ramfs, this is completely ignored */
+#define VFS_ROOTFS_DEV "disk0" /* device containing root filesystem */
+
+/* root filesystem type - either "ramfs" or "s5fs" */
+#ifdef __S5FS__
+#define VFS_ROOTFS_TYPE "s5fs"
+#else
+#define VFS_ROOTFS_TYPE "ramfs"
+#endif
diff --git a/kernel/include/ctype.h b/kernel/include/ctype.h
new file mode 100644
index 0000000..95e5496
--- /dev/null
+++ b/kernel/include/ctype.h
@@ -0,0 +1,124 @@
+#pragma once
+
+#ifdef __KERNEL__
+#include "kernel.h"
+#include "types.h"
+#else
+
+#include "stddef.h"
+#include "sys/types.h"
+
+#endif
+
+/* the original implementation for the following functions
+ * was ported from the old Weenix on Xen architecture which
+ * used the mini-os functions */
+/*
+ * NOTE! This ctype does not handle EOF like the standard C
+ * library is required to.
+ */
+
+#define _U 0x01 /* upper */
+#define _L 0x02 /* lower */
+#define _D 0x04 /* digit */
+#define _C 0x08 /* cntrl */
+#define _P 0x10 /* punct */
+#define _S 0x20 /* white space (space/lf/tab) */
+#define _X 0x40 /* hex digit */
+#define _SP 0x80 /* hard space (0x20) */
+
+static unsigned char
+ _ctype[] = {_C, _C, _C, _C, _C, _C,
+ _C, _C, /* 0-7 */
+ _C, _C | _S, _C | _S, _C | _S, _C | _S, _C | _S,
+ _C, _C, /* 8-15 */
+ _C, _C, _C, _C, _C, _C,
+ _C, _C, /* 16-23 */
+ _C, _C, _C, _C, _C, _C,
+ _C, _C, /* 24-31 */
+ _S | _SP, _P, _P, _P, _P, _P,
+ _P, _P, /* 32-39 */
+ _P, _P, _P, _P, _P, _P,
+ _P, _P, /* 40-47 */
+ _D, _D, _D, _D, _D, _D,
+ _D, _D, /* 48-55 */
+ _D, _D, _P, _P, _P, _P,
+ _P, _P, /* 56-63 */
+ _P, _U | _X, _U | _X, _U | _X, _U | _X, _U | _X,
+ _U | _X, _U, /* 64-71 */
+ _U, _U, _U, _U, _U, _U,
+ _U, _U, /* 72-79 */
+ _U, _U, _U, _U, _U, _U,
+ _U, _U, /* 80-87 */
+ _U, _U, _U, _P, _P, _P,
+ _P, _P, /* 88-95 */
+ _P, _L | _X, _L | _X, _L | _X, _L | _X, _L | _X,
+ _L | _X, _L, /* 96-103 */
+ _L, _L, _L, _L, _L, _L,
+ _L, _L, /* 104-111 */
+ _L, _L, _L, _L, _L, _L,
+ _L, _L, /* 112-119 */
+ _L, _L, _L, _P, _P, _P,
+ _P, _C, /* 120-127 */
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, /* 128-143 */
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, /* 144-159 */
+ _S | _SP, _P, _P, _P, _P, _P,
+ _P, _P, _P, _P, _P, _P,
+ _P, _P, _P, _P, /* 160-175 */
+ _P, _P, _P, _P, _P, _P,
+ _P, _P, _P, _P, _P, _P,
+ _P, _P, _P, _P, /* 176-191 */
+ _U, _U, _U, _U, _U, _U,
+ _U, _U, _U, _U, _U, _U,
+ _U, _U, _U, _U, /* 192-207 */
+ _U, _U, _U, _U, _U, _U,
+ _U, _P, _U, _U, _U, _U,
+ _U, _U, _U, _L, /* 208-223 */
+ _L, _L, _L, _L, _L, _L,
+ _L, _L, _L, _L, _L, _L,
+ _L, _L, _L, _L, /* 224-239 */
+ _L, _L, _L, _L, _L, _L,
+ _L, _P, _L, _L, _L, _L,
+ _L, _L, _L, _L}; /* 240-255 */
+
+#define __ismask(x) (_ctype[(int)(unsigned char)(x)])
+
+#define isalnum(c) ((__ismask(c) & (_U | _L | _D)) != 0)
+#define isalpha(c) ((__ismask(c) & (_U | _L)) != 0)
+#define iscntrl(c) ((__ismask(c) & (_C)) != 0)
+#define isdigit(c) ((__ismask(c) & (_D)) != 0)
+#define isgraph(c) ((__ismask(c) & (_P | _U | _L | _D)) != 0)
+#define islower(c) ((__ismask(c) & (_L)) != 0)
+#define isprint(c) ((__ismask(c) & (_P | _U | _L | _D | _SP)) != 0)
+#define ispunct(c) ((__ismask(c) & (_P)) != 0)
+#define isspace(c) ((__ismask(c) & (_S)) != 0)
+#define isupper(c) ((__ismask(c) & (_U)) != 0)
+#define isxdigit(c) ((__ismask(c) & (_D | _X)) != 0)
+
+#define isascii(c) (((unsigned char)(c)) <= 0x7f)
+#define toascii(c) (((unsigned char)(c)) & 0x7f)
+
+static inline unsigned char __tolower(unsigned char c)
+{
+ if (isupper(c))
+ {
+ c -= 'A' - 'a';
+ }
+ return c;
+}
+
+static inline unsigned char __toupper(unsigned char c)
+{
+ if (islower(c))
+ {
+ c -= 'a' - 'A';
+ }
+ return c;
+}
+
+#define tolower(c) __tolower(c)
+#define toupper(c) __toupper(c)
diff --git a/kernel/include/drivers/blockdev.h b/kernel/include/drivers/blockdev.h
new file mode 100644
index 0000000..d1b3062
--- /dev/null
+++ b/kernel/include/drivers/blockdev.h
@@ -0,0 +1,99 @@
+/*
+ * FILE: dev_byte.h
+ * DESCR: device management: block-oriented devices
+ */
+
+#pragma once
+
+#include "types.h"
+
+#include "drivers/dev.h"
+#include "util/list.h"
+
+#include "mm/mobj.h"
+#include "mm/page.h"
+
+#define BLOCK_SIZE PAGE_SIZE
+
+struct blockdev_ops;
+
+/*
+ * Represents a Weenix block device.
+ */
+typedef struct blockdev
+{
+ /* Fields that should be initialized by drivers: */
+ devid_t bd_id;
+
+ struct blockdev_ops *bd_ops;
+
+#ifdef NO
+ /* Fields that should be ignored by drivers: */
+ mobj_t bd_mobj;
+#endif
+
+ /* Link on the list of block-oriented devices */
+ list_link_t bd_link;
+} blockdev_t;
+
+typedef struct blockdev_ops
+{
+ /**
+ * Reads a block from the block device. This call will block.
+ *
+ * @param bdev the block device
+ * @param buf the memory into which to read the block (must be
+ * page-aligned)
+ * @param loc the number of the block to start reading from
+ * @param count the number of blocks to read
+ * @return 0 on success, -errno on failure
+ */
+ long (*read_block)(blockdev_t *bdev, char *buf, blocknum_t loc,
+ size_t block_count);
+
+ /**
+ * Writes a block to the block device. This call will block.
+ *
+ * @param bdev the block device
+ * @param buf the memory from which to write the block (must be
+ * page-aligned)
+ * @param loc the number of the block to start writing at
+ * @param count the number of blocks to write
+ * @return 0 on success, -errno on failure
+ */
+ long (*write_block)(blockdev_t *bdev, const char *buf, blocknum_t loc,
+ size_t block_count);
+} blockdev_ops_t;
+
+/**
+ * Initializes the block device subsystem.
+ */
+void blockdev_init(void);
+
+/**
+ * Registers a given block device.
+ *
+ * @param dev the block device to register
+ */
+long blockdev_register(blockdev_t *dev);
+
+/**
+ * Finds a block device with a given device id.
+ *
+ * @param id the device id of the block device to find
+ * @return the block device with the given id if it exists, or NULL if
+ * it cannot be found
+ */
+blockdev_t *blockdev_lookup(devid_t id);
+
+/**
+ * Cleans and frees all resident pages belonging to a given block
+ * device.
+ *
+ * @param dev the block device to flush
+ */
+void blockdev_flush_all(blockdev_t *dev);
+
+// restructure, perhaps, so that these don't have to be exported
+long blockdev_fill_pframe(mobj_t *mobj, pframe_t *pf);
+long blockdev_flush_pframe(mobj_t *mobj, pframe_t *pf); \ No newline at end of file
diff --git a/kernel/include/drivers/chardev.h b/kernel/include/drivers/chardev.h
new file mode 100644
index 0000000..f6083d8
--- /dev/null
+++ b/kernel/include/drivers/chardev.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include "drivers/dev.h"
+#include "util/list.h"
+
+struct vnode;
+struct pframe;
+
+struct chardev_ops;
+struct mobj;
+
+typedef struct chardev
+{
+ devid_t cd_id;
+ struct chardev_ops *cd_ops;
+ list_link_t cd_link;
+} chardev_t;
+
+typedef struct chardev_ops
+{
+ ssize_t (*read)(chardev_t *dev, size_t pos, void *buf, size_t count);
+
+ ssize_t (*write)(chardev_t *dev, size_t pos, const void *buf, size_t count);
+
+ long (*mmap)(struct vnode *file, struct mobj **ret);
+
+ long (*fill_pframe)(struct vnode *file, struct pframe *pf);
+
+ long (*flush_pframe)(struct vnode *file, struct pframe *pf);
+} chardev_ops_t;
+
+/**
+ * Initializes the byte device subsystem.
+ */
+void chardev_init(void);
+
+/**
+ * Registers the given byte device.
+ *
+ * @param dev the byte device to register
+ */
+long chardev_register(chardev_t *dev);
+
+/**
+ * Finds a byte device with a given device id.
+ *
+ * @param id the device id of the byte device to find
+ * @return the byte device with the given id if it exists, or NULL if
+ * it cannot be found
+ */
+chardev_t *chardev_lookup(devid_t id);
diff --git a/kernel/include/drivers/cmos.h b/kernel/include/drivers/cmos.h
new file mode 100644
index 0000000..bbbc282
--- /dev/null
+++ b/kernel/include/drivers/cmos.h
@@ -0,0 +1,40 @@
+#ifndef CMOS_H
+#define CMOS_H
+
+#include "main/io.h"
+
+// See: https://wiki.osdev.org/CMOS
+#define CMOS_ADDR 0x70
+#define CMOS_DATA 0x71
+
+#define CMOS_REG_SECOND 0x00
+#define CMOS_REG_MINUTE 0x02
+#define CMOS_REG_HOUR 0x04
+#define CMOS_REG_DAY 0x07
+#define CMOS_REG_MONTH 0x08
+#define CMOS_REG_YEAR 0x09
+
+// We're on a modern computer. It'll have a century register.
+#define CMOS_REG_CENTURY 0x32
+#define CMOS_REG_STAT_A 0x0A
+#define CMOS_REG_STAT_B 0x0B
+
+typedef struct rtc_time_t
+{
+ unsigned char second;
+ unsigned char minute;
+ unsigned char hour;
+ unsigned char day;
+ unsigned char month;
+ unsigned int year;
+
+ // Internal use ONLY
+ unsigned int __century;
+} rtc_time_t;
+
+unsigned char cmos_read_register(int reg);
+
+/* Get the time from the CMOS RTC */
+rtc_time_t rtc_get_time();
+
+#endif \ No newline at end of file
diff --git a/kernel/include/drivers/dev.h b/kernel/include/drivers/dev.h
new file mode 100644
index 0000000..883dcba
--- /dev/null
+++ b/kernel/include/drivers/dev.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "types.h"
+
+/*
+ * A Weenix "device identifier" is the concatenation of:
+ * - a "driver number" or "device type" (major number)
+ * - a "device number" (minor number)
+ *
+ * The device identifiers for block devices and character devices are
+ * independent. That is, you could have both a block device and a char device
+ * with major 3, minor 5 (for example). They would be distinct.
+ *
+ * Weenix's device number allocation/assignment scheme is as follows:
+ *
+ * - major 0 (byte or block), minor 0: reserved as an analogue of NULL
+ * for device id's
+ *
+ * - char major 1: Memory devices (mem)
+ * - minor 0: /dev/null The null device
+ * - minor 1: /dev/zero The zero device
+ *
+ * - char major 2: TTY devices (tty)
+ * - minor 0: /dev/tty0 First TTY device
+ * - minor 1: /dev/tty1 Second TTY device
+ * - and so on...
+ *
+ * - block major 1: Disk devices
+ * - minor 0: first disk device
+ * - minor 1: second disk device
+ * - and so on...
+ */
+
+#define MINOR_BITS 8
+#define MINOR_MASK ((1U << MINOR_BITS) - 1)
+#define MAJOR(devid) ((unsigned)((devid) >> MINOR_BITS))
+#define MINOR(devid) ((unsigned)((devid)&MINOR_MASK))
+#define MKDEVID(major, minor) ((devid_t)(((major) << MINOR_BITS) | (minor)))
+
+/* convenience definition: the NULL device id: */
+#define NULL_DEVID (MKDEVID(0, 0))
+#define MEM_NULL_DEVID (MKDEVID(1, 0))
+#define MEM_ZERO_DEVID (MKDEVID(1, 1))
+
+#define DISK_MAJOR 1
+
+#define MEM_MAJOR 1
+#define MEM_NULL_MINOR 0
+#define MEM_ZERO_MINOR 1
diff --git a/kernel/include/drivers/disk/ahci.h b/kernel/include/drivers/disk/ahci.h
new file mode 100644
index 0000000..1c7acf6
--- /dev/null
+++ b/kernel/include/drivers/disk/ahci.h
@@ -0,0 +1,325 @@
+#pragma once
+
+#include <types.h>
+
+/* Documents referenced:
+ * ATA Command Set 4:
+ * http://www.t13.org/Documents/UploadedDocuments/docs2016/di529r14-ATAATAPI_Command_Set_-_4.pdf
+ * AHCI SATA 1.3.1:
+ * https://www.intel.com/content/www/us/en/io/serial-ata/serial-ata-ahci-spec-rev1-3-1.html
+ * Serial ATA Revision 2.6:
+ * http://read.pudn.com/downloads157/doc/project/697017/SerialATA_Revision_2_6_Gold.pdf
+ */
+
+/* Macros for working with physical region descriptors. */
+#define AHCI_PRDT_DBC_WIDTH 22
+#define AHCI_MAX_PRDT_SIZE (1 << AHCI_PRDT_DBC_WIDTH)
+#define ATA_SECTOR_SIZE 512
+#define AHCI_SECTORS_PER_PRDT (AHCI_MAX_PRDT_SIZE / ATA_SECTOR_SIZE)
+#define AHCI_MAX_SECTORS_PER_COMMAND \
+ (1 << 16) /* FLAG: Where does this come from? */
+#define ACHI_NUM_PRDTS_PER_COMMAND_TABLE \
+ (AHCI_MAX_SECTORS_PER_COMMAND / AHCI_SECTORS_PER_PRDT)
+
+#define AHCI_MAX_NUM_PORTS 32
+#define AHCI_COMMAND_HEADERS_PER_LIST 32
+
+#define AHCI_COMMAND_LIST_ARRAY_BASE(ahci_base) (ahci_base)
+#define AHCI_COMMAND_LIST_ARRAY_SIZE \
+ (AHCI_MAX_NUM_PORTS * sizeof(command_list_t))
+
+#define AHCI_RECEIVED_FIS_ARRAY_BASE(ahci_base) \
+ ((ahci_base) + AHCI_COMMAND_LIST_ARRAY_SIZE)
+#define AHCI_RECEIVED_FIS_ARRAY_SIZE \
+ (AHCI_MAX_NUM_PORTS * sizeof(received_fis_t))
+
+#define AHCI_COMMAND_TABLE_ARRAY_BASE(ahci_base) \
+ (AHCI_RECEIVED_FIS_ARRAY_BASE(ahci_base) + AHCI_RECEIVED_FIS_ARRAY_SIZE)
+#define AHCI_COMMAND_TABLE_ARRAY_SIZE \
+ (AHCI_MAX_NUM_PORTS * AHCI_COMMAND_HEADERS_PER_LIST * \
+ sizeof(command_table_t))
+
+#define AHCI_SIZE \
+ (AHCI_COMMAND_LIST_ARRAY_SIZE + AHCI_RECEIVED_FIS_ARRAY_SIZE + \
+ AHCI_COMMAND_TABLE_ARRAY_SIZE)
+#define AHCI_SIZE_PAGES ((uintptr_t)PAGE_ALIGN_UP(AHCI_SIZE) / PAGE_SIZE)
+
+#define ALIGN_DOWN_POW_2(x, align) ((x) & -(align))
+#define ALIGN_UP_POW_2(x, align) (ALIGN_DOWN_POW_2((x)-1, align) + (align))
+
+/*=============================
+ * Frame Information Structures
+ *============================*/
+
+/* fis_type_t - FIS types are recognized by an ID.
+ * For more info, see section 10.3 (FIS Types) of Serial ATA Revision 2.6. */
+typedef enum fis_type
+{
+ fis_type_h2d_register = 0x27
+} packed fis_type_t;
+
+/* Command codes used when forming the host-to-device FIS (see: ATA Command Set
+ * 4). The first two are standard commands. The second two are for NCQ commands.
+ */
+#define ATA_READ_DMA_EXT_COMMAND 0x25
+#define ATA_WRITE_DMA_EXT_COMMAND 0x35
+#define ATA_READ_FPDMA_QUEUED_COMMAND 0x60
+#define ATA_WRITE_FPDMA_QUEUED_COMMAND 0x61
+
+/* 8-bit device setting for host-to-device FIS.
+ * Bit 6 is specified as either obsolete or "shall be set to one" for all
+ * commands used in Weenix. So, we can safely just default to this value for all
+ * commands. More info in sections 7.20, 7.21, 7.55, and 7.57 of ATA Command
+ * Set 4. */
+#define ATA_DEVICE_LBA_MODE 0x40
+
+/* h2d_register_fis - Register Host to Device FIS.
+ * This is the only FIS used in Weenix.
+ */
+typedef struct h2d_register_fis
+{
+ uint8_t fis_type; /* Must be set to fis_type_h2d_register. */
+ uint8_t : 7;
+ uint8_t c : 1; /* When set, indicates that this is an FIS for a command.
+ * This is always the case in Weenix. */
+ uint8_t command; /* See command codes further up. */
+ uint8_t
+ features; /* For regular read/write, no use.
+ * For NCQ commands, features and features_exp form the lower
+ * and upper 8 bits of sector count, respectively. */
+ uint32_t lba : 24; /* lba and lba_exp form the lower and upper 24 bits of
+ the first logical block address, respectively. */
+ uint8_t device; /* Device register.
+ * For Weenix's purposes, this should always be set to
+ * ATA_DEVICE_LBA_MODE. */
+ uint32_t lba_exp : 24;
+ uint8_t features_exp;
+ uint16_t sector_count; /* For regular read/write, specifies number of
+ * sectors to read/write.
+ * For NCQ commands, bits 7:3 specify NCQ tag. */
+ uint16_t : 16;
+ uint32_t : 32;
+} packed h2d_register_fis_t;
+
+/*========================
+ * Command List Structures
+ *=======================*/
+
+/* command_fis_t - Represents a software-constructed FIS stored in a
+ * command_table_t. */
+typedef union command_fis {
+ h2d_register_fis_t h2d_register_fis;
+ /* Must occupy 64 bytes in its corresponding command_table_t.
+ * Recall that unions conform to the size of the largest member. */
+ struct
+ {
+ uint8_t size[64];
+ };
+} packed command_fis_t;
+
+/* received_fis_t - Per-port structure that contains information on received
+ * FISes. More info in section 4.2.1 of the 1.3.1 spec. */
+typedef struct received_fis
+{
+ uint8_t _omit[256]; /* Weenix does not make use of any received FIS from the
+ device. */
+} packed received_fis_t;
+
+/* prd_t - Physical Region Descriptor.
+ * Represents an entry in the PRD table in a command table
+ * (command_table_t->prdt). Points to a chunk of system memory for the device to
+ * use according to whatever command it is executing.
+ */
+typedef struct prd
+{
+ uint64_t dba; /* Data Base Address. */
+ uint32_t : 32;
+ uint32_t
+ dbc : 22; /* Data Byte Count: Indicates length of data block in bytes,
+ * but starts counting from 0. Ex: Length 1 is 0x0. Length 2
+ * is 0x1. Length 3 is 0x10. And so on... Must be even. Due to
+ * counting from 0, this means least-significant bit MUST
+ * be 1. Max length is 4MB (all bits set). */
+ uint16_t : 9;
+ uint8_t i : 1; /* Interrupt on Completion: When set, then upon processing
+ * all PRDs in the current FIS, the port will try to generate
+ * an interrupt by setting PxIS.DPS.
+ *
+ * Whether or not this actually behaves as expected, or ever
+ * is even used, is unclear.
+ */
+} packed prd_t;
+
+/* command_table_t - Structure detailing a command and associated data / memory.
+ * More info in section 4.2.3 of SATA AHCI 1.3.1.
+ */
+typedef struct command_table
+{
+ command_fis_t
+ cfis; /* Command FIS: The actual software constructed command. */
+ uint8_t _omit[64];
+ prd_t prdt[ACHI_NUM_PRDTS_PER_COMMAND_TABLE]; /* Physical Region Descriptor
+ * Table: A list of,
+ * theoretically, up to 2^16
+ * entries of PRDs.
+ * Number of actual usable
+ * entries is indicated by
+ * command_header_t->prdtl. */
+} packed command_table_t;
+
+/* command_header_t - Structure detailing command details. Stored in a
+ * command_list_t. More info in section 4.2.2 of the SATA AHCI 1.3.1 spec. */
+typedef struct command_header
+{
+ uint8_t cfl : 5; /* Command FIS length in DW (4 bytes). Max value is 0x10
+ (16). */
+ uint8_t : 1;
+ uint8_t write : 1; /* Write: Set indicates write, clear indicates read. */
+ uint16_t : 9;
+ uint16_t prdtl; /* Physical Region Descriptor Table Length: Number of PRD
+ entries. */
+ uint32_t : 32;
+ uint64_t ctba; /* Command Table Descriptor Base Address: Pointer to the
+ command table. */
+ uint64_t : 64;
+ uint64_t : 64;
+} packed command_header_t;
+
+/* command_list_t - Per-port command list.
+ * More info in section 4.2.2 of the SATA AHCI 1.3.1 spec.
+ * See also: Figure 5: Port System Memory Structures. */
+typedef struct command_list
+{
+ command_header_t command_headers[AHCI_COMMAND_HEADERS_PER_LIST];
+} packed command_list_t;
+
+/*=================
+ * Host Bus Adapter
+ *================*/
+
+/* px_interrupt_status - Per-port bitmap indicating that a corresponding
+ * interrupt has occurred on the port. Observe that this is a union, making
+ * initialization a little easier. */
+typedef union px_interrupt_status {
+ struct
+ {
+ uint8_t dhrs : 1; /* Interrupt requested by a device-to-host FIS.
+ * Used by normal read/write commands, see 5.6.2
+ * in 1.3.1. */
+ uint8_t : 2;
+ uint8_t
+ sdbs : 1; /* Interrupt requested by a set device bits FIS.
+ * Used by NCQ read/write commands, see 5.6.4 in 1.3.1. */
+ uint8_t : 1;
+ uint8_t dps : 1; /* Interrupt set upon completing an FIS that requested
+ * an interrupt upon completion.
+ * Currently doesn't seem to be working... */
+ uint32_t : 26;
+ } bits;
+ uint32_t value;
+} packed px_interrupt_status_t;
+
+/* Observe that, to clear interrupt status, must set to 1. */
+static px_interrupt_status_t px_interrupt_status_clear = {.value =
+ (uint32_t)-1};
+
+/* Port x Interrupt Enable - Bitwise register controlling generation of various
+ * interrupts. */
+typedef union px_interrupt_enable {
+ uint32_t value;
+} packed px_interrupt_enable_t;
+
+/* Weenix uses this to initialize all ports to enable all interrupts by default.
+ */
+static px_interrupt_enable_t px_interrupt_enable_all_enabled = {
+ .value = (uint32_t)-1};
+
+/* hba_ghc_t - Generic Host Control: Information and control registers
+ * pertaining to the entire HBA. More info in section 3.1 of 1.3.1.
+ */
+typedef struct hba_ghc
+{
+ struct
+ {
+ uint32_t : 30;
+ uint8_t sncq : 1; /* Supports Native Command Queueing. */
+ uint8_t : 1;
+ } packed cap;
+ struct
+ {
+ uint8_t : 1;
+ uint8_t ie : 1; /* Interrupt Enable: Enables/disables interrupts from
+ HBA. */
+ uint32_t : 29;
+ uint8_t ae : 1; /* AHCI Enable: Indicates software adheres to AHCI
+ specification. */
+ } packed ghc;
+ uint32_t is; /* Interrupt Status: If bit x is set, then port x has a pending
+ interrupt. */
+ uint32_t pi; /* Ports Implemented: If bit x is set, then port x is available
+ for use. */
+ uint32_t _omit[7];
+} packed hba_ghc_t;
+
+/* Signature for SATA devices. Compare this against hba_port_t->px_sig to
+ * determine if a SATA device is sitting behind a given port. */
+#define SATA_SIG_ATA 0x00000101
+
+/* hba_port - A per-port structure storing port information.
+ * Each port represents a device that the HBA is communicating with (e.g. a
+ * SATA device!). Details not relevant to Weenix have been omitted. More info in
+ * section 3.3 of the SATA AHCI 1.3.1 spec.
+ */
+typedef struct hba_port
+{
+ uint64_t px_clb; /* 1K-byte aligned base physical address of this port's
+ * command list. This is a pointer to a command_list_t. */
+ uint64_t px_fb; /* Base physical address for received FISes.
+ * Weenix never uses received FIS, but we allocate and set
+ * up memory to make the HBA happy. */
+ px_interrupt_status_t px_is; /* Interrupt Status. */
+ px_interrupt_enable_t px_ie; /* Interrupt Enable. */
+ struct
+ {
+ uint8_t st : 1; /* Start: Allows the HBA to process the command list. */
+ uint8_t : 3;
+ uint8_t fre : 1; /* FIS Receive Enable: Allows HBA to post received
+ FISes in px_fb. */
+ uint16_t : 9;
+ uint8_t fr : 1; /* FIS Receive Running: Read-only indicating if FIS
+ Receive DMA is running. */
+ uint8_t cr : 1; /* Command List Running: Read-only indicating if command
+ list DMA is running. */
+ uint16_t : 16;
+ } packed px_cmd; /* Port Command and Status. */
+ uint64_t : 64;
+ uint32_t px_sig; /* Signature: Contains attached device's signature.
+ * SATA devices should have signature SATA_SIG_ATA, defined
+ * above. */
+ uint64_t : 64;
+ uint32_t px_serr; /* SATA Error: Unclear how Weenix is actually making use
+ of this register. */
+ uint32_t px_sact; /* SATA Active: Used for NCQ.
+ * Each bit corresponds to TAG and command slot of an NCQ
+ * command. Must be set by software before issuing a NCQ
+ * for a command slot.
+ */
+ uint32_t px_ci; /* Commands Issued: Software sets bit x if a command x is
+ * ready to be sent. Each bit corresponds to a command slot.
+ * HBA clears bit upon completing a command.
+ */
+ uint32_t _omit[17];
+} packed hba_port_t;
+
+/* Host Bus Adapter - Control block for the device that actually interfaces
+ * between the OS and the SATA disk device. For more info, see section 3 of
+ * the 1.3.1 spec.
+ */
+typedef struct hba
+{
+ hba_ghc_t ghc; /* Generic Host Control. */
+ uint32_t _omit[53];
+ hba_port_t ports[32]; /* Static array of port descriptors. */
+} packed hba_t;
+
+#define PORT_INDEX(hba, port) ((port) - (hba)->ports)
diff --git a/kernel/include/drivers/disk/sata.h b/kernel/include/drivers/disk/sata.h
new file mode 100644
index 0000000..6bdb573
--- /dev/null
+++ b/kernel/include/drivers/disk/sata.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#define SATA_BLOCK_SIZE 4096
+
+#include <drivers/blockdev.h>
+#include <drivers/disk/ahci.h>
+
+void sata_init();
+
+typedef struct ata_disk
+{
+ hba_port_t *port;
+ blockdev_t bdev;
+} ata_disk_t;
diff --git a/kernel/include/drivers/keyboard.h b/kernel/include/drivers/keyboard.h
new file mode 100644
index 0000000..8ac3762
--- /dev/null
+++ b/kernel/include/drivers/keyboard.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <types.h>
+
+#define BS 0x08
+#define DEL 0x7F
+#define ESC 0x1B
+#define LF 0x0A
+#define CR 0x0D
+#define SPACE 0x20
+
+// CTRL-D
+#define EOT 0x04
+
+// CTRL-C
+#define ETX 0x03
+
+/* Special stuff for scrolling (note that these only work when ctrl is held) */
+#define SCROLL_UP 0x0e
+#define SCROLL_DOWN 0x1c
+#define SCROLL_UP_PAGE 0x0f
+#define SCROLL_DOWN_PAGE 0x1d
+
+// pretty arbitrarily chosen, just the first extended ASCII code point and on...
+#define F1 ((uint8_t)128)
+#define F2 ((uint8_t)(F1 + 1))
+#define F3 ((uint8_t)(F1 + 2))
+#define F4 ((uint8_t)(F1 + 3))
+#define F5 ((uint8_t)(F1 + 4))
+#define F6 ((uint8_t)(F1 + 5))
+#define F7 ((uint8_t)(F1 + 6))
+#define F8 ((uint8_t)(F1 + 7))
+#define F9 ((uint8_t)(F1 + 8))
+#define F10 ((uint8_t)(F1 + 9))
+#define F11 ((uint8_t)(F1 + 10))
+#define F12 ((uint8_t)(F1 + 11))
+
+typedef void (*keyboard_char_handler_t)(uint8_t);
+
+/**
+ * Initializes the keyboard subsystem.
+ */
+void keyboard_init(keyboard_char_handler_t handler);
diff --git a/kernel/include/drivers/memdevs.h b/kernel/include/drivers/memdevs.h
new file mode 100644
index 0000000..420c5d0
--- /dev/null
+++ b/kernel/include/drivers/memdevs.h
@@ -0,0 +1,6 @@
+#pragma once
+
+/**
+ * Initializes the memdevs subsystem.
+ */
+void memdevs_init(void);
diff --git a/kernel/include/drivers/pcie.h b/kernel/include/drivers/pcie.h
new file mode 100644
index 0000000..83d182f
--- /dev/null
+++ b/kernel/include/drivers/pcie.h
@@ -0,0 +1,112 @@
+#pragma once
+
+#include <util/list.h>
+
+#define PCI_NUM_BUSES 256
+#define PCI_NUM_DEVICES_PER_BUS 32
+#define PCI_NUM_FUNCTIONS_PER_DEVICE 8
+#define PCI_DEVICE_FUNCTION_SIZE 4096
+#define PCI_CAPABILITY_PTR_MASK (0b11111100)
+#define PCI_MSI_CAPABILITY_ID 0x5
+
+// Intel Vol 3A 10.11.1
+//#define MSI_BASE_ADDRESS 0x0FEE0000
+#define MSI_ADDRESS_FOR(destination) \
+ ((uint32_t)((0x0FEE << 20) | ((destination) << 12) | (0b1100)))
+#define MSI_DATA_FOR(vector) ((uint16_t)(0b00000001 << 8) | (vector))
+
+typedef struct pci_capability
+{
+ uint8_t id;
+ uint8_t next_cap;
+ uint16_t control;
+} packed pci_capability_t;
+
+typedef struct msi_capability
+{
+ uint8_t id;
+ uint8_t next_cap;
+ struct
+ {
+ uint8_t msie : 1; // MSI Enable
+ uint8_t mmc : 3; // Multiple Message Capable
+ uint8_t mme : 3; // Multiple Message Enable
+ uint8_t c64 : 1; // 64 Bit Address Capable
+ uint8_t _reserved;
+ } control;
+ union {
+ struct
+ {
+ uint32_t addr;
+ uint16_t data;
+ } ad32;
+ struct
+ {
+ uint64_t addr;
+ uint16_t data;
+ } ad64;
+ } address_data;
+} packed msi_capability_t;
+
+typedef union pcie_device {
+ struct
+ {
+ char data[PCI_DEVICE_FUNCTION_SIZE];
+ } raw;
+ struct
+ {
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint16_t command;
+ uint16_t status;
+ uint8_t revision_id;
+ uint8_t prog_if;
+ uint8_t subclass;
+ uint8_t class;
+ uint8_t cache_line_size;
+ uint8_t latency_type;
+ uint8_t header_type;
+ uint8_t bist;
+ uint32_t bar[6];
+ uint32_t cardbus_cis_pointer;
+ uint16_t subsystem_vendor_id;
+ uint16_t subsystem_id;
+ uint32_t expansion_rom_base_addr;
+ uint8_t capabilities_ptr;
+ uint8_t _reserved1[7];
+ uint8_t interrupt_line;
+ uint8_t interrupt_pin;
+ uint8_t min_grant;
+ uint8_t max_latency;
+ pci_capability_t pm_capability;
+ uint16_t pmcsr;
+ uint8_t bse;
+ uint8_t data;
+ pci_capability_t msi_capability;
+ uint64_t message_address;
+ uint16_t message_data;
+ uint8_t _reserved2[2];
+ pci_capability_t pe_capability;
+ uint32_t pcie_device_capabilities;
+ uint16_t device_control;
+ uint16_t device_status;
+ uint32_t pcie_link_capabilities;
+ uint16_t link_control;
+ uint16_t link_status;
+ } standard;
+} packed pcie_device_t;
+
+#define PCI_LOOKUP_WILDCARD 0xff
+
+typedef struct pcie_device_wrapper
+{
+ uint8_t class;
+ uint8_t subclass;
+ uint8_t interface;
+ pcie_device_t *dev;
+ list_link_t link;
+} pcie_device_wrapper_t;
+
+void pci_init(void);
+
+pcie_device_t *pcie_lookup(uint8_t class, uint8_t subclass, uint8_t interface);
diff --git a/kernel/include/drivers/screen.h b/kernel/include/drivers/screen.h
new file mode 100644
index 0000000..97f7e2a
--- /dev/null
+++ b/kernel/include/drivers/screen.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include "types.h"
+
+#ifdef __VGABUF___
+
+#define SCREEN_CHARACTER_WIDTH 9
+#define SCREEN_CHARACTER_HEIGHT 15
+
+typedef union color {
+ struct
+ {
+ uint8_t blue;
+ uint8_t green;
+ uint8_t red;
+ uint8_t alpha;
+ } channels;
+ uint32_t value;
+} packed color_t;
+
+void screen_init();
+
+size_t screen_get_width();
+
+size_t screen_get_height();
+
+size_t screen_get_character_width();
+
+size_t screen_get_character_height();
+
+void screen_draw_string(size_t x, size_t y, const char *s, size_t len,
+ color_t color);
+
+void screen_fill(color_t color);
+
+void screen_fill_rect(size_t x, size_t y, size_t width, size_t height,
+ color_t color);
+
+void screen_draw_rect(size_t x, size_t y, size_t width, size_t height,
+ color_t color);
+
+void screen_copy_rect(size_t fromx, size_t fromy, size_t width, size_t height,
+ size_t tox, size_t toy);
+
+void screen_flush();
+
+void screen_print_shutdown();
+
+#else
+
+#define VGA_WIDTH ((uint16_t)80)
+#define VGA_HEIGHT ((uint16_t)25)
+#define VGA_LINE_SIZE ((size_t)(VGA_WIDTH * sizeof(uint16_t)))
+#define VGA_AREA ((uint16_t)(VGA_WIDTH * VGA_HEIGHT))
+#define VGA_BUFFER_SIZE ((uint16_t)(VGA_WIDTH * VGA_HEIGHT))
+#define VGA_DEFAULT_ATTRIB 0xF
+
+void vga_init();
+
+void vga_write_char_at(size_t row, size_t col, uint16_t v);
+
+void vga_set_cursor(size_t row, size_t col);
+
+void vga_clear_screen();
+
+void screen_print_shutdown();
+
+void vga_enable_cursor();
+
+void vga_disable_cursor();
+
+#endif \ No newline at end of file
diff --git a/kernel/include/drivers/tty/ldisc.h b/kernel/include/drivers/tty/ldisc.h
new file mode 100644
index 0000000..920c816
--- /dev/null
+++ b/kernel/include/drivers/tty/ldisc.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include "types.h"
+#include <proc/kmutex.h>
+
+#define LDISC_BUFFER_SIZE 128
+
+/**
+ * The line discipline is implemented as a circular buffer containing two
+ * sections: cooked and raw. These sections are tracked by three indices:
+ * ldisc_cooked, ldisc_tail, and ldisc_head.
+ *
+ * New characters (via ldisc_key_pressed) are put at the head position (and the
+ * head is incremented). If a newline is received, cooked is moved up to the head.
+ * Characters are read from tail up until cooked, and the tail is updated
+ * to equal cooked.
+ *
+ * The cooked portion (ready for reading) runs from ldisc_tail (inclusive) to
+ * ldisc_cooked (exclusive). The raw portion (subject to editing) runs from
+ * ldisc_cooked (inclusive) to ldisc_head (exclusive).
+ *
+ * e.g.
+ * [..........t........c...h.......]
+ * (cooked) ^^^^^^^^^
+ * ^^^^ (raw)
+ *
+ * Bear in mind that the buffer is circular, so another possible configuration
+ * might be
+ *
+ * [....h............t......c......]
+ * (cooked) ^^^^^^^
+ * ^^^^ ^^^^^^^ (raw)
+ *
+ * When incrementing the indices, make sure that you take the circularity of
+ * the buffer into account! (Hint: using LDISC_BUFFER_SIZE macro will be helpful.)
+ *
+ * The field ldisc_full is used to indicate when the circular buffer has been
+ * completely filled. This is necessary because there are two possible states
+ * in which cooked == tail == head:
+ *
+ * 1) The buffer is empty, or
+ *
+ * 2) The buffer is full: head has wrapped around and is equal to tail.
+ *
+ * ldisc_full is used to differentiate between these two states.
+ */
+typedef struct ldisc
+{
+ size_t ldisc_cooked; // Cooked is the index after the most last or most recent '\n' in the buffer.
+ size_t ldisc_tail; // Tail is the index from which characters are read by processes
+ size_t ldisc_head; // Head is the index from which new characters are placed
+ char ldisc_full; // Full identifies if the buffer is full
+ // 1 -> full
+ // 0 -> not full
+
+ ktqueue_t ldisc_read_queue; // Queue for threads waiting for data to be read
+ char ldisc_buffer[LDISC_BUFFER_SIZE];
+} ldisc_t;
+
+void ldisc_init(ldisc_t *ldisc);
+
+long ldisc_wait_read(ldisc_t *ldisc);
+
+size_t ldisc_read(ldisc_t *ldisc, char *buf, size_t count);
+
+void ldisc_key_pressed(ldisc_t *ldisc, char c);
+
+size_t ldisc_get_current_line_raw(ldisc_t *ldisc, char *s); \ No newline at end of file
diff --git a/kernel/include/drivers/tty/tty.h b/kernel/include/drivers/tty/tty.h
new file mode 100644
index 0000000..ec22b68
--- /dev/null
+++ b/kernel/include/drivers/tty/tty.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "drivers/chardev.h"
+#include "ldisc.h"
+#include "vterminal.h"
+
+#define TTY_MAJOR 2
+#define cd_to_tty(bd) \
+ CONTAINER_OF((bd), tty_t, tty_cdev) // Should this be cd, for chardev?
+
+typedef struct tty
+{
+ vterminal_t tty_vterminal; // the virtual terminal, where the characters will be displayed
+ ldisc_t tty_ldisc; // the line discipline for the tty
+ chardev_t tty_cdev; // the super struct for the tty
+ kmutex_t tty_read_mutex;
+ kmutex_t tty_write_mutex;
+} tty_t;
+
+void tty_init(void);
+
diff --git a/kernel/include/drivers/tty/vterminal.h b/kernel/include/drivers/tty/vterminal.h
new file mode 100644
index 0000000..99123a7
--- /dev/null
+++ b/kernel/include/drivers/tty/vterminal.h
@@ -0,0 +1,249 @@
+#pragma once
+
+#include <drivers/screen.h>
+#include <mm/page.h>
+#include <types.h>
+#include <util/list.h>
+//
+//
+//#define VGA_WIDTH ((uint16_t) 80)
+//#define VGA_HEIGHT ((uint16_t) 25)
+//#define VGA_AREA ((uint16_t) (VGA_WIDTH * VGA_HEIGHT))
+//#define VGA_BUFFER_COUNT ((uint16_t) (1024 * 16))
+//#define VGA_BUFFER_SIZE ((uint16_t) (VGA_BUFFER_COUNT * sizeof(short)))
+//
+//
+//#define SCREEN_GET_FOREGROUND(x) ((uint8_t) (x & 0b00001111))
+//#define SCREEN_GET_BACKGROUND(x) ((uint8_t) (x & 0b01110000))
+//#define SCREEN_MAKE_COLOR(b, f) ((uint8_t) (b << 4) | f)
+//
+//#define SCREEN_DEFAULT_FOREGROUND ((uint8_t) 0xF)
+//#define SCREEN_DEFAULT_BACKGROUND ((uint8_t) 0x0)
+//#define SCREEN_DEFAULT_COLOR SCREEN_MAKE_COLOR(SCREEN_DEFAULT_BACKGROUND,
+//SCREEN_DEFAULT_FOREGROUND)
+
+// typedef struct screen {
+// uint16_t screen_cursor_pos;
+// uint16_t screen_buffer_pos;
+// uint16_t screen_visible_pos;
+// uint8_t screen_current_color;
+//
+// uint16_t *screen_buffer;
+// uint16_t screen_inactive_buffer[VGA_BUFFER_COUNT];
+//} screen_t;
+
+// typedef struct vterminal_char {
+// char c;
+//// color_t foreground;
+//// color_t background;
+//} vterminal_char_t;
+
+#ifdef __VGABUF___
+
+#define VT_PAGES_PER_HISTORY_CHUNK 1
+#define VT_CHARS_PER_HISTORY_CHUNK \
+ (VT_PAGES_PER_HISTORY_CHUNK * PAGE_SIZE - sizeof(list_link_t))
+
+typedef struct vterminal_history_chunk
+{
+ char chars[VT_CHARS_PER_HISTORY_CHUNK];
+ list_link_t link;
+} vterminal_history_chunk_t;
+
+typedef struct vterminal
+{
+ size_t vt_width;
+ size_t vt_height;
+
+ size_t vt_len;
+ list_t vt_history_chunks;
+
+ size_t *vt_line_positions;
+
+ off_t vt_line_offset;
+
+ size_t *vt_line_widths;
+
+ size_t vt_input_pos;
+ size_t vt_cursor_pos;
+} vterminal_t;
+
+void vterminal_init(vterminal_t *vt);
+
+void vterminal_make_active(vterminal_t *vt);
+
+void vterminal_scroll(vterminal_t *vt, long count);
+
+void vterminal_scroll_to_bottom(vterminal_t *t);
+
+void vterminal_clear(vterminal_t *vt);
+
+size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len);
+
+void vterminal_key_pressed(vterminal_t *vt);
+
+#elif 0
+
+struct vt_cursor
+{
+ int y;
+ int x;
+};
+
+struct vt_attributes
+{
+ int underline : 1;
+ int bold : 1;
+ int blink : 1;
+ uint16_t fg;
+ uint16_t bg;
+};
+
+struct vt_char
+{
+ int codepoint;
+ struct vt_attributes attribs;
+};
+
+struct vt_buffer
+{
+ struct vt_char screen[VGA_HEIGHT][VGA_WIDTH];
+ size_t input_position;
+};
+
+typedef struct vterminal
+{
+ size_t height;
+ size_t width;
+ struct vt_cursor cursor;
+ struct vt_cursor saved_cursor;
+ struct vt_attributes current_attribs;
+ struct vt_buffer *active_buffer;
+ struct vt_buffer pri_buffer;
+ struct vt_buffer alt_buffer;
+} vterminal_t;
+
+void vterminal_init(vterminal_t *vt);
+
+void vterminal_make_active(vterminal_t *vt);
+
+void vterminal_scroll(vterminal_t *vt, long count);
+
+void vterminal_clear(vterminal_t *vt);
+
+size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len);
+
+size_t vterminal_echo_input(vterminal_t *vt, const char *buf, size_t len);
+
+void vterminal_key_pressed(vterminal_t *vt);
+
+void vterminal_scroll_to_bottom(vterminal_t *vt);
+
+#endif
+
+#define VTC_DEFAULT_FOREGROUND VTCOLOR_GREY
+#define VTC_DEFAULT_BACKGROUND VTCOLOR_BLACK
+#define VTC_DEFAULT_ATTR \
+ (vtattr_t) { 0, VTC_DEFAULT_FOREGROUND, VTC_DEFAULT_BACKGROUND }
+#define VTC_ANSI_PARSER_STACK_SIZE 8
+
+struct vtconsole;
+
+typedef enum
+{
+ VTCOLOR_BLACK,
+ VTCOLOR_RED,
+ VTCOLOR_GREEN,
+ VTCOLOR_YELLOW,
+ VTCOLOR_BLUE,
+ VTCOLOR_MAGENTA,
+ VTCOLOR_CYAN,
+ VTCOLOR_GREY,
+} vtcolor_t;
+
+typedef enum
+{
+ VTSTATE_ESC,
+ VTSTATE_BRACKET,
+ VTSTATE_ATTR,
+ VTSTATE_ENDVAL,
+} vtansi_parser_state_t;
+
+typedef struct
+{
+ int value;
+ int empty;
+} vtansi_arg_t;
+
+typedef struct
+{
+ vtansi_parser_state_t state;
+ vtansi_arg_t stack[VTC_ANSI_PARSER_STACK_SIZE];
+ int index;
+} vtansi_parser_t;
+
+typedef struct
+{
+ int bright;
+ vtcolor_t fg;
+ vtcolor_t bg;
+} vtattr_t;
+
+typedef struct
+{
+ char c;
+ vtattr_t attr;
+} vtcell_t;
+
+typedef struct
+{
+ int x;
+ int y;
+} vtcursor_t;
+
+typedef void (*vtc_paint_handler_t)(struct vtconsole *vtc, vtcell_t *cell,
+ int x, int y);
+typedef void (*vtc_cursor_handler_t)(struct vtconsole *vtc, vtcursor_t *cur);
+
+typedef struct vtconsole
+{
+ int width;
+ int height;
+
+ vtattr_t attr;
+ vtansi_parser_t ansiparser;
+
+ vtcell_t *buffer;
+ int *tabs;
+ int tab_index;
+ vtcursor_t cursor;
+
+ vtc_paint_handler_t on_paint;
+ vtc_cursor_handler_t on_move;
+} vtconsole_t;
+
+typedef vtconsole_t vterminal_t;
+
+vtconsole_t *vtconsole(vtconsole_t *vtc, int width, int height,
+ vtc_paint_handler_t on_paint,
+ vtc_cursor_handler_t on_move);
+void vtconsole_delete(vtconsole_t *c);
+
+void vtconsole_clear(vtconsole_t *vtc, int fromx, int fromy, int tox, int toy);
+void vtconsole_scroll(vtconsole_t *vtc, int lines);
+void vtconsole_newline(vtconsole_t *vtc);
+
+void vtconsole_putchar(vtconsole_t *vtc, char c);
+void vtconsole_write(vtconsole_t *vtc, const char *buffer, uint32_t size);
+
+size_t vterminal_write(vterminal_t *vt, const char *buf, size_t len);
+
+size_t vterminal_echo_input(vterminal_t *vt, const char *buf, size_t len);
+
+void vterminal_key_pressed(vterminal_t *vt);
+
+void vterminal_scroll_to_bottom(vterminal_t *vt);
+
+void vterminal_init(vterminal_t *vt);
+
+void vterminal_make_active(vterminal_t *vt);
diff --git a/kernel/include/errno.h b/kernel/include/errno.h
new file mode 100644
index 0000000..c9e82c8
--- /dev/null
+++ b/kernel/include/errno.h
@@ -0,0 +1,151 @@
+#pragma once
+
+/* Kernel and user header (via symlink) */
+
+#ifndef __KERNEL__
+#ifndef errno
+#define errno _libc_errno
+#endif
+extern int _libc_errno;
+#endif
+
+#define EPERM 1 /* Operation not permitted */
+#define ENOENT 2 /* No such file or directory */
+#define ESRCH 3 /* No such process */
+#define EINTR 4 /* Interrupted system call */
+#define EIO 5 /* I/O error */
+#define ENXIO 6 /* No such device or address */
+#define E2BIG 7 /* Argument list too long */
+#define ENOEXEC 8 /* Exec format error */
+#define EBADF 9 /* Bad file number */
+#define ECHILD 10 /* No child processes */
+#define EAGAIN 11 /* Try again */
+#define ENOMEM 12 /* Out of memory */
+#define EACCES 13 /* Permission denied */
+#define EFAULT 14 /* Bad address */
+#define ENOTBLK 15 /* Block device required */
+#define EBUSY 16 /* Device or resource busy */
+#define EEXIST 17 /* File exists */
+#define EXDEV 18 /* Cross-device link */
+#define ENODEV 19 /* No such device */
+#define ENOTDIR 20 /* Not a directory */
+#define EISDIR 21 /* Is a directory */
+#define EINVAL 22 /* Invalid argument */
+#define ENFILE 23 /* File table overflow */
+#define EMFILE 24 /* Too many open files */
+#define ENOTTY 25 /* Not a typewriter */
+#define ETXTBSY 26 /* Text file busy */
+#define EFBIG 27 /* File too large */
+#define ENOSPC 28 /* No space left on device */
+#define ESPIPE 29 /* Illegal seek */
+#define EROFS 30 /* Read-only file system */
+#define EMLINK 31 /* Too many links */
+#define EPIPE 32 /* Broken pipe */
+#define EDOM 33 /* Math argument out of domain of func */
+#define ERANGE 34 /* Math result not representable */
+
+#define EDEADLK 35 /* Resource deadlock would occur */
+#define ENAMETOOLONG 36 /* File name too long */
+#define ENOLCK 37 /* No record locks available */
+#define ENOSYS 38 /* Function not implemented */
+#define ENOTEMPTY 39 /* Directory not empty */
+#define ELOOP 40 /* Too many symbolic links encountered */
+#define EWOULDBLOCK EAGAIN /* Operation would block */
+#define ENOMSG 42 /* No message of desired type */
+#define EIDRM 43 /* Identifier removed */
+#define ECHRNG 44 /* Channel number out of range */
+#define EL2NSYNC 45 /* Level 2 not synchronized */
+#define EL3HLT 46 /* Level 3 halted */
+#define EL3RST 47 /* Level 3 reset */
+#define ELNRNG 48 /* Link number out of range */
+#define EUNATCH 49 /* Protocol driver not attached */
+#define ENOCSI 50 /* No CSI structure available */
+#define EL2HLT 51 /* Level 2 halted */
+#define EBADE 52 /* Invalid exchange */
+#define EBADR 53 /* Invalid request descriptor */
+#define EXFULL 54 /* Exchange full */
+#define ENOANO 55 /* No anode */
+#define EBADRQC 56 /* Invalid request code */
+#define EBADSLT 57 /* Invalid slot */
+
+#define EDEADLOCK EDEADLK
+
+#define EBFONT 59 /* Bad font file format */
+#define ENOSTR 60 /* Device not a stream */
+#define ENODATA 61 /* No data available */
+#define ETIME 62 /* Timer expired */
+#define ENOSR 63 /* Out of streams resources */
+#define ENONET 64 /* Machine is not on the network */
+#define ENOPKG 65 /* Package not installed */
+#define EREMOTE 66 /* Object is remote */
+#define ENOLINK 67 /* Link has been severed */
+#define EADV 68 /* Advertise error */
+#define ESRMNT 69 /* Srmount error */
+#define ECOMM 70 /* Communication error on send */
+#define EPROTO 71 /* Protocol error */
+#define EMULTIHOP 72 /* Multihop attempted */
+#define EDOTDOT 73 /* RFS specific error */
+#define EBADMSG 74 /* Not a data message */
+#define EOVERFLOW 75 /* Value too large for defined data type */
+#define ENOTUNIQ 76 /* Name not unique on network */
+#define EBADFD 77 /* File descriptor in bad state */
+#define EREMCHG 78 /* Remote address changed */
+#define ELIBACC 79 /* Can not access a needed shared library */
+#define ELIBBAD 80 /* Accessing a corrupted shared library */
+#define ELIBSCN 81 /* .lib section in a.out corrupted */
+#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
+#define ELIBEXEC 83 /* Cannot exec a shared library directly */
+#define EILSEQ 84 /* Illegal byte sequence */
+#define ERESTART 85 /* Interrupted system call should be restarted */
+#define ESTRPIPE 86 /* Streams pipe error */
+#define EUSERS 87 /* Too many users */
+#define ENOTSOCK 88 /* Socket operation on non-socket */
+#define EDESTADDRREQ 89 /* Destination address required */
+#define EMSGSIZE 90 /* Message too long */
+#define EPROTOTYPE 91 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 92 /* Protocol not available */
+#define EPROTONOSUPPORT 93 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
+#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
+#define EPFNOSUPPORT 96 /* Protocol family not supported */
+#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
+#define EADDRINUSE 98 /* Address already in use */
+#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
+#define ENETDOWN 100 /* Network is down */
+#define ENETUNREACH 101 /* Network is unreachable */
+#define ENETRESET 102 /* Network dropped connection because of reset */
+#define ECONNABORTED 103 /* Software caused connection abort */
+#define ECONNRESET 104 /* Connection reset by peer */
+#define ENOBUFS 105 /* No buffer space available */
+#define EISCONN 106 /* Transport endpoint is already connected */
+#define ENOTCONN 107 /* Transport endpoint is not connected */
+#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
+#define ETOOMANYREFS 109 /* Too many references: cannot splice */
+#define ETIMEDOUT 110 /* Connection timed out */
+#define ECONNREFUSED 111 /* Connection refused */
+#define EHOSTDOWN 112 /* Host is down */
+#define EHOSTUNREACH 113 /* No route to host */
+#define EALREADY 114 /* Operation already in progress */
+#define EINPROGRESS 115 /* Operation now in progress */
+#define ESTALE 116 /* Stale NFS file handle */
+#define EUCLEAN 117 /* Structure needs cleaning */
+#define ENOTNAM 118 /* Not a XENIX named type file */
+#define ENAVAIL 119 /* No XENIX semaphores available */
+#define EISNAM 120 /* Is a named type file */
+#define EREMOTEIO 121 /* Remote I/O error */
+#define EDQUOT 122 /* Quota exceeded */
+
+#define ENOMEDIUM 123 /* No medium found */
+#define EMEDIUMTYPE 124 /* Wrong medium type */
+#define ECANCELED 125 /* Operation Canceled */
+#define ENOKEY 126 /* Required key not available */
+#define EKEYEXPIRED 127 /* Key has expired */
+#define EKEYREVOKED 128 /* Key has been revoked */
+#define EKEYREJECTED 129 /* Key was rejected by service */
+
+/* for robust mutexes */
+#define EOWNERDEAD 130 /* Owner died */
+#define ENOTRECOVERABLE 131 /* State not recoverable */
+
+/* added by dap from Linux */
+#define ENOTSUP EOPNOTSUPP
diff --git a/kernel/include/fs/dirent.h b/kernel/include/fs/dirent.h
new file mode 100644
index 0000000..10fa845
--- /dev/null
+++ b/kernel/include/fs/dirent.h
@@ -0,0 +1,25 @@
+/* dirent.h - filesystem-independent directory entry
+ * mcc, kma, jal
+ */
+#pragma once
+
+/* Kernel and user header (via symlink) */
+
+#ifdef __KERNEL__
+#include "config.h"
+#include "types.h"
+#else
+
+#include "sys/types.h"
+#include "weenix/config.h"
+
+#endif
+
+typedef struct dirent
+{
+ ino_t d_ino; /* entry inode number */
+ off_t d_off; /* seek pointer of next entry */
+ char d_name[NAME_LEN]; /* filename */
+} dirent_t;
+
+#define d_fileno d_ino
diff --git a/kernel/include/fs/fcntl.h b/kernel/include/fs/fcntl.h
new file mode 100644
index 0000000..fd719f2
--- /dev/null
+++ b/kernel/include/fs/fcntl.h
@@ -0,0 +1,18 @@
+/* fcntl.h - File access bits
+ * mcc, jal
+ */
+
+#pragma once
+
+/* Kernel and user header (via symlink) */
+
+/* File access modes for open(). */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_ACCESSMODE_MASK (O_RDONLY | O_WRONLY | O_RDWR)
+
+/* File status flags for open(). */
+#define O_CREAT 0x100 /* Create file if non-existent. */
+#define O_TRUNC 0x200 /* Truncate to zero length. */
+#define O_APPEND 0x400 /* Append to file. */
diff --git a/kernel/include/fs/file.h b/kernel/include/fs/file.h
new file mode 100644
index 0000000..72caee4
--- /dev/null
+++ b/kernel/include/fs/file.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "types.h"
+
+#define FMODE_READ 1
+#define FMODE_WRITE 2
+#define FMODE_APPEND 4
+#define FMODE_MAX_VALUE (FMODE_READ | FMODE_WRITE | FMODE_APPEND)
+
+struct vnode;
+
+typedef struct file
+{
+ /*
+ * The current position in the file. Can be modified by system calls
+ * like lseek(2), read(2), and write(2) (and possibly others) as
+ * described in the man pages of those calls.
+ */
+ size_t f_pos;
+
+ /*
+ * The mode in which this file was opened. This is a mask of the flags
+ * FMODE_READ, FMODE_WRITE, and FMODE_APPEND. It is set when the file
+ * is first opened, and use to restrict the operations that can be
+ * performed on the underlying vnode.
+ */
+ unsigned int f_mode;
+
+ /*
+ * The number of references to this struct.
+ */
+ size_t f_refcount;
+
+ /*
+ * The vnode which corresponds to this file.
+ */
+ struct vnode *f_vnode;
+} file_t;
+
+struct file *fcreate(int fd, struct vnode *vnode, unsigned int mode);
+
+/*
+ * Returns the file_t assiciated with the given file descriptor for the
+ * current process. If there is no associated file_t, returns NULL.
+ */
+struct file *fget(int fd);
+
+/*
+ * fref() increments the reference count on the given file.
+ */
+void fref(file_t *file);
+
+/*
+ * fput() decrements the reference count on the given file.
+ *
+ * If the refcount reaches 0, the storage for the given file_t will be
+ * released (f won't point to a valid memory address anymore), and the
+ * refcount on the associated vnode (if any) will be decremented.
+ *
+ * The vnode release operation will also be called if it exists.
+ */
+void fput(file_t **filep);
diff --git a/kernel/include/fs/lseek.h b/kernel/include/fs/lseek.h
new file mode 100644
index 0000000..3520e77
--- /dev/null
+++ b/kernel/include/fs/lseek.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#define SEEK_SET 0
+#define SEEK_CUR 1
+#define SEEK_END 2
diff --git a/kernel/include/fs/open.h b/kernel/include/fs/open.h
new file mode 100644
index 0000000..fd10234
--- /dev/null
+++ b/kernel/include/fs/open.h
@@ -0,0 +1,5 @@
+#pragma once
+
+long do_open(const char *filename, int flags);
+
+long get_empty_fd(int *fd);
diff --git a/kernel/include/fs/pipe.h b/kernel/include/fs/pipe.h
new file mode 100644
index 0000000..61b9cc9
--- /dev/null
+++ b/kernel/include/fs/pipe.h
@@ -0,0 +1,10 @@
+/*
+ * FILE: pipe.h
+ * AUTH: eric
+ * DESC:
+ * DATE: Thu Dec 26 17:07:10 2013
+ */
+
+#pragma once
+
+int do_pipe(int pipefd[2]);
diff --git a/kernel/include/fs/ramfs/ramfs.h b/kernel/include/fs/ramfs/ramfs.h
new file mode 100644
index 0000000..b43f4eb
--- /dev/null
+++ b/kernel/include/fs/ramfs/ramfs.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include "fs/vfs.h"
+
+long ramfs_mount(struct fs *fs);
diff --git a/kernel/include/fs/s5fs/s5fs.h b/kernel/include/fs/s5fs/s5fs.h
new file mode 100644
index 0000000..7bde185
--- /dev/null
+++ b/kernel/include/fs/s5fs/s5fs.h
@@ -0,0 +1,145 @@
+/*
+ * FILE: s5fs.h
+ * AUTHOR: kma
+ * DESCR: shared structures for the System V file system...
+ */
+
+#pragma once
+
+#ifdef __FSMAKER__
+#include <stdint.h>
+#else
+
+#include "config.h"
+
+#include "drivers/blockdev.h"
+#include "fs/vfs.h"
+#include "fs/vnode.h"
+#include "mm/page.h"
+#include "proc/kmutex.h"
+
+#endif
+
+#define S5_SUPER_BLOCK 0 /* the blockno of the superblock */
+#define S5_IS_SUPER(blkno) ((blkno) == S5_SUPER_BLOCK)
+
+#define S5_NBLKS_PER_FNODE 30
+
+#define S5_BLOCK_SIZE 4096
+#define S5_NDIRECT_BLOCKS 28
+#define S5_INODES_PER_BLOCK (S5_BLOCK_SIZE / sizeof(s5_inode_t))
+#define S5_DIRENTS_PER_BLOCK (S5_BLOCK_SIZE / sizeof(s5_dirent_t))
+#define S5_MAX_FILE_BLOCKS (S5_NDIRECT_BLOCKS + S5_NIDIRECT_BLOCKS)
+#define S5_MAX_FILE_SIZE (S5_MAX_FILE_BLOCKS * S5_BLOCK_SIZE)
+#define S5_NAME_LEN 28
+
+#define S5_TYPE_FREE 0x0
+#define S5_TYPE_DATA 0x1
+#define S5_TYPE_DIR 0x2
+#define S5_TYPE_CHR 0x4
+#define S5_TYPE_BLK 0x8
+
+#define S5_MAGIC 071177
+#define S5_CURRENT_VERSION 3
+
+/* Number of blocks stored in the indirect block */
+#define S5_NIDIRECT_BLOCKS (S5_BLOCK_SIZE / sizeof(uint32_t))
+
+/* Given a file offset, returns the block number that it is in */
+#define S5_DATA_BLOCK(seekptr) ((seekptr) / S5_BLOCK_SIZE)
+
+/* Given a file offset, returns the offset into the pointer's block */
+#define S5_DATA_OFFSET(seekptr) ((seekptr) % S5_BLOCK_SIZE)
+
+/* Given an inode number, tells the block that inode is stored in. */
+#define S5_INODE_BLOCK(inum) ((inum) / S5_INODES_PER_BLOCK + 1)
+
+/*
+ * Given an inode number, tells the offset (in units of s5_inode_t) of
+ * that inode within the block returned by S5_INODE_BLOCK.
+ */
+#define S5_INODE_OFFSET(inum) ((inum) % S5_INODES_PER_BLOCK)
+
+/* Given an FS struct, get the S5FS (private data) struct. */
+#define FS_TO_S5FS(fs) ((s5fs_t *)(fs)->fs_i)
+
+/* each node of the free block list looks like this: */
+/*
+typedef struct s5_fbl_node {
+ int free_blocks[S5_NBLKS_PER_FNODE-1];
+ int more;
+} s5_fbl_node_t;
+*/
+
+/* Note that all on-disk types need to have hard-coded sizes (to ensure
+ * inter-machine compatibility of s5 disks) */
+
+/* The contents of the superblock, as stored on disk. */
+typedef struct s5_super
+{
+ uint32_t s5s_magic; /* the magic number */
+ uint32_t s5s_free_inode; /* the free inode pointer */
+ uint32_t s5s_nfree; /* number of blocks currently in
+ * s5s_free_blocks */
+ /* First "node" of free block list */
+ uint32_t s5s_free_blocks[S5_NBLKS_PER_FNODE];
+
+ uint32_t s5s_root_inode; /* root inode */
+ uint32_t s5s_num_inodes; /* number of inodes */
+ uint32_t s5s_version; /* version of this disk format */
+} s5_super_t;
+
+/* The contents of an inode, as stored on disk. */
+typedef struct s5_inode
+{
+ union {
+ uint32_t s5_next_free; /* inode free list ptr */
+ uint32_t s5_size; /* file size */
+ } s5_un;
+ uint32_t s5_number; /* this inode's number */
+ uint16_t s5_type; /* one of S5_TYPE_{FREE,DATA,DIR,CHR,BLK} */
+ int16_t s5_linkcount; /* link count of this inode */
+ uint32_t s5_direct_blocks[S5_NDIRECT_BLOCKS];
+ uint32_t s5_indirect_block;
+} s5_inode_t;
+
+typedef struct s5_node
+{
+ vnode_t vnode;
+ s5_inode_t inode;
+ long dirtied_inode;
+} s5_node_t;
+
+#define VNODE_TO_S5NODE(vn) CONTAINER_OF(vn, s5_node_t, vnode)
+
+/* The contents of a directory entry, as stored on disk. */
+typedef struct s5_dirent
+{
+ uint32_t s5d_inode;
+ char s5d_name[S5_NAME_LEN];
+} s5_dirent_t;
+
+#ifndef __FSMAKER__
+/* Our in-memory representation of a s5fs filesytem (fs_i points to this) */
+typedef struct s5fs
+{
+ blockdev_t *s5f_bdev;
+ s5_super_t s5f_super;
+ kmutex_t s5f_mutex;
+ fs_t *s5f_fs;
+#ifndef OLD
+ mobj_t s5f_mobj;
+#endif
+} s5fs_t;
+
+long s5fs_mount(struct fs *fs);
+
+void s5_get_meta_disk_block(s5fs_t *s5fs, uint64_t blocknum, long forwrite,
+ pframe_t **pfp);
+
+//void s5_get_file_disk_block(vnode_t *vnode, blocknum_t blocknum, long forwrite,
+// pframe_t **pfp);
+
+void s5_release_disk_block(pframe_t **pfp);
+
+#endif
diff --git a/kernel/include/fs/s5fs/s5fs_privtest.h b/kernel/include/fs/s5fs/s5fs_privtest.h
new file mode 100644
index 0000000..38278ef
--- /dev/null
+++ b/kernel/include/fs/s5fs/s5fs_privtest.h
@@ -0,0 +1,6 @@
+#ifndef __S5FS_PRIVTEST_H
+#define __S5FS_PRIVTEST_H
+
+int s5fs_start(const char *testroot);
+
+#endif
diff --git a/kernel/include/fs/s5fs/s5fs_subr.h b/kernel/include/fs/s5fs/s5fs_subr.h
new file mode 100644
index 0000000..ff4c570
--- /dev/null
+++ b/kernel/include/fs/s5fs/s5fs_subr.h
@@ -0,0 +1,53 @@
+/*
+ * FILE: s5fs_subr.h
+ * AUTHOR: afenn
+ * DESCR: S5 low-level subroutines
+ */
+
+#pragma once
+
+#include "types.h"
+#include "mm/pframe.h"
+#include "fs/s5fs/s5fs.h"
+
+struct s5fs;
+struct s5_node;
+
+long s5_alloc_inode(struct s5fs *s5fs, uint16_t type, devid_t devid);
+
+void s5_free_inode(struct s5fs *s5fs, ino_t ino);
+
+ssize_t s5_read_file(struct s5_node *sn, size_t pos, char *buf, size_t len);
+
+ssize_t s5_write_file(struct s5_node *sn, size_t pos, const char *buf,
+ size_t len);
+
+long s5_link(struct s5_node *dir, const char *name, size_t namelen,
+ struct s5_node *child);
+
+long s5_find_dirent(struct s5_node *dir, const char *name, size_t namelen,
+ size_t *filepos);
+
+void s5_remove_dirent(struct s5_node *dir, const char *name, size_t namelen,
+ struct s5_node *ent);
+
+void s5_replace_dirent(struct s5_node *sn, const char *name, size_t namelen,
+ struct s5_node *old, struct s5_node *new);
+
+long s5_file_block_to_disk_block(struct s5_node *sn, size_t file_blocknum,
+ int alloc, int *new);
+
+long s5_inode_blocks(struct s5_node *vnode);
+
+void s5_remove_blocks(struct s5_node *vnode);
+
+/* Converts a vnode_t* to the s5fs_t* (s5fs file system) struct */
+#define VNODE_TO_S5FS(vn) ((s5fs_t *)((vn)->vn_fs->fs_i))
+
+#ifdef OLD
+/* Converts an s5fs_t* to its memory object (the memory object of the block device) */
+#define S5FS_TO_VMOBJ(s5fs) (&(s5fs)->s5f_bdev->bd_mobj)
+#endif
+
+
+pframe_t *s5_cache_and_clear_block(mobj_t *mo, long block, long loc);
diff --git a/kernel/include/fs/stat.h b/kernel/include/fs/stat.h
new file mode 100644
index 0000000..08e477d
--- /dev/null
+++ b/kernel/include/fs/stat.h
@@ -0,0 +1,44 @@
+/*
+ * FILE: stat.h
+ * AUTH: mcc
+ * DESC:
+ * DATE: Fri Mar 13 23:10:46 1998
+ */
+
+#pragma once
+
+/* Kernel and user header (via symlink) */
+
+typedef struct stat
+{
+ int st_mode;
+ int st_ino;
+ int st_dev;
+ int st_rdev;
+ int st_nlink;
+ int st_uid;
+ int st_gid;
+ int st_size;
+ int st_atime;
+ int st_mtime;
+ int st_ctime;
+ int st_blksize;
+ int st_blocks;
+} stat_t;
+
+/* vnode vn_mode masks */
+
+#define S_IFCHR 0x0100 /* character special */
+#define S_IFDIR 0x0200 /* directory */
+#define S_IFBLK 0x0400 /* block special */
+#define S_IFREG 0x0800 /* regular */
+#define S_IFLNK 0x1000 /* symlink */
+#define S_IFIFO 0x2000 /* fifo/pipe */
+
+#define _S_TYPE(m) ((m)&0xFF00)
+#define S_ISCHR(m) (_S_TYPE(m) == S_IFCHR)
+#define S_ISDIR(m) (_S_TYPE(m) == S_IFDIR)
+#define S_ISBLK(m) (_S_TYPE(m) == S_IFBLK)
+#define S_ISREG(m) (_S_TYPE(m) == S_IFREG)
+#define S_ISLNK(m) (_S_TYPE(m) == S_IFLNK)
+#define S_ISFIFO(m) (_S_TYPE(m) == S_IFIFO)
diff --git a/kernel/include/fs/vfs.h b/kernel/include/fs/vfs.h
new file mode 100644
index 0000000..23f418a
--- /dev/null
+++ b/kernel/include/fs/vfs.h
@@ -0,0 +1,162 @@
+#pragma once
+
+#include "types.h"
+
+#include "fs/open.h"
+#include "proc/kmutex.h"
+#include "util/list.h"
+
+struct vnode;
+struct file;
+struct vfs;
+struct fs;
+struct slab_allocator;
+
+/* name_match: fname should be null-terminated, name is namelen long */
+#define name_match(fname, name, namelen) \
+ (strlen(fname) == namelen && !strncmp((fname), (name), (namelen)))
+
+typedef struct fs_ops
+{
+ /*
+ * Initialize vn_ops, vn_mode, vn_devid and vn_len.
+ * If the filesystem wishes, it may initialize and use vn_i.
+ */
+ void (*read_vnode)(struct fs *fs, struct vnode *vn);
+
+ /*
+ * Called when the vnode's reference count drops to 0.
+ * Perform any necessary cleanup for the corresponding inode.
+ */
+ void (*delete_vnode)(struct fs *fs, struct vnode *vn);
+
+ /*
+ * Optional. Default behavior is to vput() fs_root.
+ * Unmount the filesystem, performing any desired sanity checks
+ * and/or necessary cleanup.
+ * Return 0 on success; negative number on error.
+ */
+ long (*umount)(struct fs *fs);
+
+ void (*sync)(struct fs *fs);
+} fs_ops_t;
+
+#ifndef STR_MAX
+#define STR_MAX 32
+#endif
+
+/* similar to Linux's super_block. */
+typedef struct fs
+{
+ /*
+ * The string name of the device from which this file system should
+ * be mounted. This may be used by the mount function of some file
+ * systems which need to know which device they are mounting.
+ */
+ char fs_dev[STR_MAX];
+ /*
+ * The type of file system this structure represents (given as a
+ * well-defined string). This is used by the generic VFS mount
+ * function to decide which filesystem-specific mount function to
+ * call. Valid values are hard-coded in vfs.c.
+ */
+ char fs_type[STR_MAX];
+
+#ifdef __MOUNTING__
+ /*
+ * If mounting is implemented then this should point to the vnode
+ * of the file that this file system is mounted on. For the root file
+ * system this will just point to the root of that file system.
+ */
+ struct vnode *fs_mtpt;
+
+ /*
+ * An identifier for the mounted file system. This should be enlisted
+ * by the the kernel to keep track of all mounted file systems.
+ */
+ list_link_t fs_link;
+#endif
+
+ /*
+ * The following members are initialized by the filesystem
+ * implementation's mount routine:
+ */
+
+ /*
+ * The struct of operations that define which filesystem-specific
+ * functions to call to perform filesystem manipulation.
+ */
+ fs_ops_t *fs_ops;
+
+ /*
+ * The root vnode for this filesystem (not to be confused with
+ * either / (the root of VFS) or the vnode where the filesystem is
+ * mounted, which is on a different file system.
+ */
+ struct vnode *fs_root;
+
+ /* Filesystem-specific data. */
+ void *fs_i;
+
+ struct slab_allocator *fs_vnode_allocator;
+ list_t vnode_list;
+ kmutex_t vnode_list_mutex;
+ kmutex_t vnode_rename_mutex;
+
+} fs_t;
+
+/* - this is the vnode on which we will mount the vfsroot fs.
+ */
+extern fs_t vfs_root_fs;
+
+void do_sync();
+
+/* VFS {{{ */
+/*
+ * - called by the init process at system shutdown
+ * - at this point, init process is the only process running
+ * => so, there should be no "live" vnodes
+ *
+ * unmount the root filesystem (and first unmount any filesystems mounted
+ * on the root filesystem in the proper order (bottom up)).
+ *
+ */
+
+/* VFS }}} */
+/* VFS Shutdown: */
+/*
+ * Called by the init process at system shutdown.
+ *
+ * At this point, the init process is the only process running
+ * => so, there should be no "live" vnodes
+ */
+long vfs_shutdown();
+
+/* Pathname resolution: */
+/* (the corresponding definitions live in namev.c) */
+long namev_lookup(struct vnode *dir, const char *name, size_t namelen,
+ struct vnode **out);
+
+long namev_dir(struct vnode *base, const char *path, struct vnode **res_vnode,
+ const char **name, size_t *namelen);
+
+long namev_open(struct vnode *base, const char *path, int oflags, int mode,
+ devid_t devid, struct vnode **res_vnode);
+
+long namev_resolve(struct vnode *base, const char *path,
+ struct vnode **res_vnode);
+
+long namev_get_child(struct vnode *dir, char *name, size_t namelen,
+ struct vnode **out);
+
+long namev_get_parent(struct vnode *dir, struct vnode **out);
+
+long namev_is_descendant(struct vnode *a, struct vnode *b);
+
+#ifdef __GETCWD__
+long lookup_name(struct vnode *dir, struct vnode *entry, char *buf,
+ size_t size);
+long lookup_dirpath(struct vnode *dir, char *buf, size_t size);
+#endif /* __GETCWD__ */
+
+long mountfunc(fs_t *fs);
diff --git a/kernel/include/fs/vfs_privtest.h b/kernel/include/fs/vfs_privtest.h
new file mode 100644
index 0000000..1b5fb0b
--- /dev/null
+++ b/kernel/include/fs/vfs_privtest.h
@@ -0,0 +1,3 @@
+#pragma once
+
+void vfs_privtest(void);
diff --git a/kernel/include/fs/vfs_syscall.h b/kernel/include/fs/vfs_syscall.h
new file mode 100644
index 0000000..c5be65d
--- /dev/null
+++ b/kernel/include/fs/vfs_syscall.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "dirent.h"
+
+#include "types.h"
+
+#include "fs/open.h"
+#include "fs/pipe.h"
+#include "fs/stat.h"
+
+long do_close(int fd);
+
+ssize_t do_read(int fd, void *buf, size_t len);
+
+ssize_t do_write(int fd, const void *buf, size_t len);
+
+long do_dup(int fd);
+
+long do_dup2(int ofd, int nfd);
+
+long do_mknod(const char *path, int mode, devid_t devid);
+
+long do_mkdir(const char *path);
+
+long do_rmdir(const char *path);
+
+long do_unlink(const char *path);
+
+long do_link(const char *oldpath, const char *newpath);
+
+long do_rename(const char *oldpath, const char *newpath);
+
+long do_chdir(const char *path);
+
+ssize_t do_getdent(int fd, struct dirent *dirp);
+
+off_t do_lseek(int fd, off_t offset, int whence);
+
+long do_stat(const char *path, struct stat *uf);
diff --git a/kernel/include/fs/vnode.h b/kernel/include/fs/vnode.h
new file mode 100644
index 0000000..ff4b9be
--- /dev/null
+++ b/kernel/include/fs/vnode.h
@@ -0,0 +1,358 @@
+/*
+ * FILE: vnode.h
+ * AUTH: mcc
+ * DESC:
+ * DATE: Fri Mar 13 18:54:11 1998
+ * $Id: vnode.h,v 1.2.2.2 2006/06/04 01:02:32 afenn Exp $
+ */
+
+#pragma once
+
+#include "drivers/blockdev.h"
+#include "drivers/chardev.h"
+#include "drivers/dev.h"
+#include "mm/mobj.h"
+#include "mm/pframe.h"
+#include "proc/kmutex.h"
+#include "util/list.h"
+
+struct fs;
+struct dirent;
+struct stat;
+struct file;
+struct vnode;
+struct kmutex;
+
+#define VNODE_LOADING 0
+#define VNODE_LOADED 1
+
+typedef struct vnode_ops
+{
+ /* The following functions map directly to their corresponding
+ * system calls. Unless otherwise noted, they return 0 on
+ * success, and -errno on failure.
+ */
+
+ /* Operations that can be performed on non-directory files: */
+ /*
+ * read transfers at most count bytes from file into buf. It
+ * begins reading from the file at pos bytes into the file. On
+ * success, it returns the number of bytes transferred, or 0 if the
+ * end of the file has been reached (pos >= file->vn_len).
+ */
+ ssize_t (*read)(struct vnode *file, size_t pos, void *buf, size_t count);
+
+ /*
+ * write transfers count bytes from buf into file. It begins
+ * writing at pos bytes into the file. If offset+count extends
+ * past the end of the file, the file's length will be increased.
+ * If offset is before the end of the file, the existing data is
+ * overwritten. On success, it returns the number of bytes
+ * transferred.
+ */
+ ssize_t (*write)(struct vnode *file, size_t pos, const void *buf,
+ size_t count);
+
+ /*
+ * Implementations should supply an mobj through the "ret"
+ * argument (not by setting vma->vma_obj). If for any reason
+ * this cannot be done an appropriate error code should be
+ * returned instead.
+ */
+ long (*mmap)(struct vnode *file, struct mobj **ret);
+
+ /* Operations that can be performed on directory files: */
+
+ /*
+ * mknod creates a special specified by name and namelen in the
+ * directory pointed to by dir with the specified mode and devid.
+ *
+ * Upon success, ret must point to the newly created file.
+ */
+ long (*mknod)(struct vnode *dir, const char *name, size_t namelen, int mode,
+ devid_t devid, struct vnode **ret);
+
+ /*
+ * lookup attempts to find the file specified by name and namelen in the
+ * directory pointed to by dir.
+ *
+ * Upon success, ret must point to the child vnode.
+ */
+ long (*lookup)(struct vnode *dir, const char *name, size_t namelen,
+ struct vnode **out);
+
+ /*
+ * Creates a directory entry in dir specified by name and namelen pointing
+ * to the inode of target.
+ */
+ long (*link)(struct vnode *dir, const char *name, size_t namelen,
+ struct vnode *target);
+
+ /*
+ * unlink removes the directory entry in dir corresponding to the file
+ * specified by name and namelen.
+ */
+ long (*unlink)(struct vnode *dir, const char *name, size_t namelen);
+
+ /*
+ * rename
+ */
+ long (*rename)(struct vnode *olddir, const char *oldname, size_t oldnamelen,
+ struct vnode *newdir, const char *newname,
+ size_t newnamelen);
+
+ /*
+ * mkdir creates a directory specified by name and namelen in the
+ * directory pointed to by out.
+ *
+ * Upon success, out must point to the newly created directory.
+ * Upon failure, out must be unchanged.
+ */
+ long (*mkdir)(struct vnode *dir, const char *name, size_t namelen,
+ struct vnode **out);
+
+ /*
+ * rmdir removes the directory specified by name and namelen from dir.
+ * The directory to be removed must be empty: the only directory entries
+ * must be "." and "..".
+ */
+ long (*rmdir)(struct vnode *dir, const char *name, size_t namelen);
+
+ /*
+ * readdir reads one directory entry from the dir into the struct
+ * dirent. On success, it returns the amount that offset should be
+ * increased by to obtain the next directory entry with a
+ * subsequent call to readdir. If the end of the file as been
+ * reached (offset == file->vn_len), no directory entry will be
+ * read and 0 will be returned.
+ */
+ ssize_t (*readdir)(struct vnode *dir, size_t pos, struct dirent *d);
+
+ /* Operations that can be performed on any type of "file" (
+ * includes normal file, directory, block/byte device */
+ /*
+ * stat sets the fields in the given buf, filling it with
+ * information about file.
+ */
+ long (*stat)(struct vnode *vnode, struct stat *buf);
+
+ /*
+ * acquire is called on a vnode when a file takes its first
+ * reference to the vnode. The file is passed in.
+ */
+ long (*acquire)(struct vnode *vnode, struct file *file);
+
+ /*
+ * release is called on a vnode when the refcount of a file
+ * descriptor that has it open comes down to 0. Each call to
+ * acquire has exactly one matching call to release with the
+ * same file that was passed to acquire.
+ */
+ long (*release)(struct vnode *vnode, struct file *file);
+
+ long (*get_pframe)(struct vnode *vnode, size_t pagenum, long forwrite,
+ pframe_t **pfp);
+
+ /*
+ * Read the page of 'vnode' containing 'offset' into the
+ * page-aligned and page-sized buffer pointed to by
+ * 'buf'.
+ */
+ long (*fill_pframe)(struct vnode *vnode, pframe_t *pf);
+
+ /*
+ * Write the contents of the page-aligned and page-sized
+ * buffer pointed to by 'buf' to the page of 'vnode'
+ * containing 'offset'.
+ */
+ long (*flush_pframe)(struct vnode *vnode, pframe_t *pf);
+
+ /*
+ * This will truncate the file to have a length of zero
+ * Should only be used on regular files, not directories.
+ */
+ void (*truncate_file)(struct vnode *vnode);
+} vnode_ops_t;
+
+typedef struct vnode
+{
+ /*
+ * Function pointers to the implementations of file operations (the
+ * functions are provided by the filesystem implementation).
+ */
+ struct vnode_ops *vn_ops;
+
+ /*
+ * The filesystem to which this vnode belongs. This is initialized by
+ * the VFS subsystem when the vnode is first created and should never
+ * change.
+ */
+ struct fs *vn_fs;
+
+#ifdef __MOUNTING__
+ /* This field is used only for implementing mount points (not required) */
+ /* This field points the the root of the file system mounted at
+ * this vnode. If no file system is mounted at this point this is a
+ * self pointer (i.e. vn->vn_mount = vn). See vget for why this is
+ * makes things easier for us. */
+ struct vnode *vn_mount;
+#endif
+
+ /*
+ * The object responsible for managing the memory where pages read
+ * from this file reside. The VFS subsystem may use this field, but it
+ * does not need to create it.
+ */
+ struct mobj vn_mobj;
+
+ /*
+ * A number which uniquely identifies this vnode within its filesystem.
+ * (Similar and usually identical to what you might know as the inode
+ * number of a file).
+ */
+ ino_t vn_vno;
+
+ /*
+ * File type. See stat.h.
+ */
+ int vn_mode;
+
+ /*
+ * Length of file. Initialized at the fs-implementation-level (in the
+ * 'read_vnode' fs_t entry point). Maintained at the filesystem
+ * implementation level (within the implementations of relevant vnode
+ * entry points).
+ */
+ size_t vn_len;
+
+ /*
+ * A generic pointer which the file system can use to store any extra
+ * data it needs.
+ */
+ void *vn_i;
+
+ /*
+ * The device identifier.
+ * Only relevant to vnodes representing device files.
+ */
+ devid_t vn_devid;
+
+ /*
+ * The state of the vnode. Can either be loading or loaded. The vnode
+ * cannot be used until the vnode is in the loaded state. Potential
+ * users should wait on `vn_waitq` if the vnode is being loaded.
+ * This field is protected by the 'vn_state_lock'.
+ */
+ int vn_state;
+
+ /*
+ * Allows vnode users to wait on the vnode, until the vnode is ready.
+ */
+ ktqueue_t vn_waitq;
+
+ union {
+ chardev_t *chardev;
+ blockdev_t *blockdev;
+ } vn_dev;
+
+ /* Used (only) by the v{get,ref,put} facilities (vfs/vnode.c): */
+ list_link_t vn_link; /* link on system vnode list */
+} vnode_t;
+
+void init_special_vnode(vnode_t *vn);
+
+/* Core vnode management routines: */
+/*
+ * Obtain a vnode representing the file that filesystem 'fs' identifies
+ * by inode number 'vnum'; returns the vnode_t corresponding to the
+ * given filesystem and vnode number. If a vnode for the given file
+ * already exists (it already has an entry in the system inode table) then
+ * the reference count of that vnode is incremented and it is returned.
+ * Otherwise a new vnode is created in the system inode table with a
+ * reference count of 1.
+ * This function has no unsuccessful return.
+ *
+ * MAY BLOCK.
+ */
+struct vnode *vget(struct fs *fs, ino_t vnum);
+
+/*
+ * Lock a vnode (locks vn_mobj).
+ */
+void vlock(vnode_t *vn);
+
+/*
+ * Lock two vnodes in order! This prevents the A/B locking problem when locking
+ * two directories or two files.
+ */
+void vlock_in_order(vnode_t *a, vnode_t *b);
+
+/*
+ * Acquires a vnode locked (see vget above)
+ */
+vnode_t *vget_locked(struct fs *fs, ino_t ino);
+
+/**
+ * Unlock and put a vnode (see vput)
+ */
+void vput_locked(struct vnode **vnp);
+
+/**
+ * Unlocks a vnode
+ */
+void vunlock(vnode_t *vn);
+
+/**
+ * Unlocks two vnodes (effectively just 2 unlocks)
+ */
+void vunlock_in_order(vnode_t *a, vnode_t *b);
+
+/*
+ * Increments the reference count of the provided vnode
+ * (i.e. the refcount of vn_mobj).
+ */
+void vref(vnode_t *vn);
+
+/*
+ * This function decrements the reference count on this vnode
+ * (i.e. the refcount of vn_mobj).
+ *
+ * If, as a result of this, refcount reaches zero, the underlying
+ * fs's 'delete_vnode' entry point will be called and the vnode will be
+ * freed.
+ *
+ * If the linkcount of the corresponding on inode on the filesystem is zero,
+ * then the inode will be freed.
+ *
+ */
+void vput(vnode_t **vnp);
+
+/* Auxilliary: */
+
+/* Unmounting (shutting down the VFS) is the primary reason for the
+ * existence of the following three routines (when unmounting an s5 fs,
+ * they are used in the order that they are listed here): */
+/*
+ * Checks to see if there are any actively-referenced vnodes
+ * belonging to the specified filesystem.
+ * Returns -EBUSY if there is at least one such actively-referenced
+ * vnode, and 0 otherwise.
+ *
+ */
+long vfs_is_in_use(struct fs *fs);
+
+/*
+ * Returns the number of vnodes from this filesystem that are in
+ * use.
+ */
+size_t vfs_count_active_vnodes(struct fs *fs);
+
+/* Diagnostic: */
+/*
+ * Prints the vnodes that are in use. Specifying a fs_t will restrict
+ * the vnodes to just that fs. Specifying NULL will print all vnodes
+ * in the entire system.
+ *
+ * Note that this is currently unimplemented.
+ */
+void vnode_print(struct fs *fs);
diff --git a/kernel/include/fs/vnode_specials.h b/kernel/include/fs/vnode_specials.h
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/kernel/include/fs/vnode_specials.h
diff --git a/kernel/include/globals.h b/kernel/include/globals.h
new file mode 100644
index 0000000..42d4275
--- /dev/null
+++ b/kernel/include/globals.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "main/smp.h"
+#include "proc/kthread.h"
+#include "proc/proc.h"
+
+#define CORE_SPECIFIC_DATA __attribute__((section(".csd"))) = {0}
+
+extern core_t curcore;
+extern proc_t *curproc;
+extern kthread_t *curthr;
diff --git a/kernel/include/kernel.h b/kernel/include/kernel.h
new file mode 100644
index 0000000..c3f7827
--- /dev/null
+++ b/kernel/include/kernel.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include "types.h"
+
+/* The linker script will initialize these symbols. Note
+ * that the linker does not actually allocate any space
+ * for these variables (thus the void type) it only sets
+ * the address that the symbol points to. So for example
+ * the address where the kernel ends is &kernel_end,
+ * NOT kernel_end.
+ */
+extern void *setup_end;
+extern void *kernel_start;
+extern void *kernel_start_text;
+extern void *kernel_start_data;
+extern void *kernel_start_bss;
+extern void *kernel_end;
+extern void *kernel_end_text;
+extern void *kernel_end_data;
+extern void *kernel_end_bss;
+extern void *kernel_start_init;
+extern void *kernel_end_init;
+
+extern void *kernel_phys_base;
+extern void *kernel_phys_end;
+
+#define inline __attribute__((always_inline, used))
+#define noreturn __attribute__((noreturn))
+
+#define offsetof(type, member) \
+ ((uintptr_t)((char *)&((type *)(0))->member - (char *)0))
+
+#define NOT_YET_IMPLEMENTED(f) \
+ dbg(DBG_PRINT, "Not yet implemented: %s, file %s, line %d\n", f, __FILE__, \
+ __LINE__)
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#define CONTAINER_OF(obj, type, member) \
+ ((type *)((char *)(obj)-offsetof(type, member)))
+
+/* This truly atrocious macro hack taken from the wikipedia article on the C
+ * preprocessor, use to "quote" the value (or name) of another macro:
+ * QUOTE_BY_NAME(NTERMS) -> "NTERMS"
+ * QUOTE(NTERMS) -> "3"
+ *
+ * These macros even made more atrocious by searching for "stringizing operator
+ * comma". The variable length macros account for comma separated symbols.
+ */
+#define QUOTE_BY_NAME(...) #__VA_ARGS__
+#define QUOTE_BY_VALUE(x) QUOTE_BY_NAME(x)
+/* By default, we quote by value */
+#define QUOTE(...) QUOTE_BY_NAME(__VA_ARGS__)
+
+#if 0
+#ifndef __DRIVERS__
+#define __DRIVERS__
+#endif
+#ifndef __VFS__
+#define __VFS__
+#endif
+#ifndef __S5FS__
+#define __S5FS__
+#endif
+#ifndef __VM__
+#define __VM__
+#endif
+#ifndef __NTERMS__
+#define __NTERMS__ 3
+#endif
+#ifndef __NDISKS__
+#define __NDISKS__ 1
+#endif
+#endif \ No newline at end of file
diff --git a/kernel/include/limits.h b/kernel/include/limits.h
new file mode 100644
index 0000000..6db9a7e
--- /dev/null
+++ b/kernel/include/limits.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#define CHAR_BIT 8
+#define CHAR_MAX UCHAR_MAX
+#define UCHAR_MAX ((unsigned char)(~0U))
+#define SCHAR_MAX ((signed char)(UCHAR_MAX >> 1))
+#define SCHAR_MIN (-SCHAR_MAX - 1)
+#define USHRT_MAX ((unsigned short)(~0U))
+#define SHRT_MAX ((signed short)(USHRT_MAX >> 1))
+#define SHRT_MIN (-SHRT_MAX - 1)
+#define UINT_MAX ((unsigned int)(~0U))
+#define INT_MAX ((signed int)(UINT_MAX >> 1))
+#define INT_MIN (-INT_MAX - 1)
+#define ULONG_MAX ((unsigned long)(~0UL))
+#define LONG_MAX ((signed long)(ULONG_MAX >> 1))
+#define LONG_MIN (-LONG_MAX - 1)
+
+#define UPTR_MAX ULONG_MAX
diff --git a/kernel/include/main/acpi.h b/kernel/include/main/acpi.h
new file mode 100644
index 0000000..dc49805
--- /dev/null
+++ b/kernel/include/main/acpi.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <types.h>
+
+typedef struct acpi_header
+{
+ uint32_t ah_sign;
+ uint32_t ah_size;
+ uint8_t ah_rev;
+ uint8_t ah_checksum;
+ uint8_t ah_oemid[6];
+ uint8_t ah_tableid[8];
+ uint32_t ah_oemrev;
+ uint32_t ah_creatorid;
+ uint32_t ah_creatorrev;
+} packed acpi_header_t;
+
+void acpi_init();
+
+void *acpi_table(uint32_t signature, int index);
diff --git a/kernel/include/main/apic.h b/kernel/include/main/apic.h
new file mode 100644
index 0000000..ca9c8f5
--- /dev/null
+++ b/kernel/include/main/apic.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "main/interrupt.h"
+#include "types.h"
+
+typedef enum
+{
+ DESTINATION_MODE_FIXED = 0,
+ DESTINATION_MODE_LOWEST_PRIORITY = 1,
+ DESTINATION_MODE_SMI = 2,
+ DESTINATION_MODE_NMI = 4,
+ DESTINATION_MODE_INIT = 5,
+ DESTINATION_MODE_SIPI = 6
+} ipi_destination_mode;
+
+#define MAX_LAPICS 8
+
+/* Initializes the APIC using data from the ACPI tables.
+ * ACPI handlers must be initialized before calling this
+ * function. */
+void apic_init();
+
+/* Returns the APIC ID of the current processor */
+long apic_current_id();
+
+/* Returns the largest known APIC ID */
+long apic_max_id();
+
+/* Maps the given IRQ to the given interrupt number. */
+void apic_setredir(uint32_t irq, uint8_t intr);
+
+void apic_enable();
+
+// timer interrupts arrive at a rate of (freq / 16) interrupts per millisecond
+// (with an )
+/* Starts the APIC timer */
+void apic_enable_periodic_timer(uint32_t freq);
+
+/* Stops the APIC timer */
+void apic_disable_periodic_timer();
+
+/* Sets the interrupt to raise when a spurious
+ * interrupt occurs. */
+void apic_setspur(uint8_t intr);
+
+/* Sets the interrupt priority level. This function should
+ * be accessed via wrappers in the interrupt subsystem. */
+void apic_setipl(uint8_t ipl);
+
+/* Gets the interrupt priority level. This function should
+ * be accessed via wrappers in the interrupt subsystem. */
+uint8_t apic_getipl();
+
+long apic_initialized();
+
+/* Writes to the APIC's memory mapped end-of-interrupt
+ * register to indicate that the handling of an interrupt
+ * originating from the APIC has been finished. This function
+ * should only be called from the interrupt subsystem. */
+void apic_eoi();
+
+void apic_start_processor(uint8_t target, uint8_t execution_page);
+
+void apic_send_ipi(uint8_t target, ipi_destination_mode destination_mode,
+ uint8_t vector);
+
+void apic_broadcast_ipi(ipi_destination_mode mode, uint8_t vector,
+ long include_self);
+
+/**
+ * Wait for the last IPI sent to be acknowledged by the target processor.
+ */
+void apic_wait_ipi(); \ No newline at end of file
diff --git a/kernel/include/main/cpuid.h b/kernel/include/main/cpuid.h
new file mode 100644
index 0000000..5d4b5fa
--- /dev/null
+++ b/kernel/include/main/cpuid.h
@@ -0,0 +1,118 @@
+#pragma once
+
+#include <types.h>
+
+/* Vendor-strings. */
+#define CPUID_VENDOR_AMD "AuthenticAMD"
+#define CPUID_VENDOR_INTEL "GenuineIntel"
+#define CPUID_VENDOR_VIA "CentaurHauls"
+#define CPUID_VENDOR_OLDTRANSMETA "TransmetaCPU"
+#define CPUID_VENDOR_TRANSMETA "GenuineTMx86"
+#define CPUID_VENDOR_CYRIX "CyrixInstead"
+#define CPUID_VENDOR_CENTAUR "CentaurHauls"
+#define CPUID_VENDOR_NEXGEN "NexGenDriven"
+#define CPUID_VENDOR_UMC "UMC UMC UMC "
+#define CPUID_VENDOR_SIS "SiS SiS SiS "
+#define CPUID_VENDOR_NSC "Geode by NSC"
+#define CPUID_VENDOR_RISE "RiseRiseRise"
+
+enum
+{
+ CPUID_FEAT_ECX_SSE3 = 1 << 0,
+ CPUID_FEAT_ECX_PCLMUL = 1 << 1,
+ CPUID_FEAT_ECX_DTES64 = 1 << 2,
+ CPUID_FEAT_ECX_MONITOR = 1 << 3,
+ CPUID_FEAT_ECX_DS_CPL = 1 << 4,
+ CPUID_FEAT_ECX_VMX = 1 << 5,
+ CPUID_FEAT_ECX_SMX = 1 << 6,
+ CPUID_FEAT_ECX_EST = 1 << 7,
+ CPUID_FEAT_ECX_TM2 = 1 << 8,
+ CPUID_FEAT_ECX_SSSE3 = 1 << 9,
+ CPUID_FEAT_ECX_CID = 1 << 10,
+ CPUID_FEAT_ECX_FMA = 1 << 12,
+ CPUID_FEAT_ECX_CX16 = 1 << 13,
+ CPUID_FEAT_ECX_ETPRD = 1 << 14,
+ CPUID_FEAT_ECX_PDCM = 1 << 15,
+ CPUID_FEAT_ECX_DCA = 1 << 18,
+ CPUID_FEAT_ECX_SSE4_1 = 1 << 19,
+ CPUID_FEAT_ECX_SSE4_2 = 1 << 20,
+ CPUID_FEAT_ECX_x2APIC = 1 << 21,
+ CPUID_FEAT_ECX_MOVBE = 1 << 22,
+ CPUID_FEAT_ECX_POPCNT = 1 << 23,
+ CPUID_FEAT_ECX_XSAVE = 1 << 26,
+ CPUID_FEAT_ECX_OSXSAVE = 1 << 27,
+ CPUID_FEAT_ECX_AVX = 1 << 28,
+
+ CPUID_FEAT_EDX_FPU = 1 << 0,
+ CPUID_FEAT_EDX_VME = 1 << 1,
+ CPUID_FEAT_EDX_DE = 1 << 2,
+ CPUID_FEAT_EDX_PSE = 1 << 3,
+ CPUID_FEAT_EDX_TSC = 1 << 4,
+ CPUID_FEAT_EDX_MSR = 1 << 5,
+ CPUID_FEAT_EDX_PAE = 1 << 6,
+ CPUID_FEAT_EDX_MCE = 1 << 7,
+ CPUID_FEAT_EDX_CX8 = 1 << 8,
+ CPUID_FEAT_EDX_APIC = 1 << 9,
+ CPUID_FEAT_EDX_SEP = 1 << 11,
+ CPUID_FEAT_EDX_MTRR = 1 << 12,
+ CPUID_FEAT_EDX_PGE = 1 << 13,
+ CPUID_FEAT_EDX_MCA = 1 << 14,
+ CPUID_FEAT_EDX_CMOV = 1 << 15,
+ CPUID_FEAT_EDX_PAT = 1 << 16,
+ CPUID_FEAT_EDX_PSE36 = 1 << 17,
+ CPUID_FEAT_EDX_PSN = 1 << 18,
+ CPUID_FEAT_EDX_CLF = 1 << 19,
+ CPUID_FEAT_EDX_DTES = 1 << 21,
+ CPUID_FEAT_EDX_ACPI = 1 << 22,
+ CPUID_FEAT_EDX_MMX = 1 << 23,
+ CPUID_FEAT_EDX_FXSR = 1 << 24,
+ CPUID_FEAT_EDX_SSE = 1 << 25,
+ CPUID_FEAT_EDX_SSE2 = 1 << 26,
+ CPUID_FEAT_EDX_SS = 1 << 27,
+ CPUID_FEAT_EDX_HTT = 1 << 28,
+ CPUID_FEAT_EDX_TM1 = 1 << 29,
+ CPUID_FEAT_EDX_IA64 = 1 << 30,
+ CPUID_FEAT_EDX_PBE = 1 << 31
+};
+
+enum cpuid_requests
+{
+ CPUID_GETVENDORSTRING,
+ CPUID_GETFEATURES,
+ CPUID_GETTLB,
+ CPUID_GETSERIAL,
+
+ CPUID_INTELEXTENDED = 0x80000000,
+ CPUID_INTELFEATURES,
+ CPUID_INTELBRANDSTRING,
+ CPUID_INTELBRANDSTRINGMORE,
+ CPUID_INTELBRANDSTRINGEND,
+};
+
+static inline void cpuid(int request, uint32_t *a, uint32_t *b, uint32_t *c,
+ uint32_t *d)
+{
+ __asm__ volatile("cpuid"
+ : "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d)
+ : "0"(request));
+}
+
+static inline void cpuid_get_msr(uint32_t msr, uint32_t *lo, uint32_t *hi)
+{
+ __asm__ volatile("rdmsr"
+ : "=a"(*lo), "=d"(*hi)
+ : "c"(msr));
+}
+
+static inline void cpuid_set_msr(uint32_t msr, uint32_t lo, uint32_t hi)
+{
+ __asm__ volatile("wrmsr" ::"a"(lo), "d"(hi), "c"(msr));
+}
+
+static inline void io_wait(void)
+{
+ __asm__ volatile(
+ "jmp 1f\n\t"
+ "1:jmp 2f\n\t"
+ "2:");
+}
diff --git a/kernel/include/main/entry.h b/kernel/include/main/entry.h
new file mode 100644
index 0000000..64c0e96
--- /dev/null
+++ b/kernel/include/main/entry.h
@@ -0,0 +1,3 @@
+/* entry.h */
+
+void kmain(void);
diff --git a/kernel/include/main/gdt.h b/kernel/include/main/gdt.h
new file mode 100644
index 0000000..a991cbf
--- /dev/null
+++ b/kernel/include/main/gdt.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "types.h"
+
+#define GDT_COUNT 16
+
+#define GDT_ZERO 0x00
+#define GDT_KERNEL_TEXT 0x08
+#define GDT_KERNEL_DATA 0x10
+#define GDT_USER_TEXT 0x18
+#define GDT_USER_DATA 0x20
+#define GDT_TSS 0x28
+
+void gdt_init(void);
+
+void gdt_set_kernel_stack(void *addr);
+
+void gdt_set_entry(uint32_t segment, uint32_t base, uint32_t limit,
+ uint8_t ring, int exec, int dir, int rw);
+
+void gdt_clear(uint32_t segment); \ No newline at end of file
diff --git a/kernel/include/main/inits.h b/kernel/include/main/inits.h
new file mode 100644
index 0000000..5013b07
--- /dev/null
+++ b/kernel/include/main/inits.h
@@ -0,0 +1,15 @@
+#pragma once
+
+extern void sched_init();
+
+extern void kshell_init();
+
+extern void file_init();
+
+extern void pipe_init();
+
+extern void vfs_init();
+
+extern void syscall_init();
+
+extern void elf64_init(void);
diff --git a/kernel/include/main/interrupt.h b/kernel/include/main/interrupt.h
new file mode 100644
index 0000000..6a9ae00
--- /dev/null
+++ b/kernel/include/main/interrupt.h
@@ -0,0 +1,117 @@
+#pragma once
+
+#include "kernel.h"
+#include "types.h"
+#include "util/debug.h"
+
+// intr_disk_priamry/seconday so that they are different task priority classes
+#define INTR_DIVIDE_BY_ZERO 0x00
+#define INTR_INVALID_OPCODE 0x06
+#define INTR_GPF 0x0d
+#define INTR_PAGE_FAULT 0x0e
+
+#define INTR_APICTIMER 0xf0
+#define INTR_KEYBOARD 0xe0
+
+#define INTR_DISK_PRIMARY 0xd0
+#define INTR_SPURIOUS 0xfe
+#define INTR_APICERR 0xff
+#define INTR_SHUTDOWN 0xfd
+
+/* NOTE: INTR_SYSCALL is not defined here, but is in syscall.h (it must be
+ * in a userland-accessible header) */
+
+// Intel Volume 3-A, 10.8.3.1 (10-29)
+#define IPL_LOW 0
+// we want to keep timer interrupts happening all the time to keep track of time
+// :)
+#define IPL_HIGH 0xe0
+#define IPL_HIGHEST 0xff
+
+typedef struct regs
+{
+ // all the regs
+ uint64_t r_r15;
+ uint64_t r_r14;
+ uint64_t r_r13;
+ uint64_t r_r12;
+ uint64_t r_rbp;
+ uint64_t r_rbx;
+ uint64_t r_r11;
+ uint64_t r_r10;
+ uint64_t r_r9;
+ uint64_t r_r8;
+ uint64_t r_rax;
+ uint64_t r_rcx;
+ uint64_t r_rdx;
+ uint64_t r_rsi;
+ uint64_t r_rdi;
+
+ // interrupt number
+ uint64_t r_intr;
+
+ // pushed by processor
+ uint64_t r_err;
+ uint64_t r_rip;
+ uint64_t r_cs;
+ uint64_t r_rflags;
+ uint64_t r_rsp;
+ uint64_t r_ss;
+} packed regs_t;
+
+void intr_init();
+
+/* The function pointer which should be implemented by functions
+ * which will handle interrupts. These handlers should be registered
+ * with the interrupt subsystem via the intr_register function.
+ * The regs structure contains the state of the registers saved when
+ * the interrupt occured. Return whether or not the handler has itself
+ * acknowledged the interrupt with a call to apic_eoi(). */
+typedef long (*intr_handler_t)(regs_t *regs);
+
+/* Registers an interrupt handler for the given interrupt handler.
+ * If another handler had been previously registered for this interrupt
+ * number it is returned, otherwise this function returns NULL. It
+ * is good practice to assert that this function returns NULL unless
+ * it is known that this will not be the case. */
+intr_handler_t intr_register(uint8_t intr, intr_handler_t handler);
+
+int32_t intr_map(uint16_t irq, uint8_t intr);
+
+static inline uint64_t intr_enabled()
+{
+ uint64_t flags;
+ __asm__ volatile("pushf; pop %0; and $0x200, %0;"
+ : "=r"(flags)::);
+ return flags;
+}
+
+static inline void intr_enable() { __asm__ volatile("sti"); }
+
+static inline void intr_disable() { __asm__ volatile("cli"); }
+
+/* Atomically enables interrupts using the sti
+ * instruction and puts the processor into a halted
+ * state, this function returns once an interrupt
+ * occurs. */
+static inline void intr_wait()
+{
+ /* the sti instruction enables interrupts, however
+ * interrupts are not checked for until the next
+ * instruction is executed, this means that the following
+ * code will not be succeptible to a bug where an
+ * interrupt occurs between the sti and hlt commands
+ * and does not wake us up from the hlt. */
+ __asm__ volatile("sti; hlt");
+}
+
+/* Sets the interrupt priority level for hardware interrupts.
+ * At initialization time devices should detect their individual
+ * IPLs and save them for use with this function. IPL_LOW allows
+ * all hardware interrupts. IPL_HIGH blocks all hardware interrupts */
+uint8_t intr_setipl(uint8_t ipl);
+
+/* Retreives the current interrupt priority level. */
+uint8_t intr_getipl();
+
+void dump_registers(regs_t *regs);
diff --git a/kernel/include/main/io.h b/kernel/include/main/io.h
new file mode 100644
index 0000000..19c6f86
--- /dev/null
+++ b/kernel/include/main/io.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include "kernel.h"
+#include "types.h"
+
+static inline void outb(uint16_t port, uint8_t val)
+{
+ __asm__ volatile("outb %0,%1" ::"a"(val), "Nd"(port));
+}
+
+static inline uint8_t inb(uint16_t port)
+{
+ uint8_t ret;
+ __asm__ volatile("inb %1,%0"
+ : "=a"(ret)
+ : "Nd"(port));
+ return ret;
+}
+
+static inline void outw(uint16_t port, uint16_t val)
+{
+ __asm__ volatile("outw %0,%1" ::"a"(val), "Nd"(port));
+}
+
+static inline uint16_t inw(uint16_t port)
+{
+ uint16_t ret;
+ __asm__ volatile("inw %1,%0"
+ : "=a"(ret)
+ : "Nd"(port));
+ return ret;
+}
+
+static inline void outl(uint16_t port, uint32_t val)
+{
+ __asm__ volatile("outl %0,%1" ::"a"(val), "Nd"(port));
+}
+
+static inline uint32_t inl(uint16_t port)
+{
+ uint32_t ret;
+ __asm__ volatile("inl %1,%0"
+ : "=a"(ret)
+ : "Nd"(port));
+ return ret;
+}
diff --git a/kernel/include/main/smp.h b/kernel/include/main/smp.h
new file mode 100644
index 0000000..bf05fff
--- /dev/null
+++ b/kernel/include/main/smp.h
@@ -0,0 +1,22 @@
+#include "boot/config.h"
+#include "mm/page.h"
+#include "proc/core.h"
+
+// For any given piece of global data, there are 4 cases we must protect
+// against: (SMP.1) our core's other threads, (mutex or mask interrupts) (SMP.2)
+// our core's interrupt handlers, and (mask interrupts) (SMP.3) other cores'
+// threads, (mutex or spinlock) (SMP.4) other cores' interrupt handlers
+// (spinlock) mask interrupts + spinlock covers all 4 cases!
+
+#define GET_CSD(core, type, name) \
+ ((type *)(csd_vaddr_table[(core)] + PAGE_OFFSET(&(name))))
+
+extern uintptr_t csd_vaddr_table[];
+
+void map_in_core_specific_data(pml4_t *pml4);
+
+void smp_init();
+
+void core_init();
+
+long is_core_specific_data(void *addr);
diff --git a/kernel/include/mm/kmalloc.h b/kernel/include/mm/kmalloc.h
new file mode 100644
index 0000000..f99e9df
--- /dev/null
+++ b/kernel/include/mm/kmalloc.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include "types.h"
+
+void *kmalloc(size_t size);
+
+void kfree(void *addr);
diff --git a/kernel/include/mm/mm.h b/kernel/include/mm/mm.h
new file mode 100644
index 0000000..c2989b4
--- /dev/null
+++ b/kernel/include/mm/mm.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#define MM_POISON 1
+#define MM_POISON_ALLOC 0xBB
+#define MM_POISON_FREE 0xDD
+
+#define USER_MEM_LOW 0x00400000 /* inclusive */
+#define USER_MEM_HIGH (1UL << 47) /* exclusive */
diff --git a/kernel/include/mm/mman.h b/kernel/include/mm/mman.h
new file mode 100644
index 0000000..27f4d57
--- /dev/null
+++ b/kernel/include/mm/mman.h
@@ -0,0 +1,25 @@
+#pragma once
+
+/* Kernel and user header (via symlink) */
+
+/* Page protection flags.
+ */
+#define PROT_NONE 0x0 /* No access. */
+#define PROT_READ 0x1 /* Pages can be read. */
+#define PROT_WRITE 0x2 /* Pages can be written. */
+#define PROT_EXEC 0x4 /* Pages can be executed. */
+
+/* Return value for mmap() on failure.
+ */
+#define MAP_FAILED ((void *)-1)
+
+/* Mapping type - shared or private.
+ */
+#define MAP_SHARED 1
+#define MAP_PRIVATE 2
+#define MAP_TYPE 3 /* mask for above types */
+
+/* Mapping flags.
+ */
+#define MAP_FIXED 4
+#define MAP_ANON 8
diff --git a/kernel/include/mm/mobj.h b/kernel/include/mm/mobj.h
new file mode 100644
index 0000000..bca1b38
--- /dev/null
+++ b/kernel/include/mm/mobj.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include "proc/kmutex.h"
+#include "util/atomic.h"
+#include "util/list.h"
+#include "mm/pframe.h"
+
+struct pframe;
+
+struct mobj;
+
+typedef enum
+{
+ MOBJ_VNODE = 1,
+ MOBJ_SHADOW,
+ MOBJ_ANON,
+#ifdef OLD
+ MOBJ_BLOCKDEV,
+#else
+ MOBJ_FS,
+#endif
+} mobj_type_t;
+
+typedef struct mobj_ops
+{
+ long (*get_pframe)(struct mobj *o, uint64_t pagenum, long forwrite,
+ struct pframe **pfp);
+
+ long (*fill_pframe)(struct mobj *o, struct pframe *pf);
+
+ long (*flush_pframe)(struct mobj *o, struct pframe *pf);
+
+ void (*destructor)(struct mobj *o);
+} mobj_ops_t;
+
+typedef struct mobj
+{
+ long mo_type;
+ struct mobj_ops mo_ops;
+ atomic_t mo_refcount;
+ list_t mo_pframes;
+ kmutex_t mo_mutex;
+} mobj_t;
+
+void mobj_init(mobj_t *o, long type, mobj_ops_t *ops);
+
+void mobj_lock(mobj_t *o);
+
+void mobj_unlock(mobj_t *o);
+
+void mobj_ref(mobj_t *o);
+
+void mobj_put(mobj_t **op);
+
+void mobj_put_locked(mobj_t **op);
+
+long mobj_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite,
+ struct pframe **pfp);
+
+void mobj_find_pframe(mobj_t *o, uint64_t pagenum, struct pframe **pfp);
+
+long mobj_flush_pframe(mobj_t *o, struct pframe *pf);
+
+long mobj_flush(mobj_t *o);
+
+long mobj_free_pframe(mobj_t *o, struct pframe **pfp);
+
+long mobj_default_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite,
+ struct pframe **pfp);
+
+void mobj_default_destructor(mobj_t *o);
+
+#ifndef OLD
+void mobj_create_pframe(mobj_t *o, uint64_t pagenum, uint64_t loc, pframe_t **pfp);
+#endif \ No newline at end of file
diff --git a/kernel/include/mm/page.h b/kernel/include/mm/page.h
new file mode 100644
index 0000000..5230a85
--- /dev/null
+++ b/kernel/include/mm/page.h
@@ -0,0 +1,124 @@
+#pragma once
+
+#ifdef __KERNEL__
+#include "types.h"
+#else
+#include "sys/types.h"
+#endif
+
+/* This header file contains the functions for allocating
+ * and freeing page-aligned chunks of data which are a
+ * multiple of a page in size. These are the lowest level
+ * memory allocation functions. In general code should
+ * use the slab allocator functions in mm/slab.h unless
+ * they require page-aligned buffers. */
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE ((uintptr_t)(1UL << PAGE_SHIFT))
+#define PAGE_MASK (0xffffffffffffffff << PAGE_SHIFT)
+
+#define PAGE_ALIGN_DOWN(x) ((void *)(((uintptr_t)(x)&PAGE_MASK)))
+#define PAGE_ALIGN_UP(x) \
+ ((void *)((((uintptr_t)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)))
+
+#define PAGE_OFFSET(x) (((uintptr_t)(x)) & ~PAGE_MASK)
+#define PAGE_ALIGNED(x) (!PAGE_OFFSET(x))
+
+#define PN_TO_ADDR(x) ((void *)(((uintptr_t)(x)) << PAGE_SHIFT))
+#define ADDR_TO_PN(x) (((uintptr_t)(x)) >> PAGE_SHIFT)
+
+#define PAGE_SAME(x, y) (PAGE_ALIGN_DOWN(x) == PAGE_ALIGN_DOWN(y))
+
+#define PAGE_NSIZES 8
+
+#define USE_2MB_PAGES 1
+#define USE_1GB_PAGES 1
+
+#define PAGE_SHIFT_2MB 21
+#define PAGE_SIZE_2MB ((uintptr_t)(1UL << PAGE_SHIFT_2MB))
+#define PAGE_MASK_2MB (0xffffffffffffffff << PAGE_SHIFT_2MB)
+#define PAGE_ALIGN_DOWN_2MB(x) (((uintptr_t)(x)) & PAGE_MASK_2MB)
+#define PAGE_ALIGN_UP_2MB(x) (PAGE_ALIGN_DOWN_2MB((x)-1) + PAGE_SIZE_2MB)
+#define PAGE_OFFSET_2MB(x) (((uintptr_t)(x)) & ~PAGE_MASK_2MB)
+#define PAGE_ALIGNED_2MB(x) ((x) == PAGE_ALIGN_DOWN_2MB(x))
+#define PAGE_SAME_2MB(x, y) (PAGE_ALIGN_DOWN_2MB(x) == PAGE_ALIGN_DOWN_2MB(y))
+
+#define PAGE_SHIFT_1GB 30
+#define PAGE_MASK_1GB (0xffffffffffffffff << PAGE_SHIFT_1GB)
+#define PAGE_SIZE_1GB ((uintptr_t)(1UL << PAGE_SHIFT_1GB))
+#define PAGE_ALIGN_DOWN_1GB(x) (((uintptr_t)(x)) & PAGE_MASK_1GB)
+#define PAGE_ALIGN_UP_1GB(x) (PAGE_ALIGN_DOWN_1GB((x)-1) + PAGE_SIZE_1GB)
+#define PAGE_OFFSET_1GB(x) (((uintptr_t)(x)) & ~PAGE_MASK_1GB)
+#define PAGE_ALIGNED_1GB(x) ((x) == PAGE_ALIGN_DOWN_1GB(x))
+#define PAGE_SAME_1GB(x, y) (PAGE_ALIGN_DOWN_1GB(x) == PAGE_ALIGN_DOWN_1GB(y))
+
+#define PAGE_SHIFT_512GB 39
+#define PAGE_SIZE_512GB ((uintptr_t)(1UL << PAGE_SHIFT_512GB))
+#define PAGE_MASK_512GB (0xffffffffffffffff << PAGE_SHIFT_512GB)
+#define PAGE_ALIGN_DOWN_512GB(x) (((uintptr_t)(x)) & PAGE_MASK_512GB)
+#define PAGE_ALIGN_UP_512GB(x) (PAGE_ALIGN_DOWN_512GB((x)-1) + PAGE_SIZE_512GB)
+
+#define PAGE_CONTROL_FLAGS(x) \
+ ((x) & (PT_PRESENT | PT_WRITE | PT_USER | PT_WRITE_THROUGH | \
+ PT_CACHE_DISABLED | PT_SIZE | PT_GLOBAL))
+#define PAGE_FLAGS(x) ((x) & (~PAGE_MASK))
+
+typedef enum page_size
+{
+ ps_4kb,
+ ps_2mb,
+ ps_1gb,
+ ps_512gb,
+} page_size_t;
+
+typedef struct page_status
+{
+ page_size_t size;
+ int mapped;
+} page_status_t;
+
+/* Performs all initialization necessary for the
+ * page allocation system. This should be called
+ * only once at boot time before any other functions
+ * in this header are called. */
+void page_init();
+
+void *physmap_start();
+
+void *physmap_end();
+
+/* These functions allocate and free one page-aligned,
+ * page-sized block of memory. Values passed to
+ * page_free MUST have been returned by page_alloc
+ * at some previous point. There should be only one
+ * call to page_free for each value returned by
+ * page_alloc. If the system is out of memory page_alloc
+ * will return NULL. */
+void *page_alloc(void);
+
+void *page_alloc_bounded(void *max_paddr);
+
+void page_free(void *addr);
+
+/* These functions allocate and free a page-aligned
+ * block of memory which are npages pages in length.
+ * A call to page_alloc_n will allocate a block, to free
+ * that block a call should be made to page_free_n with
+ * npages set to the same as it was when the block was
+ * allocated */
+void *page_alloc_n(size_t npages);
+
+void *page_alloc_n_bounded(size_t npages, void *max_paddr);
+
+void page_free_n(void *start, size_t npages);
+
+void page_add_range(void *start, void *end);
+
+void page_mark_reserved(void *paddr);
+
+void page_init_finish();
+
+/* Returns the number of free pages remaining in the
+ * system. Note that calls to page_alloc_n(npages) may
+ * fail even if page_free_count() >= npages. */
+size_t page_free_count();
diff --git a/kernel/include/mm/pagecache.h b/kernel/include/mm/pagecache.h
new file mode 100644
index 0000000..442e7b1
--- /dev/null
+++ b/kernel/include/mm/pagecache.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include "drivers/blockdev.h"
+#include "mm/pframe.h"
+
+long pagecache_get_page(pframe_t *pf);
+#ifdef NO
+void pagecache_newsource(pframe_t *pf, blockdev_t *dev, long loc);
+#endif \ No newline at end of file
diff --git a/kernel/include/mm/pagetable.h b/kernel/include/mm/pagetable.h
new file mode 100644
index 0000000..cc1fa3e
--- /dev/null
+++ b/kernel/include/mm/pagetable.h
@@ -0,0 +1,94 @@
+#pragma once
+
+#include "mm/page.h"
+#include "vm/vmmap.h"
+
+#define PT_PRESENT 0x001
+#define PT_WRITE 0x002
+#define PT_USER 0x004
+#define PT_WRITE_THROUGH 0x008
+#define PT_CACHE_DISABLED 0x010
+#define PT_ACCESSED 0x020
+#define PT_DIRTY 0x040
+#define PT_SIZE 0x080
+#define PT_GLOBAL 0x100
+
+#define PT_ENTRY_COUNT (PAGE_SIZE / sizeof(uintptr_t))
+
+typedef struct page
+{
+ uint8_t data[PAGE_SIZE];
+} page_t;
+
+// Generalized structure for all directory like entries
+typedef struct pt
+{
+ uintptr_t phys[PT_ENTRY_COUNT];
+} pt_t, pd_t, pdp_t, pml4_t;
+
+#define INDEX_MASK 0b111111111
+#define PML4E(x) ((((uintptr_t)(x)) >> 39) & INDEX_MASK)
+#define PDPE(x) ((((uintptr_t)(x)) >> 30) & INDEX_MASK)
+#define PDE(x) ((((uintptr_t)(x)) >> 21) & INDEX_MASK)
+#define PTE(x) ((((uintptr_t)(x)) >> 12) & INDEX_MASK)
+
+#define PT_ENTRY_COUNT (PAGE_SIZE / sizeof(uintptr_t))
+#define PT_VADDR_SIZE (PAGE_SIZE * PT_ENTRY_COUNT)
+#define PD_VADDR_SIZE (PAGE_SIZE * PT_ENTRY_COUNT * PT_ENTRY_COUNT)
+#define PDP_VADDR_SIZE \
+ (PAGE_SIZE * PT_ENTRY_COUNT * PT_ENTRY_COUNT * PT_ENTRY_COUNT)
+#define PML4_VADDR_SIZE \
+ (PAGE_SIZE * PT_ENTRY_COUNT * PT_ENTRY_COUNT * PT_ENTRY_COUNT * \
+ PT_ENTRY_COUNT)
+
+#define IS_PRESENT(n) ((n)&PT_PRESENT)
+#define IS_2MB_PAGE(n) ((n)&PT_SIZE)
+#define IS_1GB_PAGE IS_2MB_PAGE
+
+#define GDB_PT_PHYSADDR(pt, v) (pt->phys[PTE(v)] & PAGE_MASK)
+#define GDB_PD_PHYSADDR(pd, v) (pd->phys[PDE(v)] & PAGE_MASK)
+#define GDB_PDP_PHYSADDR(pdp, v) (pdp->phys[PDPE(v)] & PAGE_MASK)
+#define GDB_PML4_PHYSADDR(pml4, v) (pml4->phys[PML4E(v)] & PAGE_MASK)
+
+#define GDB_PHYSADDR(pml4, v) \
+ (GDB_PT_PHYSADDR( \
+ GDB_PD_PHYSADDR( \
+ GDB_PDP_PHYSADDR(GDB_PML4_PHYSADDR(pml4, (v)) + PHYS_OFFSET, \
+ (v)) + \
+ PHYS_OFFSET, \
+ (v)) + \
+ PHYS_OFFSET, \
+ (v)) + \
+ PHYS_OFFSET)
+#define GDB_CUR_PHYSADDR(v) GDB_PHYSADDR(curproc->p_pml4, (v))
+
+uintptr_t pt_virt_to_phys_helper(pml4_t *pml4, uintptr_t vaddr);
+
+uintptr_t pt_virt_to_phys(uintptr_t vaddr);
+
+void pt_init(void);
+
+/* Currently unused. */
+void pt_template_init(void);
+
+pml4_t *pt_get();
+
+void pt_set(pml4_t *pml4);
+
+pml4_t *clone_pml4(pml4_t *pml4, long include_user_mappings);
+
+pml4_t *pt_create();
+
+void pt_destroy(pml4_t *pml4);
+
+long pt_map(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, uint32_t pdflags,
+ uint32_t ptflags);
+
+long pt_map_range(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr,
+ uintptr_t vmax, uint32_t pdflags, uint32_t ptflags);
+
+void pt_unmap(pml4_t *pml4, uintptr_t vaddr);
+
+void pt_unmap_range(pml4_t *pml4, uintptr_t vaddr, uintptr_t vmax);
+
+void check_invalid_mappings(pml4_t *pml4, vmmap_t *vmmap, char *prompt);
diff --git a/kernel/include/mm/pframe.h b/kernel/include/mm/pframe.h
new file mode 100644
index 0000000..bd2c3f7
--- /dev/null
+++ b/kernel/include/mm/pframe.h
@@ -0,0 +1,23 @@
+#pragma once
+
+//#include "mm/mobj.h"
+#include "proc/kmutex.h"
+#include "types.h"
+
+typedef struct pframe
+{
+ size_t pf_pagenum;
+ size_t pf_loc;
+ void *pf_addr;
+ long pf_dirty;
+ kmutex_t pf_mutex;
+ list_link_t pf_link;
+} pframe_t;
+
+void pframe_init();
+
+pframe_t *pframe_create();
+
+void pframe_release(pframe_t **pfp);
+
+void pframe_free(pframe_t **pfp);
diff --git a/kernel/include/mm/slab.h b/kernel/include/mm/slab.h
new file mode 100644
index 0000000..6ead5ae
--- /dev/null
+++ b/kernel/include/mm/slab.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include <types.h>
+
+/* Define SLAB_REDZONE to add top and bottom redzones to every object. */
+#define SLAB_REDZONE 0xdeadbeefdeadbeef
+
+/* Define SLAB_CHECK_FREE to add extra book keeping to make sure there
+ * are no double frees. */
+#define SLAB_CHECK_FREE
+
+/*
+ * The slab allocator. A "cache" is a store of objects; you create one by
+ * specifying a constructor, destructor, and the size of an object. The
+ * "alloc" function allocates one object, and the "free" function returns
+ * it to the free list *without calling the destructor*. This lets you save
+ * on destruction/construction calls; the idea is that every free object in
+ * the cache is in a known state.
+ */
+typedef struct slab_allocator slab_allocator_t;
+
+/* Initializes the slab allocator subsystem. This should be done
+ * only after the page subsystem has been initialized. Slab allocators
+ * and kmalloc will not work until this function has been called. */
+void slab_init();
+
+/*
+ * Example Usage
+ * See the below example for how to use a slab allocator to allocate objects
+ * of a given size. Note that you usually don't need to destroy most allocators,
+ * as they should last as long as the system is running (e.g. the process allocator).
+ *
+ * ```
+ * typedef struct {
+ * int x;
+ * int y;
+ * } point_t;
+ *
+ * // Create a new allocator for objects of type point_t. This only needs to
+ * // happen once, usually in an initialization routine.
+ * slab_allocator_t *point_allocator = slab_allocator_create("point", sizeof(point_t));
+ *
+ * // Allocate a new point_t from the slab allocator
+ * point_t *p = (point_t *)slab_obj_alloc(point_allocator);
+ *
+ * // ... Use p here ...
+ *
+ * // Deallocate the point_t
+ * slab_obj_free(point_allocator, p);
+ * ```
+ */
+
+/**
+ * Creates a slab allocator for allocating objects of a given size.
+ *
+ * @param name The name of the allocator (for debugging)
+ * @param size The size (bytes) of objects that will be allocated from this allocator
+ * @return slab_allocator_t* An allocator, or NULL on failure
+ */
+slab_allocator_t *slab_allocator_create(const char *name, size_t size);
+
+/**
+ * Destroys a slab allocator.
+ *
+ * @param allocator The allocator to destroy
+ */
+void slab_allocator_destroy(struct slab_allocator *allocator);
+
+/**
+ * Allocates an object from the given slab allocator. The object is a chunk of
+ * memory as big as the size that slab allocator was created with.
+ *
+ * @param allocator The allocator to allocate from
+ * @return void* A chunk of memory of the appropriate object size, or NULL
+ * on failure
+ */
+void *slab_obj_alloc(slab_allocator_t *allocator);
+
+/**
+ * Frees a given object that was allocated by a given slab allocator.
+ *
+ * @param allocator The allocator that allocated this object
+ * @param obj The object to be freed
+ */
+void slab_obj_free(slab_allocator_t *allocator, void *obj);
+
+/**
+ * Reclaims memory from unused slabs.
+ *
+ * NOTE: This is not currently implemented.
+ *
+ * @param target Target number of pages to reclaim. If negative, reclaim as many
+ * as possible
+ * @return long Number of pages freed
+ */
+long slab_allocators_reclaim(long target); \ No newline at end of file
diff --git a/kernel/include/mm/tlb.h b/kernel/include/mm/tlb.h
new file mode 100644
index 0000000..836be4e
--- /dev/null
+++ b/kernel/include/mm/tlb.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "kernel.h"
+#include "types.h"
+
+#include "mm/page.h"
+
+/* Invalidates any entries from the TLB which contain
+ * mappings for the given virtual address. */
+static inline void tlb_flush(uintptr_t vaddr)
+{
+ __asm__ volatile("invlpg (%0)" ::"r"(vaddr));
+}
+
+/* Invalidates any entries for count pages starting at
+ * vaddr from the TLB. If this range is very large it may
+ * be more efficient to call tlb_flush_all to invalidate
+ * the entire TLB. */
+static inline void tlb_flush_range(uintptr_t vaddr, size_t count)
+{
+ for (size_t i = 0; i < count; i++, vaddr += PAGE_SIZE)
+ {
+ tlb_flush(vaddr);
+ }
+}
+
+/* Invalidates the entire TLB. */
+static inline void tlb_flush_all()
+{
+ uintptr_t pdir;
+ __asm__ volatile("movq %%cr3, %0"
+ : "=r"(pdir));
+ __asm__ volatile("movq %0, %%cr3" ::"r"(pdir)
+ : "memory");
+}
diff --git a/kernel/include/multiboot.h b/kernel/include/multiboot.h
new file mode 100644
index 0000000..55bb8a8
--- /dev/null
+++ b/kernel/include/multiboot.h
@@ -0,0 +1,417 @@
+/* multiboot.h - Multiboot header file. */
+/* Copyright (C) 1999,2003,2007,2008,2009,2010 Free Software Foundation, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ANY
+ * DEVELOPER OR DISTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MULTIBOOT_HEADER
+#define MULTIBOOT_HEADER 1
+
+/* How many bytes from the start of the file we search for the header. */
+#define MULTIBOOT_SEARCH 32768
+#define MULTIBOOT_HEADER_ALIGN 8
+
+/* The magic field should contain this. */
+#define MULTIBOOT2_HEADER_MAGIC 0xe85250d6
+
+/* This should be in %eax. */
+#define MULTIBOOT2_BOOTLOADER_MAGIC 0x36d76289
+
+/* Alignment of multiboot modules. */
+#define MULTIBOOT_MOD_ALIGN 0x00001000
+
+/* Alignment of the multiboot info structure. */
+#define MULTIBOOT_INFO_ALIGN 0x00000008
+
+/* Flags set in the 'flags' member of the multiboot header. */
+
+#define MULTIBOOT_TAG_ALIGN 8
+#define MULTIBOOT_TAG_TYPE_END 0
+#define MULTIBOOT_TAG_TYPE_CMDLINE 1
+#define MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME 2
+#define MULTIBOOT_TAG_TYPE_MODULE 3
+#define MULTIBOOT_TAG_TYPE_BASIC_MEMINFO 4
+#define MULTIBOOT_TAG_TYPE_BOOTDEV 5
+#define MULTIBOOT_TAG_TYPE_MMAP 6
+#define MULTIBOOT_TAG_TYPE_VBE 7
+#define MULTIBOOT_TAG_TYPE_FRAMEBUFFER 8
+#define MULTIBOOT_TAG_TYPE_ELF_SECTIONS 9
+#define MULTIBOOT_TAG_TYPE_APM 10
+#define MULTIBOOT_TAG_TYPE_EFI32 11
+#define MULTIBOOT_TAG_TYPE_EFI64 12
+#define MULTIBOOT_TAG_TYPE_SMBIOS 13
+#define MULTIBOOT_TAG_TYPE_ACPI_OLD 14
+#define MULTIBOOT_TAG_TYPE_ACPI_NEW 15
+#define MULTIBOOT_TAG_TYPE_NETWORK 16
+#define MULTIBOOT_TAG_TYPE_EFI_MMAP 17
+#define MULTIBOOT_TAG_TYPE_EFI_BS 18
+#define MULTIBOOT_TAG_TYPE_EFI32_IH 19
+#define MULTIBOOT_TAG_TYPE_EFI64_IH 20
+#define MULTIBOOT_TAG_TYPE_LOAD_BASE_ADDR 21
+
+#define MULTIBOOT_HEADER_TAG_END 0
+#define MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST 1
+#define MULTIBOOT_HEADER_TAG_ADDRESS 2
+#define MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS 3
+#define MULTIBOOT_HEADER_TAG_CONSOLE_FLAGS 4
+#define MULTIBOOT_HEADER_TAG_FRAMEBUFFER 5
+#define MULTIBOOT_HEADER_TAG_MODULE_ALIGN 6
+#define MULTIBOOT_HEADER_TAG_EFI_BS 7
+#define MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI32 8
+#define MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI64 9
+#define MULTIBOOT_HEADER_TAG_RELOCATABLE 10
+
+#define MULTIBOOT_ARCHITECTURE_I386 0
+#define MULTIBOOT_ARCHITECTURE_MIPS32 4
+#define MULTIBOOT_HEADER_TAG_OPTIONAL 1
+
+#define MULTIBOOT_LOAD_PREFERENCE_NONE 0
+#define MULTIBOOT_LOAD_PREFERENCE_LOW 1
+#define MULTIBOOT_LOAD_PREFERENCE_HIGH 2
+
+#define MULTIBOOT_CONSOLE_FLAGS_CONSOLE_REQUIRED 1
+#define MULTIBOOT_CONSOLE_FLAGS_EGA_TEXT_SUPPORTED 2
+
+#ifndef ASM_FILE
+
+typedef unsigned char multiboot_uint8_t;
+typedef unsigned short multiboot_uint16_t;
+typedef unsigned int multiboot_uint32_t;
+typedef unsigned long long multiboot_uint64_t;
+
+struct multiboot_header
+{
+ /* Must be MULTIBOOT_MAGIC - see above. */
+ multiboot_uint32_t magic;
+
+ /* ISA */
+ multiboot_uint32_t architecture;
+
+ /* Total header length. */
+ multiboot_uint32_t header_length;
+
+ /* The above fields plus this one must equal 0 mod 2^32. */
+ multiboot_uint32_t checksum;
+};
+
+struct multiboot_header_tag
+{
+ multiboot_uint16_t type;
+ multiboot_uint16_t flags;
+ multiboot_uint32_t size;
+};
+
+struct multiboot_header_tag_information_request
+{
+ multiboot_uint16_t type;
+ multiboot_uint16_t flags;
+ multiboot_uint32_t size;
+ multiboot_uint32_t requests[0];
+};
+
+struct multiboot_header_tag_address
+{
+ multiboot_uint16_t type;
+ multiboot_uint16_t flags;
+ multiboot_uint32_t size;
+ multiboot_uint32_t header_addr;
+ multiboot_uint32_t load_addr;
+ multiboot_uint32_t load_end_addr;
+ multiboot_uint32_t bss_end_addr;
+};
+
+struct multiboot_header_tag_entry_address
+{
+ multiboot_uint16_t type;
+ multiboot_uint16_t flags;
+ multiboot_uint32_t size;
+ multiboot_uint32_t entry_addr;
+};
+
+struct multiboot_header_tag_console_flags
+{
+ multiboot_uint16_t type;
+ multiboot_uint16_t flags;
+ multiboot_uint32_t size;
+ multiboot_uint32_t console_flags;
+};
+
+struct multiboot_header_tag_framebuffer
+{
+ multiboot_uint16_t type;
+ multiboot_uint16_t flags;
+ multiboot_uint32_t size;
+ multiboot_uint32_t width;
+ multiboot_uint32_t height;
+ multiboot_uint32_t depth;
+};
+
+struct multiboot_header_tag_module_align
+{
+ multiboot_uint16_t type;
+ multiboot_uint16_t flags;
+ multiboot_uint32_t size;
+};
+
+struct multiboot_header_tag_relocatable
+{
+ multiboot_uint16_t type;
+ multiboot_uint16_t flags;
+ multiboot_uint32_t size;
+ multiboot_uint32_t min_addr;
+ multiboot_uint32_t max_addr;
+ multiboot_uint32_t align;
+ multiboot_uint32_t preference;
+};
+
+struct multiboot_color
+{
+ multiboot_uint8_t red;
+ multiboot_uint8_t green;
+ multiboot_uint8_t blue;
+};
+
+struct multiboot_mmap_entry
+{
+ multiboot_uint64_t addr;
+ multiboot_uint64_t len;
+#define MULTIBOOT_MEMORY_AVAILABLE 1
+#define MULTIBOOT_MEMORY_RESERVED 2
+#define MULTIBOOT_MEMORY_ACPI_RECLAIMABLE 3
+#define MULTIBOOT_MEMORY_NVS 4
+#define MULTIBOOT_MEMORY_BADRAM 5
+ multiboot_uint32_t type;
+ multiboot_uint32_t zero;
+};
+typedef struct multiboot_mmap_entry multiboot_memory_map_t;
+
+struct multiboot_tag
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+};
+
+struct multiboot_tag_string
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ char string[0];
+};
+
+struct multiboot_tag_module
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t mod_start;
+ multiboot_uint32_t mod_end;
+ char cmdline[0];
+};
+
+struct multiboot_tag_basic_meminfo
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t mem_lower;
+ multiboot_uint32_t mem_upper;
+};
+
+struct multiboot_tag_bootdev
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t biosdev;
+ multiboot_uint32_t slice;
+ multiboot_uint32_t part;
+};
+
+struct multiboot_tag_mmap
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t entry_size;
+ multiboot_uint32_t entry_version;
+ struct multiboot_mmap_entry entries[0];
+};
+
+struct multiboot_vbe_info_block
+{
+ multiboot_uint8_t external_specification[512];
+};
+
+struct multiboot_vbe_mode_info_block
+{
+ multiboot_uint8_t external_specification[256];
+};
+
+struct multiboot_tag_vbe
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+
+ multiboot_uint16_t vbe_mode;
+ multiboot_uint16_t vbe_interface_seg;
+ multiboot_uint16_t vbe_interface_off;
+ multiboot_uint16_t vbe_interface_len;
+
+ struct multiboot_vbe_info_block vbe_control_info;
+ struct multiboot_vbe_mode_info_block vbe_mode_info;
+};
+
+struct multiboot_tag_framebuffer_common
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+
+ multiboot_uint64_t framebuffer_addr;
+ multiboot_uint32_t framebuffer_pitch;
+ multiboot_uint32_t framebuffer_width;
+ multiboot_uint32_t framebuffer_height;
+ multiboot_uint8_t framebuffer_bpp;
+#define MULTIBOOT_FRAMEBUFFER_TYPE_INDEXED 0
+#define MULTIBOOT_FRAMEBUFFER_TYPE_RGB 1
+#define MULTIBOOT_FRAMEBUFFER_TYPE_EGA_TEXT 2
+ multiboot_uint8_t framebuffer_type;
+ multiboot_uint16_t reserved;
+};
+
+struct multiboot_tag_framebuffer
+{
+ struct multiboot_tag_framebuffer_common common;
+
+ union {
+ struct
+ {
+ multiboot_uint16_t framebuffer_palette_num_colors;
+ struct multiboot_color framebuffer_palette[0];
+ };
+ struct
+ {
+ multiboot_uint8_t framebuffer_red_field_position;
+ multiboot_uint8_t framebuffer_red_mask_size;
+ multiboot_uint8_t framebuffer_green_field_position;
+ multiboot_uint8_t framebuffer_green_mask_size;
+ multiboot_uint8_t framebuffer_blue_field_position;
+ multiboot_uint8_t framebuffer_blue_mask_size;
+ };
+ };
+};
+
+struct multiboot_tag_elf_sections
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t num;
+ multiboot_uint32_t entsize;
+ multiboot_uint32_t shndx;
+ char sections[0];
+};
+
+struct multiboot_tag_apm
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint16_t version;
+ multiboot_uint16_t cseg;
+ multiboot_uint32_t offset;
+ multiboot_uint16_t cseg_16;
+ multiboot_uint16_t dseg;
+ multiboot_uint16_t flags;
+ multiboot_uint16_t cseg_len;
+ multiboot_uint16_t cseg_16_len;
+ multiboot_uint16_t dseg_len;
+};
+
+struct multiboot_tag_efi32
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t pointer;
+};
+
+struct multiboot_tag_efi64
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint64_t pointer;
+};
+
+struct multiboot_tag_smbios
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint8_t major;
+ multiboot_uint8_t minor;
+ multiboot_uint8_t reserved[6];
+ multiboot_uint8_t tables[0];
+};
+
+struct multiboot_tag_old_acpi
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint8_t rsdp[0];
+};
+
+struct multiboot_tag_new_acpi
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint8_t rsdp[0];
+};
+
+struct multiboot_tag_network
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint8_t dhcpack[0];
+};
+
+struct multiboot_tag_efi_mmap
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t descr_size;
+ multiboot_uint32_t descr_vers;
+ multiboot_uint8_t efi_mmap[0];
+};
+
+struct multiboot_tag_efi32_ih
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t pointer;
+};
+
+struct multiboot_tag_efi64_ih
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint64_t pointer;
+};
+
+struct multiboot_tag_load_base_addr
+{
+ multiboot_uint32_t type;
+ multiboot_uint32_t size;
+ multiboot_uint32_t load_base_addr;
+};
+
+#endif /* ! ASM_FILE */
+
+#endif /* ! MULTIBOOT_HEADER */ \ No newline at end of file
diff --git a/kernel/include/proc/context.h b/kernel/include/proc/context.h
new file mode 100644
index 0000000..63c692e
--- /dev/null
+++ b/kernel/include/proc/context.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include "types.h"
+
+#include "mm/pagetable.h"
+
+/*
+ * The function pointer to be implemented by functions which are entry
+ * points for new threads.
+ */
+typedef void *(*context_func_t)(long, void *);
+
+typedef struct context
+{
+ uintptr_t c_rip; /* instruction pointer (RIP) */
+ uintptr_t c_rsp; /* stack pointer (RSP) */
+ uintptr_t c_rbp; /* frame pointer (RBP) */
+
+ pml4_t
+ *c_pml4; /* pointer to the top level page table (PML4) for this proc.
+ It's the 'root' of the page table where virtual address -> physical address
+ lookup starts. */
+
+ uintptr_t c_kstack;
+ size_t c_kstacksz;
+} context_t;
+
+/**
+ * Initialize the given context such that when it begins execution it
+ * will execute func(arg1,arg2). A kernel stack and page directory
+ * exclusive to this context must also be provided.
+ *
+ * @param c the context to initialize
+ * @param func the function which will begin executing when this
+ * context is first made active
+ * @param arg1 the first argument to func
+ * @param arg2 the second argument to func
+ * @param kstack a pointer to the kernel stack this context will use
+ * @param kstacksz the size of the kernel stack
+ * @param pdptr the pagetable this context will use
+ */
+void context_setup(context_t *c, context_func_t func, long arg1, void *arg2,
+ void *kstack, size_t kstacksz, pml4_t *pml4);
+
+void context_setup_raw(context_t *c, void (*func)(), void *kstack,
+ size_t kstacksz, pml4_t *pml4);
+/**
+ * Makes the given context the one currently running on the CPU. Use
+ * this mainly for the initial context.
+ *
+ * @param c the context to make active
+ */
+void context_make_active(context_t *c);
+
+/**
+ * Save the current state of the machine into the old context, and begin
+ * executing the new context. Used primarily by the scheduler.
+ *
+ * @param oldc the context to switch from
+ * @param newc the context to switch to
+ */
+void context_switch(context_t *oldc, context_t *newc);
diff --git a/kernel/include/proc/core.h b/kernel/include/proc/core.h
new file mode 100644
index 0000000..9d6eb16
--- /dev/null
+++ b/kernel/include/proc/core.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "proc/context.h"
+#include "proc/sched.h"
+#include "proc/spinlock.h"
+
+typedef struct core
+{
+ long kc_id;
+ context_t kc_ctx;
+
+ ktqueue_t *kc_queue;
+
+ uintptr_t kc_csdpaddr;
+} core_t;
diff --git a/kernel/include/proc/kmutex.h b/kernel/include/proc/kmutex.h
new file mode 100644
index 0000000..37d8ece
--- /dev/null
+++ b/kernel/include/proc/kmutex.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include "proc/sched.h"
+#include "proc/spinlock.h"
+
+/*===========
+ * Structures
+ *==========*/
+
+typedef struct kmutex
+{
+ ktqueue_t km_waitq; /* wait queue */
+ struct kthread *km_holder; /* current holder */
+ list_link_t km_link;
+} kmutex_t;
+
+#define KMUTEX_INITIALIZER(mtx) \
+ { \
+ .km_waitq = KTQUEUE_INITIALIZER((mtx).km_waitq), .km_holder = NULL, \
+ .km_link = LIST_LINK_INITIALIZER((mtx).km_link), \
+ }
+
+/*==========
+ * Functions
+ *=========*/
+
+/**
+ * Initializes a mutex.
+ *
+ * @param mtx the mutex
+ */
+void kmutex_init(kmutex_t *mtx);
+
+/**
+ * Locks the specified mutex.
+ *
+ * Note: This function may block.
+ *
+ * Note: These locks are not re-entrant
+ *
+ * @param mtx the mutex to lock
+ */
+void kmutex_lock(kmutex_t *mtx);
+
+/**
+ * Unlocks the specified mutex.
+ *
+ * @param mtx the mutex to unlock
+ */
+void kmutex_unlock(kmutex_t *mtx);
+
+/**
+ * Indicates if a mutex has waiters.
+ */
+long kmutex_has_waiters(kmutex_t *mtx);
+
+/**
+ * Indicates if curthr owns a mutex.
+ */
+long kmutex_owns_mutex(kmutex_t *mtx);
diff --git a/kernel/include/proc/kthread.h b/kernel/include/proc/kthread.h
new file mode 100644
index 0000000..6bc66be
--- /dev/null
+++ b/kernel/include/proc/kthread.h
@@ -0,0 +1,106 @@
+#pragma once
+
+#include <proc/context.h>
+#include <proc/sched.h>
+#include <proc/spinlock.h>
+#include <util/list.h>
+
+/*=====================
+ * Types and structures
+ *====================*/
+
+/*
+ * Alias for an entry point function of a new thread.
+ */
+typedef context_func_t kthread_func_t;
+
+/*
+ * Thread states.
+ */
+typedef enum
+{
+ KT_NO_STATE, /* Illegal state */
+ KT_ON_CPU, /* Currently running */
+ KT_RUNNABLE, /* On the run queue */
+ KT_SLEEP, /* Blocked indefinitely */
+ KT_SLEEP_CANCELLABLE, /* Blocked, but can be interrupted */
+ KT_EXITED /* Exited, waiting to be joined */
+} kthread_state_t;
+
+/*
+ * Thread descriptor.
+ */
+typedef struct kthread
+{
+ context_t kt_ctx; /* Thread context */
+ char *kt_kstack; /* Kernel stack */
+ void *kt_retval; /* Return value */
+ long kt_errno; /* Errno of most recent syscall */
+ struct proc *kt_proc; /* Corresponding process */
+
+ long kt_cancelled; /* Set if the thread has been cancelled */
+ ktqueue_t *kt_wchan; /* If blocking, the queue this thread is blocked on */
+ kthread_state_t kt_state;
+
+ list_link_t kt_plink; /* Link on the process's thread list, p_threads */
+ list_link_t
+ kt_qlink; /* Link on some ktqueue if the thread is not running */
+
+ list_t kt_mutexes; /* List of owned mutexes, for use in debugging */
+ long kt_recent_core; /* For SMP */
+
+ uint64_t kt_preemption_count;
+} kthread_t;
+
+/*==========
+ * Functions
+ *=========*/
+
+/**
+ * Initializes the kthread subsystem at system startup.
+ */
+void kthread_init(void);
+
+/**
+ * Allocates and initializes a kernel thread.
+ *
+ * @param proc the process in which the thread will run
+ * @param func the function that will be called when the newly created
+ * thread starts executing
+ * @param arg1 the first argument to func
+ * @param arg2 the second argument to func
+ * @return the newly created thread
+ *
+ */
+kthread_t *kthread_create(struct proc *proc, kthread_func_t func, long arg1,
+ void *arg2);
+
+/**
+ * Creates a clone of the specified thread
+ *
+ * @param thr the thread to clone
+ * @return a clone of thr
+ */
+kthread_t *kthread_clone(kthread_t *thr);
+
+/**
+ * Frees resources associated with a thread.
+ *
+ * @param thr the thread to free
+ */
+void kthread_destroy(kthread_t *thr);
+
+/**
+ * Cancels a thread.
+ *
+ * @param kthr the thread to be cancelled
+ * @param retval the return value for the thread
+ */
+void kthread_cancel(kthread_t *kthr, void *retval);
+
+/**
+ * Exits the current thread.
+ *
+ * @param retval the return value for the thread
+ */
+void kthread_exit(void *retval);
diff --git a/kernel/include/proc/proc.h b/kernel/include/proc/proc.h
new file mode 100644
index 0000000..bc608a0
--- /dev/null
+++ b/kernel/include/proc/proc.h
@@ -0,0 +1,200 @@
+#pragma once
+
+#include "config.h"
+#include "mm/pagetable.h"
+#include "proc/kthread.h"
+#include "types.h"
+#include "vm/vmmap.h"
+
+/*===========
+ * Structures
+ *==========*/
+
+/*
+ * Process resource information
+ */
+#define PROC_MAX_COUNT 65536
+#define PROC_NAME_LEN 256
+
+/* Process states */
+typedef enum
+{
+ PROC_RUNNING, /* Has running threads */
+ PROC_DEAD /* Exited, but not yet wait'ed */
+} proc_state_t;
+
+/* Process descriptor */
+typedef struct proc
+{
+ pid_t p_pid; /* Process ID */
+ char p_name[PROC_NAME_LEN]; /* Process name */
+
+ list_t p_threads; /* Threads list */
+ list_t p_children; /* Children list */
+ struct proc *p_pproc; /* Parent process */
+
+ list_link_t p_list_link; /* Link of list of all processes */
+ list_link_t p_child_link; /* Link on parent's list of children */
+
+ long p_status; /* Exit status */
+ proc_state_t p_state; /* Process state */
+
+ pml4_t *p_pml4; /* Page table. */
+
+ /*
+ * If a parent is waiting on a child, the parent puts itself on its own
+ * p_wait queue. When a child terminates, it broadcasts on its parent's
+ * p_wait to wake it up.
+ */
+ ktqueue_t p_wait;
+
+ /* VFS related */
+ struct file *p_files[NFILES]; /* Open files */
+ struct vnode *p_cwd; /* Current working directory */
+
+ /* VM related */
+ /*
+ * The current value of a process's break is maintained in the 'p_brk'.
+ *
+ * The 'p_brk' and 'p_start_brk' members of a proc_t struct are initialized
+ * by the loader. 'p_start_brk' is subsequently never modified; it always
+ * holds the initial value of the break.
+ *
+ * The loader sets 'p_start_brk' to be the end of the bss section (search
+ * online for memory layout diagrams of a running process for more
+ * details).
+ *
+ * These are both addresses.
+ */
+ void *p_brk; /* Process break; see brk(2) */
+ void *p_start_brk; /* Initial value of process break */
+ struct vmmap *p_vmmap; /* List of areas mapped into process's
+ user address space. */
+} proc_t;
+
+/*==========
+ * Functions
+ *=========*/
+
+/**
+ * Initializes the proc subsystem at system startup.
+ */
+void proc_init(void);
+
+/**
+ * Initializes the special idleproc at system startup.
+ */
+void proc_idleproc_init();
+
+/**
+ * Shuts down certain subsystems at system shutdown.
+ */
+void initproc_finish();
+
+/**
+ * Allocates and initializes a new process.
+ *
+ * @param name the name to give the newly created process
+ * @return the newly created process
+ */
+proc_t *proc_create(const char *name);
+
+/**
+ * Frees all the resources associated with a process.
+ *
+ * @param proc process to destroy
+ */
+void proc_destroy(proc_t *proc);
+
+/**
+ * Handles exiting the current process.
+ *
+ * @param retval exit code for the thread and process
+ */
+void proc_thread_exiting(void *retval);
+
+/**
+ * Stops another process from running again by cancelling all its
+ * threads.
+ *
+ * @param proc the process to kill
+ * @param status the status the process should exit with
+ */
+void proc_kill(proc_t *proc, long status);
+
+/**
+ * Kills every process except for the idle process and direct children
+ * of the idle process.
+ */
+void proc_kill_all(void);
+
+/*========================
+ * Functions: System calls
+ *=======================*/
+
+/**
+ * Implements the _exit(2) system call.
+ *
+ * @param status the exit status of the process
+ */
+void do_exit(long status);
+
+/**
+ * Implements the waitpid(2) system call.
+ *
+ * @param pid the pid to wait on, or -1 to wait on any child
+ * @param status used to return the exit status of the child
+ * @param options only 0 is supported (no options)
+ *
+ * @return the pid of the child process which was cleaned up, or
+ * - ENOTSUP invalid input
+ * - ECHILD valid child could not be found
+ */
+pid_t do_waitpid(pid_t pid, int *status, int options);
+
+/**
+ * This function implements the fork(2) system call.
+ *
+ * @param regs the register state at the time of the system call
+ */
+struct regs;
+long do_fork(struct regs *regs);
+
+/*===========
+ * Miscellany
+ *==========*/
+
+/*
+ * Special PIDs reserved for specific processes
+ */
+#define PID_IDLE 0
+#define PID_INIT 1
+
+/*
+ * Enable global use of idleproc
+ */
+extern proc_t idleproc;
+
+/*=====================
+ * Functions: Debugging
+ *====================*/
+
+/**
+ * Provides detailed debug information about a given process.
+ *
+ * @param arg a pointer to the process
+ * @param buf buffer to write to
+ * @param osize size of the buffer
+ * @return the remaining size of the buffer
+ */
+size_t proc_info(const void *arg, char *buf, size_t osize);
+
+/**
+ * Provides debug information overview of all processes.
+ *
+ * @param arg must be NULL
+ * @param buf buffer to write to
+ * @param osize size of the buffer
+ * @return the remaining size of the buffer
+ */
+size_t proc_list_info(const void *arg, char *buf, size_t osize); \ No newline at end of file
diff --git a/kernel/include/proc/sched.h b/kernel/include/proc/sched.h
new file mode 100644
index 0000000..343e8d5
--- /dev/null
+++ b/kernel/include/proc/sched.h
@@ -0,0 +1,126 @@
+#pragma once
+
+#include "proc/spinlock.h"
+#include "util/list.h"
+
+/*===========
+ * Structures
+ *==========*/
+
+/*
+ * Queue structure for kthreads
+ * Note that ktqueue functions are private - managing the queue
+ * should be done within sched.c, or using public functions
+ */
+typedef struct ktqueue
+{
+ list_t tq_list;
+ size_t tq_size;
+} ktqueue_t;
+
+/*
+ * Macro to initialize a ktqueue. See sched_queue_init for how the
+ * queue should be initialized in your code.
+ */
+#define KTQUEUE_INITIALIZER(ktqueue) \
+ { \
+ .tq_list = LIST_INITIALIZER((ktqueue).tq_list), \
+ }
+
+/*
+ * kthread declaration to make function signatures happy
+ */
+struct kthread;
+
+/*==========
+ * Functions
+ *=========*/
+
+/**
+ * Runs a new thread from the run queue.
+ *
+ * @param queue the queue to place curthr on
+ */
+void sched_switch(ktqueue_t *queue);
+
+/**
+ * Helps with context switching.
+ */
+void core_switch();
+
+/**
+ * Yields the CPU to another runnable thread.
+ */
+void sched_yield();
+
+/**
+ * Enables a thread to be selected by the scheduler to run.
+ *
+ * @param thr the thread to make runnable
+ */
+void sched_make_runnable(struct kthread *thr);
+
+/**
+ * Causes the current thread to enter into an uncancellable sleep on
+ * the given queue.
+ *
+ * @param q the queue to sleep on
+ * @param lock optional lock for release in another context
+ */
+void sched_sleep_on(ktqueue_t *q);
+
+/**
+ * Causes the current thread to enter into a cancellable sleep on the
+ * given queue.
+ *
+ * @param queue the queue to sleep on
+ * @param lock optional lock for release in another context
+ * @return -EINTR if the thread was cancelled and 0 otherwise
+ */
+long sched_cancellable_sleep_on(ktqueue_t *queue);
+
+/**
+ * Wakes up a thread from q.
+ *
+ * @param q queue
+ * @param thrp if an address is provided, *thrp is set to the woken up thread
+ *
+ */
+void sched_wakeup_on(ktqueue_t *q, struct kthread **thrp);
+
+/**
+ * Wake up all threads running on the queue.
+ *
+ * @param q the queue to wake up threads from
+ */
+void sched_broadcast_on(ktqueue_t *q);
+
+/**
+ * Cancel the given thread from the queue it sleeps on.
+ *
+ * @param the thread to cancel sleep from
+ */
+void sched_cancel(struct kthread *thr);
+
+/**
+ * Initializes a queue.
+ *
+ * @param queue the queue
+ */
+void sched_queue_init(ktqueue_t *queue);
+
+/**
+ * Returns true if the queue is empty.
+ *
+ * @param queue the queue
+ * @return true if the queue is empty
+ */
+long sched_queue_empty(ktqueue_t *queue);
+
+/**
+ * Functions for managing the current thread's preemption status.
+ */
+void preemption_disable();
+void preemption_enable();
+void preemption_reset();
+long preemption_enabled(); \ No newline at end of file
diff --git a/kernel/include/proc/spinlock.h b/kernel/include/proc/spinlock.h
new file mode 100644
index 0000000..4ce57c8
--- /dev/null
+++ b/kernel/include/proc/spinlock.h
@@ -0,0 +1,37 @@
+#pragma once
+
+typedef struct spinlock
+{
+ volatile char s_locked;
+} spinlock_t;
+
+#define SPINLOCK_INITIALIZER(lock) \
+ { \
+ .s_locked = 0 \
+ }
+
+/**
+ * Initializes the fields of the specified spinlock_t
+ * @param lock the spinlock to initialize
+ */
+void spinlock_init(spinlock_t *lock);
+
+/**
+ * Locks the specified spinlock.
+ *
+ * Note: this function may spin on the current core.
+ *
+ * Note: these locks are not re-entrant
+ *
+ * @param lock the spinlock to lock
+ */
+void spinlock_lock(spinlock_t *lock);
+
+/**
+ * Unlocks the specified spinlock.
+ *
+ * @param lock the spinlock to unlock
+ */
+void spinlock_unlock(spinlock_t *lock);
+
+long spinlock_ownslock(spinlock_t *lock);
diff --git a/kernel/include/stdarg.h b/kernel/include/stdarg.h
new file mode 100644
index 0000000..ea7b872
--- /dev/null
+++ b/kernel/include/stdarg.h
@@ -0,0 +1,7 @@
+#pragma once
+
+typedef __builtin_va_list va_list;
+
+#define va_start(v, l) __builtin_va_start(v, l)
+#define va_end(v) __builtin_va_end(v)
+#define va_arg(v, l) __builtin_va_arg(v, l)
diff --git a/kernel/include/test/driverstest.h b/kernel/include/test/driverstest.h
new file mode 100644
index 0000000..16e0bc5
--- /dev/null
+++ b/kernel/include/test/driverstest.h
@@ -0,0 +1,3 @@
+#pragma once
+
+long driverstest_main(long, void*); \ No newline at end of file
diff --git a/kernel/include/test/kshell/io.h b/kernel/include/test/kshell/io.h
new file mode 100644
index 0000000..72ac92a
--- /dev/null
+++ b/kernel/include/test/kshell/io.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include "test/kshell/kshell.h"
+
+/*
+ * When writing a kernel shell command, make sure to use the following
+ * I/O functions.
+ *
+ * Before VFS is not enabled, the kernel shell will use functions from
+ * chardev.h to get a pointer the the chardev_t struct for the TTY.
+ *
+ * When VFS is enabled, the kernel shell will use the functions from
+ * vfs_syscall.h to open and close the TTY and perform I/O operations
+ * on the TTY.
+ *
+ * If you use the functions below, this process will be completely
+ * transparent.
+ */
+
+/**
+ * Replacement for do_write.
+ *
+ * @param ksh the kshell to write to
+ * @param buf the buffer to write out to the kshell
+ * @param nbytes the maximum number of bytes to write
+ * @return number of bytes written on sucess and <0 on error
+ */
+long kshell_write(kshell_t *ksh, const void *buf, size_t nbytes);
+
+/**
+ * Replacement for do_read.
+ *
+ * @param ksh the kshell to read from
+ * @param buf the buffer to store data read from the kshell
+ * @param nbytes the maximum number of bytes to read
+ * @param number of bytes read on success and <0 on error
+ */
+long kshell_read(kshell_t *ksh, void *buf, size_t nbytes);
+
+/* Unless an error occurs, guarantees that all of buf will be
+ * written */
+/**
+ * Writes a specified number of bytes from a buffer to the
+ * kshell. Unlike kshell_write, this function guarantees it will write
+ * out the desired number of bytes.
+ *
+ * @param ksh the kshell to write to
+ * @param buf the buffer to write out to the kshell
+ * @param nbytes the number of bytes to write
+ * @return number of bytes written on success and <0 on error
+ */
+long kshell_write_all(kshell_t *ksh, void *buf, size_t nbytes);
+
+/* Replacement for printf */
+/**
+ * Write output to a kshell according to a format string.
+ *
+ * @param ksh the kshell to write to
+ * @param fmt the format string
+ */
+void kprintf(kshell_t *ksh, const char *fmt, ...);
diff --git a/kernel/include/test/kshell/kshell.h b/kernel/include/test/kshell/kshell.h
new file mode 100644
index 0000000..9baf4f5
--- /dev/null
+++ b/kernel/include/test/kshell/kshell.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include "types.h"
+
+typedef struct kshell kshell_t;
+
+typedef long (*kshell_cmd_func_t)(kshell_t *, size_t argc, char **argv);
+
+/**
+ * Process init function for a new kshell.
+ */
+void *kshell_proc_run(long tty, void *arg2);
+
+/**
+ * Adds a command to the global command table for kernel shells.
+ *
+ * Note: When writing commands for the kernel shell, you _MUST_ use
+ * the I/O functions from kshell_io.h instead of normal I/O
+ * functions. See comment in kshell_io.h for more information.
+ *
+ * @param name the name of the command. Typing this name into the
+ * shell will execute the command.
+ * @param command the command to add to the shell
+ * @param desc a description of the command. This is what will be
+ * printed by the command 'help <command>'
+ */
+void kshell_add_command(const char *name, kshell_cmd_func_t command,
+ const char *desc);
+
+/**
+ * Allocates and initializes a kshell.
+ *
+ * @param bd the byte device the kshell will read from and write to
+ * @return a kshell
+ */
+kshell_t *kshell_create(uint8_t ttyid);
+
+/**
+ * Destroys a kshell.
+ *
+ * @param ksh the kshell to destroy
+ */
+void kshell_destroy(kshell_t *ksh);
+
+/**
+ * Reads from the kshell's byte device and attempts to execute a
+ * command.
+ *
+ * @param ksh the kshell to execute commands with
+ * @return the number of bytes read
+ */
+long kshell_execute_next(kshell_t *ksh);
diff --git a/kernel/include/test/proctest.h b/kernel/include/test/proctest.h
new file mode 100644
index 0000000..94b3d9c
--- /dev/null
+++ b/kernel/include/test/proctest.h
@@ -0,0 +1,3 @@
+#pragma once
+
+long proctest_main(long, void *); \ No newline at end of file
diff --git a/kernel/include/test/s5fstest.h b/kernel/include/test/s5fstest.h
new file mode 100644
index 0000000..b6b5279
--- /dev/null
+++ b/kernel/include/test/s5fstest.h
@@ -0,0 +1,3 @@
+#pragma once
+
+long s5fstest_main(int, void *);
diff --git a/kernel/include/test/usertest.h b/kernel/include/test/usertest.h
new file mode 100644
index 0000000..3d2296f
--- /dev/null
+++ b/kernel/include/test/usertest.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#ifndef __KERNEL__
+
+#include "sys/types.h"
+#include "unistd.h"
+
+#else
+#include "types.h"
+#endif
+
+#include <stdarg.h>
+
+#define test_assert(expr, fmt, args...) \
+ _test_assert(expr, __FILE__, __LINE__, #expr, fmt, ##args)
+
+#ifndef __KERNEL__
+#define test_fork_begin() \
+ do \
+ { \
+ pid_t __test_pid = fork(); \
+ if (0 == __test_pid) \
+ { \
+ do
+
+#define test_fork_end(status) \
+ while (0) \
+ ; \
+ exit(0); \
+ } /* if */ \
+ waitpid(__test_pid, status, 0); \
+ } \
+ while (0) \
+ ;
+#endif
+
+void test_init(void);
+
+void test_fini(void);
+
+const char *test_errstr(int err);
+
+typedef void (*test_pass_func_t)(int val, const char *file, int line,
+ const char *name, const char *fmt,
+ va_list args);
+
+typedef void (*test_fail_func_t)(const char *file, int line, const char *name,
+ const char *fmt, va_list args);
+
+int _test_assert(int val, const char *file, int line, const char *name,
+ const char *fmt, ...);
diff --git a/kernel/include/test/vfstest/vfstest.h b/kernel/include/test/vfstest/vfstest.h
new file mode 100644
index 0000000..4f86563
--- /dev/null
+++ b/kernel/include/test/vfstest/vfstest.h
@@ -0,0 +1,156 @@
+#pragma once
+
+/* "kernel" utility things */
+
+/* fprintf */
+#define fprintf(fd, fmt, args...) dbg(DBG_TEST, fmt, ##args)
+#define printf(fmt, args...) dbg(DBG_TEST, fmt, ##args)
+
+/* errno */
+#define errno (curthr->kt_errno)
+
+/* malloc/free */
+#define malloc kmalloc
+#define free kfree
+
+/* The "kernel" system calls */
+#define ksyscall(name, formal, actual) \
+ static long ksys_##name formal \
+ { \
+ long ret = do_##name actual; \
+ if (ret < 0) \
+ { \
+ errno = -ret; \
+ return -1; \
+ } \
+ return ret; \
+ }
+
+ksyscall(close, (int fd), (fd))
+
+ ksyscall(read, (int fd, void *buf, size_t nbytes), (fd, buf, nbytes))
+
+ ksyscall(write, (int fd, const void *buf, size_t nbytes),
+ (fd, buf, nbytes))
+
+ ksyscall(dup, (int fd), (fd))
+
+ ksyscall(dup2, (int ofd, int nfd), (ofd, nfd))
+
+ ksyscall(mkdir, (const char *path), (path))
+
+ ksyscall(rmdir, (const char *path), (path))
+
+ ksyscall(link, (const char *old, const char *new),
+ (old, new))
+
+ ksyscall(unlink, (const char *path), (path))
+
+ ksyscall(rename,
+ (const char *oldpath,
+ const char *newpath),
+ (oldpath, newpath))
+
+ ksyscall(chdir, (const char *path),
+ (path))
+
+ ksyscall(lseek,
+ (int fd, int offset,
+ int whence),
+ (fd, offset, whence))
+
+ ksyscall(getdent,
+ (int fd,
+ struct dirent *dirp),
+ (fd, dirp))
+
+ ksyscall(stat,
+ (const char *path,
+ struct stat *uf),
+ (path, uf))
+
+ ksyscall(open,
+ (const char
+ *filename,
+ int flags),
+ (filename,
+ flags))
+#define ksys_exit do_exit
+
+ long ksys_getdents(
+ int fd,
+ struct dirent
+ *dirp,
+ unsigned int
+ count)
+{
+ size_t numbytesread = 0;
+ int nbr = 0;
+ dirent_t tempdirent;
+
+ if (count < sizeof(dirent_t))
+ {
+ curthr->kt_errno = EINVAL;
+ return -1;
+ }
+
+ while (numbytesread < count)
+ {
+ if ((nbr = do_getdent(fd, &tempdirent)) < 0)
+ {
+ curthr->kt_errno = -nbr;
+ return -1;
+ }
+ if (nbr == 0)
+ {
+ return numbytesread;
+ }
+ memcpy(dirp, &tempdirent, sizeof(dirent_t));
+
+ KASSERT(nbr == sizeof(dirent_t));
+
+ dirp++;
+ numbytesread += nbr;
+ }
+ return numbytesread;
+}
+
+/*
+ * Redirect system calls to kernel system calls.
+ */
+#define mkdir(a, b) ksys_mkdir(a)
+#define rmdir ksys_rmdir
+#define mount ksys_mount
+#define umount ksys_umount
+#define open(a, b, c) ksys_open(a, b)
+#define close ksys_close
+#define link ksys_link
+#define rename ksys_rename
+#define unlink ksys_unlink
+#define read ksys_read
+#define write ksys_write
+#define lseek ksys_lseek
+#define dup ksys_dup
+#define dup2 ksys_dup2
+#define chdir ksys_chdir
+#define stat(a, b) ksys_stat(a, b)
+#define getdents(a, b, c) ksys_getdents(a, b, c)
+#define exit(a) ksys_exit(a)
+
+/* Random numbers */
+/* Random int between lo and hi inclusive */
+#define RAND_MAX INT_MAX
+#define RANDOM(lo, hi) \
+ ((lo) + \
+ (((hi) - (lo) + 1) * (randseed = (randseed * 4096 + 150889) % 714025)) / \
+ 714025)
+
+static unsigned long long randseed = 123456L;
+
+static unsigned long long rand(void)
+{
+ randseed = (randseed * 4096 + 150889) % RAND_MAX;
+ return randseed;
+}
+
+static void srand(unsigned int seed) { randseed = seed; }
diff --git a/kernel/include/types.h b/kernel/include/types.h
new file mode 100644
index 0000000..e159fc1
--- /dev/null
+++ b/kernel/include/types.h
@@ -0,0 +1,31 @@
+#pragma once
+
+/* Kernel and user header (via symlink) */
+
+#define NULL 0
+
+#define packed __attribute__((packed))
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+
+typedef signed long int64_t;
+typedef unsigned long uint64_t;
+typedef signed long intptr_t;
+typedef unsigned long uintptr_t;
+typedef uint64_t size_t;
+typedef int64_t ssize_t;
+typedef int64_t off_t;
+
+typedef int32_t pid_t;
+typedef uint16_t mode_t;
+typedef uint32_t blocknum_t;
+typedef uint32_t ino_t;
+typedef uint32_t devid_t;
+
+typedef uint64_t time_t;
+typedef uint64_t useconds_t; \ No newline at end of file
diff --git a/kernel/include/util/atomic.h b/kernel/include/util/atomic.h
new file mode 100644
index 0000000..2c67e38
--- /dev/null
+++ b/kernel/include/util/atomic.h
@@ -0,0 +1,31 @@
+#ifndef ATOMIC_H
+#define ATOMIC_H
+
+typedef int atomic_t;
+
+#define ATOMIC_INIT(i) (i)
+
+static inline int __atomic_add_unless(atomic_t *a, int v, int u)
+{
+ int c, old;
+ c = __sync_fetch_and_add(a, 0);
+ while (c != u && (old = __sync_val_compare_and_swap(a, c, c + v)) != c)
+ c = old;
+ return c;
+}
+
+static inline void atomic_set(atomic_t *a, int i) { *a = i; }
+
+static inline void atomic_inc(atomic_t *a) { __sync_add_and_fetch(a, 1); }
+
+static inline int atomic_dec_and_test(atomic_t *a)
+{
+ return __sync_sub_and_fetch(a, 1) == 0;
+}
+
+static inline int atomic_inc_not_zero(atomic_t *a)
+{
+ return __atomic_add_unless(a, 1, 0);
+}
+
+#endif \ No newline at end of file
diff --git a/kernel/include/util/bits.h b/kernel/include/util/bits.h
new file mode 100644
index 0000000..d328574
--- /dev/null
+++ b/kernel/include/util/bits.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "kernel.h"
+#include "types.h"
+
+#define BIT(n) (1 << (n))
+
+static inline void bit_flip(void *addr, uintptr_t bit)
+{
+ uint32_t *map = (uint32_t *)addr;
+ map += (bit >> 5);
+ *map ^= (uint32_t)(1 << (bit & 0x1f));
+}
+
+static inline int bit_check(const void *addr, uintptr_t bit)
+{
+ const uint32_t *map = (const uint32_t *)addr;
+ map += (bit >> 5);
+ return (*map & (1 << (bit & 0x1f)));
+}
+
+#define MOD_POW_2(x, y) ((x) & ((y)-1))
+
+#define IS_POW_2(x) (!MOD_POW_2(x, x))
+
+#define SELECT(condition, trueval, falseval) \
+ (!!(condition) * (trueval) + !condition * (falseval))
diff --git a/kernel/include/util/debug.h b/kernel/include/util/debug.h
new file mode 100644
index 0000000..7e6eb91
--- /dev/null
+++ b/kernel/include/util/debug.h
@@ -0,0 +1,305 @@
+#pragma once
+
+#include "globals.h"
+#include "main/interrupt.h"
+#include "mm/page.h"
+#include "proc/spinlock.h"
+#include "types.h"
+#include <main/apic.h>
+
+/* How to create new dbg modes:
+ *
+ * 1) Add a new '#define DBG_NAME DBG_MODE(number)' down below. Make sure the
+ * number you choose is not already being used and is less than 64.
+ * 2) Add a new entry into the DBG_TAB below. Make sure it is above the entry
+ * for "all". The first entry should be the name you want to use to
+ * disable/enable it in the makefile, the second should be the #define'd
+ * name you gave it in step 1 and the third should be a color from the list
+ * directly below this comment. Make sure you include the '\' at the end of
+ * the line with the new entry.
+ *
+ */
+
+/*
+ * These color definitions are from the ANSI specs.
+ * Do a web search for ANSI color codes to find out
+ * more funky shit like this
+ */
+
+#define _NORMAL_ "\x1b[0m"
+#define _BLACK_ "\x1b[30;47m"
+#define _RED_ "\x1b[31;40m"
+#define _GREEN_ "\x1b[32;40m"
+#define _YELLOW_ "\x1b[33;40m"
+#define _BLUE_ "\x1b[34;40m"
+#define _MAGENTA_ "\x1b[35;40m"
+#define _CYAN_ "\x1b[36;40m"
+#define _WHITE_ "\x1b[37;40m"
+
+#define _BRED_ "\x1b[1;31;40m"
+#define _BGREEN_ "\x1b[1;32;40m"
+#define _BYELLOW_ "\x1b[1;33;40m"
+#define _BBLUE_ "\x1b[1;34;40m"
+#define _BMAGENTA_ "\x1b[1;35;40m"
+#define _BCYAN_ "\x1b[1;36;40m"
+#define _BWHITE_ "\x1b[1;37;40m"
+
+#define DBG_MODE(x) (1ULL << (x))
+
+/* These defines list all of the possible debugging
+ * types. They are flags, so make sure to use the
+ * DBG_MODE macro to declare new values. */
+#define DBG_ALL (~0ULL) /* umm, "verbose" */
+#define DBG_CORE DBG_MODE(0) /* core boot code */
+#define DBG_MM DBG_MODE(1) /* memory management */
+#define DBG_INIT DBG_MODE(2) /* boot/init code */
+#define DBG_SCHED DBG_MODE(3) /* swtch, scheduling */
+#define DBG_DISK DBG_MODE(4) /* disk driver */
+#define DBG_TEMP DBG_MODE(5) /* for resolving temporary problems */
+#define DBG_KMALLOC DBG_MODE(6) /* kmalloc, kmem_cache_alloc */
+#define DBG_PAGEALLOC DBG_MODE(7) /* page_alloc, etc. */
+#define DBG_INTR DBG_MODE(8) /* misc. trap/interrupt */
+#define DBG_TERM DBG_MODE(9) /* the terminal device */
+#define DBG_FORK DBG_MODE(10) /* fork(2) */
+#define DBG_PROC DBG_MODE(11) /* process stuff */
+#define DBG_VNREF DBG_MODE(12) /* vnode reference counts */
+#define DBG_PFRAME DBG_MODE(13) /* pframe subsys */
+#define DBG_ERROR DBG_MODE(14) /* error conditions */
+#define DBG_SYSCALL DBG_MODE(15) /* system calls */
+#define DBG_FREF DBG_MODE(16) /* file reference counts */
+#define DBG_PGTBL DBG_MODE(17) /* page table manipulation */
+#define DBG_BRK DBG_MODE(18) /* process break; user memory alloc */
+#define DBG_EXEC DBG_MODE(19) /* new process exec */
+#define DBG_VFS DBG_MODE(20) /* vfs */
+#define DBG_S5FS DBG_MODE(21) /* system V file system */
+#define DBG_KB DBG_MODE(22) /* keyboard */
+#define DBG_THR DBG_MODE(23) /* thread stuff */
+#define DBG_PRINT DBG_MODE(24) /* printdbg.c */
+#define DBG_OSYSCALL DBG_MODE(25) /* other system calls */
+#define DBG_VM DBG_MODE(28) /* VM */
+#define DBG_TEST DBG_MODE(30) /* for testing code */
+#define DBG_TESTPASS DBG_MODE(31) /* for testing code */
+#define DBG_TESTFAIL DBG_MODE(32) /* for testing code */
+
+#define DBG_MEMDEV DBG_MODE(33) /* For memory devices ("null" and "zero") */
+#define DBG_ANON DBG_MODE(34) /* anonymous vm objects */
+#define DBG_VMMAP DBG_MODE(35) /* vm area mappings */
+#define DBG_ELF DBG_MODE(37) /* elf loader */
+#define DBG_USER DBG_MODE(38) /* user land */
+#define DBG_DEFAULT DBG_ERROR /* default modes, 0 for none */
+
+/* This defines the name that is used in the
+ * environment variable to turn on the given
+ * debugging type, along with the color of the debug type */
+/* NOTE that there is an order to these objects - the color chosen for a
+ * debug statement with multiple DBG specifiers will be the first matching
+ * result in the table */
+/* Note that rearranging the table will affect results, and may be beneficial
+ * later */
+#define DBG_TAB \
+ /* General */ \
+ {"error", DBG_ERROR, _BWHITE_}, {"temp", DBG_TEMP, _NORMAL_}, \
+ {"print", DBG_PRINT, _NORMAL_}, {"test", DBG_TEST, _RED_}, \
+ {"testpass", DBG_TESTPASS, _GREEN_}, \
+ {"testfail", DBG_TESTFAIL, _RED_}, /* Kern 1 */ \
+ {"proc", DBG_PROC, _BLUE_}, {"thr", DBG_THR, _CYAN_}, \
+ {"sched", DBG_SCHED, _GREEN_}, \
+ {"init", DBG_INIT, _NORMAL_}, /* Kern 2 */ \
+ {"term", DBG_TERM, _BMAGENTA_}, {"disk", DBG_DISK, _YELLOW_}, \
+ {"memdev", DBG_MEMDEV, _BBLUE_}, /* VFS */ \
+ {"vfs", DBG_VFS, _WHITE_}, {"fref", DBG_FREF, _MAGENTA_}, \
+ {"vnref", DBG_VNREF, _CYAN_}, /* S5FS */ \
+ {"s5fs", DBG_S5FS, _BRED_}, \
+ {"pframe", DBG_PFRAME, _BMAGENTA_}, /* VM */ \
+ {"anon", DBG_ANON, _WHITE_}, {"vmmap", DBG_VMMAP, _BGREEN_}, \
+ {"fork", DBG_FORK, _BYELLOW_}, {"brk", DBG_BRK, _YELLOW_}, \
+ {"exec", DBG_EXEC, _BRED_}, {"elf", DBG_ELF, _BGREEN_}, \
+ {"pgtbl", DBG_PGTBL, _BBLUE_}, {"osyscall", DBG_OSYSCALL, _BMAGENTA_}, \
+ {"vm", DBG_VM, _RED_}, /* Syscalls (VFS - VM) */ \
+ {"syscall", DBG_SYSCALL, _RED_}, /* support code */ \
+ {"intr", DBG_INTR, _BRED_}, {"kmalloc", DBG_KMALLOC, _MAGENTA_}, \
+ {"pagealloc", DBG_PAGEALLOC, _WHITE_}, {"kb", DBG_KB, _BLUE_}, \
+ {"core", DBG_CORE, _GREEN_}, {"mm", DBG_MM, _RED_}, \
+ {"user", DBG_USER, _BYELLOW_}, \
+ /* Note this MUST be last or the color code will break */ /* Also note \
+ that the \
+ color \
+ specified \
+ here is \
+ effectively \
+ the \
+ "default" \
+ */ \
+ {"all", DBG_ALL, _NORMAL_}, \
+ { \
+ NULL, 0, NULL \
+ }
+
+extern uint64_t dbg_modes;
+
+/* A common interface for functions which provide human-readable information
+ * about some data structure. Functions implementing this interface should fill
+ * buf with up to size characters to describe the data passed in as data, then
+ * return the number of characters writen. If there is not enough space in buf
+ * to write all information then only size characters will be writen and size
+ * will be returned. The returned string will be null terminated regardless of
+ * its length. */
+typedef size_t (*dbg_infofunc_t)(const void *data, char *buf, size_t size);
+
+#define DBG_BUFFER_SIZE (PAGE_SIZE)
+
+void dbg_init(void);
+
+void dbg_print(char *fmt, ...) __attribute__((format(printf, 1, 2)));
+
+void dbg_printinfo(dbg_infofunc_t func, const void *data);
+
+const char *dbg_color(uint64_t d_mode);
+
+#if defined(__SMP__) || defined(__KPREEMPT__)
+#define DEBUG_ENTER \
+ uint8_t __ipl = apic_initialized() ? intr_setipl(IPL_HIGH) : IPL_LOW; \
+#define DEBUG_EXIT \
+ if (apic_initialized()) \
+ intr_setipl(__ipl);
+#else
+#define DEBUG_ENTER \
+ do \
+ { \
+ } while (0);
+#define DEBUG_EXIT \
+ do \
+ { \
+ } while (0);
+#endif
+
+#ifndef NDEBUG
+#define dbg(mode, ...) \
+ do \
+ { \
+ if (dbg_active(mode)) \
+ { \
+ DEBUG_ENTER \
+ dbg_print("%s", dbg_color(mode)); \
+ dbg_print("C%ld P%ld ", curcore.kc_id, \
+ curproc ? curproc->p_pid : -1L); \
+ dbg_print("%s:%d %s(): ", __FILE__, __LINE__, __func__); \
+ dbg_print(__VA_ARGS__); \
+ dbg_print("%s", _NORMAL_); \
+ DEBUG_EXIT \
+ } \
+ } while (0)
+
+#define dbg_force(mode, ...) \
+ do \
+ { \
+ DEBUG_ENTER \
+ dbg_print("%s", dbg_color(mode)); \
+ dbg_print("C%ld P%ld ", curcore.kc_id, \
+ curproc ? curproc->p_pid : -1L); \
+ dbg_print("%s:%d %s(): ", __FILE__, __LINE__, __func__); \
+ dbg_print(__VA_ARGS__); \
+ dbg_print("%s", _NORMAL_); \
+ DEBUG_EXIT \
+ } while (0)
+
+#define dbgq(mode, ...) \
+ do \
+ { \
+ if (dbg_active(mode)) \
+ { \
+ DEBUG_ENTER \
+ dbg_print("%s", dbg_color(mode)); \
+ dbg_print("C%ld P%ld ", curcore.kc_id, \
+ curproc ? curproc->p_pid : -1L); \
+ dbg_print(__VA_ARGS__); \
+ dbg_print("%s", _NORMAL_); \
+ DEBUG_EXIT \
+ } \
+ } while (0)
+
+#define dbginfo(mode, func, data) \
+ do \
+ { \
+ if (dbg_active(mode)) \
+ { \
+ DEBUG_ENTER \
+ dbg_print("%s", dbg_color(mode)); \
+ dbg_print("C%ld P%ld ", curcore.kc_id, \
+ curproc ? curproc->p_pid : -1L); \
+ dbg_printinfo(func, data); \
+ dbg_print("%s", _NORMAL_); \
+ DEBUG_EXIT \
+ } \
+ } while (0)
+
+#define dbg_active(mode) (dbg_modes & (mode))
+
+void dbg_add_mode(const char *mode);
+
+void dbg_add_modes(const char *modes);
+
+#else
+#define dbg(mode, ...)
+#define dbgq(mode, ...)
+#define dbginfo(mode, func, data)
+#define dbg_active(mode) 0
+#define dbg_add_mode(mode)
+#define dbg_add_modes(modes)
+#endif
+
+noreturn void dbg_panic(const char *file, int line, const char *func,
+ const char *fmt, ...)
+ __attribute__((format(printf, 4, 5)));
+
+#define panic(...) dbg_panic(__FILE__, __LINE__, __func__, __VA_ARGS__)
+
+#ifndef NDEBUG
+#define KASSERT(x) \
+ do \
+ { \
+ if (!(x)) \
+ panic("assertion failed: %s", #x); \
+ } while (0)
+
+#define KASSERT_GENERIC(left, right, comparator, comp_str) \
+ do \
+ { \
+ int __left = (int)(left); \
+ int __right = (int)(right); \
+ if (!comparator(__left, __right)) \
+ { \
+ panic("assertion failed: %s %s %s. Left: %d, Right: %d\n", #left, \
+ comp_str, #right, __left, __right); \
+ } \
+ } while (0)
+
+static long equals(long l, long r)
+{
+ return l == r;
+}
+
+static long notequals(long l, long r) { return l != r; }
+
+static long lessthan(long l, long r) { return l < r; }
+
+static long greaterthan(long l, long r) { return l > r; }
+
+static long lessthaneq(long l, long r) { return l <= r; }
+
+static long greaterthaneq(long l, long r) { return l >= r; }
+
+#define KASSERTEQ(l, r) KASSERT_GENERIC(l, r, equals, "==")
+#define KASSERTNEQ(l, r) KASSERT_GENERIC(l, r, notequals, "!=")
+#define KASSERT_GREATER(l, r) KASSERT_GENERIC(l, r, greaterthan, ">")
+#define KASSERT_LESS(l, r) KASSERT_GENERIC(l, r, lessthan, "<")
+#define KASSERT_GREQ(l, r) KASSERT_GENERIC(l, r, greaterthaneq, ">=")
+#define KASSERT_LESSEQ(l, r) KASSERT_GENERIC(l, r, lessthaneq, "<=")
+#else
+#define KASSERT(x)
+#define KASSERTEQ(l, r)
+#define KASSERT_GREATER(l, r)
+#define KASSERT_LESS(l, r)
+#define KASSERT_GREQ(l, r)
+#define KASSERT_LESSEQ(l, r)
+#endif
diff --git a/kernel/include/util/delay.h b/kernel/include/util/delay.h
new file mode 100644
index 0000000..29cf3b2
--- /dev/null
+++ b/kernel/include/util/delay.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include "types.h"
+#include "util/debug.h"
+
+/* Approximate numbers taken from various points in Linux kernel */
+#define LOOPS_PER_JIFFY (1 << 12)
+#define HZ 100 /* Found this in a random place in the kernel */
+
+/* From arch/x86/lib/delay.c in Linux kernel */
+/*
+ * Precise Delay Loops for i386
+ *
+ * Copyright (C) 1993 Linus Torvalds
+ * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
+ *
+ * The __delay function must _NOT_ be inlined as its execution time
+ * depends wildly on alignment on many x86 processors. The additional
+ * jump magic is needed to get the timing stable on all the CPU's
+ * we have to worry about.
+ */
+
+static void __delay(unsigned long loops)
+{
+ __asm__ volatile(
+ " test %0,%0 \n"
+ " jz 3f \n"
+ " jmp 1f \n"
+
+ ".align 16 \n"
+ "1: jmp 2f \n"
+
+ ".align 16 \n"
+ "2: dec %0 \n"
+ " jnz 2b \n"
+ "3: dec %0 \n"
+
+ : /* we don't need output */
+ : "a"(loops));
+}
+
+static inline void __const_udelay(unsigned long xloops)
+{
+ int d0;
+
+ xloops *= 4;
+ __asm__ volatile("mull %%edx"
+ : "=d"(xloops), "=&a"(d0)
+ : "1"(xloops), "0"(LOOPS_PER_JIFFY * (HZ / 4)));
+
+ __delay(++xloops);
+}
+
+static inline void __udelay(unsigned long usecs)
+{
+ __const_udelay(usecs * 4295); /* 2**32 / 1000000 */
+}
+
+static inline void __ndelay(unsigned long nsecs)
+{
+ __const_udelay(nsecs * 5); /* 2**32 / 1000000000 */
+}
+
+#define udelay(n) \
+ (__builtin_constant_p(n) ? ((n) > 20000 ? panic("Delay too large!") \
+ : __const_udelay((n)*4295)) \
+ : __udelay(n))
+
+#define ndelay(n) \
+ (__builtin_constant_p(n) \
+ ? ((n) > 20000 ? panic("Delay too large!") : __const_udelay((n)*5)) \
+ : __ndelay(n))
diff --git a/kernel/include/util/gdb.h b/kernel/include/util/gdb.h
new file mode 100644
index 0000000..cc28dbc
--- /dev/null
+++ b/kernel/include/util/gdb.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#define GDB_DEFINE_HOOK(name, ...) \
+ void __py_hook_##name(__VA_ARGS__) {}
+#define GDB_CALL_HOOK(name, ...) __py_hook_##name(__VA_ARGS__)
diff --git a/kernel/include/util/init.h b/kernel/include/util/init.h
new file mode 100644
index 0000000..9be7e3c
--- /dev/null
+++ b/kernel/include/util/init.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#define init_func(func) \
+ __asm__( \
+ ".pushsection .init\n\t" \
+ ".long " #func \
+ "\n\t" \
+ ".string \"" #func \
+ "\"\n\t" \
+ ".popsection\n\t");
+#define init_depends(name) \
+ __asm__( \
+ ".pushsection .init\n\t" \
+ ".long 0\n\t" \
+ ".string \"" #name \
+ "\"\n\t" \
+ ".popsection\n\t");
+
+typedef void (*init_func_t)();
+
+void init_call_all(void);
diff --git a/kernel/include/util/list.h b/kernel/include/util/list.h
new file mode 100644
index 0000000..5fd44c1
--- /dev/null
+++ b/kernel/include/util/list.h
@@ -0,0 +1,224 @@
+#pragma once
+
+#include "kernel.h"
+
+/*
+ * Generic circular doubly linked list implementation.
+ *
+ * list_t is the head of the list.
+ * list_link_t should be included in structures which want to be
+ * linked on a list_t.
+ *
+ * All of the list functions take pointers to list_t and list_link_t
+ * types, unless otherwise specified.
+ *
+ * list_init(list) initializes a list_t to an empty list.
+ *
+ * list_empty(list) returns 1 iff the list is empty.
+ *
+ * Insertion functions.
+ * list_insert_head(list, link) inserts link at the front of the list.
+ * list_insert_tail(list, link) inserts link at the end of the list.
+ * list_insert_before(olink, nlink) inserts nlink before olink in list.
+ *
+ * Removal functions.
+ * Head is list->l_next. Tail is list->l_prev.
+ * The following functions should only be called on non-empty lists.
+ * list_remove(link) removes a specific element from the list.
+ * list_remove_head(list) removes the first element of list.
+ * list_remove_tail(list) removes the last element of list.
+ *
+ * Item accessors.
+ * list_item(link, type, member)
+ *
+ * Given a list_link_t* and the name of the type of structure which contains
+ * the list_link_t and the name of the member corresponding to the list_link_t,
+ * returns a pointer (of type "type*") to the item.
+ *
+ * Example:
+ * struct my_struct { list_link_t my_link };
+ * struct my_struct a;
+ * list_link_init(&a.my_link);
+ *
+ * struct my_struct *b = list_item(&a.my_link, struct my_struct, my_link);
+ * // b should equal &a here
+ *
+ * To iterate over a list,
+ * list_link_t *link;
+ * for (link = list->l_next;
+ * link != list; link = link->l_next)
+ * ...
+ *
+ * Or, use the macros, which will work even if you list_remove() the
+ * current link:
+ * list_iterate(list, iterator, type, member) {
+ * ... use iterator ...
+ * }
+ * (see also list_iterate_reverse for iterating in reverse)
+ *
+ * Where:
+ * - list is a pointer to the list_t to iterate over,
+ * - iterator is a name for the loop variable which will take on the value
+ * of each item in the list,
+ * - type is the type of items in the list,
+ * - member is the name of the field in the item type that is the list_link_t
+ *
+ * Example (from kernel/drivers/chardev.c)
+ * // chardevs is a list_t
+ * // chardev_t has a cd_link member which is a list_link_t
+ * list_iterate(&chardevs, cd, chardev_t, cd_link)
+ * {
+ * if (dev->cd_id == cd->cd_id)
+ * {
+ * return -1;
+ * }
+ * }
+ */
+
+/**
+ * Initialize a list_t.
+ */
+#define LIST_INITIALIZER(list) \
+ { \
+ .l_next = &(list), .l_prev = &(list) \
+ }
+
+/**
+ * Initialize a list link.
+ */
+#define LIST_LINK_INITIALIZER(list_link) \
+ { \
+ .l_next = NULL, .l_prev = NULL \
+ }
+
+typedef struct list
+{
+ struct list *l_next;
+ struct list *l_prev;
+} list_t, list_link_t;
+
+/**
+ * Initialize a list link.
+ */
+void list_link_init(list_link_t *link);
+
+/**
+ * Initialize a list_t.
+ */
+void list_init(list_t *list);
+
+/**
+ * Check if a link is linked to some list.
+ *
+ * @param link The link to check.
+ * @return long 1 if linked, 0 otherwise.
+ */
+long list_link_is_linked(const list_link_t *link);
+
+/**
+ * Check if a list is empty.
+ *
+ * @param list The list to check.
+ * @return long 1 if empty, 0 otherwise.
+ */
+long list_empty(const list_t *list);
+
+/**
+ * Assert that the internal state of a list is sane, and
+ * panic if it is not.
+ *
+ * @param list The list to check for sanity.
+ */
+void list_assert_sanity(const list_t *list);
+
+/**
+ * Insert a new link onto a list before another link.
+ *
+ * @param link The link before which the new link should be inserted.
+ * @param to_insert The new link to be inserted.
+ */
+void list_insert_before(list_link_t *link, list_link_t *to_insert);
+
+/**
+ * Insert a new link at the head (beginning) of a given list.
+ *
+ * @param list The list to insert on.
+ * @param link The new link to insert.
+ */
+void list_insert_head(list_t *list, list_link_t *link);
+
+/**
+ * Insert a new link at the tail (end) of a given list.
+ *
+ * @param list The list to insert on.
+ * @param link The new link to insert.
+ */
+void list_insert_tail(list_t *list, list_link_t *link);
+
+/**
+ * Remove a particular link from the list it's on.
+ *
+ * @param link The link to be removed from its list.
+ */
+void list_remove(list_link_t *link);
+
+/**
+ * Get a pointer to the item that contains the given link.
+ *
+ * For instance, given a list_link_t contained within a proc_t, get a reference
+ * to the proc_t itself.
+ *
+ * @param link The link contained within the item to access.
+ * @param type The type of the outer item struct (e.g., proc_t)
+ * @param member The name of the struct member which is the list_link_t (e.g. p_list_link)
+ *
+ */
+#define list_item(link, type, member) \
+ (type *)((char *)(link)-offsetof(type, member))
+
+/**
+ * Get the item at the head of the list. See list_item for explanation
+ * of type and member.
+ */
+#define list_head(list, type, member) list_item((list)->l_next, type, member)
+
+/**
+ * Get the item at the tail of the list. See list_item for explanation
+ * of type and member.
+ */
+#define list_tail(list, type, member) list_item((list)->l_prev, type, member)
+
+/**
+ * Get the next item in a list that occurs after the given item.
+ *
+ * @param current An item from the list (e.g. a proc_t)
+ * See list_item for explanation of type and member.
+ */
+#define list_next(current, type, member) \
+ list_head(&(current)->member, type, member)
+
+/**
+ * Get the previous item in a list given an item. See list_next for explanation.
+ */
+#define list_prev(current, type, member) \
+ list_tail(&(current)->member, type, member)
+
+/**
+ * Iterate over elements in in a list. See comment at top of list.h for
+ * detailed description.
+ */
+#define list_iterate(list, var, type, member) \
+ for (type *var = list_head(list, type, member), \
+ *__next_##var = list_next(var, type, member); \
+ &var->member != (list); \
+ var = __next_##var, __next_##var = list_next(var, type, member))
+
+/**
+ * Iterate over the elements of a list in reverse. See comment at top of list.h for
+ * detailed description.
+ */
+#define list_iterate_reverse(list, var, type, member) \
+ for (type *var = list_tail(list, type, member), \
+ *__next_##var = list_prev(var, type, member); \
+ &var->member != (list); \
+ var = __next_##var, __next_##var = list_prev(var, type, member))
diff --git a/kernel/include/util/printf.h b/kernel/include/util/printf.h
new file mode 100644
index 0000000..430b156
--- /dev/null
+++ b/kernel/include/util/printf.h
@@ -0,0 +1,87 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: lib.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Aug 2003
+ *
+ * Environment: Xen Minimal OS
+ * Description: Random useful library functions, contains some freebsd stuff
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ *
+ *-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)stdarg.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/i386/include/stdarg.h,v 1.10 1999/08/28 00:44:26 peter Exp
+ *$
+ */
+
+#pragma once
+
+#include "stdarg.h"
+#include <types.h>
+
+/* printing */
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+
+int snprintf(char *buf, size_t size, const char *fmt, ...);
+
+int scnprintf(char *buf, size_t size, const char *fmt, ...);
+
+// a pretty simple way to avoid kernel buffer overflow attacks, no?
+// int vsprintf(char *buf, const char *fmt, va_list args);
+// int sprintf(char *buf, const char *fmt, ...);
+
+/* A variation on printf designed to be used in debug info functions.
+ * The function takes in a pointer to the address of a string buffer
+ * and a pointer to the size of the buffer. The buffer address pointed
+ * by str is incremented to point to the null character writen at the
+ * end of the new string. The size is decremented by the number of
+ * characters writen, not including the null character. The function
+ * returns the number of characters left in the buffer (after taking
+ * in to account the null character). */
+int iprintf(char **str, size_t *size, char *fmt, ...)
+ __attribute__((format(printf, 3, 4)));
+
+int vsscanf(const char *buf, const char *fmt, va_list args);
+
+int sscanf(const char *buf, const char *fmt, ...);
diff --git a/kernel/include/util/string.h b/kernel/include/util/string.h
new file mode 100644
index 0000000..04dc0f7
--- /dev/null
+++ b/kernel/include/util/string.h
@@ -0,0 +1,93 @@
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: lib.h
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Aug 2003
+ *
+ * Environment: Xen Minimal OS
+ * Description: Random useful library functions, contains some freebsd stuff
+ *
+ ****************************************************************************
+ * $Id: h-insert.h,v 1.4 2002/11/08 16:03:55 rn Exp $
+ ****************************************************************************
+ *
+ *-
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)stdarg.h 8.1 (Berkeley) 6/10/93
+ * $FreeBSD: src/sys/i386/include/stdarg.h,v 1.10 1999/08/28 00:44:26 peter Exp
+ *$
+ */
+
+#pragma once
+
+#include "stdarg.h"
+#include "types.h"
+
+/* string and memory manipulation */
+int memcmp(const void *cs, const void *ct, size_t count);
+
+void *memcpy(void *dest, const void *src, size_t count);
+
+int strncmp(const char *cs, const char *ct, size_t count);
+
+int strcmp(const char *cs, const char *ct);
+
+char *strcpy(char *dest, const char *src);
+
+char *strncpy(char *dest, const char *src, size_t count);
+
+void *memset(void *s, int c, size_t count);
+
+size_t strnlen(const char *s, size_t count);
+
+size_t strlen(const char *s);
+
+char *strchr(const char *s, int c);
+
+char *strrchr(const char *s, int c);
+
+char *strstr(const char *s1, const char *s2);
+
+char *strcat(char *dest, const char *src);
+
+char *strdup(const char *s);
+
+char *strtok(char *s, const char *d);
+
+/* return string-representation of an errno */
+char *strerror(int errnum);
diff --git a/kernel/include/util/time.h b/kernel/include/util/time.h
new file mode 100644
index 0000000..fe3df18
--- /dev/null
+++ b/kernel/include/util/time.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "types.h"
+#include "util/debug.h"
+
+extern uint64_t timer_tickcount;
+extern uint64_t kernel_preempted_count;
+extern uint64_t user_preempted_count;
+extern uint64_t not_preempted_count;
+extern uint64_t idle_count;
+extern volatile uint64_t jiffies;
+
+void time_init();
+
+void time_spin(time_t ms);
+
+void time_sleep(time_t ms);
+
+long do_usleep(useconds_t usec);
+
+time_t core_uptime();
+
+time_t do_time();
+
+size_t time_stats(char *buf, size_t len);
diff --git a/kernel/include/util/timer.h b/kernel/include/util/timer.h
new file mode 100644
index 0000000..57889f9
--- /dev/null
+++ b/kernel/include/util/timer.h
@@ -0,0 +1,28 @@
+#ifndef TIMER_H
+#define TIMER_H
+
+#include "util/list.h"
+
+typedef struct timer
+{
+ void (*function)(uint64_t data);
+ uint64_t data;
+ uint64_t expires;
+ list_link_t link;
+} timer_t;
+
+void timer_init(timer_t *timer);
+
+void timer_add(timer_t *timer);
+
+int timer_del(timer_t *timer);
+
+int timer_mod(timer_t *timer, int expires);
+
+int timer_pending(timer_t *timer);
+
+int timer_del_sync(timer_t *timer);
+
+void __timers_fire();
+
+#endif \ No newline at end of file
diff --git a/kernel/include/vm/anon.h b/kernel/include/vm/anon.h
new file mode 100644
index 0000000..a116853
--- /dev/null
+++ b/kernel/include/vm/anon.h
@@ -0,0 +1,9 @@
+#pragma once
+
+struct mobj;
+
+void anon_init();
+
+struct mobj *anon_create(void);
+
+extern int anon_count;
diff --git a/kernel/include/vm/brk.h b/kernel/include/vm/brk.h
new file mode 100644
index 0000000..1612b5f
--- /dev/null
+++ b/kernel/include/vm/brk.h
@@ -0,0 +1,3 @@
+#pragma once
+
+long do_brk(void *addr, void **ret);
diff --git a/kernel/include/vm/mmap.h b/kernel/include/vm/mmap.h
new file mode 100644
index 0000000..8c5638c
--- /dev/null
+++ b/kernel/include/vm/mmap.h
@@ -0,0 +1,8 @@
+#include "types.h"
+
+struct proc;
+
+long do_munmap(void *addr, size_t len);
+
+long do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off,
+ void **ret);
diff --git a/kernel/include/vm/pagefault.h b/kernel/include/vm/pagefault.h
new file mode 100644
index 0000000..7850727
--- /dev/null
+++ b/kernel/include/vm/pagefault.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "types.h"
+
+#define FAULT_PRESENT 0x01
+#define FAULT_WRITE 0x02
+#define FAULT_USER 0x04
+#define FAULT_RESERVED 0x08
+#define FAULT_EXEC 0x10
+
+void handle_pagefault(uintptr_t vaddr, uintptr_t cause);
diff --git a/kernel/include/vm/shadow.h b/kernel/include/vm/shadow.h
new file mode 100644
index 0000000..57893d5
--- /dev/null
+++ b/kernel/include/vm/shadow.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "mm/mobj.h"
+
+void shadow_init();
+
+mobj_t *shadow_create(mobj_t *shadowed);
+
+void shadow_collapse(mobj_t *o);
+
+extern int shadow_count;
diff --git a/kernel/include/vm/vmmap.h b/kernel/include/vm/vmmap.h
new file mode 100644
index 0000000..e5efba6
--- /dev/null
+++ b/kernel/include/vm/vmmap.h
@@ -0,0 +1,71 @@
+#pragma once
+
+#include "types.h"
+
+#include "util/list.h"
+
+#define VMMAP_DIR_LOHI 1
+#define VMMAP_DIR_HILO 2
+
+struct mobj;
+struct proc;
+struct vnode;
+
+typedef struct vmmap
+{
+ list_t vmm_list; /* list of virtual memory areas */
+ struct proc *vmm_proc; /* the process that corresponds to this vmmap */
+} vmmap_t;
+
+/* Make sure you understand why mapping boundaries are in terms of frame
+ * numbers (page numbers) and not addresses */
+typedef struct vmarea
+{
+ size_t vma_start; /* [starting vfn, */
+ size_t vma_end; /* ending vfn) */
+ size_t vma_off; /* offset from beginning of vma_obj in pages */
+ /* the reason this field is necessary is that
+ when files are mmap'ed, it doesn't have
+ to start from location 0. You could, for instance,
+ map pages 10-15 of a file, and vma_off would be 10. */
+
+ int vma_prot; /* permissions (protections) on mapping, see mman.h */
+ int vma_flags; /* either MAP_SHARED or MAP_PRIVATE. It can also specify
+ MAP_ANON and MAP_FIXED */
+
+ struct vmmap *vma_vmmap; /* address space that this area belongs to */
+ struct mobj *vma_obj; /* the memory object that corresponds to this address region */
+ list_link_t vma_plink; /* link on process vmmap maps list */
+} vmarea_t;
+
+void vmmap_init(void);
+
+vmmap_t *vmmap_create(void);
+
+void vmmap_destroy(vmmap_t **mapp);
+
+void vmmap_collapse(vmmap_t *map);
+
+vmarea_t *vmmap_lookup(vmmap_t *map, size_t vfn);
+
+long vmmap_map(vmmap_t *map, struct vnode *file, size_t lopage, size_t npages,
+ int prot, int flags, off_t off, int dir, vmarea_t **new_vma);
+
+long vmmap_remove(vmmap_t *map, size_t lopage, size_t npages);
+
+long vmmap_is_range_empty(vmmap_t *map, size_t startvfn, size_t npages);
+
+ssize_t vmmap_find_range(vmmap_t *map, size_t npages, int dir);
+
+long vmmap_read(vmmap_t *map, const void *vaddr, void *buf, size_t count);
+
+long vmmap_write(vmmap_t *map, void *vaddr, const void *buf, size_t count);
+
+vmmap_t *vmmap_clone(vmmap_t *map);
+
+size_t vmmap_mapping_info_helper(const void *map, char *buf, size_t size,
+ char *prompt);
+
+size_t vmmap_mapping_info(const void *map, char *buf, size_t size);
+
+void vmmap_insert(vmmap_t *map, vmarea_t *new_vma); \ No newline at end of file
diff --git a/kernel/link.ld b/kernel/link.ld
new file mode 100644
index 0000000..e8d647e
--- /dev/null
+++ b/kernel/link.ld
@@ -0,0 +1,68 @@
+OUTPUT_FORMAT(elf64-x86-64)
+
+KERNEL_LMA = 0x00100000;
+KERNEL_VMA = 0xffff800000000000;
+
+ENTRY(_start)
+STARTUP(entry/entry.o)
+
+SECTIONS {
+ . = KERNEL_VMA + KERNEL_LMA;
+
+ k_start = .;
+
+ .text : AT(ADDR(.text) - KERNEL_VMA) {
+ _code = .;
+ *(.multiboot)
+ *(.text)
+ . = ALIGN(0x1000);
+ }
+
+ csd_start = .;
+ .csd : AT(ADDR(.csd) - KERNEL_VMA) {
+ *(.csd)
+ . = ALIGN(0x1000);
+ }
+ csd_end = .;
+
+ .init : AT(ADDR(.init) - KERNEL_VMA) {
+ kernel_start_init = .;
+ *(.init)
+ . = ALIGN(0x1000);
+ kernel_end_init = .;
+ }
+
+
+ .rodata : AT(ADDR(.rodata) - KERNEL_VMA) {
+ _rodata = .;
+ *(.rodata)
+ . = ALIGN(0x1000);
+ }
+
+ .data : AT(ADDR(.data) - KERNEL_VMA) {
+ _data = .;
+ *(.data)
+ . = ALIGN(0x1000);
+ }
+
+ _edata = .;
+
+ .bss : AT(ADDR(.bss) - KERNEL_VMA) {
+ _bss = .;
+ *(.bss)
+ *(COMMON)
+ . = ALIGN(0x1000);
+ }
+
+ _end = .;
+
+ /DISCARD/ : {
+ *(.comment)
+ *(note.*)
+ }
+ kernel_phys_off = k_start - KERNEL_LMA;
+ kernel_phys_base = k_start - kernel_phys_off;
+ kernel_phys_end = _end - kernel_phys_off;
+ kernel_page_tables = ((_end - k_start) / 0x80000) + 1; /* XXX might be 0x200000 */
+ kernel_text_sectors = ((_end - k_start) / 512) + 1;
+} \ No newline at end of file
diff --git a/kernel/main/acpi.c b/kernel/main/acpi.c
new file mode 100644
index 0000000..cb0f221
--- /dev/null
+++ b/kernel/main/acpi.c
@@ -0,0 +1,161 @@
+#include "main/acpi.h"
+
+#include "boot/config.h"
+#include "mm/page.h"
+#include "types.h"
+#include "util/debug.h"
+#include "util/string.h"
+
+#define XSDT_SIGNATURE (*(uint32_t *)"XSDT")
+#define RSDT_SIGNATURE (*(uint32_t *)"RSDT")
+#define FACP_SIGNATURE (*(uint32_t *)"FACP")
+#define DSDT_SIGNATURE (*(uint32_t *)"DSDT")
+
+#define RSDP_ALIGN 16
+
+#define EBDA_MIN_PADDR 0x80000
+#define EBDA_MAX_PADDR 0xa0000
+#define EBDA_PTR_LOC_PADDR 0x040e
+
+#define EBDA_MIN (PHYS_OFFSET + EBDA_MIN_PADDR)
+#define EBDA_MAX (PHYS_OFFSET + EBDA_MAX_PADDR)
+#define EBDA_PTR_LOC (PHYS_OFFSET + EBDA_PTR_LOC_PADDR)
+
+static const uint8_t rsdp_sig[8] = {'R', 'S', 'D', ' ', 'P', 'T', 'R', ' '};
+
+typedef struct rsdp
+{
+ uint8_t rp_sign[8];
+ uint8_t rp_checksum;
+ uint8_t rp_oemid[6];
+ uint8_t rp_rev;
+ uint32_t rp_addr;
+} packed rsdp_t;
+
+typedef struct rsdp_20
+{
+ rsdp_t rsdp;
+ uint32_t length;
+ uint64_t xsdt_addr;
+ uint8_t ext_checksum;
+ uint8_t reserved[3];
+} packed rsdp_20_t;
+
+typedef struct rsd_table
+{
+ acpi_header_t rt_header;
+ uint64_t rt_other[];
+} packed rsd_table_t;
+
+static uint8_t __acpi_checksum(const uint8_t *buf, long size)
+{
+ uint8_t sum = 0;
+ for (long i = 0; i < size; i++)
+ sum += buf[i];
+ return sum;
+}
+
+static rsdp_20_t *__rsdp_search_range(uintptr_t start, uintptr_t end)
+{
+ uintptr_t rsdp_candidate = start;
+ while (rsdp_candidate <= end - sizeof(struct rsdp))
+ {
+ if (memcmp((void *)rsdp_candidate, rsdp_sig, sizeof(rsdp_sig)) == 0 &&
+ __acpi_checksum((uint8_t *)rsdp_candidate, sizeof(rsdp_20_t)) ==
+ 0)
+ {
+ return (rsdp_20_t *)rsdp_candidate;
+ }
+ rsdp_candidate += RSDP_ALIGN;
+ }
+ return NULL;
+}
+
+static void *__rsdp_search()
+{
+ // detect the location of the EBDA from the BIOS data section
+ uintptr_t ebda =
+ ((uintptr_t) * (uint16_t *)EBDA_PTR_LOC << 4) + PHYS_OFFSET;
+ rsdp_20_t *rsdp = 0;
+ if (ebda >= EBDA_MIN && ebda <= EBDA_MAX && ebda % RSDP_ALIGN == 0)
+ {
+ // check only if it's valid
+ rsdp = __rsdp_search_range(ebda, EBDA_MAX);
+ }
+ if (!rsdp)
+ {
+ // darmanio: unsure where these magic constants came from...
+ rsdp =
+ __rsdp_search_range(PHYS_OFFSET + 0xe0000, PHYS_OFFSET + 0x100000);
+ }
+ return rsdp;
+}
+
+static rsdp_20_t *rsd_ptr = NULL;
+static rsd_table_t *rsd_table = NULL;
+
+static rsd_table_t *_acpi_load_table(uintptr_t paddr)
+{
+ page_mark_reserved(PAGE_ALIGN_DOWN(paddr));
+ return (rsd_table_t *)(PHYS_OFFSET + paddr);
+}
+
+void acpi_init()
+{
+ if (rsd_ptr == NULL)
+ {
+ rsd_ptr = __rsdp_search();
+ KASSERT(rsd_ptr && "Could not find the ACPI Root Descriptor Table.");
+
+ rsd_table = _acpi_load_table(rsd_ptr->xsdt_addr);
+ KASSERT(XSDT_SIGNATURE == rsd_table->rt_header.ah_sign);
+ if (__acpi_checksum((void *)rsd_table, rsd_table->rt_header.ah_size))
+ {
+ panic("Weenix only supports ACPI 2.0 or higher");
+ }
+
+ dbgq(DBG_CORE, "--- ACPI INIT ---\n");
+ dbgq(DBG_CORE, "rsdp addr: %p\n", rsd_ptr);
+ dbgq(DBG_CORE, "rsdt addr: %p\n", rsd_table);
+ dbgq(DBG_CORE, "rev: %i\n", (int)rsd_ptr->rsdp.rp_rev);
+
+ rsd_ptr->rsdp.rp_oemid[5] = 0;
+ dbgq(DBG_CORE, "oem: %s\n", (char *)rsd_ptr->rsdp.rp_oemid);
+
+ // search for all tables listed in the RSDT and checksum them
+ dbgq(DBG_CORE, "ents:\t");
+ size_t headers =
+ (rsd_table->rt_header.ah_size - sizeof(rsd_table->rt_header)) /
+ sizeof(rsd_table->rt_other[0]);
+
+ for (size_t i = 0; i < headers; ++i)
+ {
+ acpi_header_t *header =
+ &_acpi_load_table(rsd_table->rt_other[i])->rt_header;
+ rsd_table->rt_other[i] = (uintptr_t)header;
+
+ dbgq(DBG_CORE, "%.4s ", (char *)&header->ah_sign);
+ KASSERT(0 == __acpi_checksum((void *)header, header->ah_size));
+ }
+ dbgq(DBG_CORE, "\n");
+ }
+}
+
+void *acpi_table(uint32_t signature, int index)
+{
+ KASSERT(index >= 0);
+
+ size_t headers =
+ (rsd_table->rt_header.ah_size - sizeof(rsd_table->rt_header)) /
+ sizeof(rsd_table->rt_other[0]);
+
+ for (size_t i = 0; i < headers; ++i)
+ {
+ acpi_header_t *header = (acpi_header_t *)rsd_table->rt_other[i];
+ if (header->ah_sign == signature && 0 == index--)
+ {
+ return header;
+ }
+ }
+ return NULL;
+}
diff --git a/kernel/main/apic.c b/kernel/main/apic.c
new file mode 100644
index 0000000..4d6f21c
--- /dev/null
+++ b/kernel/main/apic.c
@@ -0,0 +1,648 @@
+#include "types.h"
+
+#include "boot/config.h"
+
+#include "main/acpi.h"
+#include "main/apic.h"
+#include "main/cpuid.h"
+#include "main/interrupt.h"
+#include "main/io.h"
+
+#define APIC_SIGNATURE (*(uint32_t *)"APIC")
+
+#define TYPE_LAPIC 0
+#define TYPE_IOAPIC 1
+
+/* For disabling interrupts on the 8259 PIC, it needs to be
+ * disabled to use the APIC
+ */
+#define PIC_COMPLETE_MASK 0xff
+
+#define PIC1 0x20
+#define PIC1_COMMAND PIC1
+#define PIC1_DATA (PIC1 + 1)
+#define PIC1_VECTOR 0x20
+
+#define PIC2 0xa0
+#define PIC2_COMMAND PIC2
+#define PIC2_DATA (PIC2 + 1)
+#define PIC2_VECTOR 0x28
+
+#define ICW1_ICW4 0x01 /* ICW4 (not) needed */
+#define ICW1_SINGLE 0x02 /* Single (cascade) mode */
+#define ICW1_INTERVAL4 0x04 /* Call address interval 4 (8) */
+#define ICW1_LEVEL 0x08 /* Level triggered (edge) mode */
+#define ICW1_INIT 0x10 /* Initialization - required! */
+
+#define ICW4_8086 0x01 /* 8086/88 (MCS-80/85) mode */
+#define ICW4_AUTO 0x02 /* Auto (normal) EOI */
+#define ICW4_BUF_SLAVE 0x08 /* Buffered mode/slave */
+#define ICW4_BUF_MASTER 0x0C /* Buffered mode/master */
+#define ICW4_SFNM 0x10 /* Special fully nested (not) */
+
+/* For enabling interrupts from the APIC rather than the
+ * Master PIC, use the Interrupt Mode Configuration Register (IMCR)
+ */
+
+#define SELECT_REGISTER 0x22
+#define IMCR_REGISTER 0x70
+#define ENABLE_APIC 0x23
+#define ENABLE_APIC_PORT 0x01
+
+/* For Local APICS */
+#define IA32_APIC_BASE_MSR 0x1b
+#define IA32_APIC_BASE_MSR_ENABLE 0x800
+#define LOCAL_APIC_SPURIOUS_REGISTER 0xf0
+#define LOCAL_APIC_ENABLE_INTERRUPT 0x100
+
+#define LOCAL_APIC_ID 0x20
+#define LOCAL_APIC_VERSION 0x30
+#define LOCAL_APIC_TASKPRIOR 0x80
+#define LOCAL_APIC_EOI 0xb0
+#define LOCAL_APIC_LDR 0xd0
+#define LOCAL_APIC_DFR 0xe0
+#define LOCAL_APIC_SPURIOUS 0xf0
+#define LOCAL_APIC_ESR 0x280
+#define LOCAL_APIC_ICRL 0x300
+#define LOCAL_APIC_ICRH 0x310
+#define LOCAL_APIC_LVT_TMR 0x320
+#define LOCAL_APIC_LVT_PERF 0x340
+#define LOCAL_APIC_LVT_LINT0 0x350
+#define LOCAL_APIC_LVT_LINT1 0x360
+#define LOCAL_APIC_LVT_ERR 0x370
+#define LOCAL_APIC_TMRINITCNT 0x380
+#define LOCAL_APIC_TMRCURRCNT 0x390
+#define LOCAL_APIC_TMRDIV 0x3e0
+#define LOCAL_APIC_LAST 0x38f
+#define LOCAL_APIC_DISABLE 0x10000
+#define LOCAL_APIC_SW_ENABLE 0x100
+#define LOCAL_APIC_CPUFOCUS 0x200
+#define LOCAL_APIC_NMI (4 << 8)
+#define LOCAL_APIC_TMR_PERIODIC 0x20000
+#define LOCAL_APIC_TMR_BASEDIV (1 << 20)
+
+#define APIC_ADDR (apic->at_addr + PHYS_OFFSET)
+#define APIC_REG(x) (*(uint32_t *)(APIC_ADDR + (x)))
+#define LAPICID APIC_REG(LOCAL_APIC_ID)
+#define LAPICVER APIC_REG(LOCAL_APIC_VERSION)
+#define LAPICTPR APIC_REG(LOCAL_APIC_TASKPRIOR)
+#define LAPICSPUR APIC_REG(LOCAL_APIC_SPURIOUS)
+#define LAPICEOI APIC_REG(LOCAL_APIC_EOI)
+#define LAPICDFR APIC_REG(LOCAL_APIC_DFR)
+#define LAPICLDR APIC_REG(LOCAL_APIC_LDR)
+#define LAPICLVTTMR APIC_REG(LOCAL_APIC_LVT_TMR)
+#define LAPICLVTPERF APIC_REG(LOCAL_APIC_LVT_PERF)
+#define LAPICLVTLINT0 APIC_REG(LOCAL_APIC_LVT_LINT0)
+#define LAPICLVTLINT1 APIC_REG(LOCAL_APIC_LVT_LINT1)
+#define LAPICLVTERR APIC_REG(LOCAL_APIC_LVT_ERR)
+#define LAPICTIC APIC_REG(LOCAL_APIC_TMRINITCNT)
+#define LAPICTCC APIC_REG(LOCAL_APIC_TMRCURRCNT)
+#define LAPICTMRDIV APIC_REG(LOCAL_APIC_TMRDIV)
+#define LAPICICRH APIC_REG(LOCAL_APIC_ICRH)
+#define LAPICICRL APIC_REG(LOCAL_APIC_ICRL)
+#define LAPICESR APIC_REG(LOCAL_APIC_ESR)
+
+/* IO APIC */
+#define IOAPIC_IOWIN 0x10
+
+/* Some configuration for the IO APIC */
+#define IOAPIC_ID 0x00
+#define IOAPIC_VER 0x01
+#define IOAPIC_ARB 0x02
+#define IOAPIC_REDTBL 0x03
+
+#define IOAPIC_ADDR (ioapic->at_addr + PHYS_OFFSET)
+#define IOAPIC (*(uint32_t *)IOAPIC_ADDR)
+#define IOAPICWIN (*(uint32_t *)(IOAPIC_ADDR + IOAPIC_IOWIN))
+
+/* Helpful Macros for IO APIC programming */
+#define BIT_SET(data, bit) \
+ do \
+ { \
+ (data) = ((data) | (0x1 << (bit))); \
+ } while (0);
+#define BIT_UNSET(data, bit) \
+ do \
+ { \
+ (data) = ((data) & ~(0x1 << (bit))); \
+ } while (0);
+
+#define IRQ_TO_OFFSET(irq, part) ((uint8_t)((0x10 + (irq * 2) + part)))
+
+typedef struct apic_table
+{
+ struct acpi_header at_header;
+ uint32_t at_addr;
+ uint32_t at_flags;
+} packed apic_table_t;
+
+typedef struct lapic_table
+{
+ uint8_t at_type;
+ uint8_t at_size;
+ uint8_t at_procid;
+ uint8_t at_apicid;
+ uint32_t at_flags;
+} packed lapic_table_t;
+
+typedef struct ioapic_table
+{
+ uint8_t at_type;
+ uint8_t at_size;
+ uint8_t at_apicid;
+ uint8_t at_reserved;
+ uint32_t at_addr;
+ uint32_t at_inti;
+} packed ioapic_table_t;
+
+static apic_table_t *apic = NULL;
+static ioapic_table_t *ioapic = NULL;
+
+// Use MAX_LAPICS + 1 entries so we can guarantee the last entry is null
+static lapic_table_t *lapics[MAX_LAPICS + 1] = {NULL};
+static long max_apicid;
+
+static long initialized = 0;
+
+// Returns the maximum APIC ID
+inline long apic_max_id() { return max_apicid; }
+
+/* [APIC ID------------------------] */
+inline static long __lapic_getid(void) { return (LAPICID >> 24) & 0xff; }
+
+// Returns the APIC ID of the current processor/core
+inline long apic_current_id() { return __lapic_getid(); }
+
+inline static uint32_t __lapic_getver(void) { return LAPICVER & 0xff; }
+
+inline static void __lapic_setspur(uint8_t intr)
+{
+ uint32_t data = LAPICSPUR | LOCAL_APIC_SW_ENABLE;
+ *((uint8_t *)&data) = intr;
+ LAPICSPUR = data;
+}
+
+/* [LOGICID-------------------------] */
+inline static void __lapic_setlogicalid(uint8_t id)
+{
+ LAPICLDR = ((uint32_t)id) << 24;
+}
+
+inline static uint32_t ioapic_read(uint8_t reg_offset)
+{
+ /* Tell IOREGSEL where we want to read from */
+ IOAPIC = reg_offset;
+ return IOAPICWIN;
+}
+
+inline static void ioapic_write(uint8_t reg_offset, uint32_t value)
+{
+ /* Tell IOREGSEL where to write to */
+ IOAPIC = reg_offset;
+ /* Write the value to IOWIN */
+ IOAPICWIN = value;
+}
+
+inline static uint32_t __ioapic_getid(void)
+{
+ return (ioapic_read(IOAPIC_ID) >> 24) & 0x0f;
+}
+
+inline static uint32_t __ioapic_getver(void)
+{
+ return ioapic_read(IOAPIC_VER) & 0xff;
+}
+
+inline static uint32_t __ioapic_getmaxredir(void)
+{
+ return (ioapic_read(IOAPIC_VER) >> 16) & 0xff;
+}
+
+inline static void __ioapic_setredir(uint32_t irq, uint8_t intr)
+{
+ /* Read in the redirect table lower register first */
+ uint32_t data = ioapic_read(IRQ_TO_OFFSET(irq, 0));
+ /* Set the interrupt vector */
+ ((uint8_t *)&data)[0] = intr;
+ /* Set bit 8, unset bits 9,10 to set interrupt delivery mode to lowest
+ * priority */
+ BIT_SET(data, 8);
+ BIT_UNSET(data, 9);
+ BIT_UNSET(data, 10);
+ /* Set bit 11 to set the destination mode to a logical destination */
+ BIT_SET(data, 11);
+ /* Unset bit 13 to set the pin polarity to Active High */
+ BIT_UNSET(data, 13);
+ /* Unset bit 15 to set the trigger mode to Edge */
+ BIT_UNSET(data, 15);
+ /* Write this value to the apic */
+ ioapic_write(IRQ_TO_OFFSET(irq, 0), data);
+ /* Now deal with the higher order register */
+ data = ioapic_read(IRQ_TO_OFFSET(irq, 1));
+ ((uint8_t *)&data)[3] = 0xff;
+ ioapic_write(IRQ_TO_OFFSET(irq, 1), data);
+}
+
+inline static void __ioapic_setmask(uint32_t irq, int mask)
+{
+ uint32_t data = ioapic_read(IRQ_TO_OFFSET(irq, 0));
+ if (mask)
+ {
+ BIT_SET(data, 16);
+ }
+ else
+ {
+ BIT_UNSET(data, 16);
+ }
+ ioapic_write(IRQ_TO_OFFSET(irq, 0), data);
+}
+
+static uint32_t apic_exists(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ cpuid(CPUID_GETFEATURES, &eax, &ebx, &ecx, &edx);
+ return edx & CPUID_FEAT_EDX_APIC;
+}
+
+static void apic_set_base(uint32_t apic)
+{
+ uint32_t edx = 0;
+ uint32_t eax = (apic & 0xfffff000) | IA32_APIC_BASE_MSR_ENABLE;
+ edx = 0;
+ cpuid_set_msr(IA32_APIC_BASE_MSR, eax, edx);
+}
+
+static uint32_t apic_get_base(void)
+{
+ uint32_t eax, edx;
+ cpuid_get_msr(IA32_APIC_BASE_MSR, &eax, &edx);
+ return (eax & 0xfffff000);
+}
+
+static long __apic_err()
+{
+ dbg(DBG_PRINT, "[+] APIC Error: 0x%d", LAPICESR);
+ __asm__("cli; hlt");
+ return 0;
+}
+
+void apic_enable()
+{
+ // [MODE---------------------------]
+ // L
+ LAPICDFR = 0xffffffff;
+
+ KASSERT(apic_current_id() < 8);
+ __lapic_setlogicalid((uint8_t)(1 << apic_current_id()));
+ LAPICLVTTMR = LOCAL_APIC_DISABLE;
+ LAPICLVTPERF = LOCAL_APIC_NMI;
+ LAPICLVTLINT0 = LOCAL_APIC_DISABLE;
+ LAPICLVTLINT1 = LOCAL_APIC_DISABLE;
+ LAPICLVTERR = INTR_APICERR;
+ LAPICTPR = 0;
+ apic_set_base(apic_get_base());
+ apic_setspur(INTR_SPURIOUS);
+ intr_register(INTR_APICERR, __apic_err);
+}
+
+void apic_disable_periodic_timer()
+{
+ LAPICLVTTMR = LOCAL_APIC_DISABLE;
+ LAPICLVTPERF = LOCAL_APIC_NMI;
+ LAPICLVTLINT0 = LOCAL_APIC_DISABLE;
+ LAPICLVTLINT1 = LOCAL_APIC_DISABLE;
+ LAPICTPR = 0;
+}
+
+/* get_cpu_bus_frequency - Uses PIT to determine APIC frequency in Hz (ticks per
+ * second). NOTE: NOT SMP FRIENDLY! Note: For more info, visit the osdev wiki
+ * page on the Programmable Interval Timer. */
+static uint32_t get_cpu_bus_frequency()
+{
+ static uint32_t freq = 0;
+ if (!freq)
+ {
+ /* Division rate: 0b1011 corresponds to division by 1, which does
+ * nothing. */
+ LAPICTMRDIV = 0b1011;
+
+ /* 0x61 controls the PC speaker.
+ * Clearing bit 1 prevents any sound.
+ * Setting bit 0 connects the speaker to the output of PIT channel 2. */
+ outb(0x61, (uint8_t)((inb(0x61) & 0xfd) | 1));
+
+ /* Control reg:
+ * 0x1011 = Channel 2, lobyte/hibyte access
+ * 0x0010 = Mode 1 (hardware one-shot) */
+ outb(0x43, 0xb2);
+
+ /* Not sure why there's an inb, but the two outb send the reload value:
+ * 0x2e9b = 11931, aka 1/100th of the PIT oscillator rate, aka 10 ms. */
+ outb(0x42, 0x9b);
+ inb(0x60);
+ outb(0x42, 0x2e);
+
+ /* Reset the one-shot counter by clearing and resetting bit 0. */
+ uint32_t tmp = (uint32_t)(inb(0x61) & 0xfe);
+ outb(0x61, (uint8_t)tmp);
+ outb(0x61, (uint8_t)(tmp | 1));
+ /* Reset APIC's initial countdown value. */
+ LAPICTIC = 0xffffffff;
+ /* PC speaker sets bit 5 when it hits 0. */
+ while (!(inb(0x61) & 0x20))
+ ;
+ /* Stop the APIC timer */
+ LAPICLVTTMR = LOCAL_APIC_DISABLE;
+ /* Subtract current count from the initial count to get total ticks per
+ * second. */
+ freq = (LAPICTIC - LAPICTCC) * 100;
+ dbgq(DBG_CORE, "CPU Bus Freq: %u ticks per second\n", freq);
+ }
+ return freq;
+}
+
+/* apic_enable_periodic_timer - Starts the periodic timer (continuously send
+ * interrupts) at a given frequency. For more information, refer to: Intel
+ * System Programming Guide, Vol 3A Part 1, 10.5.4. */
+void apic_enable_periodic_timer(uint32_t freq)
+{
+ // TODO: Check this math! Don't assume it's correct...
+
+ uint32_t ticks_per_second = get_cpu_bus_frequency();
+ /* Demand at least the desired precision. */
+ if (ticks_per_second < freq)
+ {
+ panic(
+ "apic timer is not precise enough for desired frequency\n");
+ }
+
+ /* TODO: Pretty sure this can be more precise using the initial count
+ * properly. */
+
+ /* Round the bus frequency down to the nearest multiple of the desired
+ * frequency. If bus/freq is large, the remainder will get amortized to a
+ * degree that should be acceptable for Weenix. */
+ uint32_t rem = ticks_per_second % freq;
+ if (rem > (freq / 2))
+ ticks_per_second += (freq - rem);
+ else
+ ticks_per_second -= rem;
+ // TODO: Provide a warning when there is a lot of drift, e.g. more than
+ // 1/10th inaccuracy per interval
+
+ /* Divide configuration. */
+ uint32_t div = 0b0111; /* Starts at division by 1. */
+ uint32_t tmp = ticks_per_second;
+ for (int i = 1; i < 7; i++)
+ { /* Max division is 2^7. */
+ /* Don't cut the freq in half if it would ruin divisibility. */
+ if ((tmp >> 1) % freq != 0)
+ break;
+ if ((tmp >> 1) < freq)
+ break;
+ /* Cut freq in half. */
+ tmp >>= 1;
+ /* Increment the order of division (1, 2, 4, ...). */
+ div++;
+ }
+
+ uint32_t tmpdiv = div;
+
+ /* Clear bit 3, which probably artificially overflowed. */
+ div &= 0b0111;
+
+ /* APIC DIV register skips bit 2, so if set, move it to bit 3. */
+ if (div & 0b0100)
+ {
+ div &= 0b0011; /* Clear bit 2. */
+ div |= 0b1011; /* Set bit 3. */
+ }
+
+ /* Set up three registers to configure timer:
+ * 1) Initial count: count down from this value, send interrupt upon hitting
+ * 0. */
+ LAPICTIC = tmp / freq;
+ /* 3) Divide config: calculated above to cut bus clock. */
+ LAPICTMRDIV = div;
+ /* 2) LVT timer: use a periodic timer and raise the provided interrupt
+ * vector. */
+ LAPICLVTTMR = LOCAL_APIC_TMR_PERIODIC | INTR_APICTIMER;
+}
+
+static void apic_disable_8259()
+{
+ dbgq(DBG_CORE, "--- DISABLE 8259 PIC ---\n");
+ /* disable 8259 PICs by initializing them and masking all interrupts */
+ /* the first step is initialize them normally */
+ outb(PIC1_COMMAND, ICW1_INIT + ICW1_ICW4);
+ io_wait();
+ outb(PIC2_COMMAND, ICW1_INIT + ICW1_ICW4);
+ io_wait();
+ outb(PIC1_DATA, PIC1_VECTOR);
+ io_wait();
+ outb(PIC2_DATA, PIC2_VECTOR);
+ io_wait();
+ outb(PIC1_DATA, 0x04);
+ io_wait();
+ outb(PIC2_DATA, 0x02);
+ io_wait();
+ outb(PIC1_DATA, ICW4_8086);
+ io_wait();
+ outb(PIC2_DATA, ICW4_8086);
+ /* Now mask all interrupts */
+ dbgq(DBG_CORE, "Masking all interrupts on the i8259 PIC\n");
+ outb(PIC1_DATA, PIC_COMPLETE_MASK);
+ outb(PIC2_DATA, PIC_COMPLETE_MASK);
+}
+
+static void map_apic_addr(uintptr_t paddr)
+{
+ page_mark_reserved((void *)paddr);
+ pt_map(pt_get(), paddr, paddr + PHYS_OFFSET, PT_WRITE | PT_PRESENT,
+ PT_WRITE | PT_PRESENT);
+}
+
+void apic_init()
+{
+ uint8_t *ptr = acpi_table(APIC_SIGNATURE, 0);
+ apic = (apic_table_t *)ptr;
+ KASSERT(NULL != apic && "APIC table not found in ACPI.");
+
+ apic_disable_8259();
+
+ dbgq(DBG_CORE, "--- APIC INIT ---\n");
+ dbgq(DBG_CORE, "local apic paddr: 0x%x\n", apic->at_addr);
+ dbgq(DBG_CORE, "PC-AT compatible: %i\n", apic->at_flags & 0x1);
+ KASSERT(PAGE_ALIGNED((void *)(uintptr_t)apic->at_addr));
+
+ KASSERT(apic->at_addr < 0xffffffff);
+
+ map_apic_addr(apic->at_addr);
+
+ /* Get the tables for the local APIC and IO APICS */
+ uint8_t off = sizeof(*apic);
+ while (off < apic->at_header.ah_size)
+ {
+ uint8_t type = *(ptr + off);
+ uint8_t size = *(ptr + off + 1);
+ lapic_table_t *lapic = NULL;
+ if (TYPE_LAPIC == type)
+ {
+ KASSERT(apic_exists() && "Local APIC does not exist");
+ KASSERT(sizeof(lapic_table_t) == size);
+ lapic = (lapic_table_t *)(ptr + off);
+ KASSERT(lapic->at_apicid < MAX_LAPICS &&
+ "Weenix only supports MAX_LAPICS local APICs");
+ lapics[lapic->at_apicid] = lapic;
+
+ page_mark_reserved(PAGE_ALIGN_DOWN((uintptr_t)lapic - PHYS_OFFSET));
+ max_apicid = lapic->at_apicid;
+
+ dbgq(DBG_CORE, "LAPIC:\n");
+ dbgq(DBG_CORE, " id: 0x%.2x\n",
+ (uint32_t)lapic->at_apicid);
+ dbgq(DBG_CORE, " processor: 0x%.3x\n",
+ (uint32_t)lapic->at_procid);
+ dbgq(DBG_CORE, " enabled: %i\n", apic->at_flags & 0x1);
+ }
+ else if (TYPE_IOAPIC == type)
+ {
+ KASSERT(apic_exists() && "IO APIC does not exist");
+ KASSERT(sizeof(ioapic_table_t) == size);
+ KASSERT(NULL == ioapic && "Weenix only supports a single IO APIC");
+ ioapic = (ioapic_table_t *)(ptr + off);
+ page_mark_reserved(
+ PAGE_ALIGN_DOWN((uintptr_t)ioapic - PHYS_OFFSET));
+ map_apic_addr(ioapic->at_addr);
+
+ dbgq(DBG_CORE, "IOAPIC:\n");
+ dbgq(DBG_CORE, " id: 0x%.2x\n",
+ (uint32_t)ioapic->at_apicid);
+ dbgq(DBG_CORE, " base paddr: 0x%.8x\n", ioapic->at_addr);
+ dbgq(DBG_CORE, " inti addr: 0x%.8x\n", ioapic->at_inti);
+ KASSERT(PAGE_ALIGNED((void *)(uintptr_t)ioapic->at_addr));
+ }
+ else
+ {
+ dbgq(DBG_CORE, "Unknown APIC type: 0x%x\n", (uint32_t)type);
+ }
+ off += size;
+ }
+ KASSERT(NULL != lapics[apic_current_id()] &&
+ "Could not find a local APIC device");
+ KASSERT(NULL != ioapic && "Could not find an IO APIC");
+
+ initialized = 1;
+}
+
+inline long apic_initialized() { return initialized; }
+
+inline uint8_t apic_getipl() { return (uint8_t)LAPICTPR; }
+
+inline void apic_setipl(uint8_t ipl) { LAPICTPR = ipl; }
+
+inline void apic_setspur(uint8_t intr)
+{
+ dbg(DBG_CORE, "mapping spurious interrupts to %u\n", intr);
+ __lapic_setspur(intr);
+}
+
+inline void apic_eoi() { LAPICEOI = 0x0; }
+
+void apic_setredir(uint32_t irq, uint8_t intr)
+{
+ dbg(DBG_CORE, "redirecting irq %u to interrupt %u\n", irq, intr);
+ __ioapic_setredir(irq, intr);
+ __ioapic_setmask(irq, 0);
+}
+
+void apic_start_processor(uint8_t processor, uint8_t execution_page)
+{
+ // [+] TODO FIX MAGIC NUMBERS
+ KASSERT(processor < 8);
+ uint32_t icr_low = 0;
+ icr_low |= 0;
+ icr_low |= DESTINATION_MODE_INIT << 8;
+ BIT_UNSET(icr_low, 11); // physical destination
+
+ BIT_SET(icr_low, 14);
+ BIT_UNSET(icr_low, 15);
+
+ dbg(DBG_CORE, "Sending IPI: ICR_LOW = 0x%.8x, ICR_HIGH = 0x%.8x\n", icr_low,
+ processor << 24);
+ LAPICICRH = processor << 24;
+ LAPICICRL = icr_low;
+
+ apic_wait_ipi();
+
+ icr_low = 0;
+ icr_low |= execution_page;
+ icr_low |= DESTINATION_MODE_SIPI << 8;
+ BIT_UNSET(icr_low, 11); // physical destination
+
+ BIT_SET(icr_low, 14);
+ BIT_UNSET(icr_low, 15);
+ dbg(DBG_CORE, "Sending IPI: ICR_LOW = 0x%.8x, ICR_HIGH = 0x%.8x\n", icr_low,
+ processor << 24);
+
+ LAPICICRH = processor << 24;
+ LAPICICRL = icr_low;
+
+ apic_wait_ipi();
+}
+
+void apic_send_ipi(uint8_t target, ipi_destination_mode mode, uint8_t vector)
+{
+ // See https://wiki.osdev.org/APIC#Interrupt_Command_Register for a
+ // description of how this works. This function only supports targeting a
+ // single APIC, instead of using the special destination modes. Since we
+ // already parse the APIC table, it's more reliable to interrupt a specific
+ // processor.
+ KASSERT(target < 8);
+
+ uint32_t icr_low = 0;
+ icr_low |= vector; // bits 0-7 are the vector number
+ icr_low |= mode << 8; // bits 8-10 are the destination mode
+ BIT_SET(icr_low, 11); // logical destination
+
+ BIT_SET(icr_low, 14);
+
+ dbgq(DBG_CORE, "Sending IPI: ICR_LOW = 0x%.8x, ICR_HIGH = 0x%.8x\n",
+ icr_low, (1U << target) << 24);
+
+ // Bits 24-27 of ICR_HIGH are the target logical APIC ID. Setting ICR_LOW
+ // sends the interrupt, so we have to set this first
+ LAPICICRH = (1U << target) << 24;
+ // send the IPI
+ LAPICICRL = icr_low;
+}
+
+void apic_broadcast_ipi(ipi_destination_mode mode, uint8_t vector,
+ long include_self)
+{
+ uint32_t icr_low = 0;
+ icr_low |= vector;
+ icr_low |= mode << 8;
+ BIT_SET(icr_low, 11);
+ BIT_SET(icr_low, 14);
+
+ if (!include_self)
+ BIT_SET(icr_low, 18);
+ BIT_SET(icr_low, 19);
+
+ LAPICICRH = 0;
+ LAPICICRL = icr_low;
+}
+
+/**
+ * Wait for the last IPI sent to be acknowledged by the other processor.
+ *
+ * Note: this is separate from apic_send_ipi because there are circumstances
+ * where we don't want to wait.
+ */
+void apic_wait_ipi()
+{
+ // Bit 12 of ICR_LOW is the delivery status flag.
+ while (LAPICICRL & (1 << 12))
+ ;
+}
diff --git a/kernel/main/gdt.c b/kernel/main/gdt.c
new file mode 100644
index 0000000..9bc8282
--- /dev/null
+++ b/kernel/main/gdt.c
@@ -0,0 +1,129 @@
+#include "main/gdt.h"
+#include "globals.h"
+
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+typedef struct gdt_entry
+{
+ uint16_t ge_limitlo;
+ uint16_t ge_baselo;
+ uint8_t ge_basemid;
+ uint8_t ge_access;
+ uint8_t ge_flags;
+ uint8_t ge_basehi;
+} packed gdt_entry_t;
+
+static gdt_entry_t gdt[GDT_COUNT] CORE_SPECIFIC_DATA;
+
+typedef struct tss_entry
+{
+ uint32_t ts_reserved1;
+ uint64_t ts_rsp0;
+ uint64_t ts_rsp1;
+ uint64_t ts_rsp2;
+ uint64_t ts_reserved2;
+ uint64_t ts_ist1;
+ uint64_t ts_ist2;
+ uint64_t ts_ist3;
+ uint64_t ts_ist4;
+ uint64_t ts_ist5;
+ uint64_t ts_ist6;
+ uint64_t ts_ist7;
+ uint64_t ts_reserved3;
+ uint16_t ts_iopb;
+ uint16_t ts_reserved4;
+} packed tss_entry_t;
+
+typedef struct gdt_location
+{
+ uint16_t gl_size;
+ uint64_t gl_offset;
+} packed gdt_location_t;
+
+static gdt_location_t gdtl = {.gl_size = GDT_COUNT * sizeof(gdt_entry_t),
+ .gl_offset = (uint64_t)&gdt};
+
+static tss_entry_t tss CORE_SPECIFIC_DATA;
+
+void gdt_init(void)
+{
+ memset(gdt, 0, sizeof(gdt));
+ gdt_set_entry(GDT_KERNEL_TEXT, 0x0, 0xFFFFF, 0, 1, 0, 1);
+ gdt_set_entry(GDT_KERNEL_DATA, 0x0, 0xFFFFF, 0, 0, 0, 1);
+ gdt_set_entry(GDT_USER_TEXT, 0x0, 0xFFFFF, 3, 1, 0, 1);
+ gdt_set_entry(GDT_USER_DATA, 0x0, 0xFFFFF, 3, 0, 0, 1);
+
+ uintptr_t tss_pointer = (uintptr_t)&tss;
+ gdt_set_entry(GDT_TSS, (uint32_t)tss_pointer, sizeof(tss), 0, 1, 0, 0);
+ gdt[GDT_TSS / 8].ge_access &= ~(0b10000);
+ gdt[GDT_TSS / 8].ge_access |= 0b1;
+ gdt[GDT_TSS / 8].ge_flags &= ~(0b10000000);
+
+ uint64_t tss_higher_half = ((uint64_t)tss_pointer) >> 32;
+ memcpy(&gdt[GDT_TSS / 8 + 1], &tss_higher_half, 8);
+
+ memset(&tss, 0, sizeof(tss));
+ tss.ts_iopb = sizeof(tss);
+
+ gdt_location_t *data = &gdtl;
+ int segment = GDT_TSS;
+
+ dbg(DBG_CORE, "Installing GDT and TR\n");
+ __asm__ volatile("lgdt (%0); ltr %1" ::"p"(data), "m"(segment));
+}
+
+void gdt_set_kernel_stack(void *addr) { tss.ts_rsp0 = (uint64_t)addr; }
+
+void gdt_set_entry(uint32_t segment, uint32_t base, uint32_t limit,
+ uint8_t ring, int exec, int dir, int rw)
+{
+ KASSERT(segment < GDT_COUNT * 8 && 0 == segment % 8);
+ KASSERT(ring <= 3);
+ KASSERT(limit <= 0xFFFFF);
+
+ int index = segment / 8;
+ gdt[index].ge_limitlo = (uint16_t)limit;
+ gdt[index].ge_baselo = (uint16_t)base;
+ gdt[index].ge_basemid = (uint8_t)(base >> 16);
+ gdt[index].ge_basehi = (uint8_t)(base >> 24);
+
+ // For x86-64, set the L bit to indicate a 64-bit descriptor and clear Sz
+ // Having both L and Sz set is reserved for future use
+ gdt[index].ge_flags = (uint8_t)(0b10100000 | (limit >> 16));
+
+ gdt[index].ge_access = 0b10000000;
+ gdt[index].ge_access |= (ring << 5);
+ gdt[index].ge_access |= 0b10000;
+ if (exec)
+ {
+ gdt[index].ge_access |= 0b1000;
+ }
+ if (dir)
+ {
+ gdt[index].ge_access |= 0b100;
+ }
+ if (rw)
+ {
+ gdt[index].ge_access |= 0b10;
+ }
+}
+
+void gdt_clear(uint32_t segment)
+{
+ KASSERT(segment < GDT_COUNT * 8 && 0 == segment % 8);
+ memset(&gdt[segment / 8], 0, sizeof(gdt[segment / 8]));
+}
+
+size_t gdt_tss_info(const void *arg, char *buf, size_t osize)
+{
+ size_t size = osize;
+
+ KASSERT(NULL == arg);
+
+ iprintf(&buf, &size, "TSS:\n");
+ iprintf(&buf, &size, "kstack: 0x%p\n", (void *)tss.ts_rsp0);
+
+ return size;
+}
diff --git a/kernel/main/gdt.gdb b/kernel/main/gdt.gdb
new file mode 100644
index 0000000..9dbf37a
--- /dev/null
+++ b/kernel/main/gdt.gdb
@@ -0,0 +1,3 @@
+define tss
+ kinfo gdt_tss_info
+end
diff --git a/kernel/main/interrupt.c b/kernel/main/interrupt.c
new file mode 100644
index 0000000..d3f6655
--- /dev/null
+++ b/kernel/main/interrupt.c
@@ -0,0 +1,1077 @@
+#include "errno.h"
+#include "globals.h"
+#include "types.h"
+#include <api/syscall.h>
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#include "main/apic.h"
+#include "main/gdt.h"
+
+#define MAX_INTERRUPTS 256
+
+/* Convenient definitions for intr_desc.attr */
+
+#define IDT_DESC_TRAP 0x01
+#define IDT_DESC_BIT16 0x06
+#define IDT_DESC_BIT32 0x0E
+#define IDT_DESC_RING0 0x00
+#define IDT_DESC_RING1 0x40
+#define IDT_DESC_RING2 0x20
+#define IDT_DESC_RING3 0x60
+#define IDT_DESC_PRESENT 0x80
+
+#define INTR(isr) (__intr_handler##isr)
+
+#define INTR_ERRCODE(isr) \
+ extern intr_handler_t __intr_handler##isr; \
+ __asm__(".global __intr_handler" #isr \
+ "\n" \
+ "__intr_handler" #isr \
+ ":\n\t" \
+ "pushq $" #isr \
+ "\n\t" \
+ "pushq %rdi\n\t" \
+ "pushq %rsi\n\t" \
+ "pushq %rdx\n\t" \
+ "pushq %rcx\n\t" \
+ "pushq %rax\n\t" \
+ "pushq %r8\n\t" \
+ "pushq %r9\n\t" \
+ "pushq %r10\n\t" \
+ "pushq %r11\n\t" \
+ "pushq %rbx\n\t" \
+ "pushq %rbp\n\t" \
+ "pushq %r12\n\t" \
+ "pushq %r13\n\t" \
+ "pushq %r14\n\t" \
+ "pushq %r15\n\t" \
+ "call interrupt_handler\n\t" \
+ "popq %r15\n\t" \
+ "popq %r14\n\t" \
+ "popq %r13\n\t" \
+ "popq %r12\n\t" \
+ "popq %rbp\n\t" \
+ "popq %rbx\n\t" \
+ "popq %r11\n\t" \
+ "popq %r10\n\t" \
+ "popq %r9\n\t" \
+ "popq %r8\n\t" \
+ "popq %rax\n\t" \
+ "popq %rcx\n\t" \
+ "popq %rdx\n\t" \
+ "popq %rsi\n\t" \
+ "popq %rdi\n\t" \
+ "add $16, %rsp\n\t" \
+ "iretq\n");
+
+#define INTR_NOERRCODE(isr) \
+ extern intr_handler_t __intr_handler##isr; \
+ __asm__(".global __intr_handler" #isr \
+ "\n" \
+ "__intr_handler" #isr \
+ ":\n\t" \
+ "pushq $0x0\n\t" \
+ "pushq $" #isr \
+ "\n\t" \
+ "pushq %rdi\n\t" \
+ "pushq %rsi\n\t" \
+ "pushq %rdx\n\t" \
+ "pushq %rcx\n\t" \
+ "pushq %rax\n\t" \
+ "pushq %r8\n\t" \
+ "pushq %r9\n\t" \
+ "pushq %r10\n\t" \
+ "pushq %r11\n\t" \
+ "pushq %rbx\n\t" \
+ "pushq %rbp\n\t" \
+ "pushq %r12\n\t" \
+ "pushq %r13\n\t" \
+ "pushq %r14\n\t" \
+ "pushq %r15\n\t" \
+ "call interrupt_handler\n\t" \
+ "popq %r15\n\t" \
+ "popq %r14\n\t" \
+ "popq %r13\n\t" \
+ "popq %r12\n\t" \
+ "popq %rbp\n\t" \
+ "popq %rbx\n\t" \
+ "popq %r11\n\t" \
+ "popq %r10\n\t" \
+ "popq %r9\n\t" \
+ "popq %r8\n\t" \
+ "popq %rax\n\t" \
+ "popq %rcx\n\t" \
+ "popq %rdx\n\t" \
+ "popq %rsi\n\t" \
+ "popq %rdi\n\t" \
+ "add $16, %rsp\n\t" \
+ "iretq\n\t");
+
+INTR_NOERRCODE(0)
+INTR_NOERRCODE(1)
+INTR_NOERRCODE(2)
+INTR_NOERRCODE(3)
+INTR_NOERRCODE(4)
+INTR_NOERRCODE(5)
+INTR_NOERRCODE(6)
+INTR_NOERRCODE(7)
+INTR_ERRCODE(8)
+INTR_NOERRCODE(9)
+INTR_ERRCODE(10)
+INTR_ERRCODE(11)
+INTR_ERRCODE(12)
+INTR_ERRCODE(13)
+INTR_ERRCODE(14)
+INTR_NOERRCODE(15)
+INTR_NOERRCODE(16)
+INTR_ERRCODE(17)
+INTR_NOERRCODE(18)
+INTR_NOERRCODE(19)
+INTR_NOERRCODE(20)
+INTR_NOERRCODE(21)
+INTR_NOERRCODE(22)
+INTR_NOERRCODE(23)
+INTR_NOERRCODE(24)
+INTR_NOERRCODE(25)
+INTR_NOERRCODE(26)
+INTR_NOERRCODE(27)
+INTR_NOERRCODE(28)
+INTR_NOERRCODE(29)
+INTR_NOERRCODE(30)
+INTR_NOERRCODE(31)
+INTR_NOERRCODE(32)
+INTR_NOERRCODE(33)
+INTR_NOERRCODE(34)
+INTR_NOERRCODE(35)
+INTR_NOERRCODE(36)
+INTR_NOERRCODE(37)
+INTR_NOERRCODE(38)
+INTR_NOERRCODE(39)
+INTR_NOERRCODE(40)
+INTR_NOERRCODE(41)
+INTR_NOERRCODE(42)
+INTR_NOERRCODE(43)
+INTR_NOERRCODE(44)
+INTR_NOERRCODE(45)
+INTR_NOERRCODE(46)
+INTR_NOERRCODE(47)
+INTR_NOERRCODE(48)
+INTR_NOERRCODE(49)
+INTR_NOERRCODE(50)
+INTR_NOERRCODE(51)
+INTR_NOERRCODE(52)
+INTR_NOERRCODE(53)
+INTR_NOERRCODE(54)
+INTR_NOERRCODE(55)
+INTR_NOERRCODE(56)
+INTR_NOERRCODE(57)
+INTR_NOERRCODE(58)
+INTR_NOERRCODE(59)
+INTR_NOERRCODE(60)
+INTR_NOERRCODE(61)
+INTR_NOERRCODE(62)
+INTR_NOERRCODE(63)
+INTR_NOERRCODE(64)
+INTR_NOERRCODE(65)
+INTR_NOERRCODE(66)
+INTR_NOERRCODE(67)
+INTR_NOERRCODE(68)
+INTR_NOERRCODE(69)
+INTR_NOERRCODE(70)
+INTR_NOERRCODE(71)
+INTR_NOERRCODE(72)
+INTR_NOERRCODE(73)
+INTR_NOERRCODE(74)
+INTR_NOERRCODE(75)
+INTR_NOERRCODE(76)
+INTR_NOERRCODE(77)
+INTR_NOERRCODE(78)
+INTR_NOERRCODE(79)
+INTR_NOERRCODE(80)
+INTR_NOERRCODE(81)
+INTR_NOERRCODE(82)
+INTR_NOERRCODE(83)
+INTR_NOERRCODE(84)
+INTR_NOERRCODE(85)
+INTR_NOERRCODE(86)
+INTR_NOERRCODE(87)
+INTR_NOERRCODE(88)
+INTR_NOERRCODE(89)
+INTR_NOERRCODE(90)
+INTR_NOERRCODE(91)
+INTR_NOERRCODE(92)
+INTR_NOERRCODE(93)
+INTR_NOERRCODE(94)
+INTR_NOERRCODE(95)
+INTR_NOERRCODE(96)
+INTR_NOERRCODE(97)
+INTR_NOERRCODE(98)
+INTR_NOERRCODE(99)
+INTR_NOERRCODE(100)
+INTR_NOERRCODE(101)
+INTR_NOERRCODE(102)
+INTR_NOERRCODE(103)
+INTR_NOERRCODE(104)
+INTR_NOERRCODE(105)
+INTR_NOERRCODE(106)
+INTR_NOERRCODE(107)
+INTR_NOERRCODE(108)
+INTR_NOERRCODE(109)
+INTR_NOERRCODE(110)
+INTR_NOERRCODE(111)
+INTR_NOERRCODE(112)
+INTR_NOERRCODE(113)
+INTR_NOERRCODE(114)
+INTR_NOERRCODE(115)
+INTR_NOERRCODE(116)
+INTR_NOERRCODE(117)
+INTR_NOERRCODE(118)
+INTR_NOERRCODE(119)
+INTR_NOERRCODE(120)
+INTR_NOERRCODE(121)
+INTR_NOERRCODE(122)
+INTR_NOERRCODE(123)
+INTR_NOERRCODE(124)
+INTR_NOERRCODE(125)
+INTR_NOERRCODE(126)
+INTR_NOERRCODE(127)
+INTR_NOERRCODE(128)
+INTR_NOERRCODE(129)
+INTR_NOERRCODE(130)
+INTR_NOERRCODE(131)
+INTR_NOERRCODE(132)
+INTR_NOERRCODE(133)
+INTR_NOERRCODE(134)
+INTR_NOERRCODE(135)
+INTR_NOERRCODE(136)
+INTR_NOERRCODE(137)
+INTR_NOERRCODE(138)
+INTR_NOERRCODE(139)
+INTR_NOERRCODE(140)
+INTR_NOERRCODE(141)
+INTR_NOERRCODE(142)
+INTR_NOERRCODE(143)
+INTR_NOERRCODE(144)
+INTR_NOERRCODE(145)
+INTR_NOERRCODE(146)
+INTR_NOERRCODE(147)
+INTR_NOERRCODE(148)
+INTR_NOERRCODE(149)
+INTR_NOERRCODE(150)
+INTR_NOERRCODE(151)
+INTR_NOERRCODE(152)
+INTR_NOERRCODE(153)
+INTR_NOERRCODE(154)
+INTR_NOERRCODE(155)
+INTR_NOERRCODE(156)
+INTR_NOERRCODE(157)
+INTR_NOERRCODE(158)
+INTR_NOERRCODE(159)
+INTR_NOERRCODE(160)
+INTR_NOERRCODE(161)
+INTR_NOERRCODE(162)
+INTR_NOERRCODE(163)
+INTR_NOERRCODE(164)
+INTR_NOERRCODE(165)
+INTR_NOERRCODE(166)
+INTR_NOERRCODE(167)
+INTR_NOERRCODE(168)
+INTR_NOERRCODE(169)
+INTR_NOERRCODE(170)
+INTR_NOERRCODE(171)
+INTR_NOERRCODE(172)
+INTR_NOERRCODE(173)
+INTR_NOERRCODE(174)
+INTR_NOERRCODE(175)
+INTR_NOERRCODE(176)
+INTR_NOERRCODE(177)
+INTR_NOERRCODE(178)
+INTR_NOERRCODE(179)
+INTR_NOERRCODE(180)
+INTR_NOERRCODE(181)
+INTR_NOERRCODE(182)
+INTR_NOERRCODE(183)
+INTR_NOERRCODE(184)
+INTR_NOERRCODE(185)
+INTR_NOERRCODE(186)
+INTR_NOERRCODE(187)
+INTR_NOERRCODE(188)
+INTR_NOERRCODE(189)
+INTR_NOERRCODE(190)
+INTR_NOERRCODE(191)
+INTR_NOERRCODE(192)
+INTR_NOERRCODE(193)
+INTR_NOERRCODE(194)
+INTR_NOERRCODE(195)
+INTR_NOERRCODE(196)
+INTR_NOERRCODE(197)
+INTR_NOERRCODE(198)
+INTR_NOERRCODE(199)
+INTR_NOERRCODE(200)
+INTR_NOERRCODE(201)
+INTR_NOERRCODE(202)
+INTR_NOERRCODE(203)
+INTR_NOERRCODE(204)
+INTR_NOERRCODE(205)
+INTR_NOERRCODE(206)
+INTR_NOERRCODE(207)
+INTR_NOERRCODE(208)
+INTR_NOERRCODE(209)
+INTR_NOERRCODE(210)
+INTR_NOERRCODE(211)
+INTR_NOERRCODE(212)
+INTR_NOERRCODE(213)
+INTR_NOERRCODE(214)
+INTR_NOERRCODE(215)
+INTR_NOERRCODE(216)
+INTR_NOERRCODE(217)
+INTR_NOERRCODE(218)
+INTR_NOERRCODE(219)
+INTR_NOERRCODE(220)
+INTR_NOERRCODE(221)
+INTR_NOERRCODE(222)
+INTR_NOERRCODE(223)
+INTR_NOERRCODE(224)
+INTR_NOERRCODE(225)
+INTR_NOERRCODE(226)
+INTR_NOERRCODE(227)
+INTR_NOERRCODE(228)
+INTR_NOERRCODE(229)
+INTR_NOERRCODE(230)
+INTR_NOERRCODE(231)
+INTR_NOERRCODE(232)
+INTR_NOERRCODE(233)
+INTR_NOERRCODE(234)
+INTR_NOERRCODE(235)
+INTR_NOERRCODE(236)
+INTR_NOERRCODE(237)
+INTR_NOERRCODE(238)
+INTR_NOERRCODE(239)
+INTR_NOERRCODE(240)
+INTR_NOERRCODE(241)
+INTR_NOERRCODE(242)
+INTR_NOERRCODE(243)
+INTR_NOERRCODE(244)
+INTR_NOERRCODE(245)
+INTR_NOERRCODE(246)
+INTR_NOERRCODE(247)
+INTR_NOERRCODE(248)
+INTR_NOERRCODE(249)
+INTR_NOERRCODE(250)
+INTR_NOERRCODE(251)
+INTR_NOERRCODE(252)
+INTR_NOERRCODE(253)
+INTR_NOERRCODE(254)
+INTR_NOERRCODE(255)
+
+typedef struct intr_desc
+{
+ uint16_t offset1;
+ uint16_t selector;
+ uint8_t ist;
+ uint8_t attr; // type and attributes
+ uint16_t offset2; // offset bits 16..31
+ uint32_t offset3; // offset bits 32..63
+ uint32_t zero;
+} packed intr_desc_t;
+
+typedef struct intr_info
+{
+ uint16_t size;
+ uintptr_t base;
+} packed intr_info_t;
+
+static intr_desc_t intr_table[MAX_INTERRUPTS];
+static intr_handler_t intr_handlers[MAX_INTERRUPTS];
+static int32_t intr_mappings[MAX_INTERRUPTS];
+
+intr_info_t intr_data;
+
+/* This variable is updated when an interrupt occurs to
+ * point to the saved registers of the interrupted context.
+ * When it is non-NULL the processor is in an interrupt
+ * context, otherwise it is in a non-interrupt process.
+ * This variable is maintained for easy reference by
+ * debuggers. */
+static regs_t *_intr_regs CORE_SPECIFIC_DATA;
+
+inline uint8_t intr_setipl(uint8_t ipl)
+{
+ uint8_t oldipl = apic_getipl();
+ apic_setipl(ipl);
+ return oldipl;
+}
+
+inline uint8_t intr_getipl() { return apic_getipl(); }
+
+static __attribute__((used)) void interrupt_handler(regs_t regs)
+{
+ intr_handler_t handler = intr_handlers[regs.r_intr];
+ _intr_regs = &regs;
+ if (handler)
+ {
+ if ((regs.r_cs & 0x3) == 0x3)
+ {
+ // KASSERT(preemption_enabled()); TODO figure out why
+ // this sometimes fails!!
+ }
+ if (!handler(&regs))
+ apic_eoi();
+ }
+ else
+ {
+ panic("Unhandled interrupt 0x%x\n", (int)regs.r_intr);
+ }
+ _intr_regs = NULL;
+}
+
+int32_t intr_map(uint16_t irq, uint8_t intr)
+{
+ KASSERT(INTR_SPURIOUS != intr);
+
+ int32_t oldirq = intr_mappings[intr];
+ intr_mappings[intr] = irq;
+ apic_setredir(irq, intr);
+ return oldirq;
+}
+
+intr_handler_t intr_register(uint8_t intr, intr_handler_t handler)
+{
+ intr_handler_t old = intr_handlers[intr];
+ intr_handlers[intr] = handler;
+ return old;
+}
+
+// lol
+void dump_registers(regs_t *regs)
+{
+ dbg(DBG_PRINT,
+ "Registers:\nintr=0x%08lx\nerr=0x%08lx\nrip=0x%08lx\ncs=0x%"
+ "08lx\nrflags=0x%08lx\nrsp=0x%08lx\n"
+ "ss=0x%08lx\nrdi=0x%08lx\nrsi=0x%08lx\nrdx=0x%08lx\nrcx=0x%08lx\nrax="
+ "0x%08lx\nr8=0x%08lx\n"
+ "r9=0x%08lx\nr10=0x%08lx\nr11=0x%08lx\nrbx=0x%08lx\nrbp=0x%08lx\nr12="
+ "0x%08lx\nr13=0x%08lx\n"
+ "r14=0x%08lx\nr15=0x%08lx\n",
+ regs->r_intr, regs->r_err, regs->r_rip, regs->r_cs, regs->r_rflags,
+ regs->r_rsp, regs->r_ss, regs->r_rdi, regs->r_rsi, regs->r_rdx,
+ regs->r_rcx, regs->r_rax, regs->r_r8, regs->r_r9, regs->r_r10,
+ regs->r_r11, regs->r_rbx, regs->r_rbp, regs->r_r12, regs->r_r13,
+ regs->r_r14, regs->r_r15);
+}
+
+static long __intr_gpf_handler(regs_t *regs)
+{
+ // check if we're in userland
+ if ((regs->r_cs & 0x3) == 0x3)
+ {
+ // most likely accessed a non-canonical address
+ do_exit(EFAULT);
+ }
+ else
+ {
+ dump_registers(regs);
+ panic("\n\nTriggered a General Protection Fault\n");
+ }
+ return 0;
+}
+
+static long __intr_divide_by_zero_handler(regs_t *regs)
+{
+ // check if we're in userland
+ if ((regs->r_cs & 0x3) == 0x3)
+ {
+ do_exit(EPERM);
+ }
+ else
+ {
+ dump_registers(regs);
+ panic("\n\nTriggered a Divide by Zero exception\n");
+ }
+ return 0;
+}
+
+static long __intr_inval_opcode_handler(regs_t *regs)
+{
+ // check if we're in userland
+ if ((regs->r_cs & 0x3) == 0x3)
+ {
+ do_exit(EPERM);
+ }
+ else
+ {
+ dump_registers(regs);
+ panic("\n\nTriggered a General Protection Fault!\n");
+ }
+ return 0;
+}
+
+static long __intr_spurious(regs_t *regs)
+{
+ dbg(DBG_CORE, "ignoring spurious interrupt\n");
+ return 0;
+}
+
+static void __intr_set_entry(uint8_t isr, uintptr_t addr, uint8_t seg,
+ uint8_t flags)
+{
+ // [+] TODO MAGIC NUMBERS
+ intr_table[isr].offset1 = (uint16_t)((addr)&0xffff);
+ intr_table[isr].offset2 = (uint16_t)(((addr) >> 16) & 0xffff);
+ intr_table[isr].offset3 = (uint32_t)((addr) >> 32);
+ intr_table[isr].zero = 0;
+ intr_table[isr].attr = flags;
+ intr_table[isr].selector = seg;
+ intr_table[isr].ist = 0;
+}
+
+static void __intr_set_entries();
+
+void intr_init()
+{
+ static long inited = 0;
+ intr_info_t *data = &intr_data;
+
+ if (!inited)
+ {
+ // global interrupt table
+ inited = 1;
+
+ // initialize intr_data
+ data->size = sizeof(intr_desc_t) * MAX_INTERRUPTS - 1;
+ data->base = (uintptr_t)intr_table;
+
+ memset(intr_handlers, 0, sizeof(intr_handlers));
+ memset(intr_mappings, -1, sizeof(intr_mappings));
+
+ __intr_set_entries();
+ }
+ __asm__("lidt (%0)" ::"p"(data));
+
+ intr_register(INTR_SPURIOUS, __intr_spurious);
+ intr_register(INTR_DIVIDE_BY_ZERO, __intr_divide_by_zero_handler);
+ intr_register(INTR_GPF, __intr_gpf_handler);
+ intr_register(INTR_INVALID_OPCODE, __intr_inval_opcode_handler);
+}
+
+static void __intr_set_entries()
+{
+ __intr_set_entry(0, (uintptr_t)&INTR(0), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(1, (uintptr_t)&INTR(1), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(2, (uintptr_t)&INTR(2), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(3, (uintptr_t)&INTR(3), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(4, (uintptr_t)&INTR(4), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(5, (uintptr_t)&INTR(5), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(6, (uintptr_t)&INTR(6), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(7, (uintptr_t)&INTR(7), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(8, (uintptr_t)&INTR(8), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(9, (uintptr_t)&INTR(9), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(10, (uintptr_t)&INTR(10), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(11, (uintptr_t)&INTR(11), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(12, (uintptr_t)&INTR(12), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(13, (uintptr_t)&INTR(13), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(14, (uintptr_t)&INTR(14), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(15, (uintptr_t)&INTR(15), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(16, (uintptr_t)&INTR(16), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(17, (uintptr_t)&INTR(17), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(18, (uintptr_t)&INTR(18), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(19, (uintptr_t)&INTR(19), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(20, (uintptr_t)&INTR(20), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(21, (uintptr_t)&INTR(21), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(22, (uintptr_t)&INTR(22), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(23, (uintptr_t)&INTR(23), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(24, (uintptr_t)&INTR(24), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(25, (uintptr_t)&INTR(25), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(26, (uintptr_t)&INTR(26), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(27, (uintptr_t)&INTR(27), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(28, (uintptr_t)&INTR(28), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(29, (uintptr_t)&INTR(29), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(30, (uintptr_t)&INTR(30), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(31, (uintptr_t)&INTR(31), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(32, (uintptr_t)&INTR(32), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(33, (uintptr_t)&INTR(33), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(34, (uintptr_t)&INTR(34), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(35, (uintptr_t)&INTR(35), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(36, (uintptr_t)&INTR(36), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(37, (uintptr_t)&INTR(37), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(38, (uintptr_t)&INTR(38), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(39, (uintptr_t)&INTR(39), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(40, (uintptr_t)&INTR(40), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(41, (uintptr_t)&INTR(41), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(42, (uintptr_t)&INTR(42), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(43, (uintptr_t)&INTR(43), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(44, (uintptr_t)&INTR(44), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(45, (uintptr_t)&INTR(45), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ /* BEWARE - this is the interrupt table entry for userland syscalls. It
+ * differs from all the others. */
+ __intr_set_entry(
+ 46, (uintptr_t)&INTR(46), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_TRAP | IDT_DESC_RING3);
+ /* */
+ __intr_set_entry(47, (uintptr_t)&INTR(47), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(48, (uintptr_t)&INTR(48), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(49, (uintptr_t)&INTR(49), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(50, (uintptr_t)&INTR(50), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(51, (uintptr_t)&INTR(51), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(52, (uintptr_t)&INTR(52), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(53, (uintptr_t)&INTR(53), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(54, (uintptr_t)&INTR(54), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(55, (uintptr_t)&INTR(55), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(56, (uintptr_t)&INTR(56), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(57, (uintptr_t)&INTR(57), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(58, (uintptr_t)&INTR(58), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(59, (uintptr_t)&INTR(59), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(60, (uintptr_t)&INTR(60), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(61, (uintptr_t)&INTR(61), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(62, (uintptr_t)&INTR(62), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(63, (uintptr_t)&INTR(63), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(64, (uintptr_t)&INTR(64), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(65, (uintptr_t)&INTR(65), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(66, (uintptr_t)&INTR(66), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(67, (uintptr_t)&INTR(67), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(68, (uintptr_t)&INTR(68), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(69, (uintptr_t)&INTR(69), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(60, (uintptr_t)&INTR(70), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(71, (uintptr_t)&INTR(71), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(72, (uintptr_t)&INTR(72), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(73, (uintptr_t)&INTR(73), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(74, (uintptr_t)&INTR(74), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(75, (uintptr_t)&INTR(75), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(76, (uintptr_t)&INTR(76), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(77, (uintptr_t)&INTR(77), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(78, (uintptr_t)&INTR(78), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(79, (uintptr_t)&INTR(79), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(70, (uintptr_t)&INTR(80), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(81, (uintptr_t)&INTR(81), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(82, (uintptr_t)&INTR(82), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(83, (uintptr_t)&INTR(83), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(84, (uintptr_t)&INTR(84), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(85, (uintptr_t)&INTR(85), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(86, (uintptr_t)&INTR(86), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(87, (uintptr_t)&INTR(87), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(88, (uintptr_t)&INTR(88), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(89, (uintptr_t)&INTR(89), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(90, (uintptr_t)&INTR(90), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(91, (uintptr_t)&INTR(91), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(92, (uintptr_t)&INTR(92), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(93, (uintptr_t)&INTR(93), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(94, (uintptr_t)&INTR(94), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(95, (uintptr_t)&INTR(95), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(96, (uintptr_t)&INTR(96), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(97, (uintptr_t)&INTR(97), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(98, (uintptr_t)&INTR(98), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(99, (uintptr_t)&INTR(99), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(100, (uintptr_t)&INTR(100), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(101, (uintptr_t)&INTR(101), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(102, (uintptr_t)&INTR(102), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(103, (uintptr_t)&INTR(103), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(104, (uintptr_t)&INTR(104), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(105, (uintptr_t)&INTR(105), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(106, (uintptr_t)&INTR(106), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(107, (uintptr_t)&INTR(107), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(108, (uintptr_t)&INTR(108), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(109, (uintptr_t)&INTR(109), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(110, (uintptr_t)&INTR(110), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(111, (uintptr_t)&INTR(111), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(112, (uintptr_t)&INTR(112), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(113, (uintptr_t)&INTR(113), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(114, (uintptr_t)&INTR(114), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(115, (uintptr_t)&INTR(115), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(116, (uintptr_t)&INTR(116), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(117, (uintptr_t)&INTR(117), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(118, (uintptr_t)&INTR(118), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(119, (uintptr_t)&INTR(119), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(120, (uintptr_t)&INTR(120), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(121, (uintptr_t)&INTR(121), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(122, (uintptr_t)&INTR(122), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(123, (uintptr_t)&INTR(123), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(124, (uintptr_t)&INTR(124), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(125, (uintptr_t)&INTR(125), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(126, (uintptr_t)&INTR(126), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(127, (uintptr_t)&INTR(127), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(128, (uintptr_t)&INTR(128), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(129, (uintptr_t)&INTR(129), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(130, (uintptr_t)&INTR(130), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(131, (uintptr_t)&INTR(131), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(132, (uintptr_t)&INTR(132), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(133, (uintptr_t)&INTR(133), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(134, (uintptr_t)&INTR(134), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(135, (uintptr_t)&INTR(135), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(136, (uintptr_t)&INTR(136), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(137, (uintptr_t)&INTR(137), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(138, (uintptr_t)&INTR(138), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(139, (uintptr_t)&INTR(139), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(140, (uintptr_t)&INTR(140), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(141, (uintptr_t)&INTR(141), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(142, (uintptr_t)&INTR(142), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(143, (uintptr_t)&INTR(143), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(144, (uintptr_t)&INTR(144), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(145, (uintptr_t)&INTR(145), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(146, (uintptr_t)&INTR(146), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(147, (uintptr_t)&INTR(147), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(148, (uintptr_t)&INTR(148), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(149, (uintptr_t)&INTR(149), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(150, (uintptr_t)&INTR(150), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(151, (uintptr_t)&INTR(151), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(152, (uintptr_t)&INTR(152), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(153, (uintptr_t)&INTR(153), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(154, (uintptr_t)&INTR(154), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(155, (uintptr_t)&INTR(155), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(156, (uintptr_t)&INTR(156), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(157, (uintptr_t)&INTR(157), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(158, (uintptr_t)&INTR(158), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(159, (uintptr_t)&INTR(159), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(160, (uintptr_t)&INTR(160), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(161, (uintptr_t)&INTR(161), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(162, (uintptr_t)&INTR(162), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(163, (uintptr_t)&INTR(163), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(164, (uintptr_t)&INTR(164), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(165, (uintptr_t)&INTR(165), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(166, (uintptr_t)&INTR(166), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(167, (uintptr_t)&INTR(167), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(168, (uintptr_t)&INTR(168), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(169, (uintptr_t)&INTR(169), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(160, (uintptr_t)&INTR(170), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(171, (uintptr_t)&INTR(171), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(172, (uintptr_t)&INTR(172), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(173, (uintptr_t)&INTR(173), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(174, (uintptr_t)&INTR(174), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(175, (uintptr_t)&INTR(175), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(176, (uintptr_t)&INTR(176), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(177, (uintptr_t)&INTR(177), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(178, (uintptr_t)&INTR(178), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(179, (uintptr_t)&INTR(179), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(170, (uintptr_t)&INTR(180), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(181, (uintptr_t)&INTR(181), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(182, (uintptr_t)&INTR(182), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(183, (uintptr_t)&INTR(183), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(184, (uintptr_t)&INTR(184), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(185, (uintptr_t)&INTR(185), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(186, (uintptr_t)&INTR(186), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(187, (uintptr_t)&INTR(187), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(188, (uintptr_t)&INTR(188), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(189, (uintptr_t)&INTR(189), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(190, (uintptr_t)&INTR(190), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(191, (uintptr_t)&INTR(191), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(192, (uintptr_t)&INTR(192), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(193, (uintptr_t)&INTR(193), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(194, (uintptr_t)&INTR(194), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(195, (uintptr_t)&INTR(195), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(196, (uintptr_t)&INTR(196), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(197, (uintptr_t)&INTR(197), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(198, (uintptr_t)&INTR(198), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(199, (uintptr_t)&INTR(199), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(200, (uintptr_t)&INTR(200), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(201, (uintptr_t)&INTR(201), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(202, (uintptr_t)&INTR(202), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(203, (uintptr_t)&INTR(203), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(204, (uintptr_t)&INTR(204), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(205, (uintptr_t)&INTR(205), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(206, (uintptr_t)&INTR(206), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(207, (uintptr_t)&INTR(207), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(208, (uintptr_t)&INTR(208), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(209, (uintptr_t)&INTR(209), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(210, (uintptr_t)&INTR(210), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(211, (uintptr_t)&INTR(211), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(212, (uintptr_t)&INTR(212), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(213, (uintptr_t)&INTR(213), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(214, (uintptr_t)&INTR(214), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(215, (uintptr_t)&INTR(215), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(216, (uintptr_t)&INTR(216), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(217, (uintptr_t)&INTR(217), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(218, (uintptr_t)&INTR(218), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(219, (uintptr_t)&INTR(219), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(220, (uintptr_t)&INTR(220), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(221, (uintptr_t)&INTR(221), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(222, (uintptr_t)&INTR(222), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(223, (uintptr_t)&INTR(223), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(224, (uintptr_t)&INTR(224), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(225, (uintptr_t)&INTR(225), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(226, (uintptr_t)&INTR(226), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(227, (uintptr_t)&INTR(227), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(228, (uintptr_t)&INTR(228), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(229, (uintptr_t)&INTR(229), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(230, (uintptr_t)&INTR(230), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(231, (uintptr_t)&INTR(231), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(232, (uintptr_t)&INTR(232), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(233, (uintptr_t)&INTR(233), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(234, (uintptr_t)&INTR(234), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(235, (uintptr_t)&INTR(235), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(236, (uintptr_t)&INTR(236), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(237, (uintptr_t)&INTR(237), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(238, (uintptr_t)&INTR(238), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(239, (uintptr_t)&INTR(239), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(240, (uintptr_t)&INTR(240), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(241, (uintptr_t)&INTR(241), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(242, (uintptr_t)&INTR(242), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(243, (uintptr_t)&INTR(243), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(244, (uintptr_t)&INTR(244), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(245, (uintptr_t)&INTR(245), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(246, (uintptr_t)&INTR(246), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(247, (uintptr_t)&INTR(247), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(248, (uintptr_t)&INTR(248), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(249, (uintptr_t)&INTR(249), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(250, (uintptr_t)&INTR(250), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(251, (uintptr_t)&INTR(251), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(252, (uintptr_t)&INTR(252), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(253, (uintptr_t)&INTR(253), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(254, (uintptr_t)&INTR(254), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+ __intr_set_entry(255, (uintptr_t)&INTR(255), GDT_KERNEL_TEXT,
+ IDT_DESC_PRESENT | IDT_DESC_BIT32 | IDT_DESC_RING0);
+}
diff --git a/kernel/main/kmain.c b/kernel/main/kmain.c
new file mode 100644
index 0000000..6807328
--- /dev/null
+++ b/kernel/main/kmain.c
@@ -0,0 +1,200 @@
+#include "errno.h"
+#include "globals.h"
+#include "types.h"
+#include <api/exec.h>
+#include <drivers/screen.h>
+#include <drivers/tty/tty.h>
+#include <drivers/tty/vterminal.h>
+#include <main/io.h>
+#include <mm/mm.h>
+#include <mm/slab.h>
+#include <test/kshell/kshell.h>
+#include <util/time.h>
+#include <vm/anon.h>
+#include <vm/shadow.h>
+
+#include "util/debug.h"
+#include "util/gdb.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+#include "main/acpi.h"
+#include "main/apic.h"
+#include "main/inits.h"
+
+#include "drivers/dev.h"
+#include "drivers/pcie.h"
+
+#include "api/syscall.h"
+
+#include "fs/fcntl.h"
+#include "fs/vfs.h"
+#include "fs/vfs_syscall.h"
+#include "fs/vnode.h"
+
+#include "test/driverstest.h"
+
+GDB_DEFINE_HOOK(boot)
+
+GDB_DEFINE_HOOK(initialized)
+
+GDB_DEFINE_HOOK(shutdown)
+
+static void initproc_start();
+
+typedef void (*init_func_t)();
+static init_func_t init_funcs[] = {
+ dbg_init,
+ intr_init,
+ page_init,
+ pt_init,
+ acpi_init,
+ apic_init,
+ core_init,
+ slab_init,
+ pframe_init,
+ pci_init,
+ vga_init,
+#ifdef __VM__
+ anon_init,
+ shadow_init,
+#endif
+ vmmap_init,
+ proc_init,
+ kthread_init,
+#ifdef __DRIVERS__
+ chardev_init,
+ blockdev_init,
+#endif
+ kshell_init,
+ file_init,
+ pipe_init,
+ syscall_init,
+ elf64_init,
+
+ proc_idleproc_init,
+};
+
+/*
+ * Call the init functions (in order!), then run the init process
+ * (initproc_start)
+ */
+void kmain()
+{
+ GDB_CALL_HOOK(boot);
+
+ for (size_t i = 0; i < sizeof(init_funcs) / sizeof(init_funcs[0]); i++)
+ init_funcs[i]();
+
+ initproc_start();
+ panic("\nReturned to kmain()\n");
+}
+
+/*
+ * Make:
+ * 1) /dev/null
+ * 2) /dev/zero
+ * 3) /dev/ttyX for 0 <= X < __NTERMS__
+ * 4) /dev/hdaX for 0 <= X < __NDISKS__
+ */
+static void make_devices()
+{
+ long status = do_mkdir("/dev");
+ KASSERT(!status || status == -EEXIST);
+
+ status = do_mknod("/dev/null", S_IFCHR, MEM_NULL_DEVID);
+ KASSERT(!status || status == -EEXIST);
+ status = do_mknod("/dev/zero", S_IFCHR, MEM_ZERO_DEVID);
+ KASSERT(!status || status == -EEXIST);
+
+ char path[32] = {0};
+ for (long i = 0; i < __NTERMS__; i++)
+ {
+ snprintf(path, sizeof(path), "/dev/tty%ld", i);
+ dbg(DBG_INIT, "Creating tty mknod with path %s\n", path);
+ status = do_mknod(path, S_IFCHR, MKDEVID(TTY_MAJOR, i));
+ KASSERT(!status || status == -EEXIST);
+ }
+
+ for (long i = 0; i < __NDISKS__; i++)
+ {
+ snprintf(path, sizeof(path), "/dev/hda%ld", i);
+ dbg(DBG_INIT, "Creating disk mknod with path %s\n", path);
+ status = do_mknod(path, S_IFBLK, MKDEVID(DISK_MAJOR, i));
+ KASSERT(!status || status == -EEXIST);
+ }
+}
+
+/*
+ * The function executed by the init process. Finish up all initialization now
+ * that we have a proper thread context.
+ *
+ * This function will require edits over the course of the project:
+ *
+ * - Before finishing drivers, this is where your tests lie. You can, however,
+ * have them in a separate test function which can even be in a separate file
+ * (see handout).
+ *
+ * - After finishing drivers but before starting VM, you should start __NTERMS__
+ * processes running kshells (see kernel/test/kshell/kshell.c, specifically
+ * kshell_proc_run). Testing here amounts to defining a new kshell command
+ * that runs your tests.
+ *
+ * - During and after VM, you should use kernel_execve when starting, you
+ * will probably want to kernel_execve the program you wish to test directly.
+ * Eventually, you will want to kernel_execve "/sbin/init" and run your
+ * tests from the userland shell (by typing in test commands)
+ *
+ * Note: The init process should wait on all of its children to finish before
+ * returning from this function (at which point the system will shut down).
+ */
+static void *initproc_run(long arg1, void *arg2)
+{
+#ifdef __VFS__
+ dbg(DBG_INIT, "Initializing VFS...\n");
+ vfs_init();
+ make_devices();
+#endif
+
+ NOT_YET_IMPLEMENTED("PROCS: GDB_DEFINE_HOOK");
+
+ return NULL;
+}
+
+/*
+ * Sets up the initial process and prepares it to run.
+ *
+ * Hints:
+ * Use proc_create() to create the initial process.
+ * Use kthread_create() to create the initial process's only thread.
+ * Make sure the thread is set up to start running initproc_run() (values for
+ * arg1 and arg2 do not matter, they can be 0 and NULL).
+ * Use sched_make_runnable() to make the thread runnable.
+ * Use context_make_active() with the context of the current core (curcore)
+ * to start the scheduler.
+ */
+void initproc_start()
+{
+ NOT_YET_IMPLEMENTED("PROCS: GDB_DEFINE_HOOK");
+}
+
+void initproc_finish()
+{
+#ifdef __VFS__
+ if (vfs_shutdown())
+ panic("vfs shutdown FAILED!!\n");
+
+#endif
+
+#ifdef __DRIVERS__
+ screen_print_shutdown();
+#endif
+
+ /* sleep forever */
+ while (1)
+ {
+ __asm__ volatile("cli; hlt;");
+ }
+
+ panic("should not get here");
+}
diff --git a/kernel/main/smp.c b/kernel/main/smp.c
new file mode 100644
index 0000000..fb85469
--- /dev/null
+++ b/kernel/main/smp.c
@@ -0,0 +1,138 @@
+#include "globals.h"
+#include "types.h"
+#include <main/gdt.h>
+
+#include "main/apic.h"
+#include "main/inits.h"
+
+#include "mm/tlb.h"
+
+#include "util/string.h"
+#include "util/time.h"
+
+static long smp_processor_count;
+
+extern uintptr_t smp_initialization_start;
+extern uintptr_t smp_initialization_end;
+#define smp_initialization_start ((uintptr_t)(&smp_initialization_start))
+#define smp_initialization_end ((uintptr_t)(&smp_initialization_end))
+#define smp_initialization_size \
+ (smp_initialization_end - smp_initialization_start)
+
+static void smp_start_processor(uint8_t apic_id);
+static long smp_stop_processor(regs_t *regs);
+
+extern void *csd_start;
+extern void *csd_end;
+#define CSD_START ((uintptr_t)&csd_start)
+#define CSD_END ((uintptr_t)&csd_end)
+#define CSD_PAGES (uintptr_t)((CSD_END - CSD_START) >> PAGE_SHIFT)
+
+core_t curcore CORE_SPECIFIC_DATA;
+uintptr_t csd_vaddr_table[MAX_LAPICS] = {NULL};
+
+void map_in_core_specific_data(pml4_t *pml4)
+{
+ pt_map_range(pml4, curcore.kc_csdpaddr, CSD_START, CSD_END,
+ PT_PRESENT | PT_WRITE, PT_PRESENT | PT_WRITE);
+ uintptr_t mapped_paddr = pt_virt_to_phys_helper(pml4, (uintptr_t)&curcore);
+ uintptr_t expected_paddr =
+ (uintptr_t)GET_CSD(curcore.kc_id, core_t, curcore) - PHYS_OFFSET;
+ uintptr_t expected_paddr2 =
+ pt_virt_to_phys_helper(pt_get(), (uintptr_t)&curcore);
+ KASSERT(mapped_paddr == expected_paddr);
+ KASSERT(expected_paddr == expected_paddr2);
+}
+
+long is_core_specific_data(void *addr)
+{
+ return (uintptr_t)addr >= CSD_START && (uintptr_t)addr < CSD_END;
+}
+
+void core_init()
+{
+ // order of operations are pretty important here
+ pt_init();
+ pt_set(pt_create());
+
+ uintptr_t csd_paddr = (uintptr_t)page_alloc_n(CSD_PAGES);
+ if (!csd_paddr)
+ panic("not enough memory for core-specific data!");
+ csd_vaddr_table[apic_current_id()] =
+ csd_paddr; // still in PHYSMAP region; still a VMA
+ csd_paddr -= PHYS_OFFSET;
+
+ dbg(DBG_CORE, "mapping in core specific data to 0x%p\n", (void *)csd_paddr);
+ pt_map_range(pt_get(), csd_paddr, CSD_START, CSD_END, PT_PRESENT | PT_WRITE,
+ PT_PRESENT | PT_WRITE);
+ tlb_flush_all();
+
+ memset((void *)CSD_START, 0, CSD_END - CSD_START);
+
+ curcore.kc_id = apic_current_id();
+ curcore.kc_queue = NULL;
+ curcore.kc_csdpaddr = csd_paddr;
+
+ intr_init();
+ gdt_init();
+
+ apic_enable();
+ time_init();
+ sched_init();
+
+ void *stack = page_alloc();
+ KASSERT(stack != NULL);
+
+ context_setup_raw(&curcore.kc_ctx, core_switch, stack, PAGE_SIZE, pt_get());
+}
+
+void __attribute__((used)) smp_processor_entry()
+{
+ core_init();
+ dbg_force(DBG_CORE, "started C%ld!\n", curcore.kc_id);
+ smp_processor_count++;
+
+ KASSERT(!intr_enabled());
+ preemption_disable();
+ proc_idleproc_init();
+ context_make_active(&curcore.kc_ctx);
+}
+
+/*
+ * Prepare for SMP by copying the real-mode trampoline code into the
+ * first 1mb of memory.
+ */
+void smp_init()
+{
+ NOT_YET_IMPLEMENTED("SMP: ***none***");
+}
+
+// Intel Vol. 3A 10-11, 10.4.7.3
+static void smp_start_processor(uint8_t apic_id)
+{
+ // TODO: not necessarily true that apic_id == processor_id
+ dbg_force(DBG_CORE, "Booting C%d\n", apic_id);
+
+ memcpy((void *)PHYS_OFFSET, (void *)smp_initialization_start,
+ smp_initialization_size);
+
+ // First, send a INIT IPI
+
+ long prev_count = smp_processor_count;
+ apic_start_processor(apic_id, 0);
+
+ while (smp_processor_count == prev_count)
+ ;
+}
+
+static long smp_stop_processor(regs_t *regs)
+{
+ char buf[2048];
+ time_stats(buf, sizeof(buf));
+
+ dbg_force(DBG_CORE, "\n%s\nhalted cleanly!\n\n", buf);
+
+ __asm__ volatile("cli; hlt;");
+
+ return 0;
+}
diff --git a/kernel/main/smp_trampoline.S b/kernel/main/smp_trampoline.S
new file mode 100644
index 0000000..9273f0f
--- /dev/null
+++ b/kernel/main/smp_trampoline.S
@@ -0,0 +1,81 @@
+#define CR0_PG 0x80000000
+#define CR0_PE 0x00000001
+
+#define CR4_PAE 0x00000020
+#define CR4_PGE 0x00000080
+
+#define PHYSADDR(x) (x - smp_initialization_start)
+
+# NOTE: THIS CODE REQUIRES THAT IT BE PLACED STARTING AT PHYSICAL ADDRESS 0x0
+
+.file "smp_trampoline.S"
+.global smp_initialization_start, smp_initialization_end
+
+smp_initialization_start:
+
+/* When we initialize a processor, it starts in 16-bit real mode */
+.code16
+.align 0x1000
+smp_processor_init:
+ cli
+
+ // enable PAE
+ mov $(CR4_PAE | CR4_PGE), %eax
+ mov %eax, %cr4
+
+ mov $PHYSADDR(pml4), %eax
+ mov %eax, %cr3
+
+ // enter long mode
+ mov $0xC0000080, %ecx
+ rdmsr
+ or $0x100, %eax
+ wrmsr
+
+ lgdt PHYSADDR(GDTPointer)
+
+ // Enable paging AND protection simultaneously
+ movl %cr0, %eax
+ or $(CR0_PG | CR0_PE), %eax
+ movl %eax, %cr0
+
+ ljmp $0x8, $PHYSADDR(smp_trampoline)
+
+.code64
+smp_trampoline:
+ movabsq $(0xffff880000000000 + PHYSADDR(stack_pointer)), %rsp
+ xor %rbp, %rbp
+ movabsq $smp_processor_entry, %rax
+ call *%rax
+
+
+.align 16
+GDT64:
+ GDTNull:
+ .quad 0
+ GDTKernelCode:
+ // base = 0x0, limit = 0x0
+ // flags: present, ring 0, executable, readable, 64bit
+ .word 0, 0
+ .byte 0, 0x9a, 0x20, 0
+ GDTEnd:
+ GDTPointer:
+ .word GDTEnd - GDT64 - 1 // size of gdt - 1
+ .long PHYSADDR(GDT64) // pointer to gdt
+
+.align 0x1000
+pml4: // maps first 1GB of RAM to both 0x0000000000000000 and 0xffff800000000000
+ .quad PHYSADDR(pdpt) + 3
+ .fill 255,8,0
+ .quad PHYSADDR(pdpt) + 3
+ .fill 15,8,0
+ .quad PHYSADDR(pdpt) + 3
+ .fill 239,8,0
+pdpt:
+ .quad 0x0000000000000083
+ .fill 511,8,0
+
+.skip 0x1000
+stack_pointer:
+
+smp_initialization_end:
diff --git a/kernel/mm/memcheck.py b/kernel/mm/memcheck.py
new file mode 100644
index 0000000..49f40fd
--- /dev/null
+++ b/kernel/mm/memcheck.py
@@ -0,0 +1,158 @@
+import gdb
+
+import string
+
+import weenix
+import weenix.kmem
+import weenix.stack
+
+
+class SlabAllocation:
+ def __init__(self, addr, stack, allocator, initialization):
+ self.addr = addr
+ self.stack = stack
+ self.allocator = allocator
+ self.initialization = initialization
+
+
+class PageAllocation:
+ def __init__(self, addr, stack, npages, slabdata, initialization):
+ self.addr = addr
+ self.stack = stack
+ self.npages = npages
+ self.slabdata = slabdata
+ self.initialization = initialization
+
+
+class Memcheck:
+ def __init__(self):
+ self._slab_alloc_count = 0
+ self._slab_free_count = 0
+ self._slab_invalid_free = 0
+ self._slab_allocated = {}
+ self._page_alloc_count = 0
+ self._page_free_count = 0
+ self._page_invalid_free = 0
+ self._page_allocated = {}
+ self._initialized = False
+ weenix.Hook("slab_obj_alloc", self._slab_alloc_callback)
+ weenix.Hook("slab_obj_free", self._slab_free_callback)
+ weenix.Hook("page_alloc", self._page_alloc_callback)
+ weenix.Hook("page_free", self._page_free_callback)
+ weenix.Hook("initialized", self._initialized_callback)
+ weenix.Hook("shutdown", self._shutdown_callback)
+
+ def _slab_alloc_callback(self, args):
+ addr = args["addr"]
+ if string.atol(addr, 16) == 0:
+ return False
+ stack = weenix.stack.Stack(gdb.newest_frame().older())
+ allocator = weenix.kmem.SlabAllocator(
+ gdb.Value(string.atol(args["allocator"].split(" ")[0], 16)).cast(
+ gdb.lookup_type("void").pointer()
+ )
+ )
+ self._slab_allocated[addr] = SlabAllocation(
+ addr, stack, allocator, not self._initialized
+ )
+ if self._initialized:
+ self._slab_alloc_count += 1
+ return False
+
+ def _slab_free_callback(self, args):
+ if not args["addr"] in self._slab_allocated:
+ self._slab_invalid_free += 1
+ print("Invalid free of address " + args["addr"] + ":")
+ print(weenix.stack.Stack(gdb.newest_frame().older()))
+ else:
+ if not self._slab_allocated[args["addr"]].initialization:
+ self._slab_free_count += 1
+ del self._slab_allocated[args["addr"]]
+ return False
+
+ def _page_alloc_callback(self, args):
+ addr = args["addr"]
+ if string.atol(addr, 16) == 0:
+ return False
+ stack = weenix.stack.Stack(gdb.newest_frame().older())
+ slabdata = stack.contains("_slab_allocator_grow")
+ self._page_allocated[addr] = PageAllocation(
+ addr, stack, string.atoi(args["npages"]), slabdata, not self._initialized
+ )
+ if self._initialized and not slabdata:
+ self._page_alloc_count += 1
+ return False
+
+ def _page_free_callback(self, args):
+ if not args["addr"] in self._page_allocated:
+ self._page_invalid_free += 1
+ print("Invalid free of address " + args["addr"] + ":")
+ print(weenix.stack.Stack(gdb.newest_frame().older()))
+ elif self._page_allocated[args["addr"]].npages != string.atoi(args["npages"]):
+ self._page_invalid_free += 1
+ print(
+ "Address "
+ + args["addr"]
+ + " allocated for "
+ + str(self._page_allocated[args["addr"]].npages)
+ + " pages:"
+ )
+ print(self._page_allocated[args["addr"]].stack)
+ print("but freed with " + args["npages"] + " pages:")
+ print(weenix.stack.Stack(gdb.newest_frame().older()))
+ del self._page_allocated[args["addr"]]
+ else:
+ if (
+ not self._page_allocated[args["addr"]].initialization
+ and not self._page_allocated[args["addr"]].slabdata
+ ):
+ self._page_free_count += 1
+ del self._page_allocated[args["addr"]]
+ return False
+
+ def _initialized_callback(self, args):
+ self._initialized = True
+ return False
+
+ def _shutdown_callback(self, args):
+ size = 0
+ for alloc in self._slab_allocated.itervalues():
+ if not alloc.initialization:
+ size += alloc.allocator.size()
+ print(
+ 'Leaked {0} bytes from "{1}" at {2}:'.format(
+ alloc.allocator.size(), alloc.allocator.name(), alloc.addr
+ )
+ )
+ print(alloc.stack)
+ npages = 0
+ for page in self._page_allocated.itervalues():
+ if not page.initialization and not page.slabdata:
+ npages += page.npages
+ print("Leaked {0} pages at {1}:".format(page.npages, page.addr))
+ print(page.stack)
+ print(
+ "{0} slab allocs, {1} frees ({2} bytes leaked)".format(
+ self._slab_alloc_count, self._slab_free_count, size
+ )
+ )
+ print(
+ "{0} page allocs, {1} frees ({2} pages leaked)".format(
+ self._page_alloc_count, self._page_free_count, npages
+ )
+ )
+ print("{0} invalid slab frees".format(self._slab_invalid_free))
+ print("{0} invalid page frees".format(self._page_invalid_free))
+ return False
+
+
+class MemcheckFlag(weenix.Flag):
+ def __init__(self):
+ weenix.Flag.__init__(self, "memcheck", gdb.COMMAND_DATA)
+
+ def callback(self, value):
+ if value:
+ Memcheck()
+
+
+MemcheckFlag()
diff --git a/kernel/mm/mobj.c b/kernel/mm/mobj.c
new file mode 100644
index 0000000..4b9c80f
--- /dev/null
+++ b/kernel/mm/mobj.c
@@ -0,0 +1,313 @@
+#include "errno.h"
+
+#include "mm/mobj.h"
+#include "mm/pframe.h"
+
+#include "util/debug.h"
+#include <util/string.h>
+
+/*
+ * Initialize o according to type and ops. If ops do not specify a
+ * get_pframe function, set it to the default, mobj_default_get_pframe.
+ * Do the same with the destructor function pointer.
+ *
+ * Upon return, the refcount of the mobj should be 1.
+ */
+void mobj_init(mobj_t *o, long type, mobj_ops_t *ops)
+{
+ o->mo_type = type;
+
+ memcpy(&o->mo_ops, ops, sizeof(mobj_ops_t));
+
+ if (!o->mo_ops.get_pframe)
+ {
+ o->mo_ops.get_pframe = mobj_default_get_pframe;
+ KASSERT(o->mo_ops.fill_pframe);
+ KASSERT(o->mo_ops.flush_pframe);
+ }
+ if (!o->mo_ops.destructor)
+ {
+ o->mo_ops.destructor = mobj_default_destructor;
+ }
+
+ kmutex_init(&o->mo_mutex);
+
+ o->mo_refcount = ATOMIC_INIT(1);
+ list_init(&o->mo_pframes);
+}
+
+/*
+ * Lock the mobj's mutex
+ */
+inline void mobj_lock(mobj_t *o) { kmutex_lock(&o->mo_mutex); }
+
+/*
+ * Unlock the mobj's mutex
+ */
+inline void mobj_unlock(mobj_t *o) { kmutex_unlock(&o->mo_mutex); }
+
+/*
+ * Increment refcount
+ */
+void mobj_ref(mobj_t *o)
+{
+ atomic_inc(&o->mo_refcount);
+}
+
+void mobj_put_locked(mobj_t **op)
+{
+ mobj_unlock(*op);
+ mobj_put(op);
+}
+
+/*
+ * Decrement refcount, and set *op = NULL.
+ * If the refcount drop to 0, call the destructor, otherwise unlock the mobj.
+ */
+void mobj_put(mobj_t **op)
+{
+ mobj_t *o = *op;
+ KASSERT(o->mo_refcount);
+ *op = NULL;
+
+ dbg(DBG_ERROR, "count: %d\n", o->mo_refcount);
+ if (atomic_dec_and_test(&o->mo_refcount))
+ {
+ dbg(DBG_ERROR, "count: %d\n", o->mo_refcount);
+
+ KASSERT(!kmutex_owns_mutex(&o->mo_mutex));
+ o->mo_ops.destructor(o);
+ }
+ else
+ {
+ dbg(DBG_ERROR, "count: %d\n", o->mo_refcount);
+ }
+}
+
+/*
+ * Find a pframe that already exists in the memory object's mo_pframes list.
+ * If a pframe is found, it must be locked upon return from this function using
+ * pf_mutex.
+ */
+void mobj_find_pframe(mobj_t *o, uint64_t pagenum, pframe_t **pfp)
+{
+ KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+ list_iterate(&o->mo_pframes, pf, pframe_t, pf_link)
+ {
+ if (pf->pf_pagenum == pagenum)
+ {
+ kmutex_lock(&pf->pf_mutex);
+ *pfp = pf;
+ return;
+ }
+ }
+ *pfp = NULL;
+}
+
+/*
+ * Wrapper around the memory object's get_pframe function
+ * Assert a sane state of the world surrounding the call to get_pframe
+ */
+long mobj_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite,
+ pframe_t **pfp)
+{
+ KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+ *pfp = NULL;
+ long ret = o->mo_ops.get_pframe(o, pagenum, forwrite, pfp);
+ KASSERT((!*pfp && ret) || kmutex_owns_mutex(&(*pfp)->pf_mutex));
+ return ret;
+}
+
+/*
+ * Create and initialize a pframe and add it to the mobj's mo_pframes list.
+ * Upon successful return, the pframe's pf_mutex is locked.
+ */
+#ifdef OLD
+static void mobj_create_pframe(mobj_t *o, uint64_t pagenum, pframe_t **pfp)
+#endif
+void mobj_create_pframe(mobj_t *o, uint64_t pagenum, uint64_t loc, pframe_t **pfp)
+{
+ KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+ pframe_t *pf = pframe_create();
+ if (pf)
+ {
+ kmutex_lock(&pf->pf_mutex);
+
+ pf->pf_pagenum = pagenum;
+ pf->pf_loc = loc;
+ list_insert_tail(&o->mo_pframes, &pf->pf_link);
+ }
+ KASSERT(!pf || kmutex_owns_mutex(&pf->pf_mutex));
+ *pfp = pf;
+}
+
+/*
+ * The default get pframe that is at the center of the mobj/pframe subsystem.
+ * This is the routine that is used when the memory object does not have a
+ * get_pframe function associated with it (or called in the case of shadow objects
+ * when the forwrite flag is set).
+ *
+ * First, check if an pframe already exists in the mobj, creating one as
+ * necessary. Then, ensure that the pframe's contents are loaded: i.e. that
+ * pf->pf_addr is non-null. You will want to use page_alloc() and fill_pframe
+ * function pointer of the mobj. Finally, if forwrite is true, mark the pframe
+ * as dirtied. The resulting pframe should be set in *pfp.
+ *
+ * Note that upon failure, *pfp MUST be null. As always, make sure you cleanup
+ * properly in all error cases (especially if fill_prame fails)
+ *
+ * Upon successful return, *pfp refers to the found pframe and MUST be locked.
+ *
+ * Error cases mobj_default_get_pframe is responsible for generating:
+ * - ENOMEM: either cannot create the pframe or cannot allocate memory for
+ * the pframe's contents
+ */
+long mobj_default_get_pframe(mobj_t *o, uint64_t pagenum, long forwrite,
+ pframe_t **pfp)
+{
+ KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+ *pfp = NULL;
+ pframe_t *pf = NULL;
+ mobj_find_pframe(o, pagenum, &pf);
+ if (!pf)
+ {
+ mobj_create_pframe(o, pagenum, 0, &pf); // XXX is zero correct???
+ }
+ if (!pf)
+ {
+ return -ENOMEM;
+ }
+ KASSERT(kmutex_owns_mutex(&pf->pf_mutex));
+ if (!pf->pf_addr)
+ {
+ KASSERT(!pf->pf_dirty &&
+ "dirtied page doesn't have a physical address");
+ pf->pf_addr = page_alloc();
+ if (!pf->pf_addr)
+ {
+ return -ENOMEM;
+ }
+
+ dbg(DBG_PFRAME, "filling pframe 0x%p (mobj 0x%p page %lu)\n", pf, o,
+ pf->pf_pagenum);
+ KASSERT(o->mo_ops.fill_pframe);
+ long ret = o->mo_ops.fill_pframe(o, pf);
+ if (ret)
+ {
+ page_free(pf->pf_addr);
+ pf->pf_addr = NULL;
+ kmutex_unlock(&pf->pf_mutex);
+ return ret;
+ }
+ }
+ pf->pf_dirty |= forwrite;
+ *pfp = pf;
+ return 0;
+}
+
+/*
+ * If the pframe is dirty, call the mobj's flush_pframe; if flush_pframe returns
+ * successfully, clear pf_dirty flag and return 0. Otherwise, return what
+ * flush_pframe returned.
+ *
+ * Both o and pf must be locked when calling this function
+ */
+long mobj_flush_pframe(mobj_t *o, pframe_t *pf)
+{
+ KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+ KASSERT(kmutex_owns_mutex(&pf->pf_mutex));
+ KASSERT(pf->pf_addr && "cannot flush a frame not in memory!");
+ dbg(DBG_PFRAME, "pf 0x%p, mobj 0x%p, page %lu\n", pf, o, pf->pf_pagenum);
+ if (pf->pf_dirty)
+ {
+ KASSERT(o->mo_ops.flush_pframe);
+ long ret = o->mo_ops.flush_pframe(o, pf);
+ if (ret)
+ return ret;
+ pf->pf_dirty = 0;
+ }
+ KASSERT(!pf->pf_dirty);
+ return 0;
+}
+
+/*
+ * Iterate through the pframes of the mobj and try to flush each one.
+ * If any of them fail, let that reflect in the return value.
+ *
+ * The mobj o must be locked when calling this function
+ */
+long mobj_flush(mobj_t *o)
+{
+ long ret = 0;
+ KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+ list_iterate(&o->mo_pframes, pf, pframe_t, pf_link)
+ {
+ kmutex_lock(&pf->pf_mutex); // get the pframe (lock it)
+ if (pf->pf_addr)
+ {
+ ret |= mobj_flush_pframe(o, pf);
+ }
+ pframe_release(&pf);
+ }
+ return ret;
+}
+
+/*
+ * Attempt to flush the pframe. If the flush succeeds, then free the pframe's
+ * contents (pf->pf_addr) using page_free, remove the pframe from the mobj's
+ * list and call pframe_free.
+ *
+ * Upon successful return, *pfp MUST be null. If the function returns an error
+ * code, *pfp must be unchanged.
+ */
+long mobj_free_pframe(mobj_t *o, pframe_t **pfp)
+{
+ pframe_t *pf = *pfp;
+
+ if (pf->pf_addr)
+ {
+ long ret = mobj_flush_pframe(o, pf);
+ if (ret)
+ return ret;
+
+ // [+] TODO REMOVE THIS SECTION WHEN FLUSH DOES IT (I.E. WHEN WE HAVE
+ // SUPPORT FOR FREEING PFRAME'S IN USE BY UNMAPPING THEM FROM PAGE
+ // TABLES THAT USE THEM)
+ if (pf->pf_addr)
+ {
+ page_free(pf->pf_addr);
+ pf->pf_addr = NULL;
+ }
+ }
+ *pfp = NULL;
+ list_remove(&pf->pf_link);
+ pframe_free(&pf);
+ return 0;
+}
+
+/*
+ * Simply flush the memory object
+ */
+void mobj_default_destructor(mobj_t *o)
+{
+ mobj_lock(o);
+ KASSERT(kmutex_owns_mutex(&o->mo_mutex));
+
+ long ret = 0;
+ list_iterate(&o->mo_pframes, pf, pframe_t, pf_link)
+ {
+ kmutex_lock(&pf->pf_mutex); // get the pframe (lock it)
+ ret |= mobj_free_pframe(o, &pf);
+ }
+
+ if (ret)
+ {
+ dbg(DBG_MM,
+ "WARNING: flushing pframes in mobj destructor failed for one or "
+ "more frames\n"
+ "This means the memory for the pframe will be leaked!");
+ }
+
+ KASSERT(!kmutex_has_waiters(&o->mo_mutex));
+ mobj_unlock(o);
+}
diff --git a/kernel/mm/page.c b/kernel/mm/page.c
new file mode 100644
index 0000000..b42dca4
--- /dev/null
+++ b/kernel/mm/page.c
@@ -0,0 +1,658 @@
+// SMP.1 + SMP.3
+// spinlocks + mask interrupts
+#include "kernel.h"
+#include "types.h"
+#include <boot/multiboot_macros.h>
+
+#include "boot/config.h"
+
+#include "mm/mm.h"
+#include "mm/page.h"
+
+#include "util/debug.h"
+#include "util/gdb.h"
+#include "util/string.h"
+
+#include "multiboot.h"
+
+// BTREE === Binary Tree (not an actual B-Tree)
+
+// Algorithmic optimization ideas
+// have a "min free idx" pointer for each order (have a "count free" at each
+// order) delay cascading availability bits up the tree until needed; would
+// prevent state "thrashing"
+// can do this with a cascaded_order flag that equals the highest order
+// which we have cascaed up to. For a given allocation, if the required
+// order is > cascaded_order, then we cascade up to the required order
+
+// get ready for bit manipulation heaven :)
+
+typedef uintptr_t btree_word;
+
+#define BTREE_ROW_START_INDEX(order) \
+ (((uintptr_t)1 << (max_order - (order))) - 1)
+#define BTREE_ROW_END_INDEX(order) ((BTREE_ROW_START_INDEX(order) << 1) | 1)
+#define BTREE_INDEX_TO_ADDR(idx, order) \
+ (((1 << (order)) * ((idx)-BTREE_ROW_START_INDEX(order))) << PAGE_SHIFT)
+#define BTREE_ADDR_TO_INDEX(addr, order) \
+ (BTREE_ROW_START_INDEX(order) + \
+ ((((uintptr_t)(addr)) >> PAGE_SHIFT) / (1 << (order))))
+
+#define BTREE_LEAF_START_INDEX BTREE_ROW_START_INDEX(0)
+#define BTREE_ADDR_TO_LEAF_INDEX(addr) BTREE_ADDR_TO_INDEX(addr, 0)
+#define BTREE_LEAF_INDEX_TO_ADDR(idx) BTREE_INDEX_TO_ADDR(idx, 0)
+
+#define BTREE_NUM_BITS (sizeof(btree_word) << 3)
+#define BTREE_WORD_POS(idx) ((idx) / BTREE_NUM_BITS)
+#define BTREE_BIT_POS(idx) ((idx) & (BTREE_NUM_BITS - 1))
+#define BTREE_AVAILABILITY_MASK(idx) \
+ ((uintptr_t)1 << (BTREE_NUM_BITS - 1 - BTREE_BIT_POS(idx)))
+
+// we really don't want branching here (predictor would be quite bad and
+// branches are slowwwww)
+#define BTREE_SIBLING(idx) ((idx)-1 + (((idx)&1) << 1))
+// uintptr_t btree_sibling(uintptr_t idx) {
+// // in a 0-indexed binary tree, a sibling of an odd node is its right
+// neighbor --> add 1
+// // and the sibling of an even node is its left neighbor --> subtract 1
+// // so we need: (idx % 2) ? (idx + 1) : (idx - 1);
+// uintptr_t odd_addend = idx & 1; // 1 if odd, 0 if even
+// uintptr_t even_addend = (uintptr_t) -1 + odd_addend; // 0 if odd, -1 if
+// even return idx + odd_addend + even_addend; return idx + (idx & 1) +
+// ((uintptr_t) -1 + (idx & 1)); return idx - 1 + ((idx & 1) << 1);
+// // now it looks like: always subtract 1, add 2 if odd. which works :)
+// }
+
+// get the left sibling (odd) of a pair; idx may already be the left sibling or
+// may be the right sibling (even) subtract 1 from idx if it's even --> subtract
+// 1 from LSB and add it back in
+#define BTREE_LEFT_SIBLING(idx) ((idx) + (((idx)&1) - 1))
+
+#define BTREE_PARENT(idx) (((idx)-1) >> 1)
+#define BTREE_LEFT_CHILD(idx) (((idx) << 1) + 1)
+#define BTREE_RIGHT_CHILD(idx) (((idx) + 1) << 1)
+#define BTREE_IS_LEFT_CHILD(idx) ((idx)&1)
+#define BTREE_IS_RIGHT_CHILD(idx) (!BTREE_IS_LEFT_CHILD(idx))
+
+#define BTREE_IS_AVAILABLE(idx) \
+ (btree[BTREE_WORD_POS(idx)] & BTREE_AVAILABILITY_MASK(idx))
+#define BTREE_MARK_AVAILABLE(idx) \
+ (btree[BTREE_WORD_POS(idx)] |= BTREE_AVAILABILITY_MASK(idx))
+#define BTREE_MARK_UNAVAILABLE(idx) \
+ (btree[BTREE_WORD_POS(idx)] &= ~BTREE_AVAILABILITY_MASK(idx))
+
+// potential optimization: use these when clearing pairs. something about the
+// following is apparently buggy though (causes fault) #define
+// BTREE_SIBLING_AVAILABILITY_MASK(idx) (BTREE_AVAILABILITY_MASK(idx) |
+// BTREE_IS_AVAILABLE(BTREE_SIBLING(idx))) #define
+// BTREE_MARK_SIBLINGS_AVAILABLE(idx) (btree[BTREE_WORD_POS(idx)] |=
+// BTREE_SIBLING_AVAILABILITY_MASK(idx)) #define
+// BTREE_MARK_SIBLINGS_UNAVAILABLE(idx) (btree[BTREE_WORD_POS(idx)] &=
+// ~BTREE_SIBLING_AVAILABILITY_MASK(idx))
+
+GDB_DEFINE_HOOK(page_alloc, void *addr, size_t npages)
+
+GDB_DEFINE_HOOK(page_free, void *addr, size_t npages)
+
+static size_t page_freecount;
+
+// if you rename these variables, update them in the macros above
+static size_t
+ max_pages; // max number of pages as determined by RAM, NOT max_order
+static size_t max_order; // max depth of binary tree
+
+static btree_word *btree;
+static uintptr_t *min_available_idx_by_order;
+static size_t *count_available_by_order;
+
+static char *type_strings[] = {"ERROR: type = 0", "Available", "Reserved",
+ "ACPI Reclaimable", "ACPI NVS", "GRUB Bad Ram"};
+static size_t type_count = sizeof(type_strings) / sizeof(type_strings[0]);
+
+inline void *physmap_start() { return (void *)PHYS_OFFSET; }
+
+inline void *physmap_end()
+{
+ return (void *)(PHYS_OFFSET + (max_pages << PAGE_SHIFT));
+}
+
+#undef DEBUG_PHYSICAL_PAGING
+
+static inline void _btree_expensive_sanity_check()
+{
+#ifdef DEBUG_PHYSICAL_PAGING
+ size_t available = 0;
+ for (unsigned order = 0; order <= max_order; order++)
+ {
+ long checked_first = 0;
+ unsigned order_count = 0;
+ uintptr_t max = BTREE_ROW_END_INDEX(order);
+
+ for (uintptr_t idx = BTREE_ROW_START_INDEX(order); idx < max; idx++)
+ {
+ if (BTREE_IS_AVAILABLE(idx))
+ {
+ if (!checked_first)
+ {
+ KASSERT(min_available_idx_by_order[order] == idx);
+ checked_first = 1;
+ }
+ available += (1 << order);
+ order_count++;
+ KASSERT(BTREE_INDEX_TO_ADDR(idx + 1, order) <= physmap_end());
+ }
+ }
+ if (!checked_first)
+ {
+ KASSERT(min_available_idx_by_order[order] == max);
+ }
+ KASSERT(count_available_by_order[order] == order_count);
+ }
+ KASSERT(available == page_freecount);
+#endif
+}
+
+void page_init()
+{
+ uintptr_t ram = 0;
+ uintptr_t memory_available_for_use = 0;
+
+ // detect amount of RAM and memory available for use immediately after
+ // kernel before any reserved region
+
+ KASSERT(PAGE_ALIGNED(mb_tag) && (uintptr_t)mb_tag == KERNEL_PHYS_END);
+
+ for (struct multiboot_tag *tag = mb_tag + 1;
+ tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size))
+ {
+ if (tag->type != MULTIBOOT_TAG_TYPE_MMAP)
+ {
+ continue;
+ }
+ struct multiboot_tag_mmap *mmap = (struct multiboot_tag_mmap *)tag;
+ dbg(DBG_PAGEALLOC, "Physical memory map (%d entries):\n",
+ mmap->size / mmap->entry_size);
+ for (unsigned i = 0; i < mmap->size / mmap->entry_size; i++)
+ {
+ struct multiboot_mmap_entry *entry = &mmap->entries[i];
+ dbgq(DBG_MM, "\t[0x%p-0x%p) (%llu bytes): %s\n",
+ (void *)entry->addr, (void *)(entry->addr + entry->len),
+ entry->len,
+ entry->type < type_count ? type_strings[entry->type]
+ : "Unknown");
+ if (entry->type != MULTIBOOT_MEMORY_AVAILABLE)
+ {
+ continue;
+ }
+
+ if (entry->addr < KERNEL_PHYS_END &&
+ entry->addr + entry->len > KERNEL_PHYS_END)
+ {
+ memory_available_for_use =
+ entry->addr + entry->len - KERNEL_PHYS_END;
+ }
+
+ if (entry->addr + entry->len > ram)
+ {
+ ram = entry->addr + entry->len;
+ }
+ }
+ }
+
+ // check we have enough space available following the kernel to map in all
+ // of RAM detected
+ max_pages = ram >> PAGE_SHIFT;
+ max_order = 0;
+ size_t npages = max_pages;
+ while (npages)
+ {
+ max_order++;
+ npages >>= 1;
+ }
+
+ // we may have too much RAM than we can map in with the single memory holy
+ // after the kernel keep shrinking the maximum order until we find a size
+ // that fits (this can obviously be done more intelligently, but this also
+ // works)
+ size_t btree_size;
+ size_t metadata_size;
+ while (max_order)
+ {
+ // we need 2^(max_order+1) pages, and one byte maps 8 pages, so we need
+ // 2^(max_order-2) bytes for the binary tree
+ btree_size = 1UL << (max_order - 2);
+ metadata_size = sizeof(uintptr_t) * (max_order + 1) +
+ sizeof(size_t) * (max_order + 1);
+
+ if (memory_available_for_use >= btree_size + metadata_size)
+ {
+ break;
+ }
+ if (max_pages ==
+ (ram >> PAGE_SHIFT))
+ { // only print first time we shrink
+ dbg(DBG_PAGEALLOC,
+ "Warning! Need 0x%p B of memory to map in 0x%p B of RAM, but "
+ "only have 0x%p available!",
+ (void *)(btree_size + metadata_size), (void *)ram,
+ (void *)memory_available_for_use);
+ }
+ max_order--;
+ max_pages = 1UL << max_order;
+ }
+ if (max_pages !=
+ (ram >> PAGE_SHIFT))
+ { // only print if we shrank available RAM
+ dbg(DBG_PAGEALLOC, "Supporting only up to 0x%p B of RAM!",
+ (void *)(max_pages << PAGE_SHIFT));
+ }
+
+ btree = (btree_word
+ *)(KERNEL_PHYS_END +
+ PAGE_SIZE); // 1 page padding for the multiboot information
+ memset(btree, 0, btree_size);
+
+ min_available_idx_by_order = (uintptr_t *)((uintptr_t)btree + btree_size);
+ for (unsigned order = 0; order <= max_order; order++)
+ {
+ min_available_idx_by_order[order] = BTREE_ROW_END_INDEX(order);
+ }
+
+ count_available_by_order =
+ min_available_idx_by_order + sizeof(uintptr_t) * (max_order + 1);
+ memset(count_available_by_order, 0, sizeof(size_t) * (max_order + 1));
+
+ page_freecount = 0;
+
+ uintptr_t reserved_ram_start = KERNEL_PHYS_BASE;
+ uintptr_t reserved_ram_end =
+ KERNEL_PHYS_END + PAGE_SIZE + btree_size + metadata_size;
+
+ for (struct multiboot_tag *tag = mb_tag + 1;
+ tag->type != MULTIBOOT_TAG_TYPE_END; tag += TAG_SIZE(tag->size))
+ {
+ if (tag->type != MULTIBOOT_TAG_TYPE_MMAP)
+ {
+ continue;
+ }
+ struct multiboot_tag_mmap *mmap = (struct multiboot_tag_mmap *)tag;
+ for (unsigned i = 0; i < mmap->size / mmap->entry_size; i++)
+ {
+ struct multiboot_mmap_entry *entry = &mmap->entries[i];
+ if (entry->type != MULTIBOOT_MEMORY_AVAILABLE)
+ {
+ continue;
+ }
+ uintptr_t addr = entry->addr;
+ uintptr_t len = entry->len;
+
+ if (addr >= reserved_ram_start && addr < reserved_ram_end)
+ {
+ if (len <= reserved_ram_end - addr)
+ {
+ continue;
+ }
+ len -= reserved_ram_end - addr;
+ addr = reserved_ram_end;
+ }
+ if (addr < reserved_ram_start && addr + len > reserved_ram_start)
+ {
+ len = reserved_ram_start - addr;
+ }
+
+ // TODO [+] see why removing this crashes SMP
+ if (addr < reserved_ram_start)
+ {
+ continue;
+ }
+
+ page_add_range((void *)addr, (void *)(addr + len));
+ }
+ }
+
+ page_mark_reserved(0); // don't allocate the first page of memory
+
+ size_t bytes = page_freecount << PAGE_SHIFT;
+ size_t gigabytes = (bytes >> 30);
+ bytes -= (gigabytes << 30);
+ size_t megabytes = (bytes >> 20);
+ bytes -= (megabytes << 20);
+ size_t kilobytes = (bytes >> 10);
+ bytes -= (kilobytes << 10);
+ KASSERT(bytes == 0);
+
+ dbg(DBG_PAGEALLOC,
+ "Amount of physical memory available for use: %lu GB, %lu MB, and %lu "
+ "KB; [0x%p, 0x%p)\n",
+ gigabytes, megabytes, kilobytes, physmap_start(), physmap_end());
+ _btree_expensive_sanity_check();
+}
+
+void page_init_finish()
+{
+ btree = (btree_word *)((uintptr_t)btree + PHYS_OFFSET);
+ min_available_idx_by_order =
+ (uintptr_t *)((uintptr_t)min_available_idx_by_order + PHYS_OFFSET);
+ count_available_by_order =
+ (uintptr_t *)((uintptr_t)count_available_by_order + PHYS_OFFSET);
+}
+
+static void _btree_update_metadata_after_removal(size_t order, size_t idx)
+{
+ // [+] TODO Intel-specific optimizations, see BSF, BSR, REPE CMPS/SCAS
+ if (count_available_by_order[order])
+ {
+ if (idx == min_available_idx_by_order[order])
+ {
+ uintptr_t word_idx = BTREE_WORD_POS(idx);
+ if (btree[word_idx] &&
+ word_idx == BTREE_WORD_POS(BTREE_ROW_START_INDEX(order)))
+ {
+ // mask off bits to the left of BTREE_BIT_POS(idx); i.e.
+ // consider only positions > than BTREE_BIT_POS(idx) in
+ // btree[word_idx] when idx is the old index of the first
+ // available node for the given order. This is to avoid setting
+ // min available for an order x to be an index that actually
+ // belongs to order (x + 1) (in the row above).
+ btree_word copy =
+ btree[word_idx] &
+ ((1UL << (BTREE_NUM_BITS - BTREE_BIT_POS(idx))) - 1);
+ unsigned bit_idx = BTREE_NUM_BITS;
+ while (copy != 0 && bit_idx > BTREE_BIT_POS(idx))
+ {
+ bit_idx--;
+ copy = copy >> 1;
+ }
+ if (BTREE_IS_AVAILABLE(word_idx * BTREE_NUM_BITS + bit_idx))
+ {
+ min_available_idx_by_order[order] =
+ word_idx * BTREE_NUM_BITS + bit_idx;
+ return;
+ }
+ word_idx++;
+ }
+ while (!btree[word_idx])
+ word_idx++;
+ btree_word copy = btree[word_idx];
+ unsigned bit_idx = BTREE_NUM_BITS;
+ while (copy != 0)
+ {
+ bit_idx--;
+ copy = copy >> 1;
+ }
+ uintptr_t min_available = word_idx * BTREE_NUM_BITS + bit_idx;
+ if (min_available > BTREE_ROW_END_INDEX(order))
+ {
+ min_available = BTREE_ROW_END_INDEX(order);
+ }
+ min_available_idx_by_order[order] = min_available;
+ }
+ }
+ else
+ {
+ min_available_idx_by_order[order] = BTREE_ROW_END_INDEX(order);
+ }
+}
+
+static void _btree_mark_available(uintptr_t idx, size_t order)
+{
+ KASSERT(!BTREE_IS_AVAILABLE(idx));
+ BTREE_MARK_AVAILABLE(idx);
+
+ uintptr_t start = BTREE_INDEX_TO_ADDR(idx, order);
+ uintptr_t end = BTREE_INDEX_TO_ADDR(idx + 1, order);
+ dbg(DBG_MM, "marking available (0x%p, 0x%p)\n", (void *)start, (void *)end);
+ KASSERT(!(0xb1000 >= start && 0xb1000 < end));
+
+ count_available_by_order[order]++;
+ if (idx < min_available_idx_by_order[order])
+ {
+ min_available_idx_by_order[order] = idx;
+ }
+
+ while (idx > 0 && BTREE_IS_AVAILABLE(BTREE_SIBLING(idx)))
+ {
+ BTREE_MARK_UNAVAILABLE(idx);
+ BTREE_MARK_UNAVAILABLE(BTREE_SIBLING(idx));
+
+ count_available_by_order[order] -= 2;
+ _btree_update_metadata_after_removal(order, BTREE_LEFT_SIBLING(idx));
+
+ idx = BTREE_PARENT(idx);
+ order++;
+ BTREE_MARK_AVAILABLE(idx);
+ count_available_by_order[order]++;
+ if (idx < min_available_idx_by_order[order])
+ {
+ min_available_idx_by_order[order] = idx;
+ }
+ }
+}
+
+static void _btree_mark_range_available(uintptr_t leaf_idx, size_t npages)
+{
+ // coult be optimized further so that we don't need to keep traversing fromm
+ // leaf to max order. can instead jump to parent's (right) sibling when
+ // we are a right child, and by jumping to left child while npages > what
+ // would be allocated but for now, this works and is fast enough it seems...
+ // TODO potential optimization
+ while (npages)
+ {
+ uintptr_t idx = leaf_idx;
+ size_t order = 0;
+ while (BTREE_IS_LEFT_CHILD(idx) && (2UL << order) <= npages)
+ {
+ idx = BTREE_PARENT(idx);
+ order++;
+ }
+ _btree_mark_available(idx, order);
+ npages -= 1 << order;
+ leaf_idx += 1 << order;
+ }
+}
+
+void page_add_range(void *start, void *end)
+{
+ dbg(DBG_MM, "Page system adding range [0x%p, 0x%p)\n", start, end);
+ KASSERT(end > start);
+ if (start == 0)
+ {
+ start = PAGE_ALIGN_UP(1);
+ if (end <= start)
+ {
+ return;
+ }
+ }
+ start = PAGE_ALIGN_UP(start);
+ end = PAGE_ALIGN_DOWN(end);
+ size_t npages = ((uintptr_t)end - (uintptr_t)start) >> PAGE_SHIFT;
+ _btree_mark_range_available(BTREE_ADDR_TO_LEAF_INDEX(start), npages);
+ page_freecount += npages;
+ _btree_expensive_sanity_check();
+}
+
+void *page_alloc() { return page_alloc_n(1); }
+
+void *page_alloc_bounded(void *max_paddr)
+{
+ return page_alloc_n_bounded(1, max_paddr);
+}
+
+void page_free(void *addr) { page_free_n(addr, 1); }
+
+static void *_btree_alloc(size_t npages, uintptr_t idx, size_t smallest_order,
+ size_t actual_order)
+{
+ while (actual_order != smallest_order)
+ {
+ BTREE_MARK_UNAVAILABLE(idx);
+ count_available_by_order[actual_order]--;
+ _btree_update_metadata_after_removal(actual_order, idx);
+
+ idx = BTREE_LEFT_CHILD(idx);
+ BTREE_MARK_AVAILABLE(idx);
+ BTREE_MARK_AVAILABLE(BTREE_SIBLING(idx));
+ actual_order--;
+
+ count_available_by_order[actual_order] += 2;
+ if (idx < min_available_idx_by_order[actual_order])
+ {
+ min_available_idx_by_order[actual_order] = idx;
+ }
+ _btree_expensive_sanity_check();
+ }
+
+ // actually allocate the 2^smallest_order pages by marking them unavailable
+ BTREE_MARK_UNAVAILABLE(idx);
+ count_available_by_order[actual_order]--;
+ _btree_update_metadata_after_removal(actual_order, idx);
+
+ uintptr_t allocated_idx = idx;
+ size_t allocated_order = actual_order;
+ while (allocated_order-- > 0)
+ allocated_idx = BTREE_LEFT_CHILD(allocated_idx);
+
+ KASSERT(BTREE_LEAF_INDEX_TO_ADDR(allocated_idx));
+
+ // we allocated some 2^smallest_order of pages; it is possible they asked
+ // for fewer than 2^smallest_order pages; make sure we mark as available the
+ // remaining (2^smallest_order - npages) pages.
+ _btree_mark_range_available(allocated_idx + npages,
+ (1 << smallest_order) - npages);
+
+ // while (over_allocated > 0 && (1 << reclaimed_order) < over_allocated
+ // && next_leaf_to_reclaim < max_reclaim_idx) {
+ // BTREE_MARK_AVAILABLE(idx);
+ // count_available_by_order[reclaimed_order]++;
+ // if (idx < min_available_idx_by_order[reclaimed_order]) {
+ // min_available_idx_by_order[reclaimed_order] = idx;
+ // }
+ // over_allocated -= (1 << reclaimed_order);
+ // next_leaf_to_reclaim += (2 << reclaimed_order);
+ // idx = BTREE_SIBLING(BTREE_PARENT(idx));
+ // reclaimed_order++;
+ // }
+
+ page_freecount -= npages;
+
+ uintptr_t addr = BTREE_LEAF_INDEX_TO_ADDR(allocated_idx);
+ dbgq(DBG_MM, "page_alloc_n(%lu): [0x%p, 0x%p)\t\t%lu pages remain\n",
+ npages, (void *)(PHYS_OFFSET + addr),
+ (void *)(PHYS_OFFSET + addr + (npages << PAGE_SHIFT)), page_freecount);
+ _btree_expensive_sanity_check();
+ return (void *)(addr + PHYS_OFFSET);
+}
+
+void *page_alloc_n(size_t npages)
+{
+ return page_alloc_n_bounded(npages, (void *)~0UL);
+}
+
+// this is really only used for setting up initial page tables
+// this memory will be immediately overriden, so no need to poison the memory
+void *page_alloc_n_bounded(size_t npages, void *max_paddr)
+{
+ KASSERT(npages > 0 && npages <= (1UL << max_order));
+ if (npages > page_freecount)
+ {
+ return 0;
+ }
+ // a note on max_pages: so long as we never mark a page that is beyond our
+ // RAM as available, we will never allocate it. So put all those checks at
+ // the free and map functions
+
+ // find the smallest order that will fit npages
+ uintptr_t max_page_number =
+ ((uintptr_t)max_paddr >> PAGE_SHIFT) - npages + 1;
+
+ // [+] TODO intel-specific optimization possible here?
+ size_t smallest_order = 0;
+ while ((1UL << smallest_order) < npages)
+ smallest_order++;
+
+ for (size_t actual_order = smallest_order; actual_order <= max_order;
+ actual_order++)
+ {
+ if (!count_available_by_order[actual_order])
+ {
+ continue;
+ }
+ uintptr_t idx = min_available_idx_by_order[actual_order];
+ KASSERT(idx >= BTREE_ROW_START_INDEX(actual_order) &&
+ idx < BTREE_ROW_END_INDEX(actual_order));
+ if ((idx - BTREE_ROW_START_INDEX(actual_order)) * (1 << actual_order) <
+ max_page_number)
+ {
+ KASSERT((idx - BTREE_ROW_START_INDEX(actual_order)) *
+ (1 << actual_order) <
+ max_pages);
+
+ void *ret = _btree_alloc(npages, idx, smallest_order, actual_order);
+ KASSERT(((uintptr_t)ret + (npages << PAGE_SHIFT)) <=
+ (uintptr_t)physmap_end());
+ return ret;
+ }
+ }
+ return 0;
+}
+
+void page_free_n(void *addr, size_t npages)
+{
+ dbgq(DBG_MM, "page_free_n(%lu): [0x%p, 0x%p)\t\t%lu pages remain\n", npages,
+ addr, (void *)((uintptr_t)addr + (npages << PAGE_SHIFT)),
+ page_freecount);
+ GDB_CALL_HOOK(page_free, addr, npages);
+ KASSERT(npages > 0 && npages <= (1UL << max_order) && PAGE_ALIGNED(addr));
+ uintptr_t idx = BTREE_ADDR_TO_LEAF_INDEX((uintptr_t)addr - PHYS_OFFSET);
+ KASSERT(idx + npages - BTREE_LEAF_START_INDEX <= max_pages);
+ _btree_mark_range_available(idx, npages);
+ page_freecount += npages;
+ _btree_expensive_sanity_check();
+}
+
+void page_mark_reserved(void *paddr)
+{
+ if ((uintptr_t)paddr > (max_pages << PAGE_SHIFT))
+ return;
+
+ dbgq(DBG_MM, "page_mark_reserved(0x%p): [0x%p, 0x%p)\n",
+ (void *)((uintptr_t)paddr + PHYS_OFFSET),
+ (void *)((uintptr_t)paddr + PHYS_OFFSET),
+ (void *)((uintptr_t)paddr + PHYS_OFFSET + PAGE_SIZE));
+
+ KASSERT(PAGE_ALIGNED(paddr));
+ uintptr_t idx = BTREE_ADDR_TO_LEAF_INDEX(paddr);
+ size_t order = 0;
+ while (idx && !BTREE_IS_AVAILABLE(idx))
+ {
+ idx = BTREE_PARENT(idx);
+ order++;
+ }
+ if (!BTREE_IS_AVAILABLE(idx))
+ {
+ return; // can sometimes be a part of reserved RAM anyway
+ }
+
+ BTREE_MARK_UNAVAILABLE(idx);
+ count_available_by_order[order]--;
+ _btree_update_metadata_after_removal(order, idx);
+
+ uintptr_t unavailable_leaf_idx = BTREE_ADDR_TO_LEAF_INDEX(paddr);
+ uintptr_t still_available_leaf_idx_start =
+ BTREE_ADDR_TO_LEAF_INDEX(BTREE_INDEX_TO_ADDR(idx, order));
+ uintptr_t still_available_leaf_idx_end =
+ BTREE_ADDR_TO_LEAF_INDEX(BTREE_INDEX_TO_ADDR(idx + 1, order));
+
+ _btree_mark_range_available(
+ still_available_leaf_idx_start,
+ unavailable_leaf_idx - still_available_leaf_idx_start);
+ _btree_mark_range_available(
+ unavailable_leaf_idx + 1,
+ still_available_leaf_idx_end - unavailable_leaf_idx - 1);
+
+ page_freecount--;
+
+ _btree_expensive_sanity_check();
+}
+
+size_t page_free_count() { return page_freecount; }
diff --git a/kernel/mm/page.py b/kernel/mm/page.py
new file mode 100644
index 0000000..9dfedf0
--- /dev/null
+++ b/kernel/mm/page.py
@@ -0,0 +1,47 @@
+import gdb
+
+import weenix
+import weenix.kmem
+
+
+class PageCommand(weenix.Command):
+ def __init__(self):
+ weenix.Command.__init__(self, "page", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
+
+ def invoke(self, args, tty):
+ total = 0
+ print("pagesize: {0}".format(weenix.kmem.pagesize()))
+
+ names = list()
+ blobs = list()
+ pages = list()
+ bytes = list()
+
+ for order, count in weenix.kmem.freepages().items():
+ pcount = count * (1 << order)
+ bcount = pcount * weenix.kmem.pagesize()
+ names.append("freepages[{0}]:".format(order))
+ blobs.append("{0} blob{1}".format(count, " " if (count == 1) else "s"))
+ pages.append("{0} page{1}".format(pcount, " " if (pcount == 1) else "s"))
+ bytes.append("{0} byte{1}".format(bcount, " " if (bcount == 1) else "s"))
+ total += count * (1 << order)
+
+ names.append("total:")
+ blobs.append("")
+ pages.append("{0} page{1}".format(total, " " if (total == 1) else "s"))
+ bytes.append("{0} bytes".format(total * weenix.kmem.pagesize()))
+
+ namewidth = max(list(map(lambda x: len(x), names)))
+ blobwidth = max(list(map(lambda x: len(x), blobs)))
+ pagewidth = max(list(map(lambda x: len(x), pages)))
+ bytewidth = max(list(map(lambda x: len(x), bytes)))
+
+ for name, blob, page, byte in zip(names, blobs, pages, bytes):
+ print(
+ "{1:<{0}} {3:>{2}} {5:>{4}} {7:>{6}}".format(
+ namewidth, name, blobwidth, blob, pagewidth, page, bytewidth, byte
+ )
+ )
+
+
+PageCommand()
diff --git a/kernel/mm/pagecache.c b/kernel/mm/pagecache.c
new file mode 100644
index 0000000..b1763ba
--- /dev/null
+++ b/kernel/mm/pagecache.c
@@ -0,0 +1,23 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include "util/debug.h"
+
+#include "mm/pframe.h"
+
+long pagecache_get_page(pframe_t *pf) {
+ if (pf->pf_addr) {
+ // all set
+ return 1;
+ }
+ //Somehow load the page
+ KASSERT(0 && "page not in pagecache");
+ return 0;
+}
+
+#ifdef NO
+void pagecache_newsource(pframe_t pf, blockdev_t *dev, long loc) {
+ pf->pf_srcdev.pf_dev = dev;
+ pf->pf_loc = loc;
+}
+#endif \ No newline at end of file
diff --git a/kernel/mm/pagetable.c b/kernel/mm/pagetable.c
new file mode 100644
index 0000000..daf49ef
--- /dev/null
+++ b/kernel/mm/pagetable.c
@@ -0,0 +1,873 @@
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include "types.h"
+
+#include "mm/mm.h"
+#include "mm/pframe.h"
+#include "mm/mobj.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#include "vm/pagefault.h"
+
+typedef enum
+{
+ UNMAPPED,
+ PAGE_4KB,
+ PAGE_2MB,
+ PAGE_1GB
+} vaddr_map_status;
+
+static pml4_t *global_kernel_only_pml4;
+
+void pt_set(pml4_t *pml4)
+{
+ KASSERT((void *)pml4 >= physmap_start());
+ uintptr_t phys_addr = pt_virt_to_phys((uintptr_t)pml4);
+ __asm__ volatile("movq %0, %%cr3" ::"r"(phys_addr)
+ : "memory");
+}
+
+/*
+ * Don't use this for proc_create. You want each new proc to have a copy
+ * of the current page table (see pt_create).
+ *
+ * Returns a pointer to the current pagetable (a virtual address).
+ */
+inline pml4_t *pt_get()
+{
+ uintptr_t pml4;
+ __asm__ volatile("movq %%cr3, %0"
+ : "=r"(pml4));
+ return (pml4_t *)(pml4 + PHYS_OFFSET);
+}
+
+vaddr_map_status _vaddr_status(pml4_t *pml4, uintptr_t vaddr)
+{
+ uint64_t idx;
+ pml4_t *table = pml4;
+
+ idx = PML4E(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ return UNMAPPED;
+ }
+ table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PDP (1GB pages)
+ idx = PDPE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ return UNMAPPED;
+ }
+ if (IS_1GB_PAGE(table->phys[idx]))
+ {
+ return PAGE_1GB;
+ }
+ table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PD (2MB pages)
+ idx = PDE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ return UNMAPPED;
+ }
+ if (IS_2MB_PAGE(table->phys[idx]))
+ {
+ return PAGE_2MB;
+ }
+ table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PT (4KB pages)
+ idx = PTE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ return UNMAPPED;
+ }
+ return PAGE_4KB;
+}
+
+uintptr_t pt_virt_to_phys_helper(pml4_t *table, uintptr_t vaddr)
+{
+ if (vaddr >= (uintptr_t)physmap_start() &&
+ vaddr < (uintptr_t)physmap_end())
+ {
+ return vaddr - PHYS_OFFSET;
+ }
+
+ uint64_t idx;
+
+ // PML4
+ idx = PML4E(vaddr);
+ KASSERT(IS_PRESENT(table->phys[idx]));
+ table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PDP (1GB pages)
+ idx = PDPE(vaddr);
+ KASSERT(IS_PRESENT(table->phys[idx]));
+ if (USE_1GB_PAGES && IS_1GB_PAGE(table->phys[idx]))
+ {
+ return PAGE_ALIGN_DOWN_1GB(table->phys[idx]) + PAGE_OFFSET_1GB(vaddr);
+ }
+ table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PD (2MB pages)
+ idx = PDE(vaddr);
+ KASSERT(IS_PRESENT(table->phys[idx]));
+ if (USE_2MB_PAGES && IS_2MB_PAGE(table->phys[idx]))
+ {
+ return PAGE_ALIGN_DOWN_2MB(table->phys[idx]) + PAGE_OFFSET_2MB(vaddr);
+ }
+ table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PT (4KB pages)
+ idx = PTE(vaddr);
+
+ KASSERT(IS_PRESENT(table->phys[idx]));
+
+ return (uintptr_t)PAGE_ALIGN_DOWN(table->phys[idx]) + PAGE_OFFSET(vaddr);
+}
+
+uintptr_t pt_virt_to_phys(uintptr_t vaddr)
+{
+ if (vaddr >= (uintptr_t)physmap_start() &&
+ vaddr < (uintptr_t)physmap_end())
+ {
+ // if the address is within the PHYS_MAP region, then subtract the
+ // PHYS_OFFSET to get the physical address. There is a one-to-one mapping
+ // between virtual and physical addresses in this region.
+ return vaddr - PHYS_OFFSET;
+ }
+ return pt_virt_to_phys_helper(pt_get(), vaddr);
+}
+
+void _fill_pt(pt_t *pt, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax)
+{
+ for (uintptr_t idx = PTE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax;
+ idx++, paddr += PAGE_SIZE, vaddr += PAGE_SIZE)
+ {
+ pt->phys[idx] = (uintptr_t)paddr | PT_PRESENT | PT_WRITE;
+ }
+}
+
+long _fill_pd(pd_t *pd, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax,
+ uintptr_t max_paddr)
+{
+ for (uintptr_t idx = PDE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax;
+ idx++, paddr += PT_VADDR_SIZE, vaddr += PT_VADDR_SIZE)
+ {
+ KASSERT(!IS_PRESENT(pd->phys[idx]));
+#if USE_2MB_PAGES
+ if (vmax - vaddr >= PT_VADDR_SIZE)
+ {
+ pd->phys[idx] = paddr | PT_PRESENT | PT_WRITE | PT_SIZE;
+ continue;
+ }
+#endif
+
+ uintptr_t pt = (uintptr_t)page_alloc_bounded((void *)max_paddr);
+ if (!pt)
+ {
+ return 1;
+ }
+ pt -= PHYS_OFFSET;
+
+ memset((void *)pt, 0, PAGE_SIZE);
+ pd->phys[idx] = pt | PT_PRESENT | PT_WRITE;
+ _fill_pt((pt_t *)pt, paddr, vaddr, vmax);
+ }
+ return 0;
+}
+
+long _fill_pdp(pdp_t *pdp, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax,
+ uintptr_t max_paddr)
+{
+ for (uintptr_t idx = PDPE(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax;
+ idx++, paddr += PD_VADDR_SIZE, vaddr += PD_VADDR_SIZE)
+ {
+ KASSERT(!IS_PRESENT(pdp->phys[idx]));
+#if USE_1GB_PAGES
+ if (vmax - vaddr >= PD_VADDR_SIZE)
+ {
+ pdp->phys[idx] = paddr | PT_PRESENT | PT_WRITE | PT_SIZE;
+ continue;
+ }
+#endif
+
+ uintptr_t pd = (uintptr_t)page_alloc_bounded((void *)max_paddr);
+ if (!pd)
+ {
+ return 1;
+ }
+ pd -= PHYS_OFFSET;
+
+ memset((void *)pd, 0, PAGE_SIZE);
+ pdp->phys[idx] = pd | PT_PRESENT | PT_WRITE;
+ if (_fill_pd((pd_t *)pd, paddr, vaddr, vmax, max_paddr))
+ {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+long _fill_pml4(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, uintptr_t vmax,
+ uintptr_t max_paddr)
+{
+ for (uintptr_t idx = PML4E(vaddr); idx < PT_ENTRY_COUNT && vaddr < vmax;
+ idx++, paddr += PDP_VADDR_SIZE, vaddr += PDP_VADDR_SIZE)
+ {
+ KASSERT(!IS_PRESENT(pml4->phys[idx]));
+
+ uintptr_t pdp = (uintptr_t)page_alloc_bounded((void *)max_paddr);
+ if (!pdp)
+ {
+ return 1;
+ }
+ pdp -= PHYS_OFFSET;
+
+ memset((void *)pdp, 0, PAGE_SIZE);
+ pml4->phys[idx] = pdp | PT_PRESENT | PT_WRITE;
+ if (_fill_pdp((pdp_t *)pdp, paddr, vaddr, vmax, max_paddr))
+ {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+long pt_map(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr, uint32_t pdflags,
+ uint32_t ptflags)
+{
+ return pt_map_range(pml4, paddr, vaddr, vaddr + PAGE_SIZE, pdflags,
+ ptflags);
+}
+
+long pt_map_range(pml4_t *pml4, uintptr_t paddr, uintptr_t vaddr,
+ uintptr_t vmax, uint32_t pdflags, uint32_t ptflags)
+{
+ dbg(DBG_PGTBL, "[0x%p, 0x%p) mapped to 0x%p; pml4: 0x%p\n", (void *)vaddr,
+ (void *)vmax, (void *)paddr, pml4);
+ KASSERT(PAGE_ALIGNED(paddr) && PAGE_ALIGNED(vaddr) && PAGE_ALIGNED(vmax));
+ KASSERT(vmax > vaddr && (ptflags & PAGE_MASK) == 0 &&
+ (pdflags & PAGE_MASK) == 0);
+ KASSERT((pdflags & PT_USER) == (ptflags & PT_USER));
+ KASSERT(!(pdflags & PT_SIZE) && !(ptflags & PT_SIZE));
+
+ while (vaddr < vmax)
+ {
+ uint64_t size = vmax - vaddr;
+
+ uint64_t idx = PML4E(vaddr);
+ pml4_t *table = pml4;
+
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ uintptr_t page = (uintptr_t)page_alloc();
+ if (!page)
+ {
+ return -ENOMEM;
+ }
+ memset((void *)page, 0, PAGE_SIZE);
+ KASSERT(pt_virt_to_phys(page) == page - PHYS_OFFSET);
+ KASSERT(*(uintptr_t *)page == 0);
+ table->phys[idx] = (page - PHYS_OFFSET) | pdflags;
+ }
+ else
+ {
+ // can't split up if control flags don't match, so liberally include
+ // all of them
+ table->phys[idx] |= pdflags;
+ }
+ table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PDP (1GB pages)
+ idx = PDPE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+#if USE_1GB_PAGES
+ if (PAGE_ALIGNED_1GB(vaddr) && size > PAGE_SIZE_1GB)
+ {
+ table->phys[idx] = (uintptr_t)paddr | ptflags | PT_SIZE;
+ paddr += PAGE_SIZE_1GB;
+ vaddr += PAGE_SIZE_1GB;
+ continue;
+ }
+#endif
+ uintptr_t page = (uintptr_t)page_alloc();
+ if (!page)
+ {
+ return -ENOMEM;
+ }
+ memset((void *)page, 0, PAGE_SIZE);
+ table->phys[idx] = (page - PHYS_OFFSET) | pdflags;
+ }
+ else if (IS_1GB_PAGE(table->phys[idx]))
+ {
+ if (PAGE_SAME_1GB(table->phys[idx], paddr) &&
+ PAGE_OFFSET_1GB(paddr) == PAGE_OFFSET_1GB(vaddr) &&
+ PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE == pdflags)
+ {
+ vaddr = PAGE_ALIGN_UP_1GB(vaddr + 1);
+ continue;
+ }
+ pd_t *pd = page_alloc();
+ if (!pd)
+ {
+ return -ENOMEM;
+ }
+ for (unsigned i = 0; i < PT_ENTRY_COUNT; i++)
+ {
+ pd->phys[i] =
+ table->phys[idx] +
+ i * PAGE_SIZE_2MB; // keeps all flags, including PT_SIZE
+ }
+ table->phys[idx] =
+ ((uintptr_t)pd - PHYS_OFFSET) |
+ pdflags; // overwrite flags as well for particular entry
+ }
+ else
+ {
+ table->phys[idx] |= pdflags;
+ }
+ table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PD (2MB pages)
+ idx = PDE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+#if USE_2MB_PAGES
+ if (PAGE_ALIGNED_2MB(vaddr) && size > PAGE_SIZE_2MB)
+ {
+ table->phys[idx] = (uintptr_t)paddr | ptflags | PT_SIZE;
+ paddr += PAGE_SIZE_2MB;
+ vaddr += PAGE_SIZE_2MB;
+ continue;
+ }
+#endif
+ uintptr_t page = (uintptr_t)page_alloc();
+ if (!page)
+ {
+ return -ENOMEM;
+ }
+ memset((void *)page, 0, PAGE_SIZE);
+ table->phys[idx] = (page - PHYS_OFFSET) | pdflags;
+ }
+ else if (IS_2MB_PAGE(table->phys[idx]))
+ {
+ if (PAGE_SAME_2MB(table->phys[idx], paddr) &&
+ PAGE_OFFSET_2MB(paddr) == PAGE_OFFSET_2MB(vaddr) &&
+ PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE == ptflags)
+ {
+ vaddr = PAGE_ALIGN_UP_2MB(vaddr + 1);
+ continue;
+ }
+ pt_t *pt = page_alloc();
+ if (!pt)
+ {
+ return -ENOMEM;
+ }
+ for (unsigned i = 0; i < PT_ENTRY_COUNT; i++)
+ {
+ pt->phys[i] = table->phys[idx] + i * PAGE_SIZE -
+ PT_SIZE; // remove PT_SIZE flag
+ }
+ table->phys[idx] =
+ ((uintptr_t)pt - PHYS_OFFSET) | pdflags; // overwrite flags
+ }
+ else
+ {
+ table->phys[idx] |= pdflags;
+ }
+ table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PT (4KB pages)
+
+ idx = PTE(vaddr);
+ table->phys[idx] = (uintptr_t)paddr | ptflags;
+
+ KASSERT(IS_PRESENT(table->phys[idx]));
+
+ paddr += PAGE_SIZE;
+ vaddr += PAGE_SIZE;
+ }
+
+ return 0;
+}
+
+static long _pt_fault_handler(regs_t *regs)
+{
+ uintptr_t vaddr;
+ /* Get the address where the fault occurred */
+ __asm__ volatile("movq %%cr2, %0"
+ : "=r"(vaddr));
+ uintptr_t cause = regs->r_err;
+
+ /* Check if pagefault was in user space (otherwise, BAD!) */
+ if (cause & FAULT_USER)
+ {
+ handle_pagefault(vaddr, cause);
+ }
+ else
+ {
+ dump_registers(regs);
+ panic("\nKernel page fault at vaddr 0x%p\n", (void *)vaddr);
+ }
+ return 0;
+}
+
+void pt_init()
+{
+ static long inited = 0;
+ if (!inited)
+ {
+ inited = 1;
+ // allocate a page to set up the new page table structure
+ // important caveat: we have not mapped in the physmap region, which
+ // is where the addresses from page_alloc come, so we use the actual
+ // physical addrses of the page, which we request to be in the
+ // first 4MB of RAM, as they are identity-mapped by the boot-time
+ // page tables
+ uintptr_t max_paddr = (1UL << 22);
+ pml4_t *pml4 = page_alloc_bounded((void *)max_paddr);
+ if (!pml4)
+ panic("ran out of memory in pt_init");
+ pml4 = (pml4_t *)((uintptr_t)pml4 - PHYS_OFFSET);
+ KASSERT((uintptr_t)pml4 < max_paddr);
+ memset(pml4, 0, PAGE_SIZE);
+
+ // map the kernel in to it's expected virtual memory address
+ if (_fill_pml4(pml4, KERNEL_PHYS_BASE, KERNEL_VMA + KERNEL_PHYS_BASE,
+ KERNEL_VMA + KERNEL_PHYS_END, max_paddr))
+ panic("ran out of memory in pt_init");
+
+ // map in physmap
+ if (_fill_pml4(pml4, 0, (uintptr_t)physmap_start(),
+ (uintptr_t)physmap_end(), max_paddr))
+ panic("ran out of memory in pt_init");
+
+ page_init_finish();
+
+ // use the kernel memory address synonym instead of the physical address
+ // identity map for pml4 make the MMU use the new pml4
+ pt_set((pml4_t *)((uintptr_t)pml4 + PHYS_OFFSET));
+ global_kernel_only_pml4 = (pml4_t *)((uintptr_t)pml4 + PHYS_OFFSET);
+ // pt_unmap_range(global_kernel_only_pml4, USER_MEM_LOW, USER_MEM_HIGH);
+ intr_register(INTR_PAGE_FAULT, _pt_fault_handler);
+ }
+ pt_set(global_kernel_only_pml4);
+}
+
+pt_t *clone_pt(pt_t *pt)
+{
+ pt_t *clone = page_alloc();
+ dbg(DBG_PRINT, "cloning pt at 0x%p to 0x%p\n", pt, clone);
+ if (clone)
+ {
+ memcpy(clone, pt, PAGE_SIZE);
+ }
+ return clone;
+}
+
+pd_t *clone_pd(pd_t *pd)
+{
+ pd_t *clone = page_alloc();
+ dbg(DBG_PRINT, "cloning pd at 0x%p to 0x%p\n", pd, clone);
+ if (!clone)
+ {
+ return NULL;
+ }
+ memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what
+ // we have allocated
+ for (unsigned i = 0; i < PT_ENTRY_COUNT; i++)
+ {
+ // dbg(DBG_PRINT, "checking pd i = %u\n", i);
+ if (pd->phys[i])
+ {
+ if (IS_2MB_PAGE(pd->phys[i]))
+ {
+ clone->phys[i] = pd->phys[i];
+ continue;
+ }
+ pt_t *cloned_pt =
+ clone_pt((pt_t *)((pd->phys[i] & PAGE_MASK) + PHYS_OFFSET));
+ if (!cloned_pt)
+ {
+ return NULL;
+ }
+ clone->phys[i] = (((uintptr_t)cloned_pt) - PHYS_OFFSET) |
+ PAGE_FLAGS(pd->phys[i]);
+ }
+ else
+ {
+ clone->phys[i] = 0;
+ }
+ }
+ return clone;
+}
+
+pdp_t *clone_pdp(pdp_t *pdp)
+{
+ pdp_t *clone = page_alloc();
+ dbg(DBG_PRINT, "cloning pdp at 0x%p to 0x%p\n", pdp, clone);
+ if (!clone)
+ {
+ return NULL;
+ }
+ memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what
+ // we have allocated
+ for (unsigned i = 0; i < PT_ENTRY_COUNT; i++)
+ {
+ // dbg(DBG_PRINT, "checking pdp i = %u\n", i);
+ if (pdp->phys[i])
+ {
+ if (IS_1GB_PAGE(pdp->phys[i]))
+ {
+ clone->phys[i] = pdp->phys[i];
+ continue;
+ }
+ pd_t *cloned_pd =
+ clone_pd((pd_t *)((pdp->phys[i] & PAGE_MASK) + PHYS_OFFSET));
+ if (!cloned_pd)
+ {
+ return NULL;
+ }
+ clone->phys[i] = (((uintptr_t)cloned_pd) - PHYS_OFFSET) |
+ PAGE_FLAGS(pdp->phys[i]);
+ }
+ else
+ {
+ clone->phys[i] = 0;
+ }
+ }
+ return clone;
+}
+
+pml4_t *clone_pml4(pml4_t *pml4, long include_user_mappings)
+{
+ pml4_t *clone = page_alloc();
+ dbg(DBG_PRINT, "cloning pml4 at 0x%p to 0x%p\n", pml4, clone);
+ if (!clone)
+ {
+ return NULL;
+ }
+ memset(clone, 0, PAGE_SIZE); // in case the clone fails, need to know what
+ // we have allocated
+ for (uintptr_t i = include_user_mappings ? 0 : PT_ENTRY_COUNT / 2;
+ i < PT_ENTRY_COUNT; i++)
+ {
+ // dbg(DBG_PRINT, "checking pml4 i = %u\n", i);
+ if (pml4->phys[i])
+ {
+ pdp_t *cloned_pdp =
+ clone_pdp((pdp_t *)((pml4->phys[i] & PAGE_MASK) + PHYS_OFFSET));
+ if (!cloned_pdp)
+ {
+ pt_destroy(clone);
+ return NULL;
+ }
+ clone->phys[i] = (((uintptr_t)cloned_pdp) - PHYS_OFFSET) |
+ PAGE_FLAGS(pml4->phys[i]);
+ }
+ else
+ {
+ clone->phys[i] = 0;
+ }
+ }
+ return clone;
+}
+
+pml4_t *pt_create() { return clone_pml4(pt_get(), 0); }
+
+void pt_destroy_helper(pt_t *pt, long depth)
+{
+ // 4 = pml4, 3 = pdp, 2 = pd, 1 = pt
+ if (depth != 1)
+ {
+ for (uintptr_t i = 0; i < PT_ENTRY_COUNT; i++)
+ {
+ if (!pt->phys[i] || (PT_SIZE & pt->phys[i]))
+ {
+ continue;
+ }
+ KASSERT(IS_PRESENT(pt->phys[i]) && (pt->phys[i] & PAGE_MASK));
+ pt_destroy_helper((pt_t *)((pt->phys[i] & PAGE_MASK) + PHYS_OFFSET),
+ depth - 1);
+ pt->phys[i] = 0;
+ }
+ }
+ page_free(pt);
+}
+
+void pt_destroy(pml4_t *pml4) { pt_destroy_helper(pml4, 4); }
+
+void pt_unmap(pml4_t *pml4, uintptr_t vaddr)
+{
+ pt_unmap_range(pml4, vaddr, vaddr + PAGE_SIZE);
+}
+
+void pt_unmap_range(pml4_t *pml4, uintptr_t vaddr, uintptr_t vmax)
+{
+ // TODO reclaim pages on-the-fly?
+
+ dbg(DBG_PGTBL, "virt[0x%p, 0x%p); pml4: 0x%p\n", (void *)vaddr,
+ (void *)vmax, pml4);
+ KASSERT(PAGE_ALIGNED(vaddr) && PAGE_ALIGNED(vmax) && vmax > vaddr);
+
+ uintptr_t vaddr_start = vaddr;
+
+ while (vaddr < vmax)
+ {
+ uint64_t size = vmax - vaddr;
+
+ uint64_t idx = PML4E(vaddr);
+ pml4_t *table = pml4;
+
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ vaddr = PAGE_ALIGN_UP_512GB(vaddr + 1);
+ continue;
+ }
+ table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PDP (1GB pages)
+ idx = PDPE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ vaddr = PAGE_ALIGN_UP_1GB(vaddr + 1);
+ ;
+ continue;
+ }
+ if (IS_1GB_PAGE(table->phys[idx]))
+ {
+ if (PAGE_ALIGNED_1GB(vaddr) && size >= PAGE_SIZE_1GB)
+ {
+ table->phys[idx] = 0;
+ vaddr += PAGE_SIZE_1GB;
+ }
+ else
+ {
+ pd_t *pd = page_alloc();
+ if (!pd)
+ {
+ panic(
+ "Ran out of memory during pt_unmap_range; recovery "
+ "from this situation has not yet been implemented!");
+ }
+ uint64_t unmap_start = PDE(vaddr);
+ uint64_t unmap_end =
+ PAGE_SAME_1GB(vaddr, vmax) ? PDE(vmax) : 512;
+ for (unsigned i = 0; i < unmap_start; i++)
+ {
+ pd->phys[i] = table->phys[idx] +
+ i * PAGE_SIZE_2MB; // keeps all flags,
+ // including PT_SIZE
+ }
+ memset(&pd->phys[unmap_start], 0,
+ sizeof(uint64_t) * (unmap_end - unmap_start));
+ vaddr += (unmap_end - unmap_start) * PAGE_SIZE_2MB;
+ for (uintptr_t i = unmap_end; unmap_end < PT_ENTRY_COUNT; i++)
+ {
+ pd->phys[i] = table->phys[idx] +
+ i * PAGE_SIZE_2MB; // keeps all flags,
+ // including PT_SIZE
+ }
+ table->phys[idx] = ((uintptr_t)pd - PHYS_OFFSET) |
+ PAGE_CONTROL_FLAGS(table->phys[idx]);
+ }
+ continue;
+ }
+ table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PD (2MB pages)
+ idx = PDE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ vaddr = PAGE_ALIGN_UP_2MB(vaddr + 1);
+ continue;
+ }
+ if (IS_2MB_PAGE(table->phys[idx]))
+ {
+ if (PAGE_ALIGNED_2MB(vaddr) && size >= PAGE_SIZE_2MB)
+ {
+ table->phys[idx] = 0;
+ vaddr += PAGE_SIZE_2MB;
+ }
+ else
+ {
+ pt_t *pt = page_alloc();
+ if (!pt)
+ {
+ panic(
+ "Ran out of memory during pt_unmap_range; recovery "
+ "from this situation has not yet been implemented!");
+ }
+ uint64_t unmap_start = PTE(vaddr);
+ uint64_t unmap_end =
+ PAGE_SAME_2MB(vaddr, vmax) ? PTE(vmax) : 512;
+ for (unsigned i = 0; i < unmap_start; i++)
+ {
+ pt->phys[i] = table->phys[idx] + i * PAGE_SIZE -
+ PT_SIZE; // remove PT_SIZE flag
+ }
+ memset(&pt->phys[unmap_start], 0,
+ sizeof(uint64_t) * (unmap_end - unmap_start));
+ vaddr += (unmap_end - unmap_start) * PAGE_SIZE;
+ for (uintptr_t i = unmap_end; unmap_end < PT_ENTRY_COUNT; i++)
+ {
+ pt->phys[i] = table->phys[idx] + i * PAGE_SIZE -
+ PT_SIZE; // remove PT_SIZE flag
+ }
+ table->phys[idx] =
+ ((uintptr_t)pt - PHYS_OFFSET) |
+ (PAGE_CONTROL_FLAGS(table->phys[idx]) - PT_SIZE);
+ }
+ continue;
+ }
+ table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PT (4KB pages)
+ idx = PTE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ vaddr += PAGE_SIZE;
+ continue;
+ }
+ table->phys[idx] = 0;
+
+ vaddr += PAGE_SIZE;
+ }
+ KASSERT(_vaddr_status(pml4, vaddr_start) == UNMAPPED);
+}
+
+static char *entry_strings[] = {
+ "4KB",
+ "2MB",
+ "1GB",
+ "512GB",
+};
+
+inline long _vaddr_status_detailed(pml4_t *pml4, uintptr_t vaddr)
+{
+ uintptr_t idx;
+ pml4_t *table = pml4;
+
+ idx = PML4E(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ return -4;
+ }
+ table = (pdp_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PDP (1GB pages)
+ idx = PDPE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ return -3;
+ }
+ if (IS_1GB_PAGE(table->phys[idx]))
+ {
+ return 3;
+ }
+ table = (pd_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PD (2MB pages)
+ idx = PDE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ return -2;
+ }
+ if (IS_2MB_PAGE(table->phys[idx]))
+ {
+ return 2;
+ }
+ table = (pt_t *)((table->phys[idx] & PAGE_MASK) + PHYS_OFFSET);
+
+ // PT (4KB pages)
+ idx = PTE(vaddr);
+ if (!IS_PRESENT(table->phys[idx]))
+ {
+ return -1;
+ }
+ return 1;
+}
+
+void check_invalid_mappings(pml4_t *pml4, vmmap_t *vmmap, char *prompt)
+{
+ // checks that anything that is mapped in pml4 actually should be according
+ // to vmmap
+
+ uintptr_t vaddr = USER_MEM_LOW;
+ while (vaddr < USER_MEM_HIGH)
+ {
+ long state = _vaddr_status_detailed(pml4, vaddr);
+ if (state > 0)
+ {
+ uintptr_t paddr = pt_virt_to_phys_helper(pml4, vaddr);
+
+ vmarea_t *vma = vmmap_lookup(vmmap, ADDR_TO_PN(vaddr));
+ if (!vma)
+ {
+ dbg(DBG_PGTBL,
+ "[+] %s: pml4 0x%p, 0x%p (paddr: 0x%p) cannot be found in "
+ "vmmap!\n",
+ prompt, pml4, (void *)vaddr, (void *)paddr);
+ pt_unmap(pml4, vaddr);
+ }
+ else
+ {
+ pframe_t *pf = NULL;
+ uintptr_t pagenum =
+ vma->vma_off + (ADDR_TO_PN(vaddr) - vma->vma_start);
+
+ mobj_lock(vma->vma_obj);
+ long ret = mobj_get_pframe(vma->vma_obj, pagenum, 0, &pf);
+ mobj_unlock(vma->vma_obj);
+ if (ret)
+ {
+ dbg(DBG_PGTBL,
+ "[+] %s: pml4 0x%p, the page frame for virtual address "
+ "0x%p (mapping to 0x%p) could not be found!\n",
+ prompt, pml4, (void *)vaddr, (void *)paddr);
+ pt_unmap(pml4, vaddr);
+ }
+ else
+ {
+ uintptr_t pf_paddr =
+ pt_virt_to_phys_helper(pml4, (uintptr_t)pf->pf_addr);
+ if (pf_paddr != paddr)
+ {
+ dbg(DBG_PGTBL,
+ "[+] %s: pml4 0x%p, 0x%p (paddr: 0x%p) supposed to "
+ "be 0x%p (obj: 0x%p, %lu)\n",
+ prompt, pml4, (void *)vaddr, (void *)paddr,
+ (void *)pf_paddr, vma->vma_obj, pf->pf_pagenum);
+ pt_unmap(pml4, vaddr);
+ }
+ }
+ }
+ }
+ switch (state)
+ {
+ case 1:
+ case -1:
+ vaddr = (uintptr_t)PAGE_ALIGN_UP(vaddr + 1);
+ break;
+ case -2:
+ vaddr = (uintptr_t)PAGE_ALIGN_UP_2MB(vaddr + 1);
+ break;
+ case -3:
+ vaddr = (uintptr_t)PAGE_ALIGN_UP_1GB(vaddr + 1);
+ break;
+ case -4:
+ vaddr = (uintptr_t)PAGE_ALIGN_UP_512GB(vaddr + 1);
+ break;
+ case 2:
+ case 3:
+ default:
+ panic("should not get here!");
+ }
+ }
+}
diff --git a/kernel/mm/pagetable.gdb b/kernel/mm/pagetable.gdb
new file mode 100644
index 0000000..b145804
--- /dev/null
+++ b/kernel/mm/pagetable.gdb
@@ -0,0 +1,25 @@
+define pagetable
+ if $argc > 0
+ set $proc = proc_lookup($arg0)
+ if $proc != NULL
+ printf "Process %i (%s):\n", $proc->p_pid, $proc->p_name
+ set $pagedir = $proc->p_pml4
+ else
+ printf "No process with PID %i exists\n", $arg0
+ set $pagedir = NULL
+ end
+ else
+ printf "Current mappings:\n"
+ set $pagedir = current_pagedir
+ end
+
+ if $pagedir != NULL
+ kinfo pt_mapping_info current_pagedir
+ end
+end
+document pagetable
+Without arguments displays current page table mappings in the form
+"[vstart, vend) => [pstart, pend)". Takes an optional integer argument
+to specify the PID of a process whose page table mappings should be
+printed instead.
+end
diff --git a/kernel/mm/pframe.c b/kernel/mm/pframe.c
new file mode 100644
index 0000000..6eff123
--- /dev/null
+++ b/kernel/mm/pframe.c
@@ -0,0 +1,59 @@
+#include "globals.h"
+
+#include "mm/pframe.h"
+#include "mm/slab.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+static slab_allocator_t *pframe_allocator;
+
+void pframe_init()
+{
+ pframe_allocator = slab_allocator_create("pframe", sizeof(pframe_t));
+ KASSERT(pframe_allocator);
+}
+
+/*
+ * Create a pframe and initialize its members appropriately.
+ */
+pframe_t *pframe_create()
+{
+ pframe_t *pf = slab_obj_alloc(pframe_allocator);
+ if (!pf)
+ {
+ return NULL;
+ }
+ memset(pf, 0, sizeof(pframe_t));
+ kmutex_init(&pf->pf_mutex);
+ list_link_init(&pf->pf_link);
+ return pf;
+}
+
+/*
+ * Free the pframe (don't forget to unlock the mutex) and set *pfp = NULL
+ *
+ * The pframe must be locked, its contents not in memory (pf->pf_addr == NULL),
+ * have a pincount of 0, and not be linked into a memory object's list.
+ */
+void pframe_free(pframe_t **pfp)
+{
+ KASSERT(kmutex_owns_mutex(&(*pfp)->pf_mutex));
+ KASSERT(!(*pfp)->pf_addr);
+ KASSERT(!(*pfp)->pf_dirty);
+ KASSERT(!list_link_is_linked(&(*pfp)->pf_link));
+ kmutex_unlock(&(*pfp)->pf_mutex);
+ slab_obj_free(pframe_allocator, *pfp);
+ *pfp = NULL;
+}
+
+/*
+ * Unlock the pframe and set *pfp = NULL
+ */
+void pframe_release(pframe_t **pfp)
+{
+ pframe_t *pf = *pfp;
+ KASSERT(kmutex_owns_mutex(&pf->pf_mutex));
+ *pfp = NULL;
+ kmutex_unlock(&pf->pf_mutex);
+}
diff --git a/kernel/mm/slab.c b/kernel/mm/slab.c
new file mode 100644
index 0000000..bec70d1
--- /dev/null
+++ b/kernel/mm/slab.c
@@ -0,0 +1,550 @@
+// SMP.1 + SMP.3
+// spinlocks + mask interrupts
+/*
+ * slab_alloc.c - Kernel memory allocator
+ * Jason Lango <jal@cs.brown.edu>
+ *
+ * This implementation is based on the description of slab allocation
+ * (used in Solaris and Linux) from UNIX Internals: The New Frontiers,
+ * by Uresh Vahalia.
+ *
+ * Note that there is no need for locking in allocation and deallocation because
+ * it never blocks nor is used by an interrupt handler. Hurray for non
+ * preemptible kernels!
+ *
+ * darmanio: ^ lol, look at me now :D
+ */
+
+#include "types.h"
+
+#include "mm/mm.h"
+#include "mm/page.h"
+#include "mm/slab.h"
+
+#include "proc/spinlock.h"
+
+#include "util/debug.h"
+#include "util/gdb.h"
+#include "util/string.h"
+
+#ifdef SLAB_REDZONE
+#define front_rz(obj) (*(uintptr_t *)(obj))
+#define rear_rz(cache, obj) \
+ (*(uintptr_t *)(((uintptr_t)(obj)) + (cache)->sa_objsize - \
+ sizeof(uintptr_t)))
+
+#define VERIFY_REDZONES(cache, obj) \
+ do \
+ { \
+ if (front_rz(obj) != SLAB_REDZONE) \
+ panic("alloc: red-zone check failed: *(0x%p)=0x%.8lx\n", \
+ (void *)&front_rz(obj), front_rz(obj)); \
+ if (rear_rz(cache, obj) != SLAB_REDZONE) \
+ panic("alloc: red-zone check failed: *(0x%p)=0x%.8lx\n", \
+ (void *)&rear_rz(cache, obj), rear_rz(cache, obj)); \
+ } while (0);
+
+#endif
+
+struct slab
+{
+ struct slab *s_next; /* link on list of slabs */
+ size_t s_inuse; /* number of allocated objs */
+ void *s_free; /* head of obj free list */
+ void *s_addr; /* start address */
+};
+
+typedef struct slab_allocator
+{
+ const char *sa_name; /* user-provided name */
+ size_t sa_objsize; /* object size */
+ struct slab *sa_slabs; /* head of slab list */
+ size_t sa_order; /* npages = (1 << order) */
+ size_t sa_slab_nobjs; /* number of objs per slab */
+ struct slab_allocator *sa_next; /* link on global list of allocators */
+} slab_allocator_t;
+
+/* Stored at the end of every object to keep track of the
+ associated slab when allocated or a pointer to the next free object */
+typedef struct slab_bufctl
+{
+ union {
+ void *sb_next; /* next free object */
+ struct slab *sb_slab; /* containing slab */
+ } u;
+#ifdef SLAB_CHECK_FREE
+ uint8_t sb_free; /* true if is object is free */
+#endif
+} slab_bufctl_t;
+#define sb_next u.sb_next
+#define sb_slab u.sb_slab
+
+/* Returns a pointer to the start of the bufctl struct */
+#define obj_bufctl(allocator, obj) \
+ ((slab_bufctl_t *)(((uintptr_t)(obj)) + (allocator)->sa_objsize))
+/* Given a pointer to bufctrl, returns a pointer to the start of the object */
+#define bufctl_obj(allocator, buf) \
+ ((void *)(((uintptr_t)(buf)) - (allocator)->sa_objsize))
+/* Given a pointer to the object, returns a pointer to the next object (after bufctl) */
+#define next_obj(allocator, obj) \
+ ((void *)(((uintptr_t)(obj)) + (allocator)->sa_objsize + \
+ sizeof(slab_bufctl_t)))
+
+GDB_DEFINE_HOOK(slab_obj_alloc, void *addr, slab_allocator_t *allocator)
+
+GDB_DEFINE_HOOK(slab_obj_free, void *addr, slab_allocator_t *allocator)
+
+/* Head of global list of slab allocators. This is used in the python gdb script */
+static slab_allocator_t *slab_allocators = NULL;
+
+/* Special case - allocator for allocation of slab_allocator objects. */
+static slab_allocator_t slab_allocator_allocator;
+
+/*
+ * This constant defines how many orders of magnitude (in page block
+ * sizes) we'll search for an optimal slab size (past the smallest
+ * possible slab size).
+ */
+#define SLAB_MAX_ORDER 5
+
+/**
+ * Given the object size and the number of objects, calculates
+ * the size of the slab. Each object includes a slab_bufctl_t,
+ * and each slab includes a slab struct.
+*/
+static size_t _slab_size(size_t objsize, size_t nobjs)
+{
+ return (nobjs * (objsize + sizeof(slab_bufctl_t)) + sizeof(struct slab));
+}
+
+/**
+ * Given the object size and the order, calculate how many objects
+ * can fit in a certain number of pages (excluding the slab struct).
+ *
+ * PAGE_SIZE << order effectively is just PAGE_SIZE * 2^order.
+*/
+static size_t _slab_nobjs(size_t objsize, size_t order)
+{
+ return (((PAGE_SIZE << order) - sizeof(struct slab)) /
+ (objsize + sizeof(slab_bufctl_t)));
+}
+
+static size_t _slab_waste(size_t objsize, size_t order)
+{
+ /* Waste is defined as the amount of unused space in the page
+ * block, that is the number of bytes in the page block minus
+ * the optimal slab size for that particular block size.
+ */
+ return ((PAGE_SIZE << order) -
+ _slab_size(objsize, _slab_nobjs(objsize, order)));
+}
+
+static void _calc_slab_size(slab_allocator_t *allocator)
+{
+ size_t best_order;
+ size_t best_waste;
+ size_t order;
+ size_t minorder;
+ size_t minsize;
+ size_t waste;
+
+ /* Find the minimum page block size that this slab requires. */
+ minsize = _slab_size(allocator->sa_objsize, 1);
+ for (minorder = 0; minorder < PAGE_NSIZES; minorder++)
+ {
+ if ((PAGE_SIZE << minorder) >= minsize)
+ {
+ break;
+ }
+ }
+ if (minorder == PAGE_NSIZES)
+ panic("unable to find minorder\n");
+
+ /* Start the search with the minimum block size for this slab. */
+ best_order = minorder;
+ best_waste = _slab_waste(allocator->sa_objsize, minorder);
+
+ dbg(DBG_MM, "calc_slab_size: minorder %lu, waste %lu\n", minorder,
+ best_waste);
+
+ /* Find the optimal number of objects per slab and slab size,
+ * up to a predefined (somewhat arbitrary) limit on the number
+ * of pages per slab.
+ */
+ for (order = minorder + 1; order < SLAB_MAX_ORDER; order++)
+ {
+ if ((waste = _slab_waste(allocator->sa_objsize, order)) < best_waste)
+ {
+ best_waste = waste;
+ best_order = order;
+ dbg(DBG_MM, "calc_slab_size: replacing with order %lu, waste %lu\n",
+ best_order, best_waste);
+ }
+ }
+
+ /* Finally, the best page block size wins.
+ */
+ allocator->sa_order = best_order;
+ allocator->sa_slab_nobjs = _slab_nobjs(allocator->sa_objsize, best_order);
+ KASSERT(allocator->sa_slab_nobjs);
+}
+
+/*
+ * Initializes a given allocator using the name and size passed in.
+*/
+static void _allocator_init(slab_allocator_t *allocator, const char *name,
+ size_t size)
+{
+#ifdef SLAB_REDZONE
+ /*
+ * Add space for the front and rear red-zones.
+ */
+ size += 2 * sizeof(uintptr_t);
+#endif
+
+ if (!name)
+ {
+ name = "<unnamed>";
+ }
+
+ allocator->sa_name = name;
+ allocator->sa_objsize = size;
+ allocator->sa_slabs = NULL;
+ // this will set the fields sa_order and the number of objects per slab
+ _calc_slab_size(allocator);
+
+ /* Add cache to global cache list. */
+ allocator->sa_next = slab_allocators;
+ slab_allocators = allocator;
+
+ dbg(DBG_MM, "Initialized new slab allocator:\n");
+ dbgq(DBG_MM, " Name: \"%s\" (0x%p)\n", allocator->sa_name,
+ allocator);
+ dbgq(DBG_MM, " Object Size: %lu\n", allocator->sa_objsize);
+ dbgq(DBG_MM, " Order: %lu\n", allocator->sa_order);
+ dbgq(DBG_MM, " Slab Capacity: %lu\n", allocator->sa_slab_nobjs);
+}
+
+/*
+ * Given a name and size of object will create a slab_allocator
+ * to manage slabs that store objects of size `size`, along with
+ * some metadata.
+*/
+slab_allocator_t *slab_allocator_create(const char *name, size_t size)
+{
+ slab_allocator_t *allocator;
+
+ allocator = (slab_allocator_t *)slab_obj_alloc(&slab_allocator_allocator);
+ if (!allocator)
+ {
+ return NULL;
+ }
+
+ _allocator_init(allocator, name, size);
+ return allocator;
+}
+
+/*
+ * Free a given allocator.
+*/
+void slab_allocator_destroy(slab_allocator_t *allocator)
+{
+ slab_obj_free(&slab_allocator_allocator, allocator);
+}
+
+/*
+ * In the event that a slab with free objects is not found,
+ * this routine will be called.
+*/
+static long _slab_allocator_grow(slab_allocator_t *allocator)
+{
+ void *addr;
+ void *obj;
+ struct slab *slab;
+
+ addr = page_alloc_n(1UL << allocator->sa_order);
+ if (!addr)
+ {
+ return 0;
+ }
+
+ /* Initialize each bufctl to be free and point to the next object. */
+ obj = addr;
+ for (size_t i = 0; i < (allocator->sa_slab_nobjs - 1); i++)
+ {
+#ifdef SLAB_CHECK_FREE
+ obj_bufctl(allocator, obj)->sb_free = 1;
+#endif
+ obj = obj_bufctl(allocator, obj)->sb_next = next_obj(allocator, obj);
+ }
+
+ /* The last bufctl is the tail of the list. */
+#ifdef SLAB_CHECK_FREE
+ obj_bufctl(allocator, obj)->sb_free = 1;
+#endif
+ obj_bufctl(allocator, obj)->sb_next = NULL;
+
+ /* After the last object comes the slab structure itself. */
+ slab = (struct slab *)next_obj(allocator, obj);
+
+ /*
+ * The first object in the slab will be the head of the free
+ * list and the start address of the slab.
+ */
+ slab->s_free = addr;
+ slab->s_addr = addr;
+ slab->s_inuse = 0;
+
+ /* Initialize objects. */
+ obj = addr;
+ for (size_t i = 0; i < allocator->sa_slab_nobjs; i++)
+ {
+#ifdef SLAB_REDZONE
+ front_rz(obj) = SLAB_REDZONE;
+ rear_rz(allocator, obj) = SLAB_REDZONE;
+#endif
+ obj = next_obj(allocator, obj);
+ }
+
+ dbg(DBG_MM, "Growing cache \"%s\" (0x%p), new slab 0x%p (%lu pages)\n",
+ allocator->sa_name, allocator, slab, 1UL << allocator->sa_order);
+
+ /* Place this slab into the cache. */
+ slab->s_next = allocator->sa_slabs;
+ allocator->sa_slabs = slab;
+
+ return 1;
+}
+
+/*
+ * Given an allocator, will allocate an object.
+*/
+void *slab_obj_alloc(slab_allocator_t *allocator)
+{
+ struct slab *slab;
+ void *obj;
+
+ /* Find a slab with a free object. */
+ for (;;)
+ {
+ slab = allocator->sa_slabs;
+ while (slab && (slab->s_inuse == allocator->sa_slab_nobjs))
+ slab = slab->s_next;
+ if (slab && (slab->s_inuse < allocator->sa_slab_nobjs))
+ {
+ break;
+ }
+ if (!_slab_allocator_grow(allocator))
+ {
+ return NULL;
+ }
+ }
+
+ /*
+ * Remove an object from the slab's free list. We'll use the
+ * free list pointer to store a pointer back to the containing
+ * slab.
+ */
+ obj = slab->s_free;
+ slab->s_free = obj_bufctl(allocator, obj)->sb_next;
+ obj_bufctl(allocator, obj)->sb_slab = slab;
+#ifdef SLAB_CHECK_FREE
+ obj_bufctl(allocator, obj)->sb_free = 0;
+#endif
+
+ slab->s_inuse++;
+
+ dbg(DBG_MM,
+ "Allocated object 0x%p from \"%s\" (0x%p), "
+ "slab 0x%p, inuse %lu\n",
+ obj, allocator->sa_name, allocator, allocator, slab->s_inuse);
+
+#ifdef SLAB_REDZONE
+ VERIFY_REDZONES(allocator, obj);
+
+ /*
+ * Make object pointer point past the first red-zone.
+ */
+ obj = (void *)((uintptr_t)obj + sizeof(uintptr_t));
+#endif
+
+ GDB_CALL_HOOK(slab_obj_alloc, obj, allocator);
+ return obj;
+}
+
+void slab_obj_free(slab_allocator_t *allocator, void *obj)
+{
+ struct slab *slab;
+ GDB_CALL_HOOK(slab_obj_free, obj, allocator);
+
+#ifdef SLAB_REDZONE
+ /* Move pointer back to verify that the REDZONE is unchanged. */
+ obj = (void *)((uintptr_t)obj - sizeof(uintptr_t));
+
+ VERIFY_REDZONES(allocator, obj);
+#endif
+
+#ifdef SLAB_CHECK_FREE
+ KASSERT(!obj_bufctl(allocator, obj)->sb_free && "INVALID FREE!");
+ obj_bufctl(allocator, obj)->sb_free = 1;
+#endif
+
+ slab = obj_bufctl(allocator, obj)->sb_slab;
+
+ /* Place this object back on the slab's free list. */
+ obj_bufctl(allocator, obj)->sb_next = slab->s_free;
+ slab->s_free = obj;
+
+ slab->s_inuse--;
+
+ dbg(DBG_MM, "Freed object 0x%p from \"%s\" (0x%p), slab 0x%p, inuse %lu\n",
+ obj, allocator->sa_name, allocator, slab, slab->s_inuse);
+}
+
+/*
+ * Reclaims as much memory (up to a target) from
+ * unused slabs as possible
+ * @param target - target number of pages to reclaim. If negative,
+ * try to reclaim as many pages as possible
+ * @return number of pages freed
+ */
+long slab_allocators_reclaim(long target)
+{
+ panic("slab_allocators_reclaim NYI for SMP");
+ // spinlock_lock(&allocator->sa_lock);
+ // int npages_freed = 0, npages;
+
+ // slab_allocator_t *a;
+ // struct slab *s, **prev;
+
+ // /* Go through all caches */
+ // for (a = slab_allocators; NULL != a; a = a->sa_next) {
+ // prev = &(a->sa_slabs);
+ // s = a->sa_slabs;
+ // while (NULL != s) {
+ // struct slab *next = s->s_next;
+ // if (0 == s->s_inuse) {
+ // /* Free Slab */
+ // (*prev) = next;
+ // npages = 1 << a->sa_order;
+
+ // page_free_n(s->s_addr, npages);
+ // npages_freed += npages;
+ // } else {
+ // prev = &(s->s_next);
+ // }
+ // /* Check if target was met */
+ // if ((target > 0) && (npages_freed >= target)) {
+ // return npages_freed;
+ // }
+ // s = next;
+ // }
+ // }
+ // spinlock_unlock(&allocator->sa_lock);
+ // return npages_freed;
+}
+
+#define KMALLOC_SIZE_MIN_ORDER (6)
+#define KMALLOC_SIZE_MAX_ORDER (18)
+
+static slab_allocator_t
+ *kmalloc_allocators[KMALLOC_SIZE_MAX_ORDER - KMALLOC_SIZE_MIN_ORDER + 1];
+
+/* Note that kmalloc_allocator_names should be modified to remain consistent
+ * with KMALLOC_SIZE_MIN_ORDER ... KMALLOC_SIZE_MAX_ORDER.
+ */
+static const char *kmalloc_allocator_names[] = {
+ "size-64", "size-128", "size-256", "size-512", "size-1024",
+ "size-2048", "size-4096", "size-8192", "size-16384", "size-32768",
+ "size-65536", "size-131072", "size-262144"};
+
+void *kmalloc(size_t size)
+{
+ size += sizeof(slab_allocator_t *);
+
+ /*
+ * Find the first power of two bucket bigger than the
+ * requested size, and allocate from it.
+ */
+ slab_allocator_t **cs = kmalloc_allocators;
+ for (size_t order = KMALLOC_SIZE_MIN_ORDER; order <= KMALLOC_SIZE_MAX_ORDER;
+ order++, cs++)
+ {
+ if ((1UL << order) >= size)
+ {
+ void *addr = slab_obj_alloc(*cs);
+ if (!addr)
+ {
+ dbg(DBG_MM, "WARNING: kmalloc out of memory\n");
+ return NULL;
+ }
+#ifdef MM_POISON
+ memset(addr, MM_POISON_ALLOC, size);
+#endif /* MM_POISON */
+ *((slab_allocator_t **)addr) = *cs;
+ return (void *)(((slab_allocator_t **)addr) + 1);
+ }
+ }
+
+ panic("size bigger than maxorder %ld\n", size);
+}
+
+__attribute__((used)) static void *malloc(size_t size)
+{
+ /* This function is used by gdb to allocate memory
+ * within the kernel, no code in the kernel should
+ * call it. */
+ return kmalloc(size);
+}
+
+void kfree(void *addr)
+{
+ addr = (void *)(((slab_allocator_t **)addr) - 1);
+ slab_allocator_t *sa = *(slab_allocator_t **)addr;
+
+#ifdef MM_POISON
+ /* If poisoning is enabled, wipe the memory given in
+ * this object, as specified by the cache object size
+ * (minus red-zone overhead, if any).
+ */
+ size_t objsize = sa->sa_objsize;
+#ifdef SLAB_REDZONE
+ objsize -= sizeof(uintptr_t) * 2;
+#endif /* SLAB_REDZONE */
+ memset(addr, MM_POISON_FREE, objsize);
+#endif /* MM_POISON */
+
+ slab_obj_free(sa, addr);
+}
+
+__attribute__((used)) static void free(void *addr)
+{
+ /* This function is used by gdb to free memory allocated
+ * by malloc, no code in the kernel should call it. */
+ kfree(addr);
+}
+
+void slab_init()
+{
+ /* Special case initialization of the allocator for `slab_allocator_t`s */
+ /* In other words, initializes a slab allocator for other slab allocators. */
+ _allocator_init(&slab_allocator_allocator, "slab_allocators",
+ sizeof(slab_allocator_t));
+
+ /*
+ * Allocate the power of two buckets for generic
+ * kmalloc/kfree.
+ */
+ slab_allocator_t **cs = kmalloc_allocators;
+ for (size_t order = KMALLOC_SIZE_MIN_ORDER; order <= KMALLOC_SIZE_MAX_ORDER;
+ order++, cs++)
+ {
+ if (NULL ==
+ (*cs = slab_allocator_create(
+ kmalloc_allocator_names[order - KMALLOC_SIZE_MIN_ORDER],
+ (1UL << order))))
+ {
+ panic("Couldn't create kmalloc allocators!\n");
+ }
+ }
+}
diff --git a/kernel/mm/slab.py b/kernel/mm/slab.py
new file mode 100644
index 0000000..1b0c8fb
--- /dev/null
+++ b/kernel/mm/slab.py
@@ -0,0 +1,55 @@
+import gdb
+
+import weenix
+import weenix.kmem
+
+
+class SlabCommand(weenix.Command):
+ def __init__(self):
+ weenix.Command.__init__(self, "slab", gdb.COMMAND_DATA)
+
+ def _allocators(self):
+ l = list()
+ for alloc in weenix.kmem.allocators():
+ l.append(alloc)
+ return l
+
+ def invoke(self, args, tty):
+ names = list()
+ slabs = list()
+ sizes = list()
+ counts = list()
+
+ names.append("")
+ slabs.append("slabs")
+ sizes.append("objsize")
+ counts.append("allocated")
+
+ for alloc in weenix.kmem.allocators():
+ names.append(alloc.name())
+ slabs.append(str(len(list(alloc.slabs()))))
+ sizes.append(str(alloc.size()))
+ counts.append(str(len(list(alloc.objs()))))
+
+ namewidth = max(map(lambda x: len(x), names))
+ slabwidth = max(map(lambda x: len(x), slabs))
+ sizewidth = max(map(lambda x: len(x), sizes))
+ countwidth = max(map(lambda x: len(x), counts))
+
+ for name, slab, size, count in zip(names, slabs, sizes, counts):
+ print(
+ "{1:<{0}} {3:>{2}} {5:>{4}} {7:>{6}}".format(
+ namewidth, name, slabwidth, slab, sizewidth, size, countwidth, count
+ )
+ )
+
+ def complete(self, line, word):
+ l = map(lambda x: x.name(), self._allocators())
+ l = filter(lambda x: x.startswith(word), l)
+ for used in line.split():
+ l = filter(lambda x: x != used, l)
+ l.sort()
+ return l
+
+
+SlabCommand()
diff --git a/kernel/proc/context.c b/kernel/proc/context.c
new file mode 100644
index 0000000..b1902d8
--- /dev/null
+++ b/kernel/proc/context.c
@@ -0,0 +1,150 @@
+
+#include "proc/context.h"
+#include "proc/kthread.h"
+#include <main/cpuid.h>
+
+#include "main/apic.h"
+#include "main/gdt.h"
+
+typedef struct context_initial_func_args
+{
+ context_func_t func;
+ long arg1;
+ void *arg2;
+} packed context_initial_func_args_t;
+
+static void __context_thread_initial_func(context_initial_func_args_t args)
+{
+ preemption_reset();
+ apic_setipl(IPL_LOW);
+ intr_enable();
+
+ void *result = (args.func)(args.arg1, args.arg2);
+ kthread_exit(result);
+
+ panic("\nReturned from kthread_exit.\n");
+}
+
+void context_setup_raw(context_t *c, void (*func)(), void *kstack,
+ size_t kstacksz, pml4_t *pml4)
+{
+ KASSERT(NULL != pml4);
+ KASSERT(PAGE_ALIGNED(kstack));
+ c->c_kstack = (uintptr_t)kstack;
+ c->c_kstacksz = kstacksz;
+ c->c_pml4 = pml4;
+ c->c_rsp = (uintptr_t)kstack + kstacksz;
+ c->c_rsp -= sizeof(uintptr_t);
+ *((uintptr_t *)c->c_rsp) = 0;
+ c->c_rbp = c->c_rsp;
+ c->c_rip = (uintptr_t)func;
+}
+
+/*
+ * Initializes a context_t struct with the given parameters. arg1 and arg2 will
+ * appear as arguments to the function passed in when this context is first
+ * used.
+ */
+void context_setup(context_t *c, context_func_t func, long arg1, void *arg2,
+ void *kstack, size_t kstacksz, pml4_t *pml4)
+{
+ KASSERT(NULL != pml4);
+ KASSERT(PAGE_ALIGNED(kstack));
+
+ c->c_kstack = (uintptr_t)kstack;
+ c->c_kstacksz = kstacksz;
+ c->c_pml4 = pml4;
+
+ /* put the arguments for __context_thread_initial_func onto the
+ * stack */
+ c->c_rsp = (uintptr_t)kstack + kstacksz;
+ c->c_rsp -= sizeof(arg2);
+ *(void **)c->c_rsp = arg2;
+ c->c_rsp -= sizeof(arg1);
+ *(long *)c->c_rsp = arg1;
+ c->c_rsp -= sizeof(context_func_t);
+ *(context_func_t *)c->c_rsp = func;
+ // Take space for the function return address (unused)
+ c->c_rsp -= sizeof(uintptr_t);
+
+ c->c_rbp = c->c_rsp;
+ c->c_rip = (uintptr_t)__context_thread_initial_func;
+}
+
+/*
+ * WARNING!! POTENTIAL EDITOR BEWARE!!
+ * IF YOU REMOVE THE PT_SET CALLS BELOW,
+ * YOU ***MUST*** DEAL WITH SMP TLB SHOOTDOWN
+ *
+ * IN OTHER WORDS, THINK *VERY* CAREFULLY BEFORE
+ * REMOVING THE CALLS TO PT_SET BELOW
+ */
+
+void context_make_active(context_t *c)
+{
+ // gdt_set_kernel_stack((void *)((uintptr_t)c->c_kstack + c->c_kstacksz));
+ pt_set(c->c_pml4);
+
+ /* Switch stacks and run the thread */
+ __asm__ volatile(
+ "movq %0,%%rbp\n\t" /* update rbp */
+ "movq %1,%%rsp\n\t" /* update rsp */
+ "push %2\n\t" /* save rip */
+ "ret" /* jump to new rip */
+ ::"m"(c->c_rbp),
+ "m"(c->c_rsp), "m"(c->c_rip));
+}
+
+void context_switch(context_t *oldc, context_t *newc)
+{
+ gdt_set_kernel_stack(
+ (void *)((uintptr_t)newc->c_kstack + newc->c_kstacksz));
+
+ // sanity check that core-specific data is being managed (paged in)
+ // correctly
+ KASSERT(oldc->c_pml4 == pt_get());
+ uintptr_t curthr_paddr =
+ pt_virt_to_phys_helper(oldc->c_pml4, (uintptr_t)&curthr);
+ uintptr_t new_curthr_paddr =
+ pt_virt_to_phys_helper(newc->c_pml4, (uintptr_t)&curthr);
+
+ kthread_t *prev_curthr = curthr;
+ pt_set(newc->c_pml4);
+ KASSERT(pt_get() == newc->c_pml4);
+
+ KASSERT(curthr_paddr == new_curthr_paddr);
+ KASSERT(prev_curthr == curthr);
+
+ /*
+ * Save the current value of the stack pointer and the frame pointer into
+ * the old context. Set the instruction pointer to the return address
+ * (whoever called us).
+ */
+ __asm__ volatile(
+ "pushfq;" /* save RFLAGS on the stack */
+ "pushq %%rbp \n" /* save base pointer */
+ "pushq %%rbx \n" /* save other callee-saved registers */
+ "pushq %%r12 \n"
+ "pushq %%r13 \n"
+ "pushq %%r14 \n"
+ "pushq %%r15 \n"
+ "movq %%rsp, %0 \n" /* save RSP into oldc */
+ "movq %2, %%rsp \n" /* restore RSP from newc */
+ "pushq %%rax\n\t"
+ "movabs $1f, %%rax \n\t" /* save RIP into oldc (saves the label '1'
+ below) */
+ "mov %%rax, %1\n\t"
+ "popq %%rax\n\t"
+ "pushq %3 \n\t" /* restore RIP */
+ "ret \n\t"
+ "1:\t" /* this is where oldc starts executing later */
+ "popq %%r15 \n\t" /* restore callee-saved registers */
+ "popq %%r14 \n\t"
+ "popq %%r13 \n\t"
+ "popq %%r12 \n\t"
+ "popq %%rbx \n\t"
+ "popq %%rbp \n\t" /* restore base pointer */
+ "popfq" /* restore RFLAGS */
+ : "=m"(oldc->c_rsp), "=m"(oldc->c_rip)
+ : "m"(newc->c_rsp), "m"(newc->c_rip));
+}
diff --git a/kernel/proc/fork.c b/kernel/proc/fork.c
new file mode 100644
index 0000000..358b891
--- /dev/null
+++ b/kernel/proc/fork.c
@@ -0,0 +1,62 @@
+#include "errno.h"
+#include "globals.h"
+#include "types.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+#include "mm/mm.h"
+#include "mm/mman.h"
+#include "mm/pframe.h"
+#include "mm/tlb.h"
+
+#include "fs/vnode.h"
+
+#include "vm/shadow.h"
+
+#include "api/exec.h"
+
+/* Pushes the appropriate things onto the kernel stack of a newly forked thread
+ * so that it can begin execution in userland_entry.
+ * regs: registers the new thread should have on execution
+ * kstack: location of the new thread's kernel stack
+ * Returns the new stack pointer on success. */
+static uintptr_t fork_setup_stack(const regs_t *regs, void *kstack)
+{
+ /* Pointer argument and dummy return address, and userland dummy return
+ * address */
+ uint64_t rsp =
+ ((uint64_t)kstack) + DEFAULT_STACK_SIZE - (sizeof(regs_t) + 16);
+ memcpy((void *)(rsp + 8), regs, sizeof(regs_t)); /* Copy over struct */
+ return rsp;
+}
+
+/*
+ * This function implements the fork(2) system call.
+ *
+ * TODO:
+ * 1) Use proc_create() and kthread_clone() to set up a new process and thread. If
+ * either fails, perform any appropriate cleanup.
+ * 2) Finish any initialization work for the new process and thread.
+ * 3) Fix the values of the registers and the rest of the kthread's ctx.
+ * Some registers can be accessed from the cloned kthread's context (see the context_t
+ * and kthread_t structs for more details):
+ * a) We want the child process to also enter userland execution.
+ * For this, the instruction pointer should point to userland_entry (see exec.c).
+ * b) Remember that the only difference between the parent and child processes
+ * is the return value of fork(). This value is returned in the RAX register,
+ * and the return value should be 0 for the child. The parent's return value would
+ * be the process id of the newly created child process.
+ * c) Before the process begins execution in userland_entry,
+ * we need to push all registers onto the kernel stack of the kthread.
+ * Use fork_setup_stack to do this, and set RSP accordingly.
+ * d) Use pt_unmap_range and tlb_flush_all on the parent in advance of
+ * copy-on-write.
+ * 5) Prepare the child process to be run on the CPU.
+ * 6) Return the child's process id to the parent.
+ */
+long do_fork(struct regs *regs)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
diff --git a/kernel/proc/kmutex.c b/kernel/proc/kmutex.c
new file mode 100644
index 0000000..0433468
--- /dev/null
+++ b/kernel/proc/kmutex.c
@@ -0,0 +1,88 @@
+// SMP.1 + SMP.3
+// spinlock + mask interrupts
+#include "proc/kmutex.h"
+#include "globals.h"
+#include "main/interrupt.h"
+#include <errno.h>
+
+/*
+ * IMPORTANT: Mutexes can _NEVER_ be locked or unlocked from an
+ * interrupt context. Mutexes are _ONLY_ lock or unlocked from a
+ * thread context.
+ */
+
+/*
+ * Checks for the specific deadlock case where:
+ * curthr wants mtx, but the owner of mtx is waiting on a mutex that curthr is
+ * holding
+ */
+#define DEBUG_DEADLOCKS 1
+void detect_deadlocks(kmutex_t *mtx)
+{
+#if DEBUG_DEADLOCKS
+ list_iterate(&curthr->kt_mutexes, held, kmutex_t, km_link)
+ {
+ list_iterate(&held->km_waitq.tq_list, waiter, kthread_t, kt_qlink)
+ {
+ if (waiter == mtx->km_holder)
+ {
+ panic(
+ "detected deadlock between P%d and P%d (mutexes 0x%p, "
+ "0x%p)\n",
+ curproc->p_pid, waiter->kt_proc->p_pid, held, mtx);
+ }
+ }
+ }
+#endif
+}
+
+/*
+ * Initializes the members of mtx
+ */
+void kmutex_init(kmutex_t *mtx)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Obtains a mutex, potentially blocking.
+ *
+ * Hints:
+ * You are strongly advised to maintain the kt_mutexes member of curthr and call
+ * detect_deadlocks() to help debugging.
+ */
+void kmutex_lock(kmutex_t *mtx)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Releases a mutex.
+ *
+ * Hints:
+ * Again, you are strongly advised to maintain kt_mutexes.
+ * Use sched_wakeup_on() to hand off the mutex - think carefully about how
+ * these two functions interact to ensure that the mutex's km_holder is
+ * properly set before the new owner is runnable.
+ */
+void kmutex_unlock(kmutex_t *mtx)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Checks if mtx's wait queue is empty.
+ */
+long kmutex_has_waiters(kmutex_t *mtx)
+{
+ return !sched_queue_empty(&mtx->km_waitq);
+ ;
+}
+
+/*
+ * Checks if the current thread owns mtx.
+ */
+inline long kmutex_owns_mutex(kmutex_t *mtx)
+{
+ return curthr && mtx->km_holder == curthr;
+}
diff --git a/kernel/proc/kthread.c b/kernel/proc/kthread.c
new file mode 100644
index 0000000..f1c541c
--- /dev/null
+++ b/kernel/proc/kthread.c
@@ -0,0 +1,136 @@
+// SMP.1 for non-curthr actions; none for curthr
+#include "config.h"
+#include "globals.h"
+#include "mm/slab.h"
+#include "util/debug.h"
+#include "util/string.h"
+
+/*==========
+ * Variables
+ *=========*/
+
+/*
+ * Global variable maintaining the current thread on the cpu
+ */
+kthread_t *curthr CORE_SPECIFIC_DATA;
+
+/*
+ * Private slab for kthread structs
+ */
+static slab_allocator_t *kthread_allocator = NULL;
+
+/*=================
+ * Helper functions
+ *================*/
+
+/*
+ * Allocates a new kernel stack. Returns null when not enough memory.
+ */
+static char *alloc_stack() { return page_alloc_n(DEFAULT_STACK_SIZE_PAGES); }
+
+/*
+ * Frees an existing kernel stack.
+ */
+static void free_stack(char *stack)
+{
+ page_free_n(stack, DEFAULT_STACK_SIZE_PAGES);
+}
+
+/*==========
+ * Functions
+ *=========*/
+
+/*
+ * Initializes the kthread_allocator.
+ */
+void kthread_init()
+{
+ KASSERT(__builtin_popcount(DEFAULT_STACK_SIZE_PAGES) == 1 &&
+ "stack size should be a power of 2 pages to reduce fragmentation");
+ kthread_allocator = slab_allocator_create("kthread", sizeof(kthread_t));
+ KASSERT(kthread_allocator);
+}
+
+/*
+ * Creates and initializes a thread.
+ * Returns a new kthread, or NULL on failure.
+ *
+ * Hints:
+ * Use kthread_allocator to allocate a kthread
+ * Use alloc_stack() to allocate a kernel stack
+ * Use context_setup() to set up the thread's context -
+ * also use DEFAULT_STACK_SIZE and the process's pagetable (p_pml4)
+ * Remember to initialize all the thread's fields
+ * Remember to add the thread to proc's threads list
+ * Initialize the thread's kt_state to KT_NO_STATE
+ * Initialize the thread's kt_recent_core to ~0UL (unsigned -1)
+ */
+kthread_t *kthread_create(proc_t *proc, kthread_func_t func, long arg1,
+ void *arg2)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+ return NULL;
+}
+
+/*
+ * Creates and initializes a thread that is a clone of thr.
+ * Returns a new kthread, or null on failure.
+ *
+ * P.S. Note that you do not need to implement this function until VM.
+ *
+ * Hints:
+ * The only parts of the context that must be initialized are c_kstack and
+ * c_kstacksz. The thread's process should be set outside of this function. Copy
+ * over thr's retval, errno, and cancelled; other fields should be freshly
+ * initialized. See kthread_create() for more hints.
+ */
+kthread_t *kthread_clone(kthread_t *thr)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return NULL;
+}
+
+/*
+ * Free the thread's stack, remove it from its process's list of threads, and
+ * free the kthread_t struct itself. Protect access to the kthread using its
+ * kt_lock.
+ *
+ * You cannot destroy curthr.
+ */
+void kthread_destroy(kthread_t *thr)
+{
+ KASSERT(thr != curthr);
+ KASSERT(thr && thr->kt_kstack);
+ if (thr->kt_state != KT_EXITED)
+ panic("destroying thread in state %d\n", thr->kt_state);
+ free_stack(thr->kt_kstack);
+ if (list_link_is_linked(&thr->kt_plink))
+ list_remove(&thr->kt_plink);
+
+ slab_obj_free(kthread_allocator, thr);
+}
+
+/*
+ * Sets the thread's return value and cancels the thread.
+ *
+ * Note: Check out the use of check_curthr_cancelled() in syscall_handler()
+ * to see how a thread eventually notices it is cancelled and handles exiting
+ * itself.
+ *
+ * Hints:
+ * This should not be called on curthr.
+ * Use sched_cancel() to actually mark the thread as cancelled. This way you
+ * can take care of all cancellation cases.
+ */
+void kthread_cancel(kthread_t *thr, void *retval)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Wrapper around proc_thread_exiting().
+ */
+void kthread_exit(void *retval)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
diff --git a/kernel/proc/kthread.gdb b/kernel/proc/kthread.gdb
new file mode 100644
index 0000000..9c6e160
--- /dev/null
+++ b/kernel/proc/kthread.gdb
@@ -0,0 +1,39 @@
+define kstack
+ if $argc == 0
+ set $kthr = curthr
+ else
+ set $kthr = $arg0
+ end
+
+ set $save_eip = $eip
+ set $save_ebp = $ebp
+ set $save_esp = $esp
+
+ if ($kthr == curthr) && (_intr_regs != NULL)
+ set $eip = _intr_regs->r_eip
+ set $ebp = _intr_regs->r_ebp
+ set $esp = _intr_regs->r_esp
+ info stack
+ else if $kthr != curthr
+ set $eip = $kthr->kt_ctx.c_eip
+ set $ebp = $kthr->kt_ctx.c_ebp
+ set $esp = $kthr->kt_ctx.c_esp
+ info stack
+ else
+ info stack
+ end
+
+ set $eip = $save_eip
+ set $ebp = $save_ebp
+ set $esp = $save_esp
+end
+document kstack
+usage: kthread [kthread_t*]
+Takes a single, optional kthread_t as an argument.
+If no argument is given curthr is used instead. This
+command prints the current stack of the given thread.
+This includes detecting if the given thread is has
+been interrupted, and looking up the interrupted
+stack, rather than the interrupt stack (useful for
+viewing the stack trace which caused a page-fault).
+end \ No newline at end of file
diff --git a/kernel/proc/proc.c b/kernel/proc/proc.c
new file mode 100644
index 0000000..17ff5db
--- /dev/null
+++ b/kernel/proc/proc.c
@@ -0,0 +1,440 @@
+// SMP.1 + SMP.3
+// spinlock + mask interrupts
+#include "config.h"
+#include "errno.h"
+#include "fs/file.h"
+#include "fs/vfs.h"
+#include "fs/vnode.h"
+#include "globals.h"
+#include "kernel.h"
+#include "mm/slab.h"
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+#include "util/time.h"
+#include <drivers/screen.h>
+#include <fs/vfs_syscall.h>
+#include <main/apic.h>
+
+/*==========
+ * Variables
+ *=========*/
+
+/*
+ * Global variable that maintains the current process
+ */
+proc_t *curproc CORE_SPECIFIC_DATA;
+
+/*
+ * Global list of all processes (except for the idle process) and its lock
+ */
+static list_t proc_list = LIST_INITIALIZER(proc_list);
+
+/*
+ * Allocator for process descriptors
+ */
+static slab_allocator_t *proc_allocator = NULL;
+
+/*
+ * Statically allocated idle process
+ * Each core has its own idleproc, so the idleproc is stored in static memory
+ * rather than in the global process list
+ */
+proc_t idleproc CORE_SPECIFIC_DATA;
+
+/*
+ * Pointer to the init process
+ */
+static proc_t *proc_initproc = NULL;
+
+/*===============
+ * System startup
+ *==============*/
+
+/*
+ * Initializes the allocator for process descriptors.
+ */
+void proc_init()
+{
+ proc_allocator = slab_allocator_create("proc", sizeof(proc_t));
+ KASSERT(proc_allocator);
+}
+
+/*
+ * Initializes idleproc for the current core. Sets initial values for curproc
+ * and curthr.
+ */
+void proc_idleproc_init()
+{
+ proc_t *proc = &idleproc;
+
+ proc->p_pid = 0;
+ list_init(&proc->p_threads);
+ list_init(&proc->p_children);
+ proc->p_pproc = NULL;
+
+ list_link_init(&proc->p_child_link);
+ list_link_init(&proc->p_list_link);
+
+ proc->p_status = 0;
+ proc->p_state = PROC_RUNNING;
+
+ memset(&proc->p_wait, 0, sizeof(ktqueue_t)); // should not be used
+
+ proc->p_pml4 = pt_get();
+ proc->p_vmmap = vmmap_create();
+
+ proc->p_cwd = NULL;
+
+ memset(proc->p_files, 0, sizeof(proc->p_files));
+
+ char name[8];
+ snprintf(name, sizeof(name), "idle%ld", curcore.kc_id);
+ strncpy(proc->p_name, name, PROC_NAME_LEN);
+ proc->p_name[PROC_NAME_LEN - 1] = '\0';
+
+ dbg(DBG_PROC, "created %s\n", proc->p_name);
+ curproc = &idleproc;
+ curthr = NULL;
+}
+
+/*=================
+ * Helper functions
+ *================*/
+
+/*
+ * Gets the next available process ID (pid).
+ */
+static pid_t next_pid = 1;
+static pid_t _proc_getid()
+{
+ pid_t pid = next_pid;
+restart:
+ list_iterate(&proc_list, p, proc_t, p_list_link)
+ {
+ if (p->p_pid == pid)
+ {
+ pid = pid + 1 == PROC_MAX_COUNT ? 1 : pid + 1;
+ if (pid == next_pid)
+ {
+ return -1;
+ }
+ else
+ {
+ goto restart;
+ }
+ }
+ }
+ next_pid = pid + 1 == PROC_MAX_COUNT ? 1 : pid + 1;
+ KASSERT(pid);
+ return pid;
+}
+
+/*
+ * Searches the global process list for the process descriptor corresponding to
+ * a pid.
+ */
+proc_t *proc_lookup(pid_t pid)
+{
+ if (pid == 0)
+ {
+ return &idleproc;
+ }
+ list_iterate(&proc_list, p, proc_t, p_list_link)
+ {
+ if (p->p_pid == pid)
+ {
+ return p;
+ }
+ }
+ return NULL;
+}
+
+/*==========
+ * Functions
+ *=========*/
+
+/*
+ * Creates a new process with the given name.
+ * Returns the newly created process, or NULL on failure.
+ *
+ * Hints:
+ * Use _proc_getid() to get a new pid.
+ * Allocate a new proc_t with the process slab allocator (proc_allocator).
+ * Use pt_create() to create a new page table (p_pml4).
+ * If the newly created process is the init process (i.e. the generated PID
+ * matches the init process's PID, given by the macro PID_INIT), set the
+ * global proc_initproc to the created process.
+ *
+ * There is some setup to be done for VFS and VM - remember to return to this
+ * function! For VFS, clone and ref the files from curproc. For VM, clone the
+ * vmmap from curproc.
+ *
+ * Be sure to free resources appropriately if proc_create() fails midway!
+ */
+proc_t *proc_create(const char *name)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+ return NULL;
+}
+
+/*
+ * Helper for proc_thread_exiting() that cleans up resources from the current
+ * process in preparation for its destruction (which occurs later via proc_destroy()).
+ * Reparents child processes to the init process, or initiates Weenix shutdown
+ * if the current process is the init process.
+ *
+ * Hints:
+ * You won't have much to clean up until VFS and VM -- remember to revisit this
+ * function later!
+ * **VFS/VM** - there may be some repeat code in proc_destroy()). The initial process
+ * does not have a parent process and thus cleans itself up, hence why we need to cleanup
+ * here as well.
+ *
+ * Remember to set the state and status of the process.
+ * The init process' PID is given by PID_INIT.
+ * Use initproc_finish() to shutdown Weenix when cleaning up the init process.
+ */
+void proc_cleanup(long status)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Cleans up the current process and the current thread, broadcasts on its
+ * parent's p_wait, then forces a context switch. After this, the process is
+ * essentially dead -- this function does not return. The parent must eventually
+ * finish destroying the process.
+ *
+ * Hints:
+ * Use proc_cleanup() to clean up the current process. As retval specifies the current
+ * thread's return value, you should pass (long)retval as the status argument to
+ * proc_cleanup().
+ * Remember to set the exit state and return value of the current thread after calling
+ * proc_cleanup(), as this may block and cause the thread's state to be overwritten.
+ * The context switch should be performed by a call to sched_switch().
+ */
+void proc_thread_exiting(void *retval)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Cancels all the threads of proc. This should never be called on curproc.
+ *
+ * Hints:
+ * The status argument should be passed to kthread_cancel() as the retval.
+ */
+void proc_kill(proc_t *proc, long status)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Kills all processes that are not curproc and not a direct child of idleproc (i.e.,
+ * the init process), then kills the current process.
+ *
+ * Hints:
+ * The PID of the idle process is given by PID_IDLE.
+ * Processes should be killed with a status of -1.
+ * Use do_exit() to kill the current process.
+ */
+void proc_kill_all()
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Destroy / free everything from proc. Be sure to remember reference counting
+ * when working on VFS.
+ *
+ * In contrast with proc_cleanup() (in which a process begins to clean itself up), this
+ * will be called on proc by some other process to complete its cleanup.
+ * I.e., the process we are destroying should not be curproc.
+ */
+void proc_destroy(proc_t *proc)
+{
+ list_remove(&proc->p_list_link);
+
+ list_iterate(&proc->p_threads, thr, kthread_t, kt_plink)
+ {
+ kthread_destroy(thr);
+ }
+
+#ifdef __VFS__
+ for (int fd = 0; fd < NFILES; fd++)
+ {
+ if (proc->p_files[fd])
+ fput(proc->p_files + fd);
+ }
+ if (proc->p_cwd)
+ {
+ vput(&proc->p_cwd);
+ }
+#endif
+
+#ifdef __VM__
+ if (proc->p_vmmap)
+ vmmap_destroy(&proc->p_vmmap);
+#endif
+
+ dbg(DBG_THR, "destroying P%d\n", proc->p_pid);
+
+ KASSERT(proc->p_pml4);
+ pt_destroy(proc->p_pml4);
+
+ slab_obj_free(proc_allocator, proc);
+}
+
+/*=============
+ * System calls
+ *============*/
+
+/*
+ * Waits for a child process identified by pid to exit. Finishes destroying the
+ * process and optionally returns the child's status in status.
+ *
+ * If pid is a positive integer, tries to clean up the process specified by pid.
+ * If pid is -1, cleans up any child process of curproc that exits.
+ *
+ * Returns the pid of the child process that exited, or error cases:
+ * - ENOTSUP: pid is 0, a negative number not equal to -1,
+ * or options are specified (options does not equal 0)
+ * - ECHILD: pid is a positive integer but not a child of curproc, or
+ * pid is -1 and the process has no children
+ *
+ * Hints:
+ * Use sched_sleep_on() to be notified of a child process exiting.
+ * Destroy an exited process by removing it from any lists and calling
+ * proc_destroy(). Remember to set status (if it was provided) to the child's
+ * status before destroying the process.
+ * If waiting on a specific child PID, wakeups from other exiting child
+ * processes should be ignored.
+ * If waiting on any child (-1), do_waitpid can return when *any* child has exited,
+ * it does not have to return the one that exited earliest.
+ * Which field can you use to determine whether a given process exited?
+ */
+pid_t do_waitpid(pid_t pid, int *status, int options)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+ return 0;
+}
+
+/*
+ * Wrapper around kthread_exit.
+ */
+void do_exit(long status)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*==========
+ * Debugging
+ *=========*/
+
+size_t proc_info(const void *arg, char *buf, size_t osize)
+{
+ const proc_t *p = (proc_t *)arg;
+ size_t size = osize;
+ proc_t *child;
+
+ KASSERT(NULL != p);
+ KASSERT(NULL != buf);
+
+ iprintf(&buf, &size, "pid: %i\n", p->p_pid);
+ iprintf(&buf, &size, "name: %s\n", p->p_name);
+ if (NULL != p->p_pproc)
+ {
+ iprintf(&buf, &size, "parent: %i (%s)\n", p->p_pproc->p_pid,
+ p->p_pproc->p_name);
+ }
+ else
+ {
+ iprintf(&buf, &size, "parent: -\n");
+ }
+
+ if (list_empty(&p->p_children))
+ {
+ iprintf(&buf, &size, "children: -\n");
+ }
+ else
+ {
+ iprintf(&buf, &size, "children:\n");
+ }
+ list_iterate(&p->p_children, child, proc_t, p_child_link)
+ {
+ iprintf(&buf, &size, " %i (%s)\n", child->p_pid, child->p_name);
+ }
+
+ iprintf(&buf, &size, "status: %ld\n", p->p_status);
+ iprintf(&buf, &size, "state: %i\n", p->p_state);
+
+#ifdef __VFS__
+#ifdef __GETCWD__
+ if (NULL != p->p_cwd)
+ {
+ char cwd[256];
+ lookup_dirpath(p->p_cwd, cwd, sizeof(cwd));
+ iprintf(&buf, &size, "cwd: %-s\n", cwd);
+ }
+ else
+ {
+ iprintf(&buf, &size, "cwd: -\n");
+ }
+#endif /* __GETCWD__ */
+#endif
+
+#ifdef __VM__
+ iprintf(&buf, &size, "start brk: 0x%p\n", p->p_start_brk);
+ iprintf(&buf, &size, "brk: 0x%p\n", p->p_brk);
+#endif
+
+ return size;
+}
+
+size_t proc_list_info(const void *arg, char *buf, size_t osize)
+{
+ size_t size = osize;
+
+ KASSERT(NULL == arg);
+ KASSERT(NULL != buf);
+
+#if defined(__VFS__) && defined(__GETCWD__)
+ iprintf(&buf, &size, "%5s %-13s %-18s %-s\n", "PID", "NAME", "PARENT",
+ "CWD");
+#else
+ iprintf(&buf, &size, "%5s %-13s %-s\n", "PID", "NAME", "PARENT");
+#endif
+
+ list_iterate(&proc_list, p, proc_t, p_list_link)
+ {
+ char parent[64];
+ if (NULL != p->p_pproc)
+ {
+ snprintf(parent, sizeof(parent), "%3i (%s)", p->p_pproc->p_pid,
+ p->p_pproc->p_name);
+ }
+ else
+ {
+ snprintf(parent, sizeof(parent), " -");
+ }
+
+#if defined(__VFS__) && defined(__GETCWD__)
+ if (NULL != p->p_cwd)
+ {
+ char cwd[256];
+ lookup_dirpath(p->p_cwd, cwd, sizeof(cwd));
+ iprintf(&buf, &size, " %3i %-13s %-18s %-s\n", p->p_pid, p->p_name,
+ parent, cwd);
+ }
+ else
+ {
+ iprintf(&buf, &size, " %3i %-13s %-18s -\n", p->p_pid, p->p_name,
+ parent);
+ }
+#else
+ iprintf(&buf, &size, " %3i %-13s %-s\n", p->p_pid, p->p_name, parent);
+#endif
+ }
+ return size;
+}
diff --git a/kernel/proc/proc.py b/kernel/proc/proc.py
new file mode 100644
index 0000000..11a5f31
--- /dev/null
+++ b/kernel/proc/proc.py
@@ -0,0 +1,38 @@
+import gdb
+
+import weenix
+import weenix.list
+import weenix.proc
+
+
+class ProcCommand(weenix.Command):
+ """proc [<pids...>]
+ Prints information about the listed pids. If no
+ pids are listed the full process tree is printed."""
+
+ def __init__(self):
+ weenix.Command.__init__(self, "proc", gdb.COMMAND_DATA)
+
+ def invoke(self, args, tty):
+ print("invoking...")
+ if (len(args.strip()) == 0):
+ print(weenix.proc.str_proc_tree())
+ else:
+ for pid in args.split():
+ if (pid == "curproc"):
+ print(weenix.proc.curproc())
+ else:
+ print(weenix.proc.lookup(pid))
+
+ def complete(self, line, word):
+ print("completing...")
+ l = map(lambda x: str(x.pid()), weenix.proc.iter())
+ l.append("curproc")
+ l = filter(lambda x: x.startswith(word), l)
+ for used in line.split():
+ l = filter(lambda x: x != used, l)
+ l.sort()
+ return l
+
+
+ProcCommand()
diff --git a/kernel/proc/sched.c b/kernel/proc/sched.c
new file mode 100644
index 0000000..9162875
--- /dev/null
+++ b/kernel/proc/sched.c
@@ -0,0 +1,368 @@
+// SMP.1 + SMP.2 + SMP.3 + SMP.4
+// spinlocks + mask interrupts
+#include "api/syscall.h"
+#include "errno.h"
+#include "fs/vfs.h"
+#include "globals.h"
+#include "main/apic.h"
+#include "main/inits.h"
+#include "types.h"
+#include "util/debug.h"
+#include <util/time.h>
+
+/*==========
+ * Variables
+ *=========*/
+
+/*
+ * The run queue of threads waiting to be run.
+ */
+static ktqueue_t kt_runq CORE_SPECIFIC_DATA;
+
+/*
+ * Helper tracking most recent thread context before a context_switch().
+ */
+static context_t *last_thread_context CORE_SPECIFIC_DATA;
+
+/*===================
+ * Preemption helpers
+ *==================*/
+
+inline void preemption_disable()
+{
+ if (curthr)
+ curthr->kt_preemption_count++;
+}
+
+inline void preemption_enable()
+{
+ if (curthr)
+ {
+ KASSERT(curthr->kt_preemption_count);
+ curthr->kt_preemption_count--;
+ }
+}
+
+inline void preemption_reset()
+{
+ KASSERT(curthr);
+ curthr->kt_preemption_count = 0;
+}
+
+inline long preemption_enabled()
+{
+ return curthr && !curthr->kt_preemption_count;
+}
+
+/*==================
+ * ktqueue functions
+ *=================*/
+
+/*
+ * Initializes queue.
+ */
+void sched_queue_init(ktqueue_t *queue)
+{
+ list_init(&queue->tq_list);
+ queue->tq_size = 0;
+}
+
+/*
+ * Adds thr to the tail of queue.
+ *
+ * queue must be locked
+ */
+static void ktqueue_enqueue(ktqueue_t *queue, kthread_t *thr)
+{
+ KASSERT(!thr->kt_wchan);
+
+ list_assert_sanity(&queue->tq_list);
+ /* Because of the way core-specific data is handled, we add to the front
+ * of the queue (and remove from the back). */
+ list_insert_head(&queue->tq_list, &thr->kt_qlink);
+ list_assert_sanity(&queue->tq_list);
+
+ thr->kt_wchan = queue;
+ queue->tq_size++;
+}
+
+/*
+ * Removes and returns a thread from the head of queue.
+ *
+ * queue must be locked
+ */
+static kthread_t *ktqueue_dequeue(ktqueue_t *queue)
+{
+ if (sched_queue_empty(queue))
+ {
+ return NULL;
+ }
+
+ list_assert_sanity(&queue->tq_list);
+
+ list_link_t *link = queue->tq_list.l_prev;
+ kthread_t *thr = list_item(link, kthread_t, kt_qlink);
+ list_remove(link);
+ thr->kt_wchan = NULL;
+
+ list_assert_sanity(&queue->tq_list);
+
+ queue->tq_size--;
+ return thr;
+}
+
+/*
+ * Removes thr from queue
+ *
+ * queue must be locked
+ */
+static void ktqueue_remove(ktqueue_t *queue, kthread_t *thr)
+{
+ // KASSERT(spinlock_ownslock(&queue->tq_lock));
+ KASSERT(thr->kt_qlink.l_next && thr->kt_qlink.l_prev);
+ list_remove(&thr->kt_qlink);
+ thr->kt_wchan = NULL;
+ queue->tq_size--;
+ list_assert_sanity(&queue->tq_list);
+}
+
+/*
+ * Returns 1 if queue is empty, 0 if's not
+ *
+ * If using this for branching / conditional logic on the queue, it should be
+ * locked for this call to avoid a TOCTTOU bug. This is, however, up to the
+ * callee and not enforced at this level.
+ */
+inline long sched_queue_empty(ktqueue_t *queue) { return queue->tq_size == 0; }
+
+/*==========
+ * Functions
+ *=========*/
+
+/*
+ * Initializes the run queue.
+ */
+void sched_init(void)
+{
+ sched_queue_init(GET_CSD(curcore.kc_id, ktqueue_t, kt_runq));
+}
+
+/*
+ * Puts curthr into the cancellable sleep state, and calls sched_switch() with
+ * the passed in arguments. Cancellable sleep means that the thread can be woken
+ * up from sleep for two reasons:
+ * 1. The event it is waiting for has occurred.
+ * 2. It was cancelled.
+ *
+ * Returns 0, or:
+ * - EINTR: If curthr is cancelled before or after the call to sched_switch()
+ *
+ * Hints:
+ * Do not enqueue the thread directly, let sched_switch handle this.
+ */
+long sched_cancellable_sleep_on(ktqueue_t *queue)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+ return 0;
+}
+
+/*
+ * If the given thread is in a cancellable sleep, removes it from whatever queue
+ * it is sleeping on and makes the thread runnable again.
+ *
+ * Regardless of the thread's state, this should mark the thread as cancelled.
+ */
+void sched_cancel(kthread_t *thr)
+{
+ // KASSERT(spinlock_ownslock(&thr->kt_lock));
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Switches into the context of the current core, which is constantly in a loop
+ * in core_switch() to choose a new runnable thread and switch into its thread
+ * context.
+ *
+ * We want to switch to the current core because the idle process handles the
+ * actual switching of the threads. Please see section 3.3 Boot Sequence to
+ * find a more in depth explantion about the idle process and its
+ * relationship with core_switch().
+ *
+ * Hints:
+ * curthr state must NOT be KT_ON_CPU upon entry.
+ * To ensure that curthr is enqueued on queue only once it is no longer executing,
+ * set the kc_queue field of curcore (the current core) to the queue. See
+ * core_switch() to see how the queue is handled.
+ *
+ * Protect the context switch from interrupts: Use intr_disable(), intr_setipl(),
+ * intr_enable(), and IPL_LOW.
+ *
+ * Even though we want to disable interrupts while modifying the run queue,
+ * core_switch() will actually enable interrupts before sleeping,
+ * but it doesn't modify the IPL. Because we want an interrupt of any level
+ * to wake up the idling core, IPL should be set to IPL_LOW.
+ *
+ * Do not directly call core_switch. The curcore's thread is stuck in a loop
+ * inside core_switch, so switching to its context brings you there.
+ *
+ * For debugging purposes, you may find it useful to set
+ * last_thread_context to the context of the current thread here before the call
+ * to context_switch.
+ */
+void sched_switch(ktqueue_t *queue)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Set the state of the current thread to runnable and sched_switch() with the
+ * current core's runq. Protect access to the thread via its lock.
+ */
+void sched_yield()
+{
+ KASSERT(curthr->kt_state == KT_ON_CPU);
+ curthr->kt_state = KT_RUNNABLE;
+ sched_switch(&kt_runq);
+}
+
+/*
+ * Makes the given thread runnable by setting its state and enqueuing it in the
+ * run queue (kt_runq).
+ *
+ * Hints:
+ * Cannot be called on curthr (it is already running).
+ * Because this can be called from an interrupt context, temporarily mask
+ * interrupts. Use intr_setipl() and IPL_HIGH in order to avoid being interrupted
+ * while modifying the queue.
+ */
+void sched_make_runnable(kthread_t *thr)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Places curthr in an uninterruptible sleep on q. I.e. if the thread is cancelled
+ * while sleeping, it will NOT notice until it is woken up by the event it's
+ * waiting for.
+ *
+ * Hints:
+ * Temporarily mask interrupts using intr_setipl() and IPL_HIGH.
+ * IPL should be set to IPL_HIGH because the act of changing the thread's state
+ * and enqueuing the thread on the queue should not be interrupted
+ * (as sched_wakeup_on) could be called from an interrupt context.
+ *
+ * Do not enqueue the thread directly, let sched_switch handle this (pass both
+ * q and lock to sched_switch()).
+ */
+void sched_sleep_on(ktqueue_t *q)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Wakes up a thread on the given queue by taking it off the queue and
+ * making it runnable. If given an empty queue, do nothing.
+ *
+ * Hints:
+ * Make sure to set *ktp (if it is provided--i.e. ktp is not NULL) to the
+ * dequeued thread before making it runnable. This allows the caller to get a
+ * handle to the thread that was woken up (useful, for instance, when
+ * implementing unlock() on a mutex: the mutex can wake up a sleeping thread
+ * and make it the new owner).
+ */
+void sched_wakeup_on(ktqueue_t *q, kthread_t **ktp)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*
+ * Wake up all the threads on the given queue by making them all runnable.
+ */
+void sched_broadcast_on(ktqueue_t *q)
+{
+ NOT_YET_IMPLEMENTED("PROCS: ***none***");
+}
+
+/*===============
+ * Functions: SMP
+ *==============*/
+
+/*
+ * A sad, but functional, attempt at load balancing when a core is idle
+ */
+#define LOAD_BALANCING_IDLE_THRESHOLD 4096
+static inline kthread_t *load_balance()
+{
+ return NULL;
+}
+
+/*
+ * The meat of our SMP-system.
+ *
+ * You will want to (in this exact order):
+ * 1) perform the operations on curcore.kc_queue and curcore.kc_lock
+ * 2) set curproc to idleproc, and curthr to NULL
+ * 3) try to get the next thread to run
+ * a) try to use your oqn runq (kt_runq), which is core-specific data
+ * b) if, using core_uptime(), at least LOAD_BALANCING_IDLE_THRESHOLD have
+ * passed, then call load_balance() to try to get the next thread to run c) if
+ * neither (a) nor (b) work, the core is idle. Wait for an interrupt using
+ * intr_wait(). Note that you will need to re-disable interrupts after returning
+ * from intr_wait(). 4) ensure the context's PML4 for the selected thread is
+ * correctly setup with curcore's core-specific data. Use kt_recent_core and
+ * map_in_core_specific_data. 5) set curthr and curproc 6) context_switch out
+ */
+void core_switch()
+{
+ while (1)
+ {
+ KASSERT(!intr_enabled());
+ KASSERT(!curthr || curthr->kt_state != KT_ON_CPU);
+
+ if (curcore.kc_queue)
+ {
+ ktqueue_enqueue(curcore.kc_queue, curthr);
+ }
+
+ curproc = &idleproc;
+ curthr = NULL;
+
+ kthread_t *next_thread = NULL;
+
+ size_t idle_start = core_uptime();
+ while (1)
+ {
+ next_thread = ktqueue_dequeue(&kt_runq);
+
+ if (!next_thread &&
+ core_uptime() - idle_start >= LOAD_BALANCING_IDLE_THRESHOLD)
+ next_thread = load_balance();
+
+ if (next_thread)
+ break;
+
+ intr_wait();
+ intr_disable();
+ }
+
+ KASSERT(next_thread->kt_state == KT_RUNNABLE);
+ KASSERT(next_thread->kt_proc);
+
+ if (curcore.kc_id != next_thread->kt_recent_core)
+ {
+ map_in_core_specific_data(next_thread->kt_ctx.c_pml4);
+ next_thread->kt_recent_core = curcore.kc_id;
+ }
+
+ uintptr_t mapped_paddr = pt_virt_to_phys_helper(
+ next_thread->kt_ctx.c_pml4, (uintptr_t)&next_thread);
+ uintptr_t expected_paddr =
+ pt_virt_to_phys_helper(pt_get(), (uintptr_t)&next_thread);
+ KASSERT(mapped_paddr == expected_paddr);
+
+ curthr = next_thread;
+ curthr->kt_state = KT_ON_CPU;
+ curproc = curthr->kt_proc;
+ context_switch(&curcore.kc_ctx, &curthr->kt_ctx);
+ }
+} \ No newline at end of file
diff --git a/kernel/proc/spinlock.c b/kernel/proc/spinlock.c
new file mode 100644
index 0000000..bf89b8e
--- /dev/null
+++ b/kernel/proc/spinlock.c
@@ -0,0 +1,21 @@
+#include "globals.h"
+#include "main/apic.h"
+
+void spinlock_init(spinlock_t *lock) { lock->s_locked = 0; }
+
+inline void spinlock_lock(spinlock_t *lock)
+{
+// __sync_bool_compare_and_swap is a GCC intrinsic for atomic compare-and-swap
+// If lock->locked is 0, then it is set to 1 and __sync_bool_compare_and_swap
+// returns true Otherwise, lock->locked is left at 1 and
+// __sync_bool_compare_and_swap returns false
+}
+
+inline void spinlock_unlock(spinlock_t *lock)
+{
+}
+
+inline long spinlock_ownslock(spinlock_t *lock)
+{
+ return 1;
+}
diff --git a/kernel/test/Submodules b/kernel/test/Submodules
new file mode 100644
index 0000000..3227c36
--- /dev/null
+++ b/kernel/test/Submodules
@@ -0,0 +1 @@
+kshell
diff --git a/kernel/test/driverstest.c b/kernel/test/driverstest.c
new file mode 100644
index 0000000..0ed5e1d
--- /dev/null
+++ b/kernel/test/driverstest.c
@@ -0,0 +1,288 @@
+#include "errno.h"
+#include "globals.h"
+
+#include "test/usertest.h"
+#include "test/proctest.h"
+
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+#include "proc/proc.h"
+#include "proc/kthread.h"
+#include "proc/sched.h"
+
+#include "drivers/tty/tty.h"
+#include "drivers/dev.h"
+#include "drivers/blockdev.h"
+#include "drivers/keyboard.h"
+
+#define TEST_STR_1 "hello\n"
+#define TEST_STR_2 "different string\n"
+#define TEST_STR_3 "test"
+#define TEST_BUF_SZ 10
+#define NUM_PROCS 3
+#define BLOCK_NUM 0
+
+// TODO: need to change to using the MOD macro
+
+void* kthread_write(long arg1, void* arg2) {
+ chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, arg1));
+ tty_t* tty = cd_to_tty(cd);
+
+ int count = 0;
+ while (count < 2) {
+ if (count == 0) {
+ for (size_t i = 0; i < strlen(TEST_STR_1); i++) {
+ ldisc_key_pressed(&tty->tty_ldisc, TEST_STR_1[i]);
+ }
+ } else {
+ for (size_t i = 0; i < strlen(TEST_STR_2); i++) {
+ ldisc_key_pressed(&tty->tty_ldisc, TEST_STR_2[i]);
+ }
+ }
+ count++;
+ }
+ return NULL;
+}
+
+void* kthread_read1(long arg1, void* arg2) {
+ chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, arg1));
+ char buf[32];
+ memset(buf, 0, 32);
+ size_t num_bytes = cd->cd_ops->read(cd, 0, buf, strlen(TEST_STR_1));
+ test_assert(num_bytes == strlen(TEST_STR_1), "number of bytes is incorrect");
+ test_assert(!strncmp(buf, TEST_STR_1, strlen(TEST_STR_1)), "resulting strings are not equal");
+
+ return NULL;
+}
+
+void* kthread_read2(long arg1, void* arg2) {
+ chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, arg1));
+
+ char buf[32];
+ memset(buf, 0, 32);
+ size_t num_bytes = cd->cd_ops->read(cd, 0, buf, strlen(TEST_STR_2));
+ test_assert(num_bytes == strlen(TEST_STR_2), "number of bytes is incorrect");
+ test_assert(!strncmp(buf, TEST_STR_2, strlen(TEST_STR_2)), "resulting strings are not equal");
+
+ return NULL;
+}
+
+long test_concurrent_reads() {
+ proc_t* proc_write = proc_create("process_write");
+ kthread_t* kt_write = kthread_create(proc_write, kthread_write, 0, NULL);
+
+ proc_t* proc_1 = proc_create("process_1_read");
+ kthread_t* kthread_1 = kthread_create(proc_1, kthread_read1, 0, NULL);
+
+ proc_t* proc_2 = proc_create("process_2_read");
+ kthread_t* kthread_2 = kthread_create(proc_2, kthread_read2, 0, NULL);
+
+ sched_make_runnable(kthread_1);
+ sched_make_runnable(kthread_2);
+ sched_make_runnable(kt_write);
+
+ while (do_waitpid(-1, NULL, 0) != -ECHILD)
+ ;
+
+ return 0;
+}
+
+/**
+ * Function for each kthread to write the order in which they were spawned
+ * to the character device.
+*/
+void* kthread_concurrent_write(long arg1, void* arg2) {
+ chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0));
+ char buf[32];
+ memset(buf, 0, 32);
+ snprintf(buf, 32, "thread_%d\n", (int)arg1);
+ size_t num_bytes = cd->cd_ops->write(cd, 0, buf, strlen(buf));
+ test_assert(num_bytes == strlen(buf), "number of bytes written is not correct");
+ return NULL;
+}
+
+long test_concurrent_writes() {
+ char proc_name[32];
+ for (int i = 0; i < NUM_PROCS; i++) {
+ memset(proc_name, 0, 32);
+ snprintf(proc_name, 32, "process_concurrent_write_%d", i);
+ proc_t* proc_write = proc_create(proc_name);
+ kthread_t* kt_write = kthread_create(proc_write, kthread_concurrent_write, i, NULL);
+ sched_make_runnable(kt_write);
+ }
+
+ while (do_waitpid(-1, NULL, 0) != -ECHILD)
+ ;
+
+ return 0;
+}
+
+void* kthread_write_disk(long arg1, void* arg2) {
+ // write to disk here
+ void* page_of_data = page_alloc();
+ // memset it to be some random character
+ memset(page_of_data, 'F', BLOCK_SIZE);
+ blockdev_t* bd = blockdev_lookup(MKDEVID(DISK_MAJOR, 0));
+ long ret = bd->bd_ops->write_block(bd, (char*)page_of_data, arg1, 1);
+ test_assert(ret == 0, "the write operation failed");
+
+ return NULL;
+}
+
+void* kthread_read_disk(long arg1, void* arg2) {
+ // read that same block of data here
+ // not going to memset it because we are reading that amount
+ void* page_of_data_to_read = page_alloc_n(2);
+ void* data_expected = page_alloc_n(2);
+ memset(data_expected, 'F', BLOCK_SIZE);
+ blockdev_t* bd = blockdev_lookup(MKDEVID(DISK_MAJOR, 0));
+ test_assert(!PAGE_ALIGNED((char*)page_of_data_to_read+1), "not page aligned");
+ long ret = bd->bd_ops->read_block(bd, (char*)page_of_data_to_read+1, arg1, 1);
+ test_assert(ret == 0, "the read operation failed");
+ test_assert(0 == memcmp((char*)page_of_data_to_read+1, data_expected, BLOCK_SIZE), "bytes are not equal");
+ page_free_n(page_of_data_to_read, 2);
+ page_free_n(data_expected, 2);
+ return NULL;
+}
+
+/*
+ First write to disk and then attempt to read from disk
+*/
+long test_disk_write_and_read() {
+ proc_t* proc_write = proc_create("process_write");
+ kthread_t* kt_write = kthread_create(proc_write, kthread_write_disk, BLOCK_NUM, NULL);
+
+ proc_t* proc_read = proc_create("process_read");
+ kthread_t* kt_read = kthread_create(proc_read, kthread_read_disk, BLOCK_NUM, NULL);
+
+ sched_make_runnable(kt_write);
+ sched_make_runnable(kt_read);
+
+ while (do_waitpid(-1, NULL, 0) != -ECHILD)
+ ;
+
+ return 0;
+}
+
+/*
+ Tests inputting a character and a newline character
+*/
+long test_basic_line_discipline() {
+ chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0));
+ tty_t* tty = cd_to_tty(cd);
+ ldisc_t* ldisc = &tty->tty_ldisc;
+ ldisc_key_pressed(ldisc, 't');
+
+ test_assert(ldisc->ldisc_buffer[ldisc->ldisc_tail] == 't', "character not inputted into buffer correctly");
+ test_assert(ldisc->ldisc_head != ldisc->ldisc_cooked && ldisc->ldisc_tail != ldisc->ldisc_head, "pointers are updated correctly");
+
+ size_t previous_head_val = ldisc->ldisc_head;
+ ldisc_key_pressed(ldisc, '\n');
+ test_assert(ldisc->ldisc_head == previous_head_val + 1, "ldisc_head should have been incremented past newline character");
+ test_assert(ldisc->ldisc_cooked == ldisc->ldisc_head, "ldisc_cooked should be equal to ldisc_head");
+
+ // reset line discipline for other tests before returning
+ ldisc->ldisc_head = ldisc->ldisc_cooked = ldisc->ldisc_tail = 0;
+ return 0;
+}
+
+/*
+ Tests removing a character
+*/
+long test_backspace() {
+ chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0));
+ tty_t* tty = cd_to_tty(cd);
+ ldisc_t* ldisc = &tty->tty_ldisc;
+ size_t previous_head_val = ldisc->ldisc_head;
+ ldisc_key_pressed(ldisc, 't');
+ ldisc_key_pressed(ldisc, '\b');
+ test_assert(ldisc->ldisc_head == previous_head_val, "Backspace should move the ldisc_head back by 1");
+
+ // testing there should be no characters to remove
+ ldisc_key_pressed(ldisc, '\b');
+ test_assert(ldisc->ldisc_head == previous_head_val, "This backspace should result in a no-op");
+
+ // reset line discipline for other tests before returning
+ ldisc->ldisc_head = ldisc->ldisc_cooked = ldisc->ldisc_tail = 0;
+ return 0;
+}
+
+void* kthread_wait_for_eot(long arg1, void* arg2) {
+ chardev_t* cd = (chardev_t*)arg2;
+ char buf[32];
+ memset(buf, 0, 32);
+ size_t num_bytes = cd->cd_ops->read(cd, 0, buf, TEST_BUF_SZ);
+ test_assert(num_bytes == strlen(TEST_STR_3), "number of bytes is incorrect");
+ test_assert(!strncmp(buf, TEST_STR_3, strlen(TEST_STR_3)), "resulting strings are not equal");
+ return NULL;
+}
+
+/*
+ Tests the behavior for EOT
+*/
+long test_eot() {
+ chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0));
+ tty_t* tty = cd_to_tty(cd);
+ ldisc_t* ldisc = &tty->tty_ldisc;
+
+ proc_t* proc_read = proc_create("process_read");
+ kthread_t* kt_read = kthread_create(proc_read, kthread_wait_for_eot, 0, cd);
+ sched_make_runnable(kt_read);
+ // allow the other process to run first so it can block before typing
+ sched_yield();
+
+ size_t prev_tail_value = ldisc->ldisc_tail;
+ for (size_t i = 0; i < strlen(TEST_STR_3); i++) {
+ ldisc_key_pressed(ldisc, TEST_STR_3[i]);
+ }
+ ldisc_key_pressed(ldisc, EOT);
+ test_assert(ldisc->ldisc_head == ldisc->ldisc_cooked, "ldisc_head should be equal to ldisc_cooked");
+
+ // allow the other thread to read
+ while (do_waitpid(-1, NULL, 0) != -ECHILD)
+ ;
+ test_assert(ldisc->ldisc_tail == prev_tail_value + strlen(TEST_STR_3) + 1, "ldisc_tail should be past the EOT char");
+ ldisc->ldisc_head = ldisc->ldisc_tail = ldisc->ldisc_cooked = 0;
+ return 0;
+}
+
+/*
+ Tests the behavior for ETX
+*/
+long test_etx() {
+ chardev_t* cd = chardev_lookup(MKDEVID(TTY_MAJOR, 0));
+ tty_t* tty = cd_to_tty(cd);
+ ldisc_t* ldisc = &tty->tty_ldisc;
+ size_t previous_head_value = ldisc->ldisc_head;
+
+ // "press" two characters
+ ldisc_key_pressed(ldisc, 't');
+ ldisc_key_pressed(ldisc, 'e');
+ ldisc_key_pressed(ldisc, ETX);
+
+ test_assert(previous_head_value + 1 == ldisc->ldisc_head, "ldisc_head should only be one past where it used to be");
+ test_assert(ldisc->ldisc_head == ldisc->ldisc_cooked, "ldisc should be a cooked blank line");
+
+ // reset line discipline for other tests before returning
+ ldisc->ldisc_head = ldisc->ldisc_cooked = ldisc->ldisc_tail = 0;
+ return 0;
+}
+
+long driverstest_main(long arg1, void* arg2)
+{
+ dbg(DBG_TEST, "\nStarting Drivers tests\n");
+ test_init();
+
+ test_basic_line_discipline();
+ test_backspace();
+ test_eot();
+ test_etx();
+ test_concurrent_reads();
+ test_concurrent_writes();
+ test_disk_write_and_read();
+
+ test_fini();
+ return 0;
+} \ No newline at end of file
diff --git a/kernel/test/kshell/command.c b/kernel/test/kshell/command.c
new file mode 100644
index 0000000..836b743
--- /dev/null
+++ b/kernel/test/kshell/command.c
@@ -0,0 +1,46 @@
+#include "command.h"
+
+#include "mm/kmalloc.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+kshell_command_t *kshell_command_create(const char *name,
+ kshell_cmd_func_t cmd_func,
+ const char *desc)
+{
+ kshell_command_t *cmd;
+ size_t len;
+
+ KASSERT(NULL != name);
+ KASSERT(NULL != cmd_func);
+
+ cmd = (kshell_command_t *)kmalloc(sizeof(kshell_command_t));
+ if (NULL == cmd)
+ {
+ return NULL;
+ }
+
+ len = strnlen(name, KSH_CMD_NAME_LEN);
+ strncpy(cmd->kc_name, name, len);
+ cmd->kc_name[len] = '\0';
+
+ cmd->kc_cmd_func = cmd_func;
+
+ if (NULL != desc)
+ {
+ len = strnlen(desc, KSH_DESC_LEN);
+ strncpy(cmd->kc_desc, desc, len);
+ cmd->kc_desc[len] = '\0';
+ }
+ else
+ {
+ cmd->kc_desc[0] = '\0';
+ }
+
+ list_link_init(&cmd->kc_commands_link);
+
+ return cmd;
+}
+
+void kshell_command_destroy(kshell_command_t *cmd) { kfree(cmd); }
diff --git a/kernel/test/kshell/command.h b/kernel/test/kshell/command.h
new file mode 100644
index 0000000..96a5cb0
--- /dev/null
+++ b/kernel/test/kshell/command.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include "priv.h"
+
+#include "test/kshell/kshell.h"
+
+typedef struct kshell_command
+{
+ char kc_name[KSH_CMD_NAME_LEN + 1];
+ kshell_cmd_func_t kc_cmd_func;
+ char kc_desc[KSH_DESC_LEN + 1];
+
+ list_link_t kc_commands_link;
+} kshell_command_t;
+
+kshell_command_t *kshell_command_create(const char *name,
+ kshell_cmd_func_t cmd_func,
+ const char *desc);
+
+void kshell_command_destroy(kshell_command_t *cmd);
diff --git a/kernel/test/kshell/commands.c b/kernel/test/kshell/commands.c
new file mode 100644
index 0000000..5ad5b11
--- /dev/null
+++ b/kernel/test/kshell/commands.c
@@ -0,0 +1,404 @@
+#include "commands.h"
+#include "errno.h"
+
+#include "command.h"
+
+#ifdef __VFS__
+
+#include "fs/fcntl.h"
+#include "fs/vfs_syscall.h"
+#include "fs/vnode.h"
+
+#endif
+
+#include "test/kshell/io.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+list_t kshell_commands_list = LIST_INITIALIZER(kshell_commands_list);
+
+long kshell_help(kshell_t *ksh, size_t argc, char **argv)
+{
+ /* Print a list of available commands */
+ char spaces[KSH_CMD_NAME_LEN];
+ memset(spaces, ' ', KSH_CMD_NAME_LEN);
+
+ kprintf(ksh, "Available commands:\n");
+ list_iterate(&kshell_commands_list, cmd, kshell_command_t,
+ kc_commands_link)
+ {
+ KASSERT(NULL != cmd);
+ size_t namelen = strnlen(cmd->kc_name, KSH_CMD_NAME_LEN);
+ spaces[KSH_CMD_NAME_LEN - namelen] = '\0';
+ kprintf(ksh, "%s%s%s\n", cmd->kc_name, spaces, cmd->kc_desc);
+ spaces[KSH_CMD_NAME_LEN - namelen] = ' ';
+ }
+
+ return 0;
+}
+
+long kshell_exit(kshell_t *ksh, size_t argc, char **argv)
+{
+ panic("kshell: kshell_exit should NEVER be called");
+}
+
+long kshell_clear(kshell_t *ksh, size_t argc, char **argv)
+{
+ kprintf(ksh, "\033[2J\033[1;1H");
+
+ // kprintf(ksh, "\033[10A");
+ return 0;
+}
+
+long kshell_halt(kshell_t *ksh, size_t argc, char **argv)
+{
+ proc_kill_all();
+ return 0;
+}
+
+long kshell_echo(kshell_t *ksh, size_t argc, char **argv)
+{
+ if (argc == 1)
+ {
+ kprintf(ksh, "\n");
+ }
+ else
+ {
+ for (size_t i = 1; i < argc - 1; i++)
+ {
+ kprintf(ksh, "%s ", argv[i]);
+ }
+ kprintf(ksh, "%s\n", argv[argc - 1]);
+ }
+
+ return 0;
+}
+
+#ifdef __VFS__
+
+long kshell_cat(kshell_t *ksh, size_t argc, char **argv)
+{
+ if (argc < 2)
+ {
+ kprintf(ksh, "Usage: cat <files>\n");
+ return 0;
+ }
+
+ char buf[KSH_BUF_SIZE];
+ for (size_t i = 1; i < argc; i++)
+ {
+ int fd = (int)do_open(argv[i], O_RDONLY);
+ if (fd < 0)
+ {
+ kprintf(ksh, "Error opening file: %s\n", argv[i]);
+ continue;
+ }
+
+ long retval;
+ while ((retval = do_read(fd, buf, KSH_BUF_SIZE)) > 0)
+ {
+ retval = kshell_write_all(ksh, buf, (size_t)retval);
+ if (retval < 0)
+ break;
+ }
+ if (retval < 0)
+ {
+ kprintf(ksh, "Error reading or writing %s: %s\n", argv[i], strerror((int)-retval));
+ }
+
+ retval = do_close(fd);
+ if (retval < 0)
+ {
+ panic("kshell: Error closing file %s: %s\n", argv[i],
+ strerror((int)-retval));
+ }
+ }
+
+ return 0;
+}
+
+long kshell_ls(kshell_t *ksh, size_t argc, char **argv)
+{
+ size_t arglen;
+ long ret;
+ int fd;
+ dirent_t dirent;
+ stat_t statbuf;
+ char direntname[KSH_BUF_SIZE];
+
+ memset(direntname, '\0', KSH_BUF_SIZE);
+
+ if (argc > 2)
+ {
+ kprintf(ksh, "Usage: ls <directory>\n");
+ return 0;
+ }
+ else if (argc == 2)
+ {
+ if ((ret = do_stat(argv[1], &statbuf)) < 0)
+ {
+ if (ret == -ENOENT)
+ {
+ kprintf(ksh, "%s does not exist\n", argv[1]);
+ return 0;
+ }
+ else
+ {
+ return ret;
+ }
+ }
+ if (!S_ISDIR(statbuf.st_mode))
+ {
+ kprintf(ksh, "%s is not a directory\n", argv[1]);
+ return 0;
+ }
+
+ fd = (int)do_open(argv[1], O_RDONLY);
+ if (fd < 0)
+ {
+ kprintf(ksh, "Could not find directory: %s\n", argv[1]);
+ return 0;
+ }
+ arglen = strnlen(argv[1], KSH_BUF_SIZE);
+ }
+ else
+ {
+ KASSERT(argc == 1);
+ fd = (int)do_open(".", O_RDONLY);
+ if (fd < 0)
+ {
+ kprintf(ksh, "Could not find directory: .\n");
+ return 0;
+ }
+ arglen = 1;
+ }
+
+ if (argc == 2)
+ memcpy(direntname, argv[1], arglen);
+ else
+ direntname[0] = '.';
+
+ direntname[arglen] = '/';
+ direntname[arglen + NAME_LEN + 1] = '\0';
+
+ while ((ret = do_getdent(fd, &dirent)) == sizeof(dirent_t))
+ {
+ memcpy(direntname + arglen + 1, dirent.d_name, NAME_LEN + 1);
+ ret = do_stat(direntname, &statbuf);
+ if (ret < 0)
+ {
+ kprintf(ksh, "Error stat\'ing `%s`: %s\n", dirent.d_name, strerror((int)-ret));
+ continue;
+ }
+ if (S_ISDIR(statbuf.st_mode))
+ {
+ kprintf(ksh, "%s/\n", dirent.d_name);
+ }
+ else
+ {
+ kprintf(ksh, "%s\n", dirent.d_name);
+ }
+ }
+
+ do_close(fd);
+ return ret;
+}
+
+long kshell_cd(kshell_t *ksh, size_t argc, char **argv)
+{
+ KASSERT(ksh && argc && argv);
+ if (argc < 2)
+ {
+ kprintf(ksh, "Usage: cd <directory>\n");
+ return 0;
+ }
+
+ long ret = do_chdir(argv[1]);
+ if (ret < 0)
+ {
+ kprintf(ksh, "cd: `%s`: %s\n", argv[1], strerror((int)-ret));
+ }
+ return 0;
+}
+
+long kshell_rm(kshell_t *ksh, size_t argc, char **argv)
+{
+ KASSERT(ksh && argc && argv);
+
+ if (argc < 2)
+ {
+ kprintf(ksh, "Usage: rm <file>\n");
+ return 0;
+ }
+
+ long ret = do_unlink(argv[1]);
+ if (ret < 0)
+ {
+ kprintf(ksh, "rm: `%s`: %s\n", argv[1], strerror((int)-ret));
+ }
+
+ return 0;
+}
+
+long kshell_link(kshell_t *ksh, size_t argc, char **argv)
+{
+ KASSERT(ksh && argc && argv);
+
+ if (argc < 3)
+ {
+ kprintf(ksh, "Usage: link <src> <dst>\n");
+ return 0;
+ }
+
+ long ret = do_link(argv[1], argv[2]);
+ if (ret < 0)
+ {
+ kprintf(ksh, "Error linking %s to %s: %s\n", argv[1], argv[2], strerror((int)-ret));
+ }
+
+ return 0;
+}
+
+long kshell_rmdir(kshell_t *ksh, size_t argc, char **argv)
+{
+ KASSERT(ksh && argc && argv);
+ if (argc < 2)
+ {
+ kprintf(ksh, "Usage: rmdir DIRECTORY...\n");
+ return 1;
+ }
+
+ long exit_val = 0;
+ for (size_t i = 1; i < argc; i++)
+ {
+ long ret = do_rmdir(argv[i]);
+ if (ret < 0)
+ {
+ kprintf(ksh, "rmdir: failed to remove directory `%s': %s\n",
+ argv[i], strerror((int)-ret));
+ exit_val = 1;
+ }
+ }
+
+ return exit_val;
+}
+
+long kshell_mkdir(kshell_t *ksh, size_t argc, char **argv)
+{
+ KASSERT(ksh && argc && argv);
+ if (argc < 2)
+ {
+ kprintf(ksh, "Usage: mkdir DIRECTORY...\n");
+ return 1;
+ }
+
+ long exit_val = 0;
+ for (size_t i = 1; i < argc; i++)
+ {
+ long ret = do_mkdir(argv[i]);
+ if (ret < 0)
+ {
+ kprintf(ksh, "mkdir: failed to create directory `%s': %s\n",
+ argv[i], strerror((int)-ret));
+ exit_val = 1;
+ }
+ }
+
+ return exit_val;
+}
+
+static const char *get_file_type_str(int mode)
+{
+ if (S_ISCHR(mode))
+ {
+ return "character special file";
+ }
+ else if (S_ISDIR(mode))
+ {
+ return "directory";
+ }
+ else if (S_ISBLK(mode))
+ {
+ return "block special file";
+ }
+ else if (S_ISREG(mode))
+ {
+ return "regular file";
+ }
+ else if (S_ISLNK(mode))
+ {
+ return "symbolic link";
+ }
+ else
+ {
+ return "unknown";
+ }
+}
+
+long kshell_stat(kshell_t *ksh, size_t argc, char **argv)
+{
+ KASSERT(ksh && argc && argv);
+ long exit_val = 0;
+
+ if (argc < 2)
+ {
+ kprintf(ksh, "Usage: stat FILE...\n");
+ return 1;
+ }
+
+ for (size_t i = 1; i < argc; i++)
+ {
+ stat_t buf;
+ long ret = do_stat(argv[i], &buf);
+ if (ret < 0)
+ {
+ kprintf(ksh, "Cannot stat `%s': %s\n", argv[i],
+ strerror((int)-ret));
+ exit_val = 1;
+ continue;
+ }
+ const char *file_type_str = get_file_type_str(buf.st_mode);
+ kprintf(ksh, "File: `%s'\n", argv[i]);
+ kprintf(ksh, "Size: %d\n", buf.st_size);
+ kprintf(ksh, "Blocks: %d\n", buf.st_blocks);
+ kprintf(ksh, "IO Block: %d\n", buf.st_blksize);
+ kprintf(ksh, "%s\n", file_type_str);
+ kprintf(ksh, "Inode: %d\n", buf.st_ino);
+ kprintf(ksh, "Links: %d\n", buf.st_nlink);
+ }
+
+ return exit_val;
+}
+
+long vfstest_main(int, void *);
+
+long kshell_vfs_test(kshell_t *ksh, size_t argc, char **argv)
+{
+ kprintf(ksh, "TEST VFS: Testing... Please wait.\n");
+
+ long ret = vfstest_main(1, NULL);
+
+ kprintf(ksh, "TEST VFS: testing complete, check console for results\n");
+
+ return ret;
+}
+
+#endif
+
+#ifdef __S5FS__
+
+long s5fstest_main(int, void *);
+
+long kshell_s5fstest(kshell_t *ksh, size_t argc, char **argv)
+{
+ kprintf(ksh, "TEST S5FS: Testing... Please wait.\n");
+
+ long ret = s5fstest_main(1, NULL);
+
+ kprintf(ksh, "TEST S5FS: testing complete, check console for results\n");
+
+ return ret;
+}
+
+#endif
diff --git a/kernel/test/kshell/commands.h b/kernel/test/kshell/commands.h
new file mode 100644
index 0000000..bf0bf1a
--- /dev/null
+++ b/kernel/test/kshell/commands.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include "test/kshell/kshell.h"
+
+#define KSHELL_CMD(name) \
+ long kshell_##name(kshell_t *ksh, size_t argc, char **argv)
+
+KSHELL_CMD(help);
+
+KSHELL_CMD(exit);
+
+KSHELL_CMD(halt);
+
+KSHELL_CMD(echo);
+
+KSHELL_CMD(clear);
+
+#ifdef __VFS__
+KSHELL_CMD(cat);
+KSHELL_CMD(ls);
+KSHELL_CMD(cd);
+KSHELL_CMD(rm);
+KSHELL_CMD(link);
+KSHELL_CMD(rmdir);
+KSHELL_CMD(mkdir);
+KSHELL_CMD(stat);
+KSHELL_CMD(vfs_test);
+#endif
+
+#ifdef __S5FS__
+KSHELL_CMD(s5fstest);
+#endif
diff --git a/kernel/test/kshell/io.c b/kernel/test/kshell/io.c
new file mode 100644
index 0000000..65d816d
--- /dev/null
+++ b/kernel/test/kshell/io.c
@@ -0,0 +1,78 @@
+#include "test/kshell/io.h"
+#include "util/debug.h"
+
+#include "priv.h"
+
+#ifndef __VFS__
+
+#include "drivers/chardev.h"
+
+#endif
+
+#ifdef __VFS__
+
+#include "fs/vfs_syscall.h"
+
+#endif
+
+#include "util/printf.h"
+#include "util/string.h"
+
+/*
+ * If VFS is enabled, we can just use the syscalls.
+ *
+ * If VFS is not enabled, then we need to explicitly call the byte
+ * device.
+ */
+
+#ifdef __VFS__
+
+long kshell_write(kshell_t *ksh, const void *buf, size_t nbytes)
+{
+ long retval = do_write(ksh->ksh_out_fd, buf, nbytes);
+ KASSERT(retval < 0 || (size_t)retval == nbytes);
+ return retval;
+}
+
+long kshell_read(kshell_t *ksh, void *buf, size_t nbytes)
+{
+ return do_read(ksh->ksh_in_fd, buf, nbytes);
+}
+
+long kshell_write_all(kshell_t *ksh, void *buf, size_t nbytes)
+{
+ /* See comment in kshell_write */
+ return kshell_write(ksh, buf, nbytes);
+}
+
+#else
+
+long kshell_read(kshell_t *ksh, void *buf, size_t nbytes)
+{
+ return ksh->ksh_cd->cd_ops->read(ksh->ksh_cd, 0, buf, nbytes);
+}
+
+long kshell_write(kshell_t *ksh, const void *buf, size_t nbytes)
+{
+ return ksh->ksh_cd->cd_ops->write(ksh->ksh_cd, 0, buf, nbytes);
+}
+
+#endif
+
+void kprint(kshell_t *ksh, const char *fmt, va_list args)
+{
+ char buf[KSH_BUF_SIZE];
+ size_t count;
+
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ count = strnlen(buf, sizeof(buf));
+ kshell_write(ksh, buf, count);
+}
+
+void kprintf(kshell_t *ksh, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ kprint(ksh, fmt, args);
+ va_end(args);
+}
diff --git a/kernel/test/kshell/kshell.c b/kernel/test/kshell/kshell.c
new file mode 100644
index 0000000..a26c42c
--- /dev/null
+++ b/kernel/test/kshell/kshell.c
@@ -0,0 +1,504 @@
+#include "test/kshell/kshell.h"
+#include <util/printf.h>
+
+#include "config.h"
+
+#include "command.h"
+#include "commands.h"
+#include "tokenizer.h"
+
+#ifndef __VFS__
+
+#include "drivers/chardev.h"
+#include "drivers/tty/tty.h"
+
+#endif
+
+#include "mm/kmalloc.h"
+
+#include "proc/proc.h"
+
+#ifdef __VFS__
+
+#include "fs/fcntl.h"
+#include "fs/open.h"
+#include "fs/vfs_syscall.h"
+
+#endif
+
+#include "test/kshell/io.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+void *kshell_proc_run(long tty, void *arg2)
+{
+ // Create kernel shell on given TTY
+ kshell_t *kshell = kshell_create((uint8_t)tty);
+ if (!kshell)
+ {
+ do_exit(-1);
+ }
+
+ while (kshell_execute_next(kshell) > 0)
+ ;
+ kshell_destroy(kshell);
+ return NULL;
+}
+
+void kshell_init()
+{
+ kshell_add_command("help", kshell_help,
+ "prints a list of available commands");
+ kshell_add_command("echo", kshell_echo, "display a line of text");
+ kshell_add_command("clear", kshell_clear, "clears the screen");
+#ifdef __VFS__
+ kshell_add_command("cat", kshell_cat,
+ "concatenate files and print on the standard output");
+ kshell_add_command("ls", kshell_ls, "list directory contents");
+ kshell_add_command("cd", kshell_cd, "change the working directory");
+ kshell_add_command("rm", kshell_rm, "remove files");
+ kshell_add_command("link", kshell_link,
+ "call the link function to create a link to a file");
+ kshell_add_command("rmdir", kshell_rmdir, "remove empty directories");
+ kshell_add_command("mkdir", kshell_mkdir, "make directories");
+ kshell_add_command("stat", kshell_stat, "display file status");
+ kshell_add_command("vfstest", kshell_vfs_test, "runs VFS tests");
+#endif
+
+#ifdef __S5FS__
+ kshell_add_command("s5fstest", kshell_s5fstest, "runs S5FS tests");
+#endif
+
+ kshell_add_command("halt", kshell_halt, "halts the systems");
+ kshell_add_command("exit", kshell_exit, "exits the shell");
+}
+
+void kshell_add_command(const char *name, kshell_cmd_func_t cmd_func,
+ const char *desc)
+{
+ kshell_command_t *cmd;
+
+ cmd = kshell_command_create(name, cmd_func, desc);
+ KASSERT(NULL != cmd);
+ list_insert_tail(&kshell_commands_list, &cmd->kc_commands_link);
+
+ dprintf("Added %s command\n", name);
+}
+
+kshell_t *kshell_create(uint8_t ttyid)
+{
+ kshell_t *ksh;
+
+ ksh = (kshell_t *)kmalloc(sizeof(kshell_t));
+ if (NULL == ksh)
+ {
+ dprintf("Not enough memory to create kshell\n");
+ return NULL;
+ }
+
+#ifdef __VFS__
+ long fd;
+ char tty_path[MAXPATHLEN];
+
+ snprintf(tty_path, sizeof(tty_path), "/dev/tty%u", ttyid);
+ if ((fd = do_open(tty_path, O_RDWR)) < 0)
+ {
+ dprintf("Couldn't open %s\n", tty_path);
+ kfree(ksh);
+ return NULL;
+ }
+ ksh->ksh_out_fd = ksh->ksh_in_fd = ksh->ksh_fd = (int)fd;
+#else
+ chardev_t *cd;
+ cd = chardev_lookup(MKDEVID(TTY_MAJOR, ttyid));
+ if (NULL == cd)
+ {
+ dprintf("Couldn't find TTY with ID %u\n", ttyid);
+ kfree(ksh);
+ return NULL;
+ }
+ ksh->ksh_cd = cd;
+#endif
+
+ dprintf("kshell successfully created on TTY %u\n", ttyid);
+ return ksh;
+}
+
+void kshell_destroy(kshell_t *ksh)
+{
+ KASSERT(NULL != ksh);
+ kprintf(ksh, "Bye!\n");
+#ifdef __VFS__
+ if (do_close(ksh->ksh_fd) < 0)
+ {
+ panic("Error closing TTY file descriptor\n");
+ }
+ dprintf("kshell with file descriptor %d destroyed\n", ksh->ksh_fd);
+#else
+ dprintf("kshell on byte device %u destroyed\n", ksh->ksh_cd->cd_id);
+#endif
+ kfree(ksh);
+}
+
+/**
+ * Removes the token from the input line it came from, replacing it
+ * with spaces.
+ *
+ * @param ksh the kshell
+ * @param token the token to scrub
+ */
+static void kshell_scrub_token(kshell_t *ksh, kshell_token_t *token)
+{
+ KASSERT(NULL != ksh);
+ KASSERT(NULL != token);
+ KASSERT(NULL != token->kt_text);
+
+ memset(token->kt_text, ' ', token->kt_textlen);
+}
+
+/**
+ * Finds the redirection operators ('<' and '>') in the input line,
+ * stores the name of the file to redirect stdout in in redirect_out
+ * and the name of the file to redirect stdin in redirect_in, and
+ * removes any trace of the redirection from the input line.
+ *
+ * @param ksh the kshell
+ * @param line the input line
+ * @param redirect_in buffer to store the name of the file to redirect
+ * stdin from. Buffer size assumed to be at least MAXPATHLEN
+ * @param redirect_out buffer to store the name of the file to stdout
+ * to. Buffer size assumed to be at least MAXPATHLEN
+ * @param append out parameter containing true if the file stdout is
+ * being redirected to should be appeneded to
+ * @return 0 on success and <0 on error
+ */
+static long kshell_find_redirection(kshell_t *ksh, char *line,
+ char *redirect_in, char *redirect_out,
+ int *append)
+{
+ long retval;
+ kshell_token_t token;
+
+ while ((retval = kshell_next_token(ksh, line, &token)) > 0)
+ {
+ KASSERT(token.kt_type != KTT_EOL);
+ line += retval;
+
+ if (token.kt_type == KTT_WORD)
+ {
+ continue;
+ }
+
+ char *redirect = NULL;
+ if (token.kt_type == KTT_REDIRECT_OUT)
+ {
+ redirect = redirect_out;
+ *append = 0;
+ }
+ else if (token.kt_type == KTT_REDIRECT_OUT_APPEND)
+ {
+ redirect = redirect_out;
+ *append = 1;
+ }
+ else if (token.kt_type == KTT_REDIRECT_IN)
+ {
+ redirect = redirect_in;
+ }
+ kshell_scrub_token(ksh, &token);
+
+ if ((retval = kshell_next_token(ksh, line, &token)) == 0)
+ {
+ goto unexpected_token;
+ }
+ KASSERT(retval > 0);
+
+ if (token.kt_type != KTT_WORD)
+ {
+ goto unexpected_token;
+ }
+ strncpy(redirect, token.kt_text, token.kt_textlen);
+ redirect[token.kt_textlen] = '\0';
+ kshell_scrub_token(ksh, &token);
+ }
+ return 0;
+
+unexpected_token:
+ kprintf(ksh, "kshell: Unexpected token '%s'\n",
+ kshell_token_type_str(token.kt_type));
+ return -1;
+}
+
+/**
+ * Ignoring whitespace, finds the next argument from a string.
+ *
+ * @param ksh the kshell
+ * @param line the string to find arguments in
+ * @param arg out parameter which should point to the beginning of the
+ * next argument if any were found
+ * @param arglen the length of the argument if any were found
+ * @return 0 if no argument was found, and the number of bytes read
+ * otherwise
+ */
+static long kshell_find_next_arg(kshell_t *ksh, char *line, char **arg,
+ size_t *arglen)
+{
+ long retval;
+ kshell_token_t token;
+
+ if ((retval = kshell_next_token(ksh, line, &token)) == 0)
+ {
+ KASSERT(token.kt_type == KTT_EOL);
+ return retval;
+ }
+ KASSERT(token.kt_type == KTT_WORD);
+ *arg = token.kt_text;
+ *arglen = token.kt_textlen;
+
+ /*
+ * This is a little hacky, but not awful.
+ *
+ * If we find a '\0', there are no more arguments
+ * left. However, we still need to return a nonzero value to
+ * alert the calling function about the argument we just
+ * found. Since there are no more arguments, we aren't
+ * overwriting anything by setting the next byte to '\0'. We
+ * also know that we aren't writing into invalid memory
+ * because in the struct definition for kshell_t, we declared
+ * ksh_buf to have KSH_BUF_SIZE + 1 bytes.
+ */
+ if (line[retval] == '\0')
+ {
+ line[retval + 1] = '\0';
+ }
+ else
+ {
+ line[retval] = '\0';
+ }
+ return retval;
+}
+
+/**
+ * Finds the arguments of the command just into a kshell. This should
+ * be called directly after returning from a read.
+ *
+ * @param buf the buffer to extract arguments from
+ * @param argv out parameter containing an array of null-terminated
+ * strings, one for each argument
+ * @param max_args the maximum number of arguments to find
+ * @param argc out parameter containing the number of arguments found
+ */
+static void kshell_get_args(kshell_t *ksh, char *buf, char **argv,
+ size_t max_args, size_t *argc)
+{
+ size_t arglen;
+
+ KASSERT(NULL != buf);
+ KASSERT(NULL != argv);
+ KASSERT(max_args > 0);
+ KASSERT(NULL != argc);
+
+ *argc = 0;
+ while (kshell_find_next_arg(ksh, buf, argv + *argc, &arglen) &&
+ *argc < max_args)
+ {
+ buf = argv[*argc] + arglen + 1;
+ ++(*argc);
+ }
+ if (*argc >= max_args)
+ {
+ dprintf("Too many arguments\n");
+ }
+}
+
+kshell_command_t *kshell_lookup_command(const char *name, size_t namelen)
+{
+ if (namelen > KSH_CMD_NAME_LEN)
+ {
+ namelen = KSH_CMD_NAME_LEN;
+ }
+
+ list_iterate(&kshell_commands_list, cmd, kshell_command_t,
+ kc_commands_link)
+ {
+ KASSERT(NULL != cmd);
+ if ((strncmp(cmd->kc_name, name, namelen) == 0) &&
+ (namelen == strnlen(cmd->kc_name, KSH_CMD_NAME_LEN)))
+ {
+ return cmd;
+ }
+ }
+ return NULL;
+}
+
+#ifdef __VFS__
+
+/**
+ * If stdin or stdout has been redirected to a file, closes the file
+ * and directs I/O back to stdin and stdout.
+ *
+ * @param the kshell
+ */
+void kshell_undirect(kshell_t *ksh)
+{
+ KASSERT(NULL != ksh);
+
+ if (ksh->ksh_in_fd != ksh->ksh_fd)
+ {
+ if (do_close(ksh->ksh_in_fd) < 0)
+ {
+ panic("kshell: Error closing file descriptor %d\n", ksh->ksh_in_fd);
+ }
+ ksh->ksh_in_fd = ksh->ksh_fd;
+ }
+ if (ksh->ksh_out_fd != ksh->ksh_fd)
+ {
+ if (do_close(ksh->ksh_out_fd) < 0)
+ {
+ panic("kshell: Error closing file descriptor %d\n",
+ ksh->ksh_out_fd);
+ }
+ ksh->ksh_out_fd = ksh->ksh_fd;
+ }
+}
+
+/**
+ * Redirects stdin and stdout.
+ *
+ * @param ksh the kshell
+ * @param redirect_in the name of the file to redirect stdin from
+ * @param redirect_out the name of the file to redirect stdout to
+ * @param append if true, output will be appended
+ * @return 0 on sucess and <0 on error. If returns with <0, no streams
+ * will be redirected.
+ */
+long kshell_redirect(kshell_t *ksh, const char *redirect_in,
+ const char *redirect_out, int append)
+{
+ long fd;
+
+ KASSERT(NULL != ksh);
+ KASSERT(NULL != redirect_in);
+ KASSERT(NULL != redirect_out);
+
+ if (redirect_in[0] != '\0')
+ {
+ if ((fd = do_open(redirect_in, O_RDONLY | O_CREAT)) < 0)
+ {
+ kprintf(ksh, "kshell: %s: Error opening file\n", redirect_in);
+ goto error;
+ }
+ ksh->ksh_in_fd = (int)fd;
+ }
+ if (redirect_out[0] != '\0')
+ {
+ int flags = append ? O_WRONLY | O_CREAT | O_APPEND : O_WRONLY | O_CREAT | O_TRUNC;
+ if ((fd = do_open(redirect_out, flags)) < 0)
+ {
+ kprintf(ksh, "kshell: %s: Error opening file\n", redirect_out);
+ goto error;
+ }
+ ksh->ksh_out_fd = fd;
+ }
+ return 0;
+
+error:
+ kshell_undirect(ksh);
+ return fd;
+}
+
+#endif
+
+long kshell_execute_next(kshell_t *ksh)
+{
+ static const char *kshell_prompt = "kshell$";
+
+ long nbytes, retval;
+ kshell_command_t *cmd;
+ char *args[KSH_MAX_ARGS];
+ size_t argc;
+ char redirect_in[MAXPATHLEN];
+ char redirect_out[MAXPATHLEN];
+ int append;
+
+ /*
+ * Need that extra byte at the end. See comment in
+ * kshell_find_next_arg.
+ */
+ char buf[KSH_BUF_SIZE + 1];
+
+ KASSERT(NULL != ksh);
+
+ kprintf(ksh, "%s ", kshell_prompt);
+
+ if ((nbytes = kshell_read(ksh, buf, KSH_BUF_SIZE)) <= 0)
+ {
+ return nbytes;
+ }
+ if (nbytes == 1)
+ {
+ return 1;
+ }
+ if (buf[nbytes - 1] == '\n')
+ {
+ /* Overwrite the newline with a null terminator */
+ buf[--nbytes] = '\0';
+ }
+ else
+ {
+ /* Add the null terminator to the end */
+ buf[nbytes] = '\0';
+ }
+
+ /* Even though we can't redirect I/O to files before VFS, we
+ * still want to scrub out any reference to redirection before
+ * passing the line off to kshell_get_args */
+ redirect_in[0] = redirect_out[0] = '\0';
+ if (kshell_find_redirection(ksh, buf, redirect_in, redirect_out, &append) <
+ 0)
+ {
+ goto done;
+ }
+#ifdef __VFS__
+ if ((retval = kshell_redirect(ksh, redirect_in, redirect_out, append)) <
+ 0)
+ {
+ dprintf("Error redirecting I/O\n");
+ goto done;
+ }
+#endif
+
+ kshell_get_args(ksh, buf, args, KSH_MAX_ARGS, &argc);
+ if (argc == 0)
+ {
+ goto done;
+ }
+
+ dprintf("Attempting to execute command '%s'\n", args[0]);
+
+ if (strncmp(args[0], "exit", strlen("exit")) == 0)
+ {
+ nbytes = 0;
+ goto done;
+ }
+
+ if ((cmd = kshell_lookup_command(args[0], strlen(args[0]))) == NULL)
+ {
+ kprintf(ksh, "kshell: %s not a valid command\n", args[0]);
+ }
+ else
+ {
+ if ((retval = cmd->kc_cmd_func(ksh, argc, args)) < 0)
+ {
+ nbytes = retval;
+ goto done;
+ }
+ }
+ goto done;
+
+done:
+#ifdef __VFS__
+ kshell_undirect(ksh);
+#endif
+ return nbytes;
+}
diff --git a/kernel/test/kshell/priv.h b/kernel/test/kshell/priv.h
new file mode 100644
index 0000000..65c9493
--- /dev/null
+++ b/kernel/test/kshell/priv.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include "test/kshell/kshell.h"
+
+#include "util/list.h"
+
+#define dprintf(x, args...) dbg(DBG_TEST, x, ##args)
+
+#define KSH_BUF_SIZE \
+ 1024 /* This really just needs to be as large as \
+ * the line discipline buffer */
+#define KSH_CMD_NAME_LEN 16
+#define KSH_MAX_ARGS 128
+#define KSH_DESC_LEN 64
+
+struct chardev;
+struct kshell_command;
+
+struct kshell
+{
+ /* If we have a filesystem, we can write to the file
+ * descriptor. Otherwise, we need to write to a byte device */
+#ifdef __VFS__
+ int ksh_fd;
+
+ /* Used for redirection */
+ int ksh_out_fd;
+ int ksh_in_fd;
+#else
+ struct chardev *ksh_cd;
+#endif
+};
+
+extern list_t kshell_commands_list;
+
+/**
+ * Searches for a shell command with a specified name.
+ *
+ * @param name name of the command to search for
+ * @param namelen length of name
+ * @return the command, if it exists, or NULL
+ */
+struct kshell_command *kshell_lookup_command(const char *name, size_t namelen);
diff --git a/kernel/test/kshell/tokenizer.c b/kernel/test/kshell/tokenizer.c
new file mode 100644
index 0000000..9406668
--- /dev/null
+++ b/kernel/test/kshell/tokenizer.c
@@ -0,0 +1,74 @@
+#include "tokenizer.h"
+
+#include <ctype.h>
+
+#include "util/debug.h"
+
+#define EOL '\0'
+
+const char *ksh_tok_type_str[] = {"text", "<", ">", ">>", "end of line", ""};
+
+long kshell_next_token(kshell_t *ksh, char *line, kshell_token_t *token)
+{
+ KASSERT(NULL != ksh);
+ KASSERT(NULL != line);
+ KASSERT(NULL != token);
+
+ size_t i = 0;
+ while (line[i] != EOL && isspace(line[i]))
+ ++i;
+ token->kt_text = line + i;
+
+ /* Determine the token type */
+ switch (line[i])
+ {
+ case EOL:
+ token->kt_type = KTT_EOL;
+ token->kt_textlen = 0;
+ break;
+ case '<':
+ token->kt_type = KTT_REDIRECT_IN;
+ token->kt_textlen = i = 1;
+ break;
+ case '>':
+ if (line[i + 1] == '>')
+ {
+ token->kt_type = KTT_REDIRECT_OUT_APPEND;
+ token->kt_textlen = i = 2;
+ }
+ else
+ {
+ token->kt_type = KTT_REDIRECT_OUT;
+ token->kt_textlen = i = 1;
+ }
+ break;
+ default:
+ token->kt_type = KTT_WORD;
+ token->kt_textlen = 0;
+ break;
+ }
+
+ switch (token->kt_type)
+ {
+ case KTT_WORD:
+ while (!isspace(line[i]) && line[i] != '<' && line[i] != '>' &&
+ line[i] != EOL)
+ {
+ ++i;
+ ++token->kt_textlen;
+ }
+ break;
+ case KTT_EOL:
+ return 0;
+ default:
+ break;
+ }
+
+ return i;
+}
+
+const char *kshell_token_type_str(kshell_token_type_t type)
+{
+ KASSERT(type < KTT_MAX);
+ return ksh_tok_type_str[type];
+}
diff --git a/kernel/test/kshell/tokenizer.h b/kernel/test/kshell/tokenizer.h
new file mode 100644
index 0000000..9c49026
--- /dev/null
+++ b/kernel/test/kshell/tokenizer.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "types.h"
+
+#include "test/kshell/kshell.h"
+
+typedef enum kshell_token_type
+{
+ KTT_WORD,
+ KTT_REDIRECT_IN, /* '<' */
+ KTT_REDIRECT_OUT, /* '>' */
+ KTT_REDIRECT_OUT_APPEND, /* '>>' */
+ KTT_EOL,
+
+ KTT_MAX /* Number of token types */
+} kshell_token_type_t;
+
+typedef struct kshell_token
+{
+ kshell_token_type_t kt_type;
+ char *kt_text;
+ size_t kt_textlen;
+} kshell_token_t;
+
+/**
+ * Finds the next token in the input line.
+ *
+ * Note: To find multiple tokens from the same line, you increment the
+ * line pointer by the number of bytes processed before the next call
+ * to kshell_next token.
+ *
+ * @param ksh the kshell
+ * @param line the input line to tokenize
+ * @param token out parameter containing the next token found
+ * @return 0 if no more tokens, otherwise, number of bytes processed
+ */
+long kshell_next_token(kshell_t *ksh, char *line, kshell_token_t *token);
+
+const char *kshell_token_type_str(kshell_token_type_t type);
diff --git a/kernel/test/pipes.c b/kernel/test/pipes.c
new file mode 100644
index 0000000..ee4f195
--- /dev/null
+++ b/kernel/test/pipes.c
@@ -0,0 +1,133 @@
+#include "errno.h"
+#include "globals.h"
+
+#include "fs/file.h"
+#include "fs/pipe.h"
+#include "fs/vfs_syscall.h"
+
+#include "test/kshell/io.h"
+#include "test/kshell/kshell.h"
+
+#define IMAX 256
+#define JMAX 16
+#define KMAX 16
+#define ISTEP (JMAX * KMAX)
+
+static kthread_t *make_proc_and_thread(char *name, kthread_func_t func,
+ int arg1, void *arg2)
+{
+ proc_t *proc = proc_create(name);
+ if (!proc)
+ {
+ return NULL;
+ }
+
+ int i;
+ for (i = 0; i < NFILES; ++i)
+ {
+ proc->p_files[i] = curproc->p_files[i];
+ if (proc->p_files[i])
+ {
+ fref(proc->p_files[i]);
+ }
+ }
+ return kthread_create(proc, func, arg1, arg2);
+}
+
+static void *producer(long arg1, void *arg2)
+{
+ int fd = (int)arg1;
+ kshell_t *ksh = (kshell_t *)arg2;
+
+ kprintf(ksh, "Producing bytes...\n");
+
+ unsigned char buf[KMAX];
+ int i, j, k;
+ for (i = 0; i < IMAX; ++i)
+ {
+ for (j = 0; j < JMAX; ++j)
+ {
+ for (k = 0; k < KMAX; ++k)
+ {
+ buf[k] = (unsigned char)(i ^ (j * KMAX + k));
+ }
+ kprintf(ksh, "Writing bytes %d to %d\n", i * ISTEP + j * KMAX,
+ i * ISTEP + (j + 1) * KMAX);
+ if (do_write(fd, buf, KMAX) == -EPIPE)
+ {
+ kprintf(ksh, "Got EPIPE\n");
+ goto out;
+ }
+ }
+ kprintf(ksh, "Wrote %d bytes\n", (i + 1) * ISTEP);
+ }
+out:
+ return NULL;
+}
+
+static void *consumer(long arg1, void *arg2)
+{
+ int fd = (int)arg1;
+ kshell_t *ksh = (kshell_t *)arg2;
+
+ kprintf(ksh, "Consuming bytes...\n");
+ unsigned char buf[KMAX];
+ int i, j, k;
+ for (i = 0; i < IMAX; ++i)
+ {
+ for (j = 0; j < JMAX; ++j)
+ {
+ kprintf(ksh, "Reading bytes %d to %d\n", i * ISTEP + j * KMAX,
+ i * ISTEP + (j + 1) * KMAX);
+ if (do_read(fd, buf, KMAX) == 0)
+ {
+ kprintf(ksh, "End of pipe\n");
+ goto out;
+ }
+ for (k = 0; k < KMAX; ++k)
+ {
+ if (buf[k] != (i ^ (j * KMAX + k)))
+ {
+ kprintf(ksh, "Byte %d incorrect (expected %2x, got %2x)\n",
+ i * ISTEP + j * KMAX + k, (i ^ (j * KMAX + k)),
+ buf[k]);
+ }
+ }
+ }
+ kprintf(ksh, "Read %d bytes\n", (i + 1) * ISTEP);
+ }
+out:
+ return NULL;
+}
+
+static int test_pipes(kshell_t *ksh, int argc, char **argv)
+{
+ int pfds[2];
+ int err = do_pipe(pfds);
+ if (err < 0)
+ {
+ kprintf(ksh, "Failed to create pipe\n");
+ }
+ kprintf(ksh, "Created pipe with read fd %d and write fd %d\n", pfds[0],
+ pfds[1]);
+
+ sched_make_runnable(
+ make_proc_and_thread("producer", producer, pfds[1], ksh));
+ kprintf(ksh, "Created producer process\n");
+ sched_make_runnable(
+ make_proc_and_thread("consumer", consumer, pfds[0], ksh));
+ kprintf(ksh, "Created consumer process\n");
+
+ do_waitpid(-1, 0, 0);
+ do_waitpid(-1, 0, 0);
+ return 0;
+}
+
+#ifdef __PIPES__
+static __attribute__((unused)) void test_pipes_init()
+{
+ kshell_add_command("test_pipes", test_pipes, "run pipe tests");
+}
+init_func(test_pipes_init);
+init_depends(kshell_init);
+#endif /* __PIPES__ */
diff --git a/kernel/test/proctest.c b/kernel/test/proctest.c
new file mode 100644
index 0000000..31067cd
--- /dev/null
+++ b/kernel/test/proctest.c
@@ -0,0 +1,57 @@
+#include "errno.h"
+#include "globals.h"
+
+#include "test/proctest.h"
+#include "test/usertest.h"
+
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+#include "proc/kthread.h"
+#include "proc/proc.h"
+#include "proc/sched.h"
+
+/*
+ * Set up a testing function for the process to execute.
+*/
+void *test_func(long arg1, void *arg2)
+{
+ proc_t *proc_as_arg = (proc_t *)arg2;
+ test_assert(arg1 == proc_as_arg->p_pid, "Arguments are not set up correctly");
+ test_assert(proc_as_arg->p_state == PROC_RUNNING, "Process state is not running");
+ test_assert(list_empty(&proc_as_arg->p_children), "There should be no child processes");
+ return NULL;
+}
+
+void test_termination()
+{
+ int num_procs_created = 0;
+ proc_t *new_proc1 = proc_create("proc test 1");
+ kthread_t *new_kthread1 = kthread_create(new_proc1, test_func, 2, new_proc1);
+ num_procs_created++;
+ sched_make_runnable(new_kthread1);
+
+ int count = 0;
+ int status;
+ while (do_waitpid(-1, &status, 0) != -ECHILD)
+ {
+ test_assert(status == 0, "Returned status not set correctly");
+ count++;
+ }
+ test_assert(count == num_procs_created,
+ "Expected: %d, Actual: %d number of processes have been cleaned up\n", num_procs_created, count);
+}
+
+long proctest_main(long arg1, void *arg2)
+{
+ dbg(DBG_TEST, "\nStarting Procs tests\n");
+ test_init();
+ test_termination();
+
+ // Add more tests here!
+ // We highly recommend looking at section 3.8 on the handout for help!
+
+ test_fini();
+ return 0;
+} \ No newline at end of file
diff --git a/kernel/test/s5fstest.c b/kernel/test/s5fstest.c
new file mode 100644
index 0000000..c60ee32
--- /dev/null
+++ b/kernel/test/s5fstest.c
@@ -0,0 +1,251 @@
+//
+// Tests some edge cases of s5fs
+//
+
+#include "errno.h"
+#include "globals.h"
+
+#include "test/usertest.h"
+
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+#include "fs/fcntl.h"
+#include "fs/lseek.h"
+#include "fs/s5fs/s5fs.h"
+#include "fs/vfs_syscall.h"
+
+#define BUFSIZE 256
+#define BIG_BUFSIZE 2056
+
+static void get_file_name(char *buf, size_t sz, long fileno)
+{
+ snprintf(buf, sz, "file%ld", fileno);
+}
+
+// Write to a fail forever until it is either filled up or we get an error.
+static long write_until_fail(int fd)
+{
+ size_t total_written = 0;
+ char buf[BIG_BUFSIZE] = {42};
+ while (total_written < S5_MAX_FILE_SIZE)
+ {
+ long res = do_write(fd, buf, BIG_BUFSIZE);
+ if (res < 0)
+ {
+ return res;
+ }
+ total_written += res;
+ }
+ KASSERT(total_written == S5_MAX_FILE_SIZE);
+ KASSERT(do_lseek(fd, 0, SEEK_END) == S5_MAX_FILE_SIZE);
+
+ return 0;
+}
+
+// Read n bytes from the file, and check they're all 0
+// We do this in increments of big_bufsize because we might want to read
+// like a million bytes from the file
+static long is_first_n_bytes_zero(int fd, size_t n)
+{
+ size_t total_read = 0;
+ while (total_read < n)
+ {
+ size_t amt_to_read = MIN(BIG_BUFSIZE, n - total_read);
+ char buf[BIG_BUFSIZE] = {1};
+ long res = do_read(fd, buf, amt_to_read);
+ if ((size_t)res != amt_to_read)
+ {
+ dbg(DBG_TESTFAIL, "do_read result was %ld\n", res);
+ return 0;
+ }
+ total_read += res;
+
+ // Check everything that we read is indeed 0
+ // TODO use gcc intrinsic to just scan for first non-zero
+ for (size_t i = 0; i < amt_to_read; i++)
+ {
+ if (buf[i])
+ {
+ dbg(DBG_TESTFAIL, "buf contains char %d\n", buf[i]);
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static void test_running_out_of_inodes()
+{
+ // Open a ton of files until we get an error
+ long res;
+ long fileno = 0;
+ char filename[BUFSIZE];
+
+ // open files til we get an error
+ while (1)
+ {
+ get_file_name(filename, BUFSIZE, fileno);
+ res = do_open(filename, O_RDONLY | O_CREAT);
+ if (res >= 0)
+ {
+ fileno++;
+ test_assert(do_close((int)res) == 0, "couldn't close");
+ }
+ else
+ {
+ break;
+ }
+ }
+ test_assert(res == -ENOSPC, "Did not get ENOSPC error");
+
+ // make sure mkdir fails now that we're out of inodes
+ test_assert(do_mkdir("directory") < 0, "do_mkdir worked!?");
+ test_assert(res == -ENOSPC, "unexpected error");
+
+ test_assert(do_mknod("nod", S_IFCHR, 123) != 0, "mknod worked!?");
+ test_assert(res == -ENOSPC, "wrong error code");
+
+ // the last file we tried to open failed
+ fileno--;
+
+ do
+ {
+ get_file_name(filename, BUFSIZE, fileno);
+ res = do_unlink(filename);
+ test_assert(res == 0, "couldnt unlink");
+ fileno--;
+ } while (fileno >= 0);
+
+ // Now we've freed all the files, try to create another file
+ int fd = (int)do_open("file", O_RDONLY | O_CREAT);
+ test_assert(fd >= 0, "Still cannot create files");
+ test_assert(do_close(fd) == 0, "Could not do_close fd");
+ test_assert(do_unlink("file") == 0, "Could not remove file");
+}
+
+static void test_filling_file()
+{
+ long res = 0;
+ int fd = (int)do_open("hugefile", O_RDWR | O_CREAT);
+ KASSERT(fd >= 0);
+
+ res = write_until_fail(fd);
+ test_assert(res == 0, "Did not write to entire file");
+
+ // make sure all other writes are unsuccessful/dont complete
+ char buf[BIG_BUFSIZE] = {0};
+ res = do_write(fd, buf, sizeof(buf));
+ test_assert(res < 0, "Able to write although the file is full");
+ test_assert(res == -EFBIG || res == -EINVAL, "Wrong error code");
+
+ test_assert(do_close(fd) == 0, "couldnt close hugefile");
+ test_assert(do_unlink("hugefile") == 0, "couldnt unlink hugefile");
+}
+
+// Fill up the disk. Apparently to do this, we should need to fill up one
+// entire file, then start to fill up another. We should eventually get
+// the ENOSPC error
+static void test_running_out_of_blocks()
+{
+ long res = 0;
+
+ int fd1 = (int)do_open("fullfile", O_RDWR | O_CREAT);
+
+ res = write_until_fail(fd1);
+ test_assert(res == 0, "Ran out of space quicker than we expected");
+ test_assert(do_close(fd1) == 0, "could not close");
+
+ int fd2 = (int)do_open("partiallyfullfile", O_RDWR | O_CREAT);
+ res = write_until_fail(fd2);
+ test_assert(res == -ENOSPC, "Did not get nospc error");
+
+ test_assert(do_close(fd2) == 0, "could not close");
+
+ test_assert(do_unlink("fullfile") == 0, "couldnt do_unlink file");
+ test_assert(do_unlink("partiallyfullfile") == 0, "couldnt do_unlink file");
+}
+
+// Open a new file, write to some random address in the file,
+// and make sure everything up to that is all 0s.
+static int test_sparseness_direct_blocks()
+{
+ const char *filename = "sparsefile";
+ int fd = (int)do_open(filename, O_RDWR | O_CREAT);
+
+ // Now write to some random address that'll be in a direct block
+ const int addr = 10000;
+ const char *b = "iboros";
+ const size_t sz = strlen(b);
+
+ test_assert(do_lseek(fd, addr, SEEK_SET) == addr, "couldnt seek");
+ test_assert((size_t)do_write(fd, b, sz) == sz,
+ "couldnt write to random address");
+
+ test_assert(do_lseek(fd, 0, SEEK_SET) == 0, "couldnt seek back to begin");
+ test_assert(is_first_n_bytes_zero(fd, addr) == 1,
+ "sparseness for direct blocks failed");
+
+ // Get rid of this file
+ test_assert(do_close(fd) == 0, "couldn't close file");
+ test_assert(do_unlink(filename) == 0, "couldnt unlink file");
+
+ return 0;
+}
+
+static int test_sparseness_indirect_blocks()
+{
+ const char *filename = "bigsparsefile";
+ int fd = (int)do_open(filename, O_RDWR | O_CREAT);
+
+ // Now write to some random address that'll be in an indirect block
+ const int addr = 1000000;
+ const char *b = "iboros";
+ const size_t sz = strlen(b);
+
+ test_assert(do_lseek(fd, addr, SEEK_SET) == addr, "couldnt seek");
+ test_assert((size_t)do_write(fd, b, sz) == sz,
+ "couldnt write to random address");
+
+ test_assert(do_lseek(fd, 0, SEEK_SET) == 0, "couldnt seek back to begin");
+ test_assert(is_first_n_bytes_zero(fd, addr) == 1,
+ "sparseness for indirect blocks failed");
+
+ // Get rid of this file
+ test_assert(do_close(fd) == 0, "couldn't close file");
+ test_assert(do_unlink(filename) == 0, "couldnt unlink file");
+
+ return 0;
+}
+
+long s5fstest_main(int arg0, void *arg1)
+{
+ dbg(DBG_TEST, "\nStarting S5FS test\n");
+
+ test_init();
+
+ KASSERT(do_mkdir("s5fstest") == 0);
+ KASSERT(do_chdir("s5fstest") == 0);
+ dbg(DBG_TEST, "Test dir initialized\n");
+
+ dbg(DBG_TEST, "Testing sparseness for direct blocks\n");
+ test_sparseness_direct_blocks();
+ dbg(DBG_TEST, "Testing sparseness for indirect blocks\n");
+ test_sparseness_indirect_blocks();
+
+ dbg(DBG_TEST, "Testing running out of inodes\n");
+ test_running_out_of_inodes();
+ dbg(DBG_TEST, "Testing filling a file to max capacity\n");
+ test_filling_file();
+ dbg(DBG_TEST, "Testing using all available blocks on disk\n");
+ test_running_out_of_blocks();
+
+ test_assert(do_chdir("..") == 0, "");
+ test_assert(do_rmdir("s5fstest") == 0, "");
+
+ test_fini();
+
+ return 0;
+} \ No newline at end of file
diff --git a/kernel/test/usertest.c b/kernel/test/usertest.c
new file mode 100644
index 0000000..aa3c231
--- /dev/null
+++ b/kernel/test/usertest.c
@@ -0,0 +1,174 @@
+#include "kernel.h"
+#include "stdarg.h"
+
+#include "test/usertest.h"
+
+#include "util/debug.h"
+#include "util/printf.h"
+
+typedef struct test_data
+{
+ int td_passed;
+ int td_failed;
+} test_data_t;
+
+static void _default_test_fail(const char *file, int line, const char *name,
+ const char *fmt, va_list args);
+
+static void _default_test_pass(int val, const char *file, int line,
+ const char *name, const char *fmt, va_list args);
+
+static test_data_t _test_data;
+static test_pass_func_t _pass_func = _default_test_pass;
+static test_fail_func_t _fail_func = _default_test_fail;
+
+void test_init(void)
+{
+ _test_data.td_passed = 0;
+ _test_data.td_failed = 0;
+}
+
+void test_fini(void)
+{
+ dbgq(DBG_TEST, "tests completed:\n");
+ dbgq(DBG_TEST, "\t\t%d passed\n", _test_data.td_passed);
+ dbgq(DBG_TEST, "\t\t%d failed\n", _test_data.td_failed);
+}
+
+const char *test_errstr(int err)
+{
+ switch (err)
+ {
+ case 1:
+ return "EPERM";
+ case 2:
+ return "ENOENT";
+ case 3:
+ return "ESRCH";
+ case 4:
+ return "EINTR";
+ case 5:
+ return "EIO";
+ case 6:
+ return "ENXIO";
+ case 7:
+ return "E2BIG";
+ case 8:
+ return "ENOEXEC";
+ case 9:
+ return "EBADF";
+ case 10:
+ return "ECHILD";
+ case 11:
+ return "EAGAIN";
+ case 12:
+ return "ENOMEM";
+ case 13:
+ return "EACCES";
+ case 14:
+ return "EFAULT";
+ case 15:
+ return "ENOTBLK";
+ case 16:
+ return "EBUSY";
+ case 17:
+ return "EEXIST";
+ case 18:
+ return "EXDEV";
+ case 19:
+ return "ENODEV";
+ case 20:
+ return "ENOTDIR";
+ case 21:
+ return "EISDIR";
+ case 22:
+ return "EINVAL";
+ case 23:
+ return "ENFILE";
+ case 24:
+ return "EMFILE";
+ case 25:
+ return "ENOTTY";
+ case 26:
+ return "ETXTBSY";
+ case 27:
+ return "EFBIG";
+ case 28:
+ return "ENOSPC";
+ case 29:
+ return "ESPIPE";
+ case 30:
+ return "EROFS";
+ case 31:
+ return "EMLINK";
+ case 32:
+ return "EPIPE";
+ case 33:
+ return "EDOM";
+ case 34:
+ return "ERANGE";
+ case 35:
+ return "EDEADLK";
+ case 36:
+ return "ENAMETOOLONG";
+ case 37:
+ return "ENOLCK";
+ case 38:
+ return "ENOSYS";
+ case 39:
+ return "ENOTEMPTY";
+ case 40:
+ return "ELOOP";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static void _default_test_fail(const char *file, int line, const char *name,
+ const char *fmt, va_list args)
+{
+ _test_data.td_failed++;
+ if (NULL == fmt)
+ {
+ dbgq(DBG_TEST, "FAILED: %s(%d): %s\n", file, line, name);
+ }
+ else
+ {
+ char buf[2048];
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ buf[2047] = '\0';
+ dbgq(DBG_TEST, "FAILED: %s(%d): %s: %s\n", file, line, name, buf);
+ }
+}
+
+static void _default_test_pass(int val, const char *file, int line,
+ const char *name, const char *fmt,
+ va_list args)
+{
+ _test_data.td_passed++;
+}
+
+int _test_assert(int val, const char *file, int line, const char *name,
+ const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ if (0 == val)
+ {
+ if (NULL != _fail_func)
+ {
+ _fail_func(file, line, name, fmt, args);
+ }
+ }
+ else
+ {
+ if (NULL != _pass_func)
+ {
+ _pass_func(val, file, line, name, fmt, args);
+ }
+ }
+
+ va_end(args);
+ return val;
+}
diff --git a/kernel/test/vfstest/vfstest.c b/kernel/test/vfstest/vfstest.c
new file mode 100644
index 0000000..dba2ff4
--- /dev/null
+++ b/kernel/test/vfstest/vfstest.c
@@ -0,0 +1,1173 @@
+#ifdef __KERNEL__
+
+#include "config.h"
+#include "errno.h"
+#include "globals.h"
+#include "kernel.h"
+#include "limits.h"
+
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+#include "proc/kthread.h"
+#include "proc/proc.h"
+
+#include "fs/dirent.h"
+#include "fs/fcntl.h"
+#include "fs/lseek.h"
+#include "fs/stat.h"
+#include "fs/vfs_syscall.h"
+#include "mm/kmalloc.h"
+#include "mm/mman.h"
+
+#include "test/usertest.h"
+#include "test/vfstest/vfstest.h"
+
+#undef __VM__
+
+#else
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <weenix/syscall.h>
+
+#include <test/test.h>
+
+#endif
+
+/* Some helpful strings */
+#define LONGNAME "supercalifragilisticexpialidocious" /* Longer than NAME_LEN \
+ */
+
+#define TESTSTR \
+ "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do " \
+ "eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad " \
+ "minim " \
+ "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea " \
+ "commodo " \
+ "consequat. Duis aute irure dolor in reprehenderit in voluptate velit " \
+ "esse cillum " \
+ "dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non " \
+ "proident, " \
+ "sunt in culpa qui officia deserunt mollit anim id est laborum."
+
+#define SHORTSTR "Quidquid latine dictum, altum videtur"
+
+static char root_dir[64];
+
+static int makedirs(const char *dir)
+{
+ int ret = 0;
+ char *d, *p;
+
+ if (NULL == (d = malloc(strlen(dir) + 1)))
+ {
+ return ENOMEM;
+ }
+ strcpy(d, dir);
+
+ p = d;
+ while (NULL != (p = strchr(p + 1, '/')))
+ {
+ *p = '\0';
+ if (0 != mkdir(d, 0777) && EEXIST != errno)
+ {
+ ret = errno;
+ goto error;
+ }
+ *p = '/';
+ }
+ if (0 != mkdir(d, 0777) && EEXIST != errno)
+ {
+ ret = errno;
+ goto error;
+ }
+
+error:
+ free(d);
+ return ret;
+}
+
+static int getdent(const char *dir, dirent_t *dirent)
+{
+ int ret, fd = -1;
+
+ if (0 > (fd = open(dir, O_RDONLY, 0777)))
+ {
+ return -1;
+ }
+
+ ret = 1;
+ while (ret != 0)
+ {
+ if (0 > (ret = getdents(fd, dirent, sizeof(*dirent))))
+ {
+ return -1;
+ }
+ if (0 != strcmp(".", dirent->d_name) &&
+ 0 != strcmp("..", dirent->d_name))
+ {
+ close(fd);
+ return 1;
+ }
+ }
+
+ close(fd);
+ return 0;
+}
+
+static int removeall(const char *dir)
+{
+ int ret;
+ dirent_t dirent;
+ stat_t status;
+
+ if (0 > chdir(dir))
+ {
+ return errno;
+ }
+
+ ret = 1;
+ while (ret != 0)
+ {
+ if (0 > (ret = getdent(".", &dirent)))
+ {
+ return errno;
+ }
+ if (0 == ret)
+ {
+ break;
+ }
+
+ if (0 > stat(dirent.d_name, &status))
+ {
+ return errno;
+ }
+
+ if (S_ISDIR(status.st_mode))
+ {
+ if (0 > removeall(dirent.d_name))
+ {
+ return errno;
+ }
+ }
+ else
+ {
+ if (0 > unlink(dirent.d_name))
+ {
+ return errno;
+ }
+ }
+ }
+
+ if (0 > chdir(".."))
+ {
+ return errno;
+ }
+
+ if (0 > rmdir(dir))
+ {
+ return errno;
+ }
+
+ return 0;
+}
+
+static void vfstest_start(void)
+{
+ int err;
+
+ root_dir[0] = '\0';
+ do
+ {
+ snprintf(root_dir, sizeof(root_dir), "vfstest-%d", rand());
+ err = mkdir(root_dir, 0777);
+
+ if (errno == EEXIST)
+ {
+ break;
+ }
+
+ if (err && errno != EEXIST)
+ {
+ printf("Failed to make test root directory: %s\n", strerror(errno));
+ exit(errno);
+ }
+ } while (err != 0);
+ printf("Created test root directory: ./%s\n", root_dir);
+}
+
+/*
+ * Terminates the testing environment
+ */
+static void vfstest_term(void)
+{
+ if (0 != removeall(root_dir))
+ {
+ fprintf(stderr, "ERROR: could not remove testing root %s: %s\n",
+ root_dir, strerror(errno));
+ exit(-1);
+ }
+ printf("Removed test root directory: ./%s\n", root_dir);
+}
+
+#define paths_equal(p1, p2) \
+ do \
+ { \
+ int __r; \
+ stat_t __s1, __s2; \
+ if (__r = makedirs(p1), !test_assert(0 == __r, "makedirs(\"%s\"): %s", \
+ p1, test_errstr(__r))) \
+ break; \
+ if (__r = stat(p1, &__s1), !test_assert(0 == __r, "stat(\"%s\"): %s", \
+ p1, test_errstr(errno))) \
+ break; \
+ if (__r = stat(p2, &__s2), !test_assert(0 == __r, "stat(\"%s\"): %s", \
+ p2, test_errstr(errno))) \
+ break; \
+ test_assert(__s1.st_ino == __s2.st_ino, \
+ "paths_equals(\"%s\" (ino %d), \"%s\" (ino %d))", p1, \
+ __s1.st_ino, p2, __s2.st_ino); \
+ } while (0);
+
+#define syscall_fail(expr, err) \
+ (test_assert((errno = 0, -1 == (expr)), \
+ "\nunexpected success, wanted %s (%d)", test_errstr(err), \
+ err) \
+ ? test_assert((expr, errno == err), \
+ "\nexpected %s (%d)" \
+ "\ngot %s (%d)", \
+ test_errstr(err), err, test_errstr(errno), errno) \
+ : 0)
+
+#define syscall_success(expr) \
+ test_assert(0 <= (expr), "\nunexpected error: %s (%d)", \
+ test_errstr(errno), errno)
+
+#define create_file(file) \
+ do \
+ { \
+ int __fd; \
+ if (syscall_success(__fd = open((file), O_RDONLY | O_CREAT, 0777))) \
+ { \
+ syscall_success(close(__fd)); \
+ } \
+ } while (0);
+#define read_fd(fd, size, goal) \
+ do \
+ { \
+ char __buf[64]; \
+ test_assert((ssize_t)strlen(goal) == read(fd, __buf, size), \
+ "\nread unexpected number of bytes"); \
+ test_assert(0 == memcmp(__buf, goal, strlen(goal)), \
+ "\nread data incorrect"); \
+ } while (0);
+#define test_fpos(fd, exp) \
+ do \
+ { \
+ int __g, __e = (exp); \
+ syscall_success(__g = lseek(fd, 0, SEEK_CUR)); \
+ test_assert((__g == __e), "fd %d fpos at %d, expected %d", fd, __g, \
+ __e); \
+ } while (0);
+
+static void vfstest_notdir(void)
+{
+ int fd;
+ stat_t s;
+ syscall_success(mkdir("realdir", 0));
+ syscall_success(fd = open("realdir/file", O_RDWR | O_CREAT, 0));
+ syscall_success(close(fd));
+ syscall_success(fd = open("realdir/file2", O_RDWR | O_CREAT, 0));
+ syscall_success(close(fd));
+
+ syscall_fail(open("realdir/file/nope", O_CREAT | O_RDWR, 0), ENOTDIR);
+ syscall_fail(link("realdir/file2", "realdir/file/nope"), ENOTDIR);
+ syscall_fail(link("realdir/file/nope", "realdir/file3"), ENOTDIR);
+ syscall_fail(unlink("realdir/file/nope"), ENOTDIR);
+ syscall_fail(rmdir("realdir/file/nope"), ENOTDIR);
+ syscall_fail(stat("realdir/file/nope", &s), ENOTDIR);
+ syscall_fail(rename("realdir/file2", "realdir/file/nope"), ENOTDIR);
+ syscall_fail(rename("realdir/file/nope", "realdir/file3"), ENOTDIR);
+
+ /* Cleanup */
+ syscall_success(unlink("realdir/file"));
+ syscall_success(unlink("realdir/file2"));
+ syscall_success(rmdir("realdir"));
+}
+
+static void vfstest_stat(void)
+{
+ int fd;
+ stat_t s;
+
+ syscall_success(mkdir("stat", 0));
+ syscall_success(chdir("stat"));
+
+ syscall_success(stat(".", &s));
+ test_assert(S_ISDIR(s.st_mode), NULL);
+
+ create_file("file");
+ syscall_success(stat("file", &s));
+ test_assert(S_ISREG(s.st_mode), NULL);
+
+ /* file size is correct */
+ syscall_success(fd = open("file", O_RDWR, 0));
+ syscall_success(write(fd, "foobar", 6));
+ syscall_success(stat("file", &s));
+ test_assert(s.st_size == 6, "unexpected file size");
+ syscall_success(close(fd));
+
+ /* error cases */
+#ifdef __VM__
+ syscall_fail(stat(".", NULL), EFAULT);
+#endif
+ syscall_fail(stat("noent", &s), ENOENT);
+
+ syscall_success(chdir(".."));
+}
+
+static void vfstest_mkdir(void)
+{
+ syscall_success(mkdir("mkdir", 0777));
+ syscall_success(chdir("mkdir"));
+
+ /* mkdir an existing file or directory */
+ create_file("file");
+ syscall_fail(mkdir("file", 0777), EEXIST);
+ syscall_success(mkdir("dir", 0777));
+ syscall_fail(mkdir("dir", 0777), EEXIST);
+
+ /* mkdir an invalid path */
+ syscall_fail(mkdir(LONGNAME, 0777), ENAMETOOLONG);
+ syscall_fail(mkdir("file/dir", 0777), ENOTDIR);
+ syscall_fail(mkdir("noent/dir", 0777), ENOENT);
+ syscall_fail(rmdir("file/dir"), ENOTDIR);
+ syscall_fail(rmdir("noent/dir"), ENOENT);
+ syscall_fail(rmdir("noent"), ENOENT);
+ syscall_fail(rmdir("."), EINVAL);
+ syscall_fail(rmdir(".."), ENOTEMPTY);
+ syscall_fail(rmdir("dir/."), EINVAL);
+ syscall_fail(rmdir("dir/.."), ENOTEMPTY);
+ syscall_fail(rmdir("noent/."), ENOENT);
+ syscall_fail(rmdir("noent/.."), ENOENT);
+
+ /* unlink and rmdir the inappropriate types */
+ syscall_fail(rmdir("file"), ENOTDIR);
+ syscall_fail(unlink("dir"), EPERM);
+
+ /* remove non-empty directory */
+ create_file("dir/file");
+ syscall_fail(rmdir("dir"), ENOTEMPTY);
+
+ /* remove empty directory */
+ syscall_success(unlink("dir/file"));
+ syscall_success(rmdir("dir"));
+
+ syscall_success(chdir(".."));
+}
+
+static void vfstest_chdir(void)
+{
+#define CHDIR_TEST_DIR "chdir"
+
+ stat_t ssrc, sdest, sparent, sdir;
+ stat_t rsrc, rdir;
+
+ /* chdir back and forth to CHDIR_TEST_DIR */
+ syscall_success(mkdir(CHDIR_TEST_DIR, 0777));
+ syscall_success(stat(".", &ssrc));
+ syscall_success(stat(CHDIR_TEST_DIR, &sdir));
+
+ test_assert(ssrc.st_ino != sdir.st_ino, NULL);
+
+ syscall_success(chdir(CHDIR_TEST_DIR));
+ syscall_success(stat(".", &sdest));
+ syscall_success(stat("..", &sparent));
+
+ test_assert(sdest.st_ino == sdir.st_ino, NULL);
+ test_assert(ssrc.st_ino == sparent.st_ino, NULL);
+ test_assert(ssrc.st_ino != sdest.st_ino, NULL);
+
+ syscall_success(chdir(".."));
+ syscall_success(stat(".", &rsrc));
+ syscall_success(stat(CHDIR_TEST_DIR, &rdir));
+
+ test_assert(rsrc.st_ino == ssrc.st_ino, NULL);
+ test_assert(rdir.st_ino == sdir.st_ino, NULL);
+
+ /* can't chdir into non-directory */
+ syscall_success(chdir(CHDIR_TEST_DIR));
+ create_file("file");
+ syscall_fail(chdir("file"), ENOTDIR);
+ syscall_fail(chdir("noent"), ENOENT);
+ syscall_success(chdir(".."));
+}
+
+static void vfstest_paths(void)
+{
+#define PATHS_TEST_DIR "paths"
+
+ stat_t s;
+
+ syscall_success(mkdir(PATHS_TEST_DIR, 0777));
+ syscall_success(chdir(PATHS_TEST_DIR));
+
+ syscall_fail(stat("", &s), EINVAL);
+
+ paths_equal(".", ".");
+ paths_equal("1/2/3", "1/2/3");
+ paths_equal("4/5/6", "4/5/6");
+
+ /* root directory */
+ paths_equal("/", "/");
+ paths_equal("/", "/..");
+ paths_equal("/", "/../");
+ paths_equal("/", "/../.");
+
+ /* . and .. */
+ paths_equal(".", "./.");
+ paths_equal(".", "1/..");
+ paths_equal(".", "1/../");
+ paths_equal(".", "1/2/../..");
+ paths_equal(".", "1/2/../..");
+ paths_equal(".", "1/2/3/../../..");
+ paths_equal(".", "1/../1/..");
+ paths_equal(".", "1/../4/..");
+ paths_equal(".", "1/../1/..");
+ paths_equal(".", "1/2/3/../../../4/5/6/../../..");
+ paths_equal(".", "1/./2/./3/./.././.././.././4/./5/./6/./.././.././..");
+
+ /* extra slashes */
+ paths_equal("1/2/3", "1/2/3/");
+ paths_equal("1/2/3", "1//2/3");
+ paths_equal("1/2/3", "1/2//3");
+ paths_equal("1/2/3", "1//2//3");
+ paths_equal("1/2/3", "1//2//3/");
+ paths_equal("1/2/3", "1///2///3///");
+
+ /* strange names */
+ paths_equal("-", "-");
+ paths_equal(" ", " ");
+ paths_equal("\\", "\\");
+ paths_equal("0", "0");
+
+ stat_t st;
+
+ /* error cases */
+ syscall_fail(stat("asdf", &st), ENOENT);
+ syscall_fail(stat("1/asdf", &st), ENOENT);
+ syscall_fail(stat("1/../asdf", &st), ENOENT);
+ syscall_fail(stat("1/2/asdf", &st), ENOENT);
+
+ create_file("1/file");
+ syscall_fail(open("1/file/other", O_RDONLY, 0777), ENOTDIR);
+ syscall_fail(open("1/file/other", O_RDONLY | O_CREAT, 0777), ENOTDIR);
+
+ syscall_success(chdir(".."));
+}
+
+static void vfstest_fd(void)
+{
+#define FD_BUFSIZE 5
+#define BAD_FD 20
+#define HUGE_FD 9999
+
+ int fd1, fd2;
+ char buf[FD_BUFSIZE];
+ struct dirent d;
+
+ syscall_success(mkdir("fd", 0));
+ syscall_success(chdir("fd"));
+
+ /* read/write/close/getdents/dup nonexistent file descriptors */
+ syscall_fail(read(BAD_FD, buf, FD_BUFSIZE), EBADF);
+ syscall_fail(read(HUGE_FD, buf, FD_BUFSIZE), EBADF);
+ syscall_fail(read(-1, buf, FD_BUFSIZE), EBADF);
+
+ syscall_fail(write(BAD_FD, buf, FD_BUFSIZE), EBADF);
+ syscall_fail(write(HUGE_FD, buf, FD_BUFSIZE), EBADF);
+ syscall_fail(write(-1, buf, FD_BUFSIZE), EBADF);
+
+ syscall_fail(close(BAD_FD), EBADF);
+ syscall_fail(close(HUGE_FD), EBADF);
+ syscall_fail(close(-1), EBADF);
+
+ syscall_fail(lseek(BAD_FD, 0, SEEK_SET), EBADF);
+ syscall_fail(lseek(HUGE_FD, 0, SEEK_SET), EBADF);
+ syscall_fail(lseek(-1, 0, SEEK_SET), EBADF);
+
+ syscall_fail(getdents(BAD_FD, &d, sizeof(d)), EBADF);
+ syscall_fail(getdents(HUGE_FD, &d, sizeof(d)), EBADF);
+ syscall_fail(getdents(-1, &d, sizeof(d)), EBADF);
+
+ syscall_fail(dup(BAD_FD), EBADF);
+ syscall_fail(dup(HUGE_FD), EBADF);
+ syscall_fail(dup(-1), EBADF);
+
+ syscall_fail(dup2(BAD_FD, 25), EBADF);
+ syscall_fail(dup2(HUGE_FD, 25), EBADF);
+ syscall_fail(dup2(-1, 25), EBADF);
+
+ /* dup2 has some extra cases since it takes a second fd */
+ syscall_fail(dup2(0, HUGE_FD), EBADF);
+ syscall_fail(dup2(0, -1), EBADF);
+
+ /* if the fds are equal, but the first is invalid or out of the
+ * allowed range */
+ syscall_fail(dup2(BAD_FD, BAD_FD), EBADF);
+ syscall_fail(dup2(HUGE_FD, HUGE_FD), EBADF);
+ syscall_fail(dup2(-1, -1), EBADF);
+
+ /* dup works properly in normal usage */
+ create_file("file01");
+ syscall_success(fd1 = open("file01", O_RDWR, 0));
+ syscall_success(fd2 = dup(fd1));
+ test_assert(fd1 < fd2, "dup(%d) returned %d", fd1, fd2);
+ syscall_success(write(fd2, "hello", 5));
+ test_fpos(fd1, 5);
+ test_fpos(fd2, 5);
+ syscall_success(lseek(fd2, 0, SEEK_SET));
+ test_fpos(fd1, 0);
+ test_fpos(fd2, 0);
+ read_fd(fd1, 5, "hello");
+ test_fpos(fd1, 5);
+ test_fpos(fd2, 5);
+ syscall_success(close(fd2));
+
+ /* dup2 works properly in normal usage */
+ syscall_success(fd2 = dup2(fd1, 25));
+ test_assert(25 == fd2, "dup2(%d, 25) returned %d", fd1, fd2);
+ test_fpos(fd1, 5);
+ test_fpos(fd2, 5);
+ syscall_success(lseek(fd2, 0, SEEK_SET));
+ test_fpos(fd1, 0);
+ test_fpos(fd2, 0);
+ syscall_success(close(fd2));
+
+ /* dup2-ing a file to itself works */
+ syscall_success(fd2 = dup2(fd1, fd1));
+ test_assert(fd1 == fd2, "dup2(%d, %d) returned %d", fd1, fd1, fd2);
+
+ /* dup2 closes previous file */
+ int fd3;
+ create_file("file02");
+ syscall_success(fd3 = open("file02", O_RDWR, 0));
+ syscall_success(fd2 = dup2(fd1, fd3));
+ test_assert(fd2 == fd3, "dup2(%d, %d) returned %d", fd1, fd3, fd2);
+ test_fpos(fd1, 0);
+ test_fpos(fd2, 0);
+ syscall_success(lseek(fd2, 5, SEEK_SET));
+ test_fpos(fd1, 5);
+ test_fpos(fd2, 5);
+ syscall_success(close(fd2));
+ syscall_success(close(fd1));
+
+ syscall_success(chdir(".."));
+}
+
+static void vfstest_memdev(void)
+{
+ int res, fd;
+ char def = 'a';
+ char buf[4096];
+
+ res = 1;
+
+ memset(buf, def, sizeof(buf));
+
+ syscall_success(fd = open("/dev/null", O_RDWR, 0));
+ syscall_success(res = write(fd, buf, sizeof(buf)));
+ test_assert(sizeof(buf) == res, "write of %d bytes /dev/null returned %d",
+ sizeof(buf), res);
+ syscall_success(res = read(fd, buf, sizeof(buf)));
+ test_assert(0 == res, "read of %d bytes /dev/null returned %d", sizeof(buf),
+ res);
+ test_assert(buf[sizeof(buf) / 2] == def,
+ "read from /dev/null changed buffer");
+ syscall_success(close(fd));
+
+ memset(buf, def, sizeof(buf));
+
+ syscall_success(fd = open("/dev/zero", O_RDWR, 0));
+ syscall_success(res = write(fd, buf, sizeof(buf)));
+ test_assert(sizeof(buf) == res, "write of %d bytes /dev/zero returned %d",
+ sizeof(buf), res);
+ syscall_success(res = read(fd, buf, sizeof(buf)));
+ test_assert(sizeof(buf) == res, "read of %d bytes /dev/zero returned %d",
+ sizeof(buf), res);
+ test_assert(buf[sizeof(buf) / 2] == 0,
+ "read from /dev/zero doesn't zero buffer");
+ syscall_success(close(fd));
+}
+
+static void vfstest_write(void)
+{
+#define CHUNK_SIZE 25
+#define NUM_CHUNKS 4
+ int fd, i, res;
+ stat_t s;
+ const char *str = "hello world";
+
+ char chunk[CHUNK_SIZE];
+ memcpy(chunk, str, strlen(str));
+ memset(chunk + strlen(str), 0, 25 - strlen(str));
+
+ syscall_success(mkdir("write", 0));
+ syscall_success(chdir("write"));
+
+ create_file("file");
+ syscall_success(fd = open("file", O_RDWR, 0));
+ for (i = 0; i < NUM_CHUNKS * CHUNK_SIZE; i += CHUNK_SIZE)
+ {
+ syscall_success(lseek(fd, i, SEEK_SET));
+ syscall_success(res = write(fd, str, strlen(str)));
+ test_assert((int)strlen(str) == res, "write of %d bytes returned %d",
+ strlen(str), res);
+ }
+ syscall_success(lseek(fd, 0, SEEK_SET));
+ for (i = 0; i < NUM_CHUNKS - 1; ++i)
+ {
+ char __buf[64];
+ test_assert(CHUNK_SIZE == read(fd, __buf, CHUNK_SIZE),
+ "\nread unexpected number of bytes");
+ test_assert(0 == memcmp(__buf, chunk, CHUNK_SIZE),
+ "\nread data incorrect");
+ }
+ char __buf[64];
+ test_assert((int)strlen(str) == read(fd, __buf, strlen(str)),
+ "\nread unexpected number of bytes");
+ test_assert(0 == memcmp(__buf, chunk, strlen(str)),
+ "\nread data incorrect");
+
+ const char *new_str = "testing";
+ const int loc = 37;
+ // writing to middle of file
+ // make sure file size doesn't change and the write is done at the correct
+ // location
+ syscall_success(lseek(fd, loc, SEEK_SET));
+ syscall_success(res = write(fd, new_str, strlen(new_str)));
+ test_assert((int)strlen(new_str) == res, "write of %d bytes returned %d",
+ strlen(new_str), res);
+ syscall_success(lseek(fd, loc, SEEK_SET));
+ read_fd(fd, strlen(new_str), new_str);
+ test_assert(lseek(fd, 0, SEEK_END) ==
+ (NUM_CHUNKS - 1) * CHUNK_SIZE + (int)strlen(str),
+ "file is not the right size");
+
+ syscall_success(close(fd));
+ syscall_success(unlink("file"));
+
+ syscall_success(chdir(".."));
+ syscall_success(rmdir("write"));
+}
+
+/* These operations should run for a long time and halt when the file
+ * descriptor overflows. */
+static void vfstest_infinite(void)
+{
+ int res, fd;
+ char buf[4096];
+
+ res = 1;
+ syscall_success(fd = open("/dev/null", O_WRONLY, 0));
+ while (0 < res)
+ {
+ syscall_success(res = write(fd, buf, sizeof(buf)));
+ }
+ syscall_success(close(fd));
+
+ res = 1;
+ syscall_success(fd = open("/dev/zero", O_RDONLY, 0));
+ while (0 < res)
+ {
+ syscall_success(res = read(fd, buf, sizeof(buf)));
+ }
+ syscall_success(close(fd));
+}
+
+/*
+ * Tests open(), close(), and unlink()
+ * - Accepts only valid combinations of flags
+ * - Cannot open nonexistent file without O_CREAT
+ * - Cannot write to readonly file
+ * - Cannot read from writeonly file
+ * - Cannot close non-existent file descriptor
+ * - Lowest file descriptor is always selected
+ * - Cannot unlink a directory
+ # - Cannot unlink a non-existent file
+ * - Cannot open a directory for writing
+ * - File descriptors are correctly released when a proc exits
+ */
+static void vfstest_open(void)
+{
+#define OPEN_BUFSIZE 5
+
+ char buf[OPEN_BUFSIZE];
+ int fd, fd2;
+ stat_t s;
+
+ syscall_success(mkdir("open", 0777));
+ syscall_success(chdir("open"));
+
+ /* No invalid combinations of O_RDONLY, O_WRONLY, and O_RDWR. Since
+ * O_RDONLY is stupidly defined as 0, the only invalid possible
+ * combination is O_WRONLY|O_RDWR. */
+ syscall_fail(open("file01", O_WRONLY | O_RDWR | O_CREAT, 0), EINVAL);
+ syscall_fail(open("file01", O_RDONLY | O_RDWR | O_WRONLY | O_CREAT, 0),
+ EINVAL);
+
+ /* Cannot open nonexistent file without O_CREAT */
+ syscall_fail(open("file02", O_WRONLY, 0), ENOENT);
+ syscall_success(fd = open("file02", O_RDONLY | O_CREAT, 0));
+ syscall_success(close(fd));
+ syscall_success(unlink("file02"));
+ syscall_fail(stat("file02", &s), ENOENT);
+
+ /* Cannot create invalid files */
+ create_file("tmpfile");
+ syscall_fail(open("tmpfile/test", O_RDONLY | O_CREAT, 0), ENOTDIR);
+ syscall_fail(open("noent/test", O_RDONLY | O_CREAT, 0), ENOENT);
+ syscall_fail(open(LONGNAME, O_RDONLY | O_CREAT, 0), ENAMETOOLONG);
+
+ /* Cannot write to readonly file */
+ syscall_success(fd = open("file03", O_RDONLY | O_CREAT, 0));
+ syscall_fail(write(fd, "hello", 5), EBADF);
+ syscall_success(close(fd));
+
+ /* Cannot read from writeonly file. Note that we do not unlink() it
+ * from above, so we do not need O_CREAT set. */
+ syscall_success(fd = open("file03", O_WRONLY, 0));
+ syscall_fail(read(fd, buf, OPEN_BUFSIZE), EBADF);
+ syscall_success(close(fd));
+ syscall_success(unlink("file03"));
+ syscall_fail(stat("file03", &s), ENOENT);
+
+ /* Lowest file descriptor is always selected. */
+ syscall_success(fd = open("file04", O_RDONLY | O_CREAT, 0));
+ syscall_success(fd2 = open("file04", O_RDONLY, 0));
+ test_assert(fd2 > fd, "open() did not return lowest fd");
+ syscall_success(close(fd));
+ syscall_success(close(fd2));
+ syscall_success(fd2 = open("file04", O_WRONLY, 0));
+ test_assert(fd2 == fd, "open() did not return correct fd");
+ syscall_success(close(fd2));
+ syscall_success(unlink("file04"));
+ syscall_fail(stat("file04", &s), ENOENT);
+
+ /* Cannot open a directory for writing */
+ syscall_success(mkdir("file05", 0));
+ syscall_fail(open("file05", O_WRONLY, 0), EISDIR);
+ syscall_fail(open("file05", O_RDWR, 0), EISDIR);
+ syscall_success(rmdir("file05"));
+
+ /* Cannot unlink a directory */
+ syscall_success(mkdir("file06", 0));
+ syscall_fail(unlink("file06"), EPERM);
+ syscall_success(rmdir("file06"));
+ syscall_fail(unlink("."), EPERM);
+ syscall_fail(unlink(".."), EPERM);
+
+ /* Cannot unlink a non-existent file */
+ syscall_fail(unlink("file07"), ENOENT);
+
+ /* Cannot open a file as a directory */
+ create_file("file08");
+ syscall_fail(open("file08/", O_RDONLY, 0), ENOTDIR);
+ syscall_success(mkdir("dirA", 0777));
+ syscall_success(chdir("dirA"));
+ create_file("file09");
+ syscall_success(chdir(".."));
+ syscall_fail(open("dirA/file09/", O_RDONLY, 0), ENOTDIR);
+
+ /* Succeeds with trailing slash */
+ syscall_success(mkdir("dirB", 0777));
+ syscall_success(mkdir("dirB/dirC", 0777));
+ syscall_success(fd = open("dirB/", O_RDONLY, 0));
+ syscall_success(close(fd));
+ syscall_success(fd = open("dirB/dirC/", O_RDONLY, 0));
+ syscall_success(close(fd));
+
+ syscall_success(chdir(".."));
+}
+
+static void vfstest_read(void)
+{
+#define READ_BUFSIZE 256
+
+ int fd, ret;
+ char buf[READ_BUFSIZE];
+ stat_t s;
+
+ syscall_success(mkdir("read", 0777));
+ syscall_success(chdir("read"));
+
+ /* Can read and write to a file */
+ syscall_success(fd = open("file01", O_RDWR | O_CREAT, 0));
+ syscall_success(ret = write(fd, "hello", 5));
+ test_assert(5 == ret, "write(%d, \"hello\", 5) returned %d", fd, ret);
+ syscall_success(ret = lseek(fd, 0, SEEK_SET));
+ test_assert(0 == ret, "lseek(%d, 0, SEEK_SET) returned %d", fd, ret);
+ read_fd(fd, READ_BUFSIZE, "hello");
+ syscall_success(close(fd));
+
+ /* cannot read from a directory */
+ syscall_success(mkdir("dir01", 0));
+ syscall_success(fd = open("dir01", O_RDONLY, 0));
+ syscall_fail(read(fd, buf, READ_BUFSIZE), EISDIR);
+ syscall_success(close(fd));
+
+ /* Can seek to beginning, middle, and end of file */
+ syscall_success(fd = open("file02", O_RDWR | O_CREAT, 0));
+ syscall_success(write(fd, "hello", 5));
+
+#define test_lseek(expr, res) \
+ do \
+ { \
+ int __r = (expr); \
+ test_assert((res) == __r, #expr " returned %d, expected %d", __r, \
+ res); \
+ } while (0);
+
+ test_lseek(lseek(fd, 0, SEEK_CUR), 5);
+ read_fd(fd, 10, "");
+ test_lseek(lseek(fd, -1, SEEK_CUR), 4);
+ read_fd(fd, 10, "o");
+ test_lseek(lseek(fd, 2, SEEK_CUR), 7);
+ read_fd(fd, 10, "");
+ syscall_fail(lseek(fd, -8, SEEK_CUR), EINVAL);
+
+ test_lseek(lseek(fd, 0, SEEK_SET), 0);
+ read_fd(fd, 10, "hello");
+ test_lseek(lseek(fd, 3, SEEK_SET), 3);
+ read_fd(fd, 10, "lo");
+ test_lseek(lseek(fd, 7, SEEK_SET), 7);
+ read_fd(fd, 10, "");
+ syscall_fail(lseek(fd, -1, SEEK_SET), EINVAL);
+
+ test_lseek(lseek(fd, 0, SEEK_END), 5);
+ read_fd(fd, 10, "");
+ test_lseek(lseek(fd, -2, SEEK_END), 3);
+ read_fd(fd, 10, "lo");
+ test_lseek(lseek(fd, 3, SEEK_END), 8);
+ read_fd(fd, 10, "");
+ syscall_fail(lseek(fd, -8, SEEK_END), EINVAL);
+
+ syscall_fail(lseek(fd, 0, SEEK_SET + SEEK_CUR + SEEK_END), EINVAL);
+ syscall_success(close(fd));
+
+ /* O_APPEND works properly */
+ create_file("file03");
+ syscall_success(fd = open("file03", O_RDWR, 0));
+ test_fpos(fd, 0);
+ syscall_success(write(fd, "hello", 5));
+ test_fpos(fd, 5);
+ syscall_success(close(fd));
+
+ syscall_success(fd = open("file03", O_RDWR | O_APPEND, 0));
+ test_fpos(fd, 0);
+ syscall_success(write(fd, "hello", 5));
+ test_fpos(fd, 10);
+
+ syscall_success(lseek(fd, 0, SEEK_SET));
+ test_fpos(fd, 0);
+ read_fd(fd, 10, "hellohello");
+ syscall_success(lseek(fd, 5, SEEK_SET));
+ test_fpos(fd, 5);
+ syscall_success(write(fd, "again", 5));
+ test_fpos(fd, 15);
+ syscall_success(lseek(fd, 0, SEEK_SET));
+ test_fpos(fd, 0);
+ read_fd(fd, 15, "hellohelloagain");
+ syscall_success(close(fd));
+
+ /* seek and write beyond end of file */
+ create_file("file04");
+ syscall_success(fd = open("file04", O_RDWR, 0));
+ syscall_success(write(fd, "hello", 5));
+ test_fpos(fd, 5);
+ test_lseek(lseek(fd, 10, SEEK_SET), 10);
+ syscall_success(write(fd, "again", 5));
+ syscall_success(stat("file04", &s));
+ test_assert(s.st_size == 15, "actual size: %d", s.st_size);
+ test_lseek(lseek(fd, 0, SEEK_SET), 0);
+ test_assert(15 == read(fd, buf, READ_BUFSIZE),
+ "unexpected number of bytes read");
+ test_assert(0 == memcmp(buf, "hello\0\0\0\0\0again", 15),
+ "unexpected data read");
+ syscall_success(close(fd));
+
+ syscall_success(chdir(".."));
+}
+
+static void vfstest_getdents(void)
+{
+ int fd, ret;
+ dirent_t dirents[4];
+
+ syscall_success(mkdir("getdents", 0));
+ syscall_success(chdir("getdents"));
+
+ /* getdents works */
+ syscall_success(mkdir("dir01", 0));
+ syscall_success(mkdir("dir01/1", 0));
+ create_file("dir01/2");
+
+ syscall_success(fd = open("dir01", O_RDONLY, 0));
+ syscall_success(ret = getdents(fd, dirents, 4 * sizeof(dirent_t)));
+ test_assert(4 * sizeof(dirent_t) == ret, NULL);
+
+ syscall_success(ret = getdents(fd, dirents, sizeof(dirent_t)));
+ test_assert(0 == ret, NULL);
+
+ syscall_success(lseek(fd, 0, SEEK_SET));
+ test_fpos(fd, 0);
+ syscall_success(ret = getdents(fd, dirents, 2 * sizeof(dirent_t)));
+ test_assert(2 * sizeof(dirent_t) == ret, NULL);
+ syscall_success(ret = getdents(fd, dirents, 2 * sizeof(dirent_t)));
+ test_assert(2 * sizeof(dirent_t) == ret, NULL);
+ syscall_success(ret = getdents(fd, dirents, sizeof(dirent_t)));
+ test_assert(0 == ret, NULL);
+ syscall_success(close(fd));
+
+ /* Cannot call getdents on regular file */
+ create_file("file01");
+ syscall_success(fd = open("file01", O_RDONLY, 0));
+ syscall_fail(getdents(fd, dirents, 4 * sizeof(dirent_t)), ENOTDIR);
+ syscall_success(close(fd));
+
+ syscall_success(chdir(".."));
+}
+
+#ifdef __VM__
+/*
+ * Tests link(), rename(), and mmap() (and munmap, and brk).
+ * These functions are not supported on testfs, and not included in kernel-land
+ * vfs privtest (hence the name)
+ */
+
+static void vfstest_s5fs_vm(void)
+{
+ int fd, newfd, ret;
+ char buf[2048];
+ stat_t oldstatbuf, newstatbuf;
+ void *addr;
+ memset(&oldstatbuf, '\0', sizeof(stat_t));
+ memset(&newstatbuf, '\0', sizeof(stat_t));
+
+ syscall_success(mkdir("s5fs", 0));
+ syscall_success(chdir("s5fs"));
+
+ /* Open some stuff */
+ syscall_success(fd = open("oldchld", O_RDWR | O_CREAT, 0));
+ syscall_success(mkdir("parent", 0));
+
+ /* link/unlink tests */
+ syscall_success(link("oldchld", "newchld"));
+
+ /* Make sure stats match */
+ syscall_success(stat("oldchld", &oldstatbuf));
+ syscall_success(stat("newchld", &newstatbuf));
+ test_assert(0 == memcmp(&oldstatbuf, &newstatbuf, sizeof(stat_t)), NULL);
+
+ /* Make sure contents match */
+ syscall_success(newfd = open("newchld", O_RDWR, 0));
+ syscall_success(ret = write(fd, TESTSTR, strlen(TESTSTR)));
+ test_assert(ret == (int)strlen(TESTSTR), NULL);
+ syscall_success(ret = read(newfd, buf, strlen(TESTSTR)));
+ test_assert(ret == (int)strlen(TESTSTR), NULL);
+ test_assert(0 == strncmp(buf, TESTSTR, strlen(TESTSTR)),
+ "string is %.*s, expected %s", strlen(TESTSTR), buf, TESTSTR);
+
+ syscall_success(close(fd));
+ syscall_success(close(newfd));
+
+ /* Remove one, make sure the other remains */
+ syscall_success(unlink("oldchld"));
+ syscall_fail(mkdir("newchld", 0), EEXIST);
+ syscall_success(link("newchld", "oldchld"));
+
+ /* Link/unlink error cases */
+ syscall_fail(link("oldchld", "newchld"), EEXIST);
+ syscall_fail(link("oldchld", LONGNAME), ENAMETOOLONG);
+ syscall_fail(link("parent", "newchld"), EPERM);
+
+ /* only rename test */
+ /*syscall_success(rename("oldchld", "newchld"));*/
+
+ /* mmap/munmap tests */
+ syscall_success(fd = open("newchld", O_RDWR, 0));
+ test_assert(
+ MAP_FAILED != (addr = mmap(0, strlen(TESTSTR), PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0)),
+ NULL);
+ /* Check contents of memory */
+ test_assert(0 == memcmp(addr, TESTSTR, strlen(TESTSTR)), NULL);
+
+ /* Write to it -> we shouldn't pagefault */
+ memcpy(addr, SHORTSTR, strlen(SHORTSTR));
+ test_assert(0 == memcmp(addr, SHORTSTR, strlen(SHORTSTR)), NULL);
+
+ /* mmap the same thing on top of it, but shared */
+ test_assert(
+ MAP_FAILED != mmap(addr, strlen(TESTSTR), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, 0),
+ NULL);
+ /* Make sure the old contents were restored (the mapping was private) */
+ test_assert(0 == memcmp(addr, TESTSTR, strlen(TESTSTR)), NULL);
+
+ /* Now change the contents */
+ memcpy(addr, SHORTSTR, strlen(SHORTSTR));
+ /* mmap it on, private, on top again */
+ test_assert(
+ MAP_FAILED != mmap(addr, strlen(TESTSTR), PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_FIXED, fd, 0),
+ NULL);
+ /* Make sure it changed */
+ test_assert(0 == memcmp(addr, SHORTSTR, strlen(SHORTSTR)), NULL);
+
+ /* Fork and try changing things */
+ if (!fork())
+ {
+ /* Child changes private mapping */
+ memcpy(addr, TESTSTR, strlen(TESTSTR));
+ exit(0);
+ }
+
+ /* Wait until child is done */
+ syscall_success(wait(0));
+
+ /* Make sure it's actually private */
+ test_assert(0 == memcmp(addr, SHORTSTR, strlen(SHORTSTR)), NULL);
+
+ /* Unmap it */
+ syscall_success(munmap(addr, 2048));
+
+ /* mmap errors */
+ test_assert(MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_PRIVATE, 12, 0),
+ NULL);
+ test_assert(MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_PRIVATE, -1, 0),
+ NULL);
+ test_assert(MAP_FAILED == mmap(0, 1024, PROT_READ, 0, fd, 0), NULL);
+ test_assert(MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_FIXED, fd, 0), NULL);
+ test_assert(
+ MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_FIXED | MAP_PRIVATE, fd, 0),
+ NULL);
+ test_assert(
+ MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_PRIVATE, fd, 0x12345), NULL);
+ test_assert(MAP_FAILED == mmap((void *)0x12345, 1024, PROT_READ,
+ MAP_PRIVATE | MAP_FIXED, fd, 0),
+ NULL);
+ test_assert(MAP_FAILED == mmap(0, 0, PROT_READ, MAP_PRIVATE, fd, 0), NULL);
+ test_assert(MAP_FAILED == mmap(0, -1, PROT_READ, MAP_PRIVATE, fd, 0), NULL);
+ test_assert(
+ MAP_FAILED == mmap(0, 1024, PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0),
+ NULL);
+ syscall_success(close(fd));
+
+ syscall_success(fd = open("newchld", O_RDONLY, 0));
+ test_assert(
+ MAP_FAILED == mmap(0, 1024, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0),
+ NULL);
+ syscall_success(close(fd));
+
+ /* TODO ENODEV (mmap a terminal)
+ EOVERFLOW (mmap SO MUCH of /dev/zero that fpointer would overflow) */
+
+ /* Also should test opening too many file descriptors somewhere */
+
+ /* munmap errors */
+ syscall_fail(munmap((void *)0x12345, 15), EINVAL);
+ syscall_fail(munmap(0x0, 15), EINVAL);
+ syscall_fail(munmap(addr, 0), EINVAL);
+ syscall_fail(munmap(addr, -1), EINVAL);
+
+ /* brk tests */
+ /* Set the break, and use the memory in question */
+ test_assert((void *)-1 != (addr = sbrk(128)), NULL);
+ memcpy(addr, TESTSTR, 128);
+ test_assert(0 == memcmp(addr, TESTSTR, 128), NULL);
+
+ /* Make sure that the brk is being saved properly */
+ test_assert((void *)((unsigned long)addr + 128) == sbrk(0), NULL);
+ /* Knock the break back down */
+ syscall_success(brk(addr));
+
+ /* brk errors */
+ syscall_fail(brk((void *)(&"brk")), ENOMEM);
+ syscall_fail(brk((void *)1), ENOMEM);
+ syscall_fail(brk((void *)&addr), ENOMEM);
+
+ syscall_success(chdir(".."));
+}
+#endif
+
+#ifdef __KERNEL__
+extern uint64_t jiffies;
+#endif
+
+static void seed_randomness()
+{
+#ifdef __KERNEL__
+ srand(jiffies);
+#else
+ srand(time(NULL));
+#endif
+ rand();
+}
+
+/*
+ * Finally, the main function.
+ */
+#ifndef __KERNEL__
+
+int main(int argc, char **argv)
+#else
+int vfstest_main(int argc, char **argv)
+#endif
+{
+ if (argc != 1)
+ {
+ fprintf(stderr, "USAGE: vfstest\n");
+ return 1;
+ }
+
+ seed_randomness();
+
+ test_init();
+ vfstest_start();
+
+ syscall_success(chdir(root_dir));
+
+ vfstest_notdir();
+ vfstest_stat();
+ vfstest_chdir();
+ vfstest_mkdir();
+ vfstest_paths();
+ vfstest_fd();
+ vfstest_open();
+ vfstest_read();
+ vfstest_getdents();
+ vfstest_memdev();
+ vfstest_write();
+
+#ifdef __VM__
+ vfstest_s5fs_vm();
+#endif
+
+ syscall_success(chdir(".."));
+
+ vfstest_term();
+ test_fini();
+
+ return 0;
+}
diff --git a/kernel/test/vmtest.c b/kernel/test/vmtest.c
new file mode 100644
index 0000000..9ffa4c6
--- /dev/null
+++ b/kernel/test/vmtest.c
@@ -0,0 +1,74 @@
+#include "errno.h"
+#include "globals.h"
+
+#include "test/usertest.h"
+#include "test/proctest.h"
+
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+#include "mm/mm.h"
+#include "mm/page.h"
+#include "mm/slab.h"
+#include "mm/kmalloc.h"
+#include "vm/vmmap.h"
+
+long test_vmmap() {
+ vmmap_t *map = curproc->p_vmmap;
+
+ // Make sure we start out cleanly
+ KASSERT(vmmap_is_range_empty(map, ADDR_TO_PN(USER_MEM_LOW), ADDR_TO_PN(USER_MEM_HIGH - USER_MEM_LOW)));
+
+ // Go through the address space, make sure we find nothing
+ for (size_t i = USER_MEM_LOW; i < ADDR_TO_PN(USER_MEM_HIGH); i += PAGE_SIZE) {
+ KASSERT(!vmmap_lookup(map, i));
+ }
+
+ // You can probably change this.
+ size_t num_vmareas = 5;
+ // Probably shouldn't change this to anything that's not a power of two.
+ size_t num_pages_per_vmarea = 16;
+
+ size_t prev_start = ADDR_TO_PN(USER_MEM_HIGH);
+ for (size_t i = 0; i < num_vmareas; i++) {
+ ssize_t start = vmmap_find_range(map, num_pages_per_vmarea, VMMAP_DIR_HILO);
+ test_assert(start + num_pages_per_vmarea == prev_start, "Incorrect return value from vmmap_find_range");
+
+ vmarea_t *vma = kmalloc(sizeof(vmarea_t));
+ KASSERT(vma && "Unable to alloc the vmarea");
+ memset(vma, 0, sizeof(vmarea_t));
+
+ vma->vma_start = start;
+ vma->vma_end = start + num_pages_per_vmarea;
+ vmmap_insert(map, vma);
+
+ prev_start = start;
+ }
+
+ // Now, our address space should look like:
+ // EMPTY EMPTY EMPTY [ ][ ][ ][ ][ ]
+ // ^LP
+ // ^HP
+ // ^section_start
+ // HP --> the highest possible userland page number
+ // LP --> the lowest possible userland page number
+ // section start --> HP - (num_vmareas * num_pages_per_vmarea)
+
+ list_iterate(&map->vmm_list, vma, vmarea_t, vma_plink) {
+ list_remove(&vma->vma_plink);
+ kfree(vma);
+ }
+
+ return 0;
+}
+
+long vmtest_main(long arg1, void* arg2) {
+ test_init();
+ test_vmmap();
+
+ // Write your own tests here!
+
+ test_fini();
+ return 0;
+}
diff --git a/kernel/util/debug.c b/kernel/util/debug.c
new file mode 100644
index 0000000..47c8345
--- /dev/null
+++ b/kernel/util/debug.c
@@ -0,0 +1,237 @@
+#include "main/apic.h"
+#include "main/io.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+/*
+ * Debug message behavior.
+ *
+ * To disable a dbg mode add ',-name' to this variable. To enable one add
+ * ',name'. For example to have everything except 'mm' and 'pagealloc' you would
+ * set DBG to 'all,-mm,-pagealloc'. To have only 'test', 'testpass', 'testfail'
+ * you would set DBG to '-all,test,testpass,testfail'.
+ *
+ * We generally recommend that you leave this set to 'all' with some of the
+ * less useful message types disabled. To see all available message types, and
+ * to potentially add to them see 'kernel/include/util/debug.h'
+ *
+ * Note that due to the way this is interpreted either 'all' or '-all' should
+ * always be the first thing in this variable. Note that this setting can be
+ * changed at runtime by modifying the dbg_modes global variable.
+ */
+#define INIT_DBG_MODES "-all"
+
+/* Below is a truly terrible poll-driven serial driver that we use for debugging
+ * purposes - it outputs to COM1, but
+ * this can be easily changed. It does not use interrupts, and cannot read input
+ * */
+/* This port is COM1 */
+#define PORT 0x3f8
+/* Corresponding interrupt vector */
+#define PORT_INTR 0x0d
+
+uint64_t dbg_modes;
+
+typedef struct dbg_mode
+{
+ const char *d_name;
+ uint64_t d_mode;
+ const char *d_color;
+} dbg_mode_t;
+
+void dbg_init()
+{
+ outb(PORT + 3, 0x80); /* Enable DLAB (set baud rate divisor) */
+ outb(PORT + 0, 0x03); /* Set divisor to 3 (lo byte) 38400 baud */
+ outb(PORT + 1, 0x00); /* (hi byte) */
+ outb(PORT + 3, 0x03); /* 8 bits, no parity, one stop bit */
+ outb(PORT + 2, 0xC7); /* Enable FIFO, clear them, with 14-byte threshold */
+
+ dbg_add_modes(INIT_DBG_MODES);
+}
+
+static dbg_mode_t dbg_tab[] = {DBG_TAB};
+
+const char *dbg_color(uint64_t d_mode)
+{
+ dbg_mode_t *mode;
+ for (mode = dbg_tab; mode->d_mode != 0UL; mode++)
+ {
+ if (mode->d_mode & d_mode)
+ {
+ return mode->d_color;
+ }
+ }
+ /* If we get here, something went seriously wrong */
+ panic("Unknown debug mode 0x%lx\n", d_mode);
+}
+
+static void dbg_puts(char *c)
+{
+ while (*c != '\0')
+ {
+ /* Wait until the port is free */
+ while (!(inb(PORT + 5) & 0x20))
+ ;
+ outb(PORT, (uint8_t)*c++);
+ }
+}
+
+#define BUFFER_SIZE 1024
+
+void dbg_print(char *fmt, ...)
+{
+ va_list args;
+ char buf[BUFFER_SIZE];
+ size_t count;
+
+ va_start(args, fmt);
+ count = (size_t)vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ if (count >= sizeof(buf))
+ {
+ dbg_puts(
+ "WARNING: The following message has been truncated due to "
+ "buffer size limitations.\n");
+ }
+ dbg_puts(buf);
+}
+
+void dbg_printinfo(dbg_infofunc_t func, const void *data)
+{
+ char buf[BUFFER_SIZE];
+ func(data, buf, BUFFER_SIZE);
+ dbg_puts(buf);
+}
+
+#ifndef NDEBUG
+/**
+ * searches for <code>name</code> in the list of known
+ * debugging modes specified above and, if it
+ * finds <code>name</code>, adds the corresponding
+ * debugging mode to a list
+ */
+void dbg_add_mode(const char *name)
+{
+ long cancel;
+ dbg_mode_t *mode;
+
+ if (*name == '-')
+ {
+ cancel = 1;
+ name++;
+ }
+ else
+ {
+ cancel = 0;
+ }
+
+ for (mode = dbg_tab; mode->d_name != NULL; mode++)
+ {
+ if (strcmp(name, mode->d_name) == 0)
+ {
+ break;
+ }
+ }
+ if (mode->d_name == NULL)
+ {
+ dbg_print("Warning: Unknown debug option: \"%s\"\n", name);
+ return;
+ }
+
+ if (cancel)
+ {
+ dbg_modes &= ~mode->d_mode;
+ }
+ else
+ {
+ dbg_modes |= mode->d_mode;
+ }
+}
+
+/**
+ * Cycles through each comma-delimited debugging option and
+ * adds it to the debugging modes by calling dbg_add_mode
+ */
+void dbg_add_modes(const char *modes)
+{
+ char env[256];
+ char *name;
+
+ strncpy(env, modes, sizeof(env));
+ /* Maybe it would be good if we did this without strtok, but I'm too lazy */
+ for (name = strtok(env, ","); name; name = strtok(NULL, ","))
+ {
+ dbg_add_mode(name);
+ }
+}
+
+size_t dbg_modes_info(const void *data, char *buf, size_t size)
+{
+ KASSERT(NULL == data);
+ KASSERT(0 < size);
+
+ size_t osize = size;
+
+ dbg_mode_t *mode;
+ for (mode = dbg_tab; mode->d_name != NULL; ++mode)
+ {
+ if (dbg_modes & mode->d_mode && mode->d_mode != DBG_ALL)
+ {
+ int len;
+ if ((len = snprintf(buf, size, "%s,", mode->d_name)) >= (int)size)
+ {
+ break;
+ }
+ else
+ {
+ buf += len;
+ size -= len;
+ }
+ }
+ }
+
+ if (size == osize)
+ {
+ buf[0] = '\0';
+ return 0;
+ }
+ else
+ {
+ /* remove trailing comma */
+ buf[-1] = '\0';
+ return osize - size + 1;
+ }
+}
+#endif
+
+/* This is meant as a good point to automatically set a breakpoint which will
+ * stop just after a panic has occured and printed its message. */
+noreturn static void dbg_panic_halt()
+{
+ __asm__ volatile("cli; hlt");
+ __builtin_unreachable();
+}
+
+#define PANIC_BUFSIZE 2048
+
+noreturn void dbg_panic(const char *file, int line, const char *func,
+ const char *fmt, ...)
+{
+ char buf[PANIC_BUFSIZE];
+ va_list args;
+ va_start(args, fmt);
+
+ DEBUG_ENTER
+ dbg_print("C%ld P%ld panic in %s:%u %s(): ", curcore.kc_id,
+ curproc ? curproc->p_pid : -1L, file, line, func);
+ vsnprintf(buf, PANIC_BUFSIZE, fmt, args);
+ dbg_print("%s", buf);
+ dbg_print("\nC%ld Halting.\n\n", apic_current_id());
+ DEBUG_EXIT
+
+ va_end(args);
+
+ dbg_panic_halt();
+}
diff --git a/kernel/util/debug.py b/kernel/util/debug.py
new file mode 100644
index 0000000..7d1ce0d
--- /dev/null
+++ b/kernel/util/debug.py
@@ -0,0 +1,77 @@
+import gdb
+
+import weenix
+import weenix.info
+
+
+class InfoCommand(weenix.Command):
+ """usage: info <infofunc> [<data>]
+ <infofunc> the info function to be called
+ <data> the first argument to <infofunc>, if unspecified NULL is used
+ Prints the string generated by one of the kernel's info functions."""
+
+ def __init__(self):
+ weenix.Command.__init__(self, "info", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL)
+
+ def invoke(self, arg, tty):
+ args = gdb.string_to_argv(arg)
+ if len(args) < 1 or len(args) > 2:
+ gdb.write("{0}\n".format(self.__doc__))
+ raise gdb.GdbError("invalid arguments")
+ gdb.write(weenix.info.string(args[0], args[1] if (len(args) > 1) else None))
+
+
+InfoCommand()
+
+
+class DbgCommand(weenix.Command):
+ """usage: dbg [<modes>]
+ <modes> any number of whitespace seperated debug modes
+ When no arguments are given prints a list of all active debug
+ modes. If any debug modes are listed they are added to the
+ current debug modes. If a listed mode is prefixed with a
+ '-' it is removed instead of added."""
+
+ def __init__(self):
+ weenix.Command.__init__(self, "dbg", gdb.COMMAND_DATA)
+
+ def _modes(self):
+ i = 0
+ l = list()
+ while gdb.parse_and_eval("dbg_tab[{0}]".format(i))["d_name"] != 0:
+ mode = gdb.parse_and_eval("dbg_tab[{0}]".format(i))
+ i += 1
+ l.append(mode["d_name"].string())
+ return l
+
+ def invoke(self, arg, tty):
+ if len(arg.strip()) == 0:
+ info = weenix.info.string("dbg_modes_info")
+ if len(info) == 0:
+ gdb.write("No active modes.\n")
+ else:
+ gdb.write("{0}\n".format(weenix.info.string("dbg_modes_info")))
+ else:
+ modes = self._modes()
+ for mode in arg.split():
+ name = mode[1:] if (mode.startswith("-")) else mode
+ if not name in modes:
+ gdb.write(
+ 'warning: skipping non-existant mode "{0}"\n'.format(name)
+ )
+ else:
+ weenix.eval_func("dbg_add_mode", '"{0}"'.format(mode))
+
+ def complete(self, line, word):
+ l = self._modes()
+ l = filter(lambda x: x.startswith(word), l)
+ for used in line.split():
+ if used.startswith("-"):
+ used = used[1:]
+ l = filter(lambda x: x != used, l)
+ l.sort()
+
+ return l
+
+
+DbgCommand()
diff --git a/kernel/util/init.c b/kernel/util/init.c
new file mode 100644
index 0000000..d1bc0d8
--- /dev/null
+++ b/kernel/util/init.c
@@ -0,0 +1,142 @@
+#include "kernel.h"
+
+#include "mm/kmalloc.h"
+
+#include "util/debug.h"
+#include "util/init.h"
+#include "util/list.h"
+#include "util/string.h"
+
+static int _init_search_count = 0;
+
+struct init_function
+{
+ init_func_t if_func;
+ const char *if_name;
+ list_link_t if_link;
+
+ int if_search;
+ int if_called;
+ list_t if_deps;
+};
+
+struct init_depends
+{
+ const char *id_name;
+ list_link_t id_link;
+};
+
+static void _init_call(list_t *funcs, struct init_function *func)
+{
+ list_iterate(&func->if_deps, dep, struct init_depends, id_link)
+ {
+ struct init_function *found = NULL;
+ list_iterate(funcs, f, struct init_function, if_link)
+ {
+ if (strcmp(dep->id_name, f->if_name) == 0)
+ {
+ found = f;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ panic("'%s' dependency for '%s' does not exist", dep->id_name,
+ func->if_name);
+ }
+
+ if (func->if_search == found->if_search)
+ {
+ panic("circular dependency between '%s' and '%s'", func->if_name,
+ found->if_name);
+ }
+
+ dbg(DBG_INIT, "'%s' depends on '%s': ", func->if_name, found->if_name);
+ if (!found->if_called)
+ {
+ dbgq(DBG_INIT, "calling\n");
+ found->if_search = func->if_search;
+ _init_call(funcs, found);
+ }
+ else
+ {
+ dbgq(DBG_INIT, "already called\n");
+ }
+ }
+
+ KASSERT(!func->if_called);
+
+ dbg(DBG_INIT, "Calling %s (0x%p)\n", func->if_name, func->if_func);
+ func->if_func();
+ func->if_called = 1;
+}
+
+void init_call_all()
+{
+ list_t funcs;
+ char *buf, *end;
+
+ list_init(&funcs);
+ buf = (char *)&kernel_start_init;
+ end = (char *)&kernel_end_init;
+
+ while (buf < end)
+ {
+ struct init_function *curr = kmalloc(sizeof(*curr));
+ KASSERT(NULL != curr);
+
+ list_insert_tail(&funcs, &curr->if_link);
+ list_init(&curr->if_deps);
+
+ KASSERT(NULL != *(uintptr_t *)buf);
+ curr->if_func = (init_func_t) * (uintptr_t *)buf;
+ curr->if_name = buf + sizeof(curr->if_func);
+ curr->if_search = 0;
+ curr->if_called = 0;
+
+ buf += sizeof(curr->if_func) + strlen(curr->if_name) + 1;
+
+ while ((NULL == *(uintptr_t *)buf) && (buf < end))
+ {
+ struct init_depends *dep = kmalloc(sizeof(*dep));
+ KASSERT(NULL != dep);
+
+ list_insert_tail(&curr->if_deps, &dep->id_link);
+
+ dep->id_name = buf + sizeof(curr->if_func);
+ buf += sizeof(curr->if_func) + strlen(dep->id_name) + 1;
+ }
+ }
+
+ KASSERT(buf == end);
+
+ dbg(DBG_INIT, "Initialization functions and dependencies:\n");
+ list_iterate(&funcs, func, struct init_function, if_link)
+ {
+ dbgq(DBG_INIT, "%s (0x%p): ", func->if_name, func->if_func);
+ list_iterate(&func->if_deps, dep, struct init_depends, id_link)
+ {
+ dbgq(DBG_INIT, "%s ", dep->id_name);
+ }
+ dbgq(DBG_INIT, "\n");
+ }
+
+ list_iterate(&funcs, func, struct init_function, if_link)
+ {
+ if (!func->if_called)
+ {
+ func->if_search = ++_init_search_count;
+ _init_call(&funcs, func);
+ }
+ }
+
+ list_iterate(&funcs, func, struct init_function, if_link)
+ {
+ list_iterate(&func->if_deps, dep, struct init_depends, id_link)
+ {
+ kfree(dep);
+ }
+ kfree(func);
+ }
+}
diff --git a/kernel/util/list.c b/kernel/util/list.c
new file mode 100644
index 0000000..81a1beb
--- /dev/null
+++ b/kernel/util/list.c
@@ -0,0 +1,53 @@
+
+#include <util/debug.h>
+#include <util/list.h>
+
+inline void list_init(list_t *list) { list->l_next = list->l_prev = list; }
+
+inline void list_link_init(list_link_t *link)
+{
+ link->l_next = link->l_prev = NULL;
+}
+
+inline long list_link_is_linked(const list_link_t *link)
+{
+ return link->l_next && link->l_prev;
+}
+
+inline long list_empty(const list_t *list) { return list->l_next == list; }
+
+inline void list_assert_sanity(const list_t *list)
+{
+ KASSERT(list->l_next && list->l_next->l_prev && list->l_prev &&
+ list->l_prev->l_next);
+}
+
+inline void list_insert_before(list_link_t *link, list_link_t *to_insert)
+{
+ list_link_t *prev = to_insert;
+ list_link_t *next = link;
+ prev->l_next = next;
+ prev->l_prev = next->l_prev;
+ next->l_prev->l_next = prev;
+ next->l_prev = prev;
+}
+
+inline void list_insert_head(list_t *list, list_link_t *link)
+{
+ list_insert_before((list)->l_next, link);
+}
+
+inline void list_insert_tail(list_t *list, list_link_t *link)
+{
+ list_insert_before(list, link);
+}
+
+inline void list_remove(list_link_t *link)
+{
+ list_link_t *ll = link;
+ list_link_t *prev = ll->l_prev;
+ list_link_t *next = ll->l_next;
+ prev->l_next = next;
+ next->l_prev = prev;
+ ll->l_next = ll->l_prev = NULL;
+}
diff --git a/kernel/util/list.py b/kernel/util/list.py
new file mode 100644
index 0000000..4eeed03
--- /dev/null
+++ b/kernel/util/list.py
@@ -0,0 +1,32 @@
+import gdb
+
+import weenix
+import weenix.list
+
+
+class ListCommand(weenix.Command):
+ """usage: list <list> [<type> <member>]
+ <list> the list_t to be printed
+ <type> the type of the values stored on the list
+ <member> type's list link member used to make the list
+ Prints all items on a list_t, if <type> and <member> are not given
+ then the addresses of the list links are printed, otherwise the items
+ are printed assuming that they have the given type."""
+
+ def __init__(self):
+ weenix.Command.__init__(self, "list", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL)
+
+ def invoke(self, arg, tty):
+ args = gdb.string_to_argv(arg)
+ if len(args) == 1:
+ for i, item in enumerate(weenix.list.load(args[0])):
+ gdb.write("{0:>3}: {1:8}\n".format(i, item.link_addr()))
+ elif len(args) == 3:
+ for i, item in enumerate(weenix.list.load(args[0], args[1], args[2])):
+ gdb.write("{0:>3}: {1}\n".format(i, item.item()))
+ else:
+ gdb.write("{0}\n".format(self.__doc__))
+ raise gdb.GdbError("invalid arguments")
+
+
+ListCommand()
diff --git a/kernel/util/math.c b/kernel/util/math.c
new file mode 100644
index 0000000..93900a2
--- /dev/null
+++ b/kernel/util/math.c
@@ -0,0 +1,411 @@
+// todo port to 64 bit
+#if 0
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: math.c
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes:
+ *
+ * Date: Aug 2003
+ *
+ * Environment: Xen Minimal OS
+ * Description: Library functions for 64bit arith and other
+ * from freebsd, files in sys/libkern/ (qdivrem.c, etc)
+ *
+ ****************************************************************************
+ * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
+ ****************************************************************************
+ *-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
+*/
+
+#include "kernel.h"
+#include "types.h"
+
+/*
+ * Depending on the desired operation, we view a `long long' (aka quad_t) in
+ * one or more of the following formats.
+ */
+union uu {
+ int64_t q; /* as a (signed) quad */
+ int64_t uq; /* as an unsigned quad */
+ long sl[2]; /* as two signed longs */
+ unsigned long ul[2]; /* as two unsigned longs */
+};
+/* XXX RN: Yuck hardcoded endianess :) */
+#define _QUAD_HIGHWORD 1
+#define _QUAD_LOWWORD 0
+/*
+ * Define high and low longwords.
+ */
+#define H _QUAD_HIGHWORD
+#define L _QUAD_LOWWORD
+
+/*
+ * Total number of bits in a quad_t and in the pieces that make it up.
+ * These are used for shifting, and also below for halfword extraction
+ * and assembly.
+ */
+#define CHAR_BIT 8 /* number of bits in a char */
+#define QUAD_BITS (sizeof(int64_t) * CHAR_BIT)
+#define LONG_BITS (sizeof(long) * CHAR_BIT)
+#define HALF_BITS (sizeof(long) * CHAR_BIT / 2)
+
+/*
+ * Extract high and low shortwords from longword, and move low shortword of
+ * longword to upper half of long, i.e., produce the upper longword of
+ * ((quad_t)(x) << (number_of_bits_in_long/2)). (`x' must actually be u_long.)
+ *
+ * These are used in the multiply code, to split a longword into upper
+ * and lower halves, and to reassemble a product as a quad_t, shifted left
+ * (sizeof(long)*CHAR_BIT/2).
+ */
+#define HHALF(x) ((x) >> HALF_BITS)
+#define LHALF(x) ((x) & ((1UL << HALF_BITS) - 1))
+#define LHUP(x) ((x) << HALF_BITS)
+
+/*
+ * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
+ * section 4.3.1, pp. 257--259.
+ */
+#define B (1UL << HALF_BITS) /* digit base */
+
+/* Combine two `digits' to make a single two-digit number. */
+#define COMBINE(a, b) (((unsigned long)(a) << HALF_BITS) | (b))
+
+/* select a type for digits in base B: use unsigned short if they fit */
+/* #if ULONG_MAX == 0xffffffff && USHORT_MAX >= 0xffff
+typedef unsigned short digit;
+#else */
+typedef unsigned long digit;
+/* #endif */
+
+
+/*
+ * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
+ * `fall out' the left (there never will be any such anyway).
+ * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
+ */
+static void
+shl(register digit *p, register int len, register int sh)
+{
+ register int i;
+
+ for (i = 0; i < len; i++)
+ p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
+ p[i] = LHALF(p[i] << sh);
+}
+
+/*
+ * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
+ *
+ * We do this in base 2-sup-HALF_BITS, so that all intermediate products
+ * fit within u_long. As a consequence, the maximum length dividend and
+ * divisor are 4 `digits' in this base (they are shorter if they have
+ * leading zeros).
+ */
+uint64_t
+__qdivrem(uint64_t uq, uint64_t vq, uint64_t *arq)
+{
+ union uu tmp;
+ digit *u, *v, *q;
+ register digit v1, v2;
+ unsigned long qhat, rhat, t;
+ int m, n, d, j, i;
+ digit uspace[5], vspace[5], qspace[5];
+
+ /*
+ * Take care of special cases: divide by zero, and u < v.
+ */
+ if (vq == 0) {
+ /* divide by zero. */
+ static volatile const unsigned int zero = 0;
+
+ tmp.ul[H] = tmp.ul[L] = 1 / zero;
+ if (arq)
+ *arq = uq;
+ return tmp.q;
+ }
+ if (uq < vq) {
+ if (arq)
+ *arq = uq;
+ return 0;
+ }
+ u = &uspace[0];
+ v = &vspace[0];
+ q = &qspace[0];
+
+ /*
+ * Break dividend and divisor into digits in base B, then
+ * count leading zeros to determine m and n. When done, we
+ * will have:
+ * u = (u[1]u[2]...u[m+n]) sub B
+ * v = (v[1]v[2]...v[n]) sub B
+ * v[1] != 0
+ * 1 < n <= 4 (if n = 1, we use a different division algorithm)
+ * m >= 0 (otherwise u < v, which we already checked)
+ * m + n = 4
+ * and thus
+ * m = 4 - n <= 2
+ */
+ tmp.uq = uq;
+ u[0] = 0;
+ u[1] = HHALF(tmp.ul[H]);
+ u[2] = LHALF(tmp.ul[H]);
+ u[3] = HHALF(tmp.ul[L]);
+ u[4] = LHALF(tmp.ul[L]);
+ tmp.uq = vq;
+ v[1] = HHALF(tmp.ul[H]);
+ v[2] = LHALF(tmp.ul[H]);
+ v[3] = HHALF(tmp.ul[L]);
+ v[4] = LHALF(tmp.ul[L]);
+ for (n = 4; v[1] == 0; v++) {
+ if (--n == 1) {
+ unsigned long rbj; /* r*B+u[j] (not root boy jim) */
+ digit q1, q2, q3, q4;
+
+ /*
+ * Change of plan, per exercise 16.
+ * r = 0;
+ * for j = 1..4:
+ * q[j] = floor((r*B + u[j]) / v),
+ * r = (r*B + u[j]) % v;
+ * We unroll this completely here.
+ */
+ t = v[2]; /* nonzero, by definition */
+ q1 = u[1] / t;
+ rbj = COMBINE(u[1] % t, u[2]);
+ q2 = rbj / t;
+ rbj = COMBINE(rbj % t, u[3]);
+ q3 = rbj / t;
+ rbj = COMBINE(rbj % t, u[4]);
+ q4 = rbj / t;
+ if (arq)
+ *arq = rbj % t;
+ tmp.ul[H] = COMBINE(q1, q2);
+ tmp.ul[L] = COMBINE(q3, q4);
+ return tmp.q;
+ }
+ }
+
+ /*
+ * By adjusting q once we determine m, we can guarantee that
+ * there is a complete four-digit quotient at &qspace[1] when
+ * we finally stop.
+ */
+ for (m = 4 - n; u[1] == 0; u++)
+ m--;
+ for (i = 4 - m; --i >= 0;)
+ q[i] = 0;
+ q += 4 - m;
+
+ /*
+ * Here we run Program D, translated from MIX to C and acquiring
+ * a few minor changes.
+ *
+ * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
+ */
+ d = 0;
+ for (t = v[1]; t < B / 2; t <<= 1)
+ d++;
+ if (d > 0) {
+ shl(&u[0], m + n, d); /* u <<= d */
+ shl(&v[1], n - 1, d); /* v <<= d */
+ }
+ /*
+ * D2: j = 0.
+ */
+ j = 0;
+ v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
+ v2 = v[2]; /* for D3 */
+ do {
+ register digit uj0, uj1, uj2;
+
+ /*
+ * D3: Calculate qhat (\^q, in TeX notation).
+ * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
+ * let rhat = (u[j]*B + u[j+1]) mod v[1].
+ * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
+ * decrement qhat and increase rhat correspondingly.
+ * Note that if rhat >= B, v[2]*qhat < rhat*B.
+ */
+ uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
+ uj1 = u[j + 1]; /* for D3 only */
+ uj2 = u[j + 2]; /* for D3 only */
+ if (uj0 == v1) {
+ qhat = B;
+ rhat = uj1;
+ goto qhat_too_big;
+ } else {
+ unsigned long nn = COMBINE(uj0, uj1);
+ qhat = nn / v1;
+ rhat = nn % v1;
+ }
+ while (v2 * qhat > COMBINE(rhat, uj2)) {
+qhat_too_big:
+ qhat--;
+ if ((rhat += v1) >= B)
+ break;
+ }
+ /*
+ * D4: Multiply and subtract.
+ * The variable `t' holds any borrows across the loop.
+ * We split this up so that we do not require v[0] = 0,
+ * and to eliminate a final special case.
+ */
+ for (t = 0, i = n; i > 0; i--) {
+ t = u[i + j] - v[i] * qhat - t;
+ u[i + j] = LHALF(t);
+ t = (B - HHALF(t)) & (B - 1);
+ }
+ t = u[j] - t;
+ u[j] = LHALF(t);
+ /*
+ * D5: test remainder.
+ * There is a borrow if and only if HHALF(t) is nonzero;
+ * in that (rare) case, qhat was too large (by exactly 1).
+ * Fix it by adding v[1..n] to u[j..j+n].
+ */
+ if (HHALF(t)) {
+ qhat--;
+ for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
+ t += u[i + j] + v[i];
+ u[i + j] = LHALF(t);
+ t = HHALF(t);
+ }
+ u[j] = LHALF(u[j] + t);
+ }
+ q[j] = qhat;
+ } while (++j <= m); /* D7: loop on j. */
+
+ /*
+ * If caller wants the remainder, we have to calculate it as
+ * u[m..m+n] >> d (this is at most n digits and thus fits in
+ * u[m+1..m+n], but we may need more source digits).
+ */
+ if (arq) {
+ if (d) {
+ for (i = m + n; i > m; --i)
+ u[i] = (u[i] >> d) |
+ LHALF(u[i - 1] << (HALF_BITS - d));
+ u[i] = 0;
+ }
+ tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
+ tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
+ *arq = tmp.q;
+ }
+
+ tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
+ tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
+ return tmp.q;
+}
+
+
+/*
+ * Divide two signed quads.
+ * ??? if -1/2 should produce -1 on this machine, this code is wrong
+ */
+int64_t __divdi3(int64_t a, int64_t b)
+{
+ uint64_t ua, ub, uq;
+ int neg;
+
+ if (a < 0)
+ ua = -(uint64_t)a, neg = 1;
+ else
+ ua = a, neg = 0;
+ if (b < 0)
+ ub = -(uint64_t)b, neg ^= 1;
+ else
+ ub = b;
+ uq = __qdivrem(ua, ub, (uint64_t *)0);
+ return (neg ? -uq : uq);
+}
+
+/*
+ * Divide two unsigned quads.
+ */
+uint64_t
+__udivdi3(uint64_t a, uint64_t b)
+{
+ return __qdivrem(a, b, (uint64_t *)0);
+}
+
+
+/*
+ * Return remainder after dividing two unsigned quads.
+ */
+uint64_t
+__umoddi3(uint64_t a, uint64_t b)
+{
+ uint64_t r;
+
+ (void)__qdivrem(a, b, &r);
+ return r;
+}
+
+/*
+ * Return ceil(log_2(x))
+ * We shift our input right until we get zero. The number of times we had to
+ * shift before getting zero gives us the ceiling of log2(x), except for powers
+ * of 2, in which case it gives us 1 + log2(x). Thus, we check whether it's a
+ * power of two, and special case that.
+ * author: dap
+ */
+int log2(int x)
+{
+ int current = x;
+ /* y keeps track of 2^(result) to see if our input was a power of 2 */
+ int y = 1;
+ int result = 0;
+ while (current) {
+ current >>= 1;
+ ++result;
+ y <<= 1;
+ }
+ y >>= 1;
+ if (y == x)
+ return result - 1;
+
+ return result;
+}
+
+#endif
diff --git a/kernel/util/printf.c b/kernel/util/printf.c
new file mode 100644
index 0000000..6daf8ce
--- /dev/null
+++ b/kernel/util/printf.c
@@ -0,0 +1,996 @@
+/*
+ ****************************************************************************
+ * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
+ ****************************************************************************
+ *
+ * File: printf.c
+ * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
+ * Changes: Grzegorz Milos (gm281@cam.ac.uk)
+ *
+ * Date: Aug 2003, Aug 2005
+ *
+ * Environment: Xen Minimal OS
+ * Description: Library functions for printing
+ * (freebsd port, mainly sys/subr_prf.c)
+ *
+ ****************************************************************************
+ *
+ *-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
+ */
+
+#include "ctype.h"
+#include "kernel.h"
+#include "limits.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+/**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
+{
+ unsigned long result = 0, value;
+
+ if (!base)
+ {
+ base = 10;
+ if (*cp == '0')
+ {
+ base = 8;
+ cp++;
+ if ((*cp == 'x') && isxdigit(cp[1]))
+ {
+ cp++;
+ base = 16;
+ }
+ }
+ }
+ while (isxdigit(*cp) &&
+ (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) <
+ base)
+ {
+ result = result * base + value;
+ cp++;
+ }
+ if (endp)
+ {
+ *endp = (char *)cp;
+ }
+ return result;
+}
+
+/**
+ * simple_strtol - convert a string to a signed long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ {
+ return -simple_strtoul(cp + 1, endp, base);
+ }
+ return simple_strtoul(cp, endp, base);
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long long simple_strtoull(const char *cp, char **endp,
+ unsigned int base)
+{
+ unsigned long long result = 0, value;
+
+ if (!base)
+ {
+ base = 10;
+ if (*cp == '0')
+ {
+ base = 8;
+ cp++;
+ if ((*cp == 'x') && isxdigit(cp[1]))
+ {
+ cp++;
+ base = 16;
+ }
+ }
+ }
+ while (isxdigit(*cp) &&
+ (value = isdigit(*cp) ? *cp - '0'
+ : (islower(*cp) ? toupper(*cp) : *cp) - 'A' +
+ 10) < base)
+ {
+ result = result * base + value;
+ cp++;
+ }
+ if (endp)
+ {
+ *endp = (char *)cp;
+ }
+ return result;
+}
+
+/**
+ * simple_strtoll - convert a string to a signed long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long long simple_strtoll(const char *cp, char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ {
+ return -simple_strtoull(cp + 1, endp, base);
+ }
+ return simple_strtoull(cp, endp, base);
+}
+
+static int skip_atoi(const char **s)
+{
+ int i = 0;
+
+ while (isdigit(**s))
+ i = i * 10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SPECIAL 32 /* 0x */
+#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+
+static char *number(char *buf, char *end, long long num, int base, int size,
+ int precision, int type)
+{
+ char c, sign, tmp[66];
+ const char *digits;
+ const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+ const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ int i;
+
+ digits = (type & LARGE) ? large_digits : small_digits;
+ if (type & LEFT)
+ {
+ type &= ~ZEROPAD;
+ }
+ if (base < 2 || base > 36)
+ {
+ return buf;
+ }
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN)
+ {
+ if (num < 0)
+ {
+ sign = '-';
+ num = -num;
+ size--;
+ }
+ else if (type & PLUS)
+ {
+ sign = '+';
+ size--;
+ }
+ else if (type & SPACE)
+ {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL)
+ {
+ if (base == 16)
+ {
+ size -= 2;
+ }
+ else if (base == 8)
+ {
+ size--;
+ }
+ }
+ i = 0;
+ if (num == 0)
+ {
+ tmp[i++] = '0';
+ }
+ else
+ {
+ /* XXX KAF: force unsigned mod and div. */
+ /* XXX kernel does not support long long division */
+ unsigned long long num2 = (unsigned long long)num;
+ unsigned int base2 = (unsigned int)base;
+ while (num2 != 0)
+ {
+ tmp[i++] = digits[num2 % base2];
+ num2 /= base2;
+ }
+ }
+ if (i > precision)
+ {
+ precision = i;
+ }
+ size -= precision;
+ if (!(type & (ZEROPAD + LEFT)))
+ {
+ while (size-- > 0)
+ {
+ if (buf <= end)
+ {
+ *buf = ' ';
+ }
+ ++buf;
+ }
+ }
+ if (sign)
+ {
+ if (buf <= end)
+ {
+ *buf = sign;
+ }
+ ++buf;
+ }
+ if (type & SPECIAL)
+ {
+ if (base == 8)
+ {
+ if (buf <= end)
+ {
+ *buf = '0';
+ }
+ ++buf;
+ }
+ else if (base == 16)
+ {
+ if (buf <= end)
+ {
+ *buf = '0';
+ }
+ ++buf;
+ if (buf <= end)
+ {
+ *buf = digits[33];
+ }
+ ++buf;
+ }
+ }
+ if (!(type & LEFT))
+ {
+ while (size-- > 0)
+ {
+ if (buf <= end)
+ {
+ *buf = c;
+ }
+ ++buf;
+ }
+ }
+ while (i < precision--)
+ {
+ if (buf <= end)
+ {
+ *buf = '0';
+ }
+ ++buf;
+ }
+ while (i-- > 0)
+ {
+ if (buf <= end)
+ {
+ *buf = tmp[i];
+ }
+ ++buf;
+ }
+ while (size-- > 0)
+ {
+ if (buf <= end)
+ {
+ *buf = ' ';
+ }
+ ++buf;
+ }
+ return buf;
+}
+
+/**
+ * vsnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want snprintf instead.
+ */
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int len;
+ unsigned long long num;
+ int i, base;
+ char *str, *end, c;
+ const char *s;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /* min. # of digits for integers; max
+ number of chars for from string */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+ /* 'z' support added 23/7/1999 S.H. */
+ /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+ str = buf;
+ end = buf + size - 1;
+
+ if (end < buf - 1)
+ {
+ end = ((void *)-1);
+ size = end - buf + 1;
+ }
+
+ for (; *fmt; ++fmt)
+ {
+ if (*fmt != '%')
+ {
+ if (str <= end)
+ {
+ *str = *fmt;
+ }
+ ++str;
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+ repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt)
+ {
+ case '-':
+ flags |= LEFT;
+ goto repeat;
+ case '+':
+ flags |= PLUS;
+ goto repeat;
+ case ' ':
+ flags |= SPACE;
+ goto repeat;
+ case '#':
+ flags |= SPECIAL;
+ goto repeat;
+ case '0':
+ flags |= ZEROPAD;
+ goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ {
+ field_width = skip_atoi(&fmt);
+ }
+ else if (*fmt == '*')
+ {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0)
+ {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.')
+ {
+ ++fmt;
+ if (isdigit(*fmt))
+ {
+ precision = skip_atoi(&fmt);
+ }
+ else if (*fmt == '*')
+ {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ {
+ precision = 0;
+ }
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z')
+ {
+ qualifier = *fmt;
+ ++fmt;
+ if (qualifier == 'l' && *fmt == 'l')
+ {
+ qualifier = 'L';
+ ++fmt;
+ }
+ }
+ if (*fmt == 'q')
+ {
+ qualifier = 'L';
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt)
+ {
+ case 'c':
+ if (!(flags & LEFT))
+ {
+ while (--field_width > 0)
+ {
+ if (str <= end)
+ {
+ *str = ' ';
+ }
+ ++str;
+ }
+ }
+ c = (unsigned char)va_arg(args, int);
+ if (str <= end)
+ {
+ *str = c;
+ }
+ ++str;
+ while (--field_width > 0)
+ {
+ if (str <= end)
+ {
+ *str = ' ';
+ }
+ ++str;
+ }
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ if (!s)
+ {
+ s = "<NULL>";
+ }
+
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ {
+ while (len < field_width--)
+ {
+ if (str <= end)
+ {
+ *str = ' ';
+ }
+ ++str;
+ }
+ }
+ for (i = 0; i < len; ++i)
+ {
+ if (str <= end)
+ {
+ *str = *s;
+ }
+ ++str;
+ ++s;
+ }
+ while (len < field_width--)
+ {
+ if (str <= end)
+ {
+ *str = ' ';
+ }
+ ++str;
+ }
+ continue;
+
+ case 'p':
+ if (field_width == -1)
+ {
+ field_width = 2 * sizeof(void *);
+ flags |= ZEROPAD;
+ }
+ str = number(str, end, (unsigned long)va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+ case 'n':
+ /* FIXME:
+ * What does C99 say about the overflow case here? */
+ if (qualifier == 'l')
+ {
+ long *ip = va_arg(args, long *);
+ *ip = (str - buf);
+ }
+ else if (qualifier == 'Z')
+ {
+ size_t *ip = va_arg(args, size_t *);
+ *ip = (str - buf);
+ }
+ else
+ {
+ int *ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ if (str <= end)
+ {
+ *str = '%';
+ }
+ ++str;
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'X':
+ flags |= LARGE;
+ base = 16;
+ break;
+ case 'x':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ if (str <= end)
+ {
+ *str = '%';
+ }
+ ++str;
+ if (*fmt)
+ {
+ if (str <= end)
+ {
+ *str = *fmt;
+ }
+ ++str;
+ }
+ else
+ {
+ --fmt;
+ }
+ continue;
+ }
+ if (qualifier == 'L')
+ {
+ num = va_arg(args, long long);
+ }
+ else if (qualifier == 'l')
+ {
+ num = va_arg(args, unsigned long);
+ if (flags & SIGN)
+ {
+ num = (signed long)num;
+ }
+ }
+ else if (qualifier == 'Z')
+ {
+ num = va_arg(args, size_t);
+ }
+ else if (qualifier == 'h')
+ {
+ num = (unsigned short)va_arg(args, int);
+ if (flags & SIGN)
+ {
+ num = (signed short)num;
+ }
+ }
+ else
+ {
+ num = va_arg(args, unsigned int);
+ if (flags & SIGN)
+ {
+ num = (signed int)num;
+ }
+ }
+
+ str = number(str, end, num, base, field_width, precision, flags);
+ }
+ if (str <= end)
+ {
+ *str = '\0';
+ }
+ else if (size > 0)
+ {
+ /* don't write out a null byte if the buf size is zero */
+ *end = '\0';
+ }
+ /* the trailing null byte doesn't count towards the total
+ * ++str;
+ */
+ return str - buf;
+}
+
+/**
+ * snprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ */
+int snprintf(char *buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+ return i;
+}
+
+size_t iprintf(char **str, size_t *size, char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ va_start(args, fmt);
+ len = vsnprintf(*str, *size, fmt, args);
+ va_end(args);
+
+ /* The way the "iprintf system" works, we're never going to catch
+ * an error anywhere else. The size of the buffer will appear to have
+ * increased, and it will appear to start farther to the left -> bad!
+ * (However, kernel vsnprintf should never fail...) */
+ KASSERT(len >= 0);
+
+ len = MIN(len, (int)(*size - 1));
+
+ *str += len;
+ *size -= len;
+
+ return *size - 1;
+}
+
+/**
+ * vsscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: format of buffer
+ * @args: arguments
+ */
+int vsscanf(const char *buf, const char *fmt, va_list args)
+{
+ const char *str = buf;
+ char *next;
+ char digit;
+ int num = 0;
+ int qualifier;
+ int base;
+ int field_width;
+ int is_sign = 0;
+
+ while (*fmt && *str)
+ {
+ /* skip any white space in format */
+ /* white space in format matchs any amount of
+ * white space, including none, in the input.
+ */
+ if (isspace(*fmt))
+ {
+ while (isspace(*fmt))
+ ++fmt;
+ while (isspace(*str))
+ ++str;
+ }
+
+ /* anything that is not a conversion must match exactly */
+ if (*fmt != '%' && *fmt)
+ {
+ if (*fmt++ != *str++)
+ {
+ break;
+ }
+ continue;
+ }
+
+ if (!*fmt)
+ {
+ break;
+ }
+ ++fmt;
+
+ /* skip this conversion.
+ * advance both strings to next white space
+ */
+ if (*fmt == '*')
+ {
+ while (!isspace(*fmt) && *fmt)
+ fmt++;
+ while (!isspace(*str) && *str)
+ str++;
+ continue;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ {
+ field_width = skip_atoi(&fmt);
+ }
+
+ /* get conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z' ||
+ *fmt == 'z')
+ {
+ qualifier = *fmt++;
+ if (qualifier == *fmt)
+ {
+ if (qualifier == 'h')
+ {
+ qualifier = 'H';
+ fmt++;
+ }
+ else if (qualifier == 'l')
+ {
+ qualifier = 'L';
+ fmt++;
+ }
+ }
+ }
+ base = 10;
+ is_sign = 0;
+
+ if (!*fmt || !*str)
+ {
+ break;
+ }
+
+ switch (*fmt++)
+ {
+ case 'c':
+ {
+ char *s = (char *)va_arg(args, char *);
+ if (field_width == -1)
+ {
+ field_width = 1;
+ }
+ do
+ {
+ *s++ = *str++;
+ } while (--field_width > 0 && *str);
+ num++;
+ }
+ continue;
+ case 's':
+ {
+ char *s = (char *)va_arg(args, char *);
+ if (field_width == -1)
+ {
+ field_width = INT_MAX;
+ }
+ /* first, skip leading white space in buffer */
+ while (isspace(*str))
+ str++;
+
+ /* now copy until next white space */
+ while (*str && !isspace(*str) && field_width--)
+ {
+ *s++ = *str++;
+ }
+ *s = '\0';
+ num++;
+ }
+ continue;
+ case 'n':
+ /* return number of characters read so far */
+ {
+ int *i = (int *)va_arg(args, int *);
+ *i = str - buf;
+ }
+ continue;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ case 'i':
+ base = 0;
+ is_sign = 1;
+ break;
+ case 'd':
+ is_sign = 1;
+ break;
+ case 'u':
+ break;
+ case '%':
+ /* looking for '%' in str */
+ if (*str++ != '%')
+ {
+ return num;
+ }
+ continue;
+ default:
+ /* invalid format; stop here */
+ return num;
+ }
+
+ /* have some sort of integer conversion.
+ * first, skip white space in buffer.
+ */
+ while (isspace(*str))
+ str++;
+
+ digit = *str;
+ if (is_sign && digit == '-')
+ {
+ digit = *(str + 1);
+ }
+
+ if (!digit || (base == 16 && !isxdigit(digit)) ||
+ (base == 10 && !isdigit(digit)) ||
+ (base == 8 && (!isdigit(digit) || digit > '7')) ||
+ (base == 0 && !isdigit(digit)))
+ {
+ break;
+ }
+
+ switch (qualifier)
+ {
+ case 'H': /* that's 'hh' in format */
+ if (is_sign)
+ {
+ signed char *s = (signed char *)va_arg(args, signed char *);
+ *s = (signed char)simple_strtol(str, &next, base);
+ }
+ else
+ {
+ unsigned char *s =
+ (unsigned char *)va_arg(args, unsigned char *);
+ *s = (unsigned char)simple_strtoul(str, &next, base);
+ }
+ break;
+ case 'h':
+ if (is_sign)
+ {
+ short *s = (short *)va_arg(args, short *);
+ *s = (short)simple_strtol(str, &next, base);
+ }
+ else
+ {
+ unsigned short *s =
+ (unsigned short *)va_arg(args, unsigned short *);
+ *s = (unsigned short)simple_strtoul(str, &next, base);
+ }
+ break;
+ case 'l':
+ if (is_sign)
+ {
+ long *l = (long *)va_arg(args, long *);
+ *l = simple_strtol(str, &next, base);
+ }
+ else
+ {
+ unsigned long *l =
+ (unsigned long *)va_arg(args, unsigned long *);
+ *l = simple_strtoul(str, &next, base);
+ }
+ break;
+ case 'L':
+ if (is_sign)
+ {
+ long long *l = (long long *)va_arg(args, long long *);
+ *l = simple_strtoll(str, &next, base);
+ }
+ else
+ {
+ unsigned long long *l = (unsigned long long *)va_arg(
+ args, unsigned long long *);
+ *l = simple_strtoull(str, &next, base);
+ }
+ break;
+ case 'Z':
+ case 'z':
+ {
+ size_t *s = (size_t *)va_arg(args, size_t *);
+ *s = (size_t)simple_strtoul(str, &next, base);
+ }
+ break;
+ default:
+ if (is_sign)
+ {
+ int *i = (int *)va_arg(args, int *);
+ *i = (int)simple_strtol(str, &next, base);
+ }
+ else
+ {
+ unsigned int *i =
+ (unsigned int *)va_arg(args, unsigned int *);
+ *i = (unsigned int)simple_strtoul(str, &next, base);
+ }
+ break;
+ }
+ num++;
+
+ if (!next)
+ {
+ break;
+ }
+ str = next;
+ }
+ return num;
+}
+
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: formatting of buffer
+ * @...: resulting arguments
+ */
+int sscanf(const char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsscanf(buf, fmt, args);
+ va_end(args);
+ return i;
+}
diff --git a/kernel/util/string.c b/kernel/util/string.c
new file mode 100644
index 0000000..2d47075
--- /dev/null
+++ b/kernel/util/string.c
@@ -0,0 +1,509 @@
+#include "ctype.h"
+#include "errno.h"
+
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+ int ret;
+ /* Compare bytes at %esi and %edi up to %ecx bytes OR until
+ * the bytes are not equal */
+ /* If not equal, set zf = 0 and stop */
+ /* Find out zf and sf and use them to return 0,1, or -1 */
+ __asm__ volatile(
+ "xor %%eax, %%eax\n\t" /* Zero output */
+ "cld\n\t" /* Make sure direction is forwards */
+ "repe\n\t"
+ "cmpsb\n\t"
+ "setnz %%al\n\t" /* If it is not zero, put 1 in low part */
+ "sets %%ah" /* If sign set (means 2nd arg larger),
+ * put 1 in high part */
+ : "=a"(ret)
+ : "S"(cs), "D"(ct), "c"(count)
+ : "cc" /* Overwrite flags */
+ );
+ return ((ret & 1) ? ((ret >> 8) ? -1 : 1) : 0);
+}
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ /* Move %ecx bytes from %esi to %edi */
+ __asm__ volatile(
+ "cld\n\t" /* Make sure direction is forwards */
+ "rep\n\t"
+ "movsb"
+ : /* No output */
+ : "S"(src), "D"(dest), "c"(count)
+ : "cc" /* We overwrite condition codes - i.e., flags */
+ );
+ return dest;
+}
+
+void *memset(void *s, int c, size_t count)
+{
+ /* Fill %ecx bytes at %edi with %eax (actually %al) */
+ __asm__ volatile(
+ "cld\n\t" /* Make sure direction is forwards */
+ "rep\n\t"
+ "stosb"
+ : /* No output */
+ : "a"(c), "D"(s), "c"(count)
+ : "cc" /* Overwrite flags */
+ );
+ return s;
+}
+
+int strncmp(const char *cs, const char *ct, size_t count)
+{
+ register signed char __res = 0;
+
+ while (count)
+ {
+ if ((__res = *cs - *ct++) != 0 || !*cs++)
+ {
+ break;
+ }
+ count--;
+ }
+
+ return __res;
+}
+
+int strcmp(const char *cs, const char *ct)
+{
+ register signed char __res;
+
+ while (1)
+ {
+ if ((__res = *cs - *ct++) != 0 || !*cs++)
+ {
+ break;
+ }
+ }
+
+ return __res;
+}
+
+char *strcpy(char *dest, const char *src)
+{
+ char *tmp = dest;
+
+ while ((*dest++ = *src++) != '\0') /* nothing */
+ ;
+ return tmp;
+}
+
+char *strncpy(char *dest, const char *src, size_t count)
+{
+ char *tmp = dest;
+
+ while (count)
+ {
+ if ((*dest = *src) != 0)
+ src++;
+ dest++;
+ count--;
+ }
+
+ return tmp;
+}
+
+size_t strnlen(const char *s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ {
+ /* nothing */}
+ return sc - s;
+}
+
+char *strcat(char *dest, const char *src)
+{
+ char *tmp = dest;
+
+ while (*dest)
+ dest++;
+
+ while ((*dest++ = *src++) != '\0')
+ ;
+
+ return tmp;
+}
+
+size_t strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ {
+ /* nothing */}
+ return sc - s;
+}
+
+char *strchr(const char *s, int c)
+{
+ for (; *s != (char)c; ++s)
+ {
+ if (*s == '\0')
+ {
+ return NULL;
+ }
+ }
+ return (char *)s;
+}
+
+char *strrchr(const char *s, int c)
+{
+ char *r = NULL;
+ for (; *s; ++s)
+ {
+ if (*s == (char)c)
+ {
+ r = (char *)s;
+ }
+ }
+ return r;
+}
+
+char *strstr(const char *s1, const char *s2)
+{
+ int l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ {
+ return (char *)s1;
+ }
+ l1 = strlen(s1);
+ while (l1 >= l2)
+ {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ {
+ return (char *)s1;
+ }
+ s1++;
+ }
+ return NULL;
+}
+
+/*
+ * The following three functions were ripped out of OpenSolaris. Legally, they
+ * might have to be in a separate file. Leaving it here out of laziness.
+ * Got this from /onnv-gate/usr/src/common/uti/string.c.
+ */
+
+char *strpbrk(const char *string, const char *brkset)
+{
+ const char *p;
+
+ do
+ {
+ for (p = brkset; *p != '\0' && *p != *string; ++p)
+ ;
+ if (*p != '\0')
+ {
+ return (char *)string;
+ }
+ } while (*string++);
+
+ return NULL;
+}
+
+size_t strspn(const char *string, const char *charset)
+{
+ const char *p, *q;
+
+ for (q = string; *q != '\0'; ++q)
+ {
+ for (p = charset; *p != '\0' && *p != *q; ++p)
+ ;
+ if (*p == '\0')
+ {
+ break;
+ }
+ }
+
+ return q - string;
+}
+
+char *strtok(char *string, const char *sepset)
+{
+ char *p, *q, *r;
+ static char *savept;
+
+ /*
+ * Set `p' to our current location in the string.
+ */
+ p = (string == NULL) ? savept : string;
+ if (p == NULL)
+ {
+ return NULL;
+ }
+
+ /*
+ * Skip leading separators; bail if no tokens remain.
+ */
+ q = p + strspn(p, sepset);
+ if (*q == '\0')
+ {
+ return NULL;
+ }
+
+ /*
+ * Mark the end of the token and set `savept' for the next iteration.
+ */
+ if ((r = strpbrk(q, sepset)) == NULL)
+ {
+ savept = NULL;
+ }
+ else
+ {
+ *r = '\0';
+ savept = ++r;
+ }
+
+ return q;
+}
+
+/* created with the help of:
+ * perl -p -e 's/#define\s+(\w+)\s+\d+\s+\/\* ([^\t\*]+)\s*\*\/\s*$/case $1:
+ * return "$2";\n/' < /usr/include/sys/errno.h
+ */
+char *strerror(long errnum)
+{
+ switch (errnum)
+ {
+ case EPERM:
+ return "Not super-user";
+ case ENOENT:
+ return "No such file or directory";
+ case ESRCH:
+ return "No such process";
+ case EINTR:
+ return "interrupted system call";
+ case EIO:
+ return "I/O error";
+ case ENXIO:
+ return "No such device or address";
+ case E2BIG:
+ return "Arg list too long";
+ case ENOEXEC:
+ return "Exec format error";
+ case EBADF:
+ return "Bad file number";
+ case ECHILD:
+ return "No children";
+ case EAGAIN:
+ return "Resource temporarily unavailable";
+ case ENOMEM:
+ return "Not enough core";
+ case EACCES:
+ return "Permission denied";
+ case EFAULT:
+ return "Bad address";
+ case ENOTBLK:
+ return "Block device required";
+ case EBUSY:
+ return "Mount device busy";
+ case EEXIST:
+ return "File exists";
+ case EXDEV:
+ return "Cross-device link";
+ case ENODEV:
+ return "No such device";
+ case ENOTDIR:
+ return "Not a directory";
+ case EISDIR:
+ return "Is a directory";
+ case EINVAL:
+ return "Invalid argument";
+ case ENFILE:
+ return "File table overflow";
+ case EMFILE:
+ return "Too many open files";
+ case ENOTTY:
+ return "Inappropriate ioctl for device";
+ case ETXTBSY:
+ return "Text file busy";
+ case EFBIG:
+ return "File too large";
+ case ENOSPC:
+ return "No space left on device";
+ case ESPIPE:
+ return "Illegal seek";
+ case EROFS:
+ return "Read only file system";
+ case EMLINK:
+ return "Too many links";
+ case EPIPE:
+ return "Broken pipe";
+ case EDOM:
+ return "Math arg out of domain of func";
+ case ERANGE:
+ return "Math result not representable";
+ case ENOMSG:
+ return "No message of desired type";
+ case EIDRM:
+ return "Identifier removed";
+ case ECHRNG:
+ return "Channel number out of range";
+ case EL2NSYNC:
+ return "Level 2 not synchronized";
+ case EL3HLT:
+ return "Level 3 halted";
+ case EL3RST:
+ return "Level 3 reset";
+ case ELNRNG:
+ return "Link number out of range";
+ case EUNATCH:
+ return "Protocol driver not attached";
+ case ENOCSI:
+ return "No CSI structure available";
+ case EL2HLT:
+ return "Level 2 halted";
+ case EDEADLK:
+ return "Deadlock condition.";
+ case ENOLCK:
+ return "No record locks available.";
+ case ECANCELED:
+ return "Operation canceled";
+ case ENOTSUP:
+ return "Operation not supported";
+ case EDQUOT:
+ return "Disc quota exceeded";
+ case EBADE:
+ return "invalid exchange";
+ case EBADR:
+ return "invalid request descriptor";
+ case EXFULL:
+ return "exchange full";
+ case ENOANO:
+ return "no anode";
+ case EBADRQC:
+ return "invalid request code";
+ case EBADSLT:
+ return "invalid slot";
+ case EBFONT:
+ return "bad font file fmt";
+ case ENOSTR:
+ return "Device not a stream";
+ case ENODATA:
+ return "no data (for no delay io)";
+ case ETIME:
+ return "timer expired";
+ case ENOSR:
+ return "out of streams resources";
+ case ENONET:
+ return "Machine is not on the network";
+ case ENOPKG:
+ return "Package not installed";
+ case EREMOTE:
+ return "The object is remote";
+ case ENOLINK:
+ return "the link has been severed";
+ case EADV:
+ return "advertise error";
+ case ESRMNT:
+ return "srmount error";
+ case ECOMM:
+ return "Communication error on send";
+ case EPROTO:
+ return "Protocol error";
+ case EMULTIHOP:
+ return "multihop attempted";
+ case EBADMSG:
+ return "trying to read unreadable message";
+ case ENAMETOOLONG:
+ return "path name is too long";
+ case EOVERFLOW:
+ return "value too large to be stored in data type";
+ case ENOTUNIQ:
+ return "given log. name not unique";
+ case EBADFD:
+ return "f.d. invalid for this operation";
+ case EREMCHG:
+ return "Remote address changed";
+ case ELIBACC:
+ return "Can't access a needed shared lib.";
+ case ELIBBAD:
+ return "Accessing a corrupted shared lib.";
+ case ELIBSCN:
+ return ".lib section in a.out corrupted.";
+ case ELIBMAX:
+ return "Attempting to link in too many libs.";
+ case ELIBEXEC:
+ return "Attempting to exec a shared library.";
+ case EILSEQ:
+ return "Illegal byte sequence.";
+ case ENOSYS:
+ return "Unsupported file system operation";
+ case ELOOP:
+ return "Symbolic link loop";
+ case ERESTART:
+ return "Restartable system call";
+ case ESTRPIPE:
+ return "if pipe/FIFO, don't sleep in stream head";
+ case ENOTEMPTY:
+ return "directory not empty";
+ case EUSERS:
+ return "Too many users (for UFS)";
+ case ENOTSOCK:
+ return "Socket operation on non-socket";
+ case EDESTADDRREQ:
+ return "Destination address required";
+ case EMSGSIZE:
+ return "Message too long";
+ case EPROTOTYPE:
+ return "Protocol wrong type for socket";
+ case ENOPROTOOPT:
+ return "Protocol not available";
+ case EPROTONOSUPPORT:
+ return "Protocol not supported";
+ case ESOCKTNOSUPPORT:
+ return "Socket type not supported";
+ case EPFNOSUPPORT:
+ return "Protocol family not supported";
+ case EAFNOSUPPORT:
+ return "Address family not supported by protocol family";
+ case EADDRINUSE:
+ return "Address already in use";
+ case EADDRNOTAVAIL:
+ return "Can't assign requested address";
+ case ENETDOWN:
+ return "Network is down";
+ case ENETUNREACH:
+ return "Network is unreachable";
+ case ENETRESET:
+ return "Network dropped connection because of reset";
+ case ECONNABORTED:
+ return "Software caused connection abort";
+ case ECONNRESET:
+ return "Connection reset by peer";
+ case ENOBUFS:
+ return "No buffer space available";
+ case EISCONN:
+ return "Socket is already connected";
+ case ENOTCONN:
+ return "Socket is not connected";
+ case ESHUTDOWN:
+ return "Can't send after socket shutdown";
+ case ETOOMANYREFS:
+ return "Too many references: can't splice";
+ case ETIMEDOUT:
+ return "Connection timed out";
+ case ECONNREFUSED:
+ return "Connection refused";
+ case EHOSTDOWN:
+ return "Host is down";
+ case EHOSTUNREACH:
+ return "No route to host";
+ case EALREADY:
+ return "operation already in progress";
+ case EINPROGRESS:
+ return "operation now in progress";
+ case ESTALE:
+ return "Stale NFS file handle";
+ default:
+ return 0;
+ }
+}
diff --git a/kernel/util/time.c b/kernel/util/time.c
new file mode 100644
index 0000000..11ff8de
--- /dev/null
+++ b/kernel/util/time.c
@@ -0,0 +1,194 @@
+#include "util/time.h"
+#include "drivers/cmos.h"
+#include "main/apic.h"
+#include "proc/sched.h"
+#include "util/printf.h"
+#include "util/timer.h"
+#include <drivers/screen.h>
+
+#define TIME_APIC_TICK_FREQUENCY 16
+// this is pretty wrong...
+#define MICROSECONDS_PER_APIC_TICK (16 * 1000 / TIME_APIC_TICK_FREQUENCY)
+
+volatile uint64_t jiffies;
+uint64_t timer_tickcount CORE_SPECIFIC_DATA;
+uint64_t kernel_preempted_count CORE_SPECIFIC_DATA;
+uint64_t user_preempted_count CORE_SPECIFIC_DATA;
+uint64_t not_preempted_count CORE_SPECIFIC_DATA;
+uint64_t idle_count CORE_SPECIFIC_DATA;
+
+// (freq / 16) interrupts per millisecond
+static long timer_tick_handler(regs_t *regs)
+{
+ timer_tickcount++;
+
+#ifdef __VGABUF__
+ if (timer_tickcount % 128 == 0)
+ screen_flush();
+#endif
+
+ if (curcore.kc_id == 0)
+ {
+ jiffies = timer_tickcount;
+ __timers_fire();
+ }
+
+#ifdef __KPREEMPT__ // if (preemption_enabled()) {
+ (regs->r_cs & 0x3) ? user_preempted_count++ : kernel_preempted_count++;
+ apic_eoi();
+ if (regs->r_cs & 0x3 && curthr->kt_cancelled)
+ kthread_exit((void *)-1);
+ sched_yield();
+ return 1;
+
+#endif
+#ifndef __KPREEMPT__ //} else {
+ curthr ? not_preempted_count++ : idle_count++;
+ return 0;
+#endif //}
+
+ return 0;
+}
+
+void time_init()
+{
+ timer_tickcount = 0;
+ intr_register(INTR_APICTIMER, timer_tick_handler);
+ apic_enable_periodic_timer(TIME_APIC_TICK_FREQUENCY);
+}
+
+void time_spin(uint64_t ms)
+{
+ uint64_t ticks_to_wait = ms * TIME_APIC_TICK_FREQUENCY / 16;
+ uint64_t target = timer_tickcount + ticks_to_wait;
+ dbg(DBG_SCHED, "spinning for %lu ms (%lu APIC ticks)\n", ms, ticks_to_wait);
+ while (timer_tickcount < target)
+ ;
+}
+
+void time_sleep(uint64_t ms)
+{
+ // TODO make curthr runnable and place on runqueue
+ time_spin(ms);
+}
+
+inline time_t core_uptime()
+{
+ return (MICROSECONDS_PER_APIC_TICK * timer_tickcount) / 1000;
+}
+
+static int mdays[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334};
+
+time_t do_time()
+{
+ rtc_time_t tm = rtc_get_time();
+ // dbg(DBG_SCHED, "rtc_time (Y-M-D:hh:mm:ss): %d-%d-%d:%d:%d:%d\n", tm.year,
+ // tm.month, tm.day, tm.hour, tm.minute, tm.second);
+
+ int yday = mdays[tm.month - 1] + tm.day - 1;
+ if (tm.month >= 3 && (((tm.year % 4 == 0) && (tm.year % 100 != 0)) ||
+ (tm.year % 400 == 0)))
+ {
+ yday += 1;
+ }
+ tm.year -= 1900;
+
+ /* oof */
+ time_t unix_time =
+ tm.second + tm.minute * 60 + tm.hour * 3600 + yday * 86400 +
+ (tm.year - 70) * 31536000 + ((tm.year - 69) / 4) * 86400 -
+ ((tm.year - 1) / 100) * 86400 + ((tm.year + 299) / 400) * 86400;
+
+ return unix_time;
+}
+
+static size_t human_readable_format(char *buf, size_t size, uint64_t ticks)
+{
+ uint64_t milliseconds = core_uptime();
+ uint64_t minutes = milliseconds / (60 * 1000);
+ milliseconds -= minutes * 60 * 1000;
+ uint64_t seconds = milliseconds / 1000;
+ milliseconds -= seconds * 1000;
+ return (size_t)snprintf(buf, size, "%lu min, %lu sec, %lu ms", minutes,
+ seconds, milliseconds);
+}
+
+static size_t percentage(char *buf, size_t size, uint64_t numerator,
+ uint64_t denominator)
+{
+ // 2 decimal points, no floats
+ uint64_t new_numerator = numerator * 10000;
+ if (new_numerator < numerator)
+ {
+ return (size_t)snprintf(buf, size, "N/A");
+ }
+ uint64_t result = denominator ? new_numerator / denominator : 0;
+ return (size_t)snprintf(buf, size, "%lu.%02lu%%", result / 100,
+ result % 100);
+}
+
+size_t time_stats(char *buf, size_t len)
+{
+ size_t off = 0;
+ off += snprintf(buf + off, len - off, "core uptime:\t");
+ off += human_readable_format(buf + off, len - off, timer_tickcount);
+ off += snprintf(buf + off, len - off, "\nidle time:\t");
+ off += human_readable_format(buf + off, len - off, idle_count);
+ off += snprintf(buf + off, len - off, "\t");
+ off += percentage(buf + off, len - off, idle_count, timer_tickcount);
+
+ KASSERT(not_preempted_count + user_preempted_count +
+ kernel_preempted_count + idle_count - timer_tickcount <=
+ 2);
+
+ off += snprintf(buf + off, len - off, "\n\ntotal tick count = %lu",
+ timer_tickcount);
+ off += snprintf(buf + off, len - off, "\nidle count = %lu",
+ idle_count);
+ off += snprintf(buf + off, len - off, "\t");
+ off += percentage(buf + off, len - off, idle_count, timer_tickcount);
+ off += snprintf(buf + off, len - off, "\nkernel preempted count = %lu",
+ kernel_preempted_count);
+ off += snprintf(buf + off, len - off, "\t");
+ off += percentage(buf + off, len - off, kernel_preempted_count,
+ timer_tickcount);
+ off += snprintf(buf + off, len - off, "\nuser preempted count = %lu",
+ user_preempted_count);
+ off += snprintf(buf + off, len - off, "\t");
+ off +=
+ percentage(buf + off, len - off, user_preempted_count, timer_tickcount);
+ off += snprintf(buf + off, len - off, "\nnot preempted count = %lu",
+ not_preempted_count);
+ off += snprintf(buf + off, len - off, "\t");
+ off +=
+ percentage(buf + off, len - off, not_preempted_count, timer_tickcount);
+
+ return off;
+}
+
+static void do_wakeup(uint64_t arg)
+{
+ kthread_t *thr = (kthread_t *)arg;
+
+ if (thr->kt_wchan)
+ {
+ sched_broadcast_on(thr->kt_wchan);
+ }
+}
+
+long do_usleep(useconds_t usec)
+{
+ ktqueue_t waitq;
+ sched_queue_init(&waitq);
+
+ timer_t timer;
+ timer_init(&timer);
+ timer.function = do_wakeup;
+ timer.data = (uint64_t)curthr;
+ timer.expires = jiffies + (usec / MICROSECONDS_PER_APIC_TICK);
+
+ timer_add(&timer);
+ long ret = sched_cancellable_sleep_on(&waitq);
+ timer_del(&timer);
+ return ret;
+} \ No newline at end of file
diff --git a/kernel/util/timer.c b/kernel/util/timer.c
new file mode 100644
index 0000000..f1be4a2
--- /dev/null
+++ b/kernel/util/timer.c
@@ -0,0 +1,121 @@
+#include "util/timer.h"
+#include "proc/spinlock.h"
+#include "util/time.h"
+
+static timer_t *timer_running = NULL;
+static uint64_t timer_next_expiry = -1;
+static list_t timers_primary = LIST_INITIALIZER(timers_primary);
+static list_t timers_secondary = LIST_INITIALIZER(timers_secondary);
+static int timers_firing = 0;
+
+void timer_init(timer_t *timer)
+{
+ timer->expires = -1;
+ list_link_init(&timer->link);
+}
+
+void timer_add(timer_t *timer) { timer_mod(timer, timer->expires); }
+
+int __timer_del(timer_t *timer)
+{
+ int ret = 0;
+ if (list_link_is_linked(&timer->link))
+ {
+ list_remove(&timer->link);
+ ret = 1;
+ }
+ return ret;
+}
+
+int timer_del(timer_t *timer)
+{
+ int ret = __timer_del(timer);
+
+ return ret;
+}
+
+void __timer_add(timer_t *timer)
+{
+ KASSERT(!list_link_is_linked(&timer->link));
+ list_t *list = timers_firing ? &timers_secondary : &timers_primary;
+ list_insert_head(list, &timer->link);
+}
+
+int timer_mod(timer_t *timer, int expires)
+{
+
+ timer->expires = expires;
+ int ret = __timer_del(timer);
+ __timer_add(timer);
+ timer_next_expiry = MIN(timer_next_expiry, timer->expires);
+
+ return ret;
+}
+
+int timer_pending(timer_t *timer)
+{
+ int ret = list_link_is_linked(&timer->link);
+ return ret;
+}
+
+int timer_del_sync(timer_t *timer)
+{
+ /* Not great performance wise... */
+ while (timer_running == timer)
+ {
+ sched_yield();
+ }
+
+ int ret = __timer_del(timer);
+
+ return ret;
+}
+
+/* Note: using a linked-list rather than some priority is terribly inefficient
+ * Also this implementation is just bad. Sorry.
+ */
+int ready = 0;
+void __timers_fire()
+{
+ if (curthr && !preemption_enabled())
+ {
+ return;
+ }
+
+ timers_firing = 1;
+
+ //dbg(DBG_PRINT, "next expiry: %d\n", timer_next_expiry);
+ if (jiffies < timer_next_expiry)
+ {
+ timers_firing = 0;
+ return;
+ }
+
+ uint64_t min_expiry = 0;
+
+ list_iterate(&timers_primary, timer, timer_t, link)
+ {
+ if (jiffies >= timer->expires)
+ {
+ list_remove(&timer->link);
+ timer_running = timer;
+ timer->function(timer->data);
+ timer_running = NULL;
+ }
+ else
+ {
+ min_expiry = MIN(min_expiry, timer->expires);
+ }
+ }
+
+ /* migrate from the backup list to the primary list */
+ list_iterate(&timers_secondary, timer, timer_t, link)
+ {
+ min_expiry = MIN(min_expiry, timer->expires);
+ list_remove(&timer->link);
+ list_insert_head(&timers_primary, &timer->link);
+ }
+
+ timer_next_expiry = min_expiry;
+ timers_firing = 0;
+}
diff --git a/kernel/vm/anon.c b/kernel/vm/anon.c
new file mode 100644
index 0000000..a998d70
--- /dev/null
+++ b/kernel/vm/anon.c
@@ -0,0 +1,65 @@
+#include "mm/mobj.h"
+#include "mm/page.h"
+#include "mm/pframe.h"
+#include "mm/slab.h"
+
+#include "util/debug.h"
+#include "util/string.h"
+
+/* for debugging/verification purposes */
+int anon_count = 0;
+
+static slab_allocator_t *anon_allocator;
+
+static long anon_fill_pframe(mobj_t *o, pframe_t *pf);
+
+static long anon_flush_pframe(mobj_t *o, pframe_t *pf);
+
+static void anon_destructor(mobj_t *o);
+
+static mobj_ops_t anon_mobj_ops = {.get_pframe = NULL,
+ .fill_pframe = anon_fill_pframe,
+ .flush_pframe = anon_flush_pframe,
+ .destructor = anon_destructor};
+
+/*
+ * Initialize anon_allocator using the slab allocator.
+ */
+void anon_init()
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
+
+/*
+ * The mobj should be locked upon successful return. Use mobj_init and
+ * mobj_lock.
+ */
+mobj_t *anon_create()
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return NULL;
+}
+
+/*
+ * This function is not complicated -- think about what the pframe should look
+ * like for an anonymous object
+ */
+static long anon_fill_pframe(mobj_t *o, pframe_t *pf)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+static long anon_flush_pframe(mobj_t *o, pframe_t *pf) { return 0; }
+
+/*
+ * Release all resources associated with an anonymous object.
+ *
+ * Hints:
+ * 1) Call mobj_default_destructor() to free pframes
+ * 2) Free the mobj
+ */
+static void anon_destructor(mobj_t *o)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
diff --git a/kernel/vm/brk.c b/kernel/vm/brk.c
new file mode 100644
index 0000000..5169a42
--- /dev/null
+++ b/kernel/vm/brk.c
@@ -0,0 +1,58 @@
+#include "errno.h"
+#include "globals.h"
+#include "mm/mm.h"
+#include "util/debug.h"
+
+#include "mm/mman.h"
+
+/*
+ * This function implements the brk(2) system call.
+ *
+ * This routine manages the calling process's "break" -- the ending address
+ * of the process's dynamic region (heap)
+ *
+ * Some important details on the range of values 'p_brk' can take:
+ * 1) 'p_brk' should not be set to a value lower than 'p_start_brk', since this
+ * could overrite data in another memory region. But, 'p_brk' can be equal to
+ * 'p_start_brk', which would mean that there is no heap yet/is empty.
+ * 2) Growth of the 'p_brk' cannot overlap with/expand into an existing
+ * mapping. Use vmmap_is_range_empty() to help with this.
+ * 3) 'p_brk' cannot go beyond the region of the address space allocated for use by
+ * userland (USER_MEM_HIGH)
+ *
+ * Before setting 'p_brk' to 'addr', you must account for all scenarios by comparing
+ * the page numbers of addr, 'p_brk' and 'p_start_brk' as the vmarea that represents the heap
+ * has page granularity. Think about the following sub-cases (note that the heap
+ * should always be represented by at most one vmarea):
+ * 1) The heap needs to be created. What permissions and attributes does a process
+ * expect the heap to have?
+ * 2) The heap already exists, so you need to modify its end appropriately.
+ * 3) The heap needs to shrink.
+ *
+ * Beware of page alignment!:
+ * 1) The starting break is not necessarily page aligned. Since the loader sets
+ * 'p_start_brk' to be the end of the bss section, 'p_start_brk' should always be
+ * aligned up to start the dynamic region at the first page after bss_end.
+ * 2) vmareas only have page granularity, so you will need to take this
+ * into account when deciding how to set the mappings if p_brk or p_start_brk
+ * is not page aligned. The caller of do_brk() would be very disappointed if
+ * you give them less than they asked for!
+ *
+ * Some additional details:
+ * 1) You are guaranteed that the process data/bss region is non-empty.
+ * That is, if the starting brk is not page-aligned, its page has
+ * read/write permissions.
+ * 2) If 'addr' is NULL, you should return the current break. We use this to
+ * implement sbrk(0) without writing a separate syscall. Look in
+ * user/libc/syscall.c if you're curious.
+ * 3) Return 0 on success, -errno on failure. The 'ret' argument should be used to
+ * return the updated 'p_brk' on success.
+ *
+ * Error cases do_brk is responsible for generating:
+ * - ENOMEM: attempting to set p_brk beyond its valid range
+ */
+long do_brk(void *addr, void **ret)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
diff --git a/kernel/vm/mmap.c b/kernel/vm/mmap.c
new file mode 100644
index 0000000..7eb2d89
--- /dev/null
+++ b/kernel/vm/mmap.c
@@ -0,0 +1,83 @@
+#include "vm/mmap.h"
+#include "errno.h"
+#include "fs/file.h"
+#include "fs/vfs.h"
+#include "fs/vnode.h"
+#include "globals.h"
+#include "mm/mm.h"
+#include "mm/mman.h"
+#include "mm/tlb.h"
+#include "util/debug.h"
+
+/*
+ * This function implements the mmap(2) syscall: Add a mapping to the current
+ * process's address space. Supports the following flags: MAP_SHARED,
+ * MAP_PRIVATE, MAP_FIXED, and MAP_ANON.
+ *
+ * ret - If provided, on success, *ret must point to the start of the mapped area
+ *
+ * Return 0 on success, or:
+ * - EACCES:
+ * - a file mapping was requested, but fd is not open for reading.
+ * - MAP_SHARED was requested and PROT_WRITE is set, but fd is
+ * not open in read/write (O_RDWR) mode.
+ * - PROT_WRITE is set, but the file has FMODE_APPEND specified.
+ * - EBADF:
+ * - fd is not a valid file descriptor and MAP_ANON was
+ * not set
+ * - EINVAL:
+ * - addr is not page aligned and MAP_FIXED is specified
+ * - addr is out of range of the user address space and MAP_FIXED is specified
+ * - off is not page aligned
+ * - len is <= 0 or off < 0
+ * - flags do not contain MAP_PRIVATE or MAP_SHARED
+ * - ENODEV:
+ * - The underlying filesystem of the specified file does not
+ * support memory mapping or in other words, the file's vnode's mmap
+ * operation doesn't exist
+ * - Propagate errors from vmmap_map()
+ *
+ * See the errors section of the mmap(2) man page for more details
+ *
+ * Hints:
+ * 1) A lot of error checking.
+ * 2) Call vmmap_map() to create the mapping.
+ * a) Use VMMAP_DIR_HILO as default, which will make other stencil code in
+ * Weenix happy.
+ * 3) Call tlb_flush_range() on the newly-mapped region. This is because the
+ * newly-mapped region could have been used by someone else, and you don't
+ * want to get stale mappings.
+ * 4) Don't forget to set ret if it was provided.
+ *
+ * If you are mapping less than a page, make sure that you are still allocating
+ * a full page.
+ */
+long do_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t off,
+ void **ret)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * This function implements the munmap(2) syscall.
+ *
+ * Return 0 on success, or:
+ * - EINVAL:
+ * - addr is not aligned on a page boundary
+ * - the region to unmap is out of range of the user address space
+ * - len is 0
+ * - Propagate errors from vmmap_remove()
+ *
+ * See the errors section of the munmap(2) man page for more details
+ *
+ * Hints:
+ * - Similar to do_mmap():
+ * 1) Perform error checking.
+ * 2) Call vmmap_remove().
+ */
+long do_munmap(void *addr, size_t len)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+} \ No newline at end of file
diff --git a/kernel/vm/pagefault.c b/kernel/vm/pagefault.c
new file mode 100644
index 0000000..11868e5
--- /dev/null
+++ b/kernel/vm/pagefault.c
@@ -0,0 +1,53 @@
+#include "vm/pagefault.h"
+#include "errno.h"
+#include "globals.h"
+#include "mm/mm.h"
+#include "mm/mman.h"
+#include "mm/mobj.h"
+#include "mm/pframe.h"
+#include "mm/tlb.h"
+#include "types.h"
+#include "util/debug.h"
+
+/*
+ * Respond to a user mode pagefault by setting up the desired page.
+ *
+ * vaddr - The virtual address that the user pagefaulted on
+ * cause - A combination of FAULT_ flags indicating the type of operation that
+ * caused the fault (see pagefault.h)
+ *
+ * Implementation details:
+ * 1) Find the vmarea that contains vaddr, if it exists.
+ * 2) Check the vmarea's protections (see the vmarea_t struct) against the 'cause' of
+ * the pagefault. For example, error out if the fault has cause write and we don't
+ * have write permission in the area. Keep in mind:
+ * a) You can assume that FAULT_USER is always specified.
+ * b) If neither FAULT_WRITE nor FAULT_EXEC is specified, you may assume the
+ * fault was due to an attempted read.
+ * 3) Obtain the corresponding pframe from the vmarea's mobj. Be careful about
+ * locking and error checking!
+ * 4) Finally, set up a call to pt_map to insert a new mapping into the
+ * appropriate pagetable:
+ * a) Use pt_virt_to_phys() to obtain the physical address of the actual
+ * data.
+ * b) You should not assume that vaddr is page-aligned, but you should
+ * provide a page-aligned address to the mapping.
+ * c) For pdflags, use PT_PRESENT | PT_WRITE | PT_USER.
+ * d) For ptflags, start with PT_PRESENT | PT_USER. Also supply PT_WRITE if
+ * the user can and wants to write to the page.
+ * 5) Flush the TLB.
+ *
+ * Tips:
+ * 1) This gets called by _pt_fault_handler() in mm/pagetable.c, which
+ * importantly checks that the fault did not occur in kernel mode. Think
+ * about why a kernel mode page fault would be bad in Weenix. Explore
+ * _pt_fault_handler() to get a sense of what's going on.
+ * 2) If you run into any errors, you should segfault by calling
+ * do_exit(EFAULT).
+ */
+void handle_pagefault(uintptr_t vaddr, uintptr_t cause)
+{
+ dbg(DBG_VM, "vaddr = 0x%p (0x%p), cause = %lu\n", (void *)vaddr,
+ PAGE_ALIGN_DOWN(vaddr), cause);
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
diff --git a/kernel/vm/shadow.c b/kernel/vm/shadow.c
new file mode 100644
index 0000000..3b6f783
--- /dev/null
+++ b/kernel/vm/shadow.c
@@ -0,0 +1,173 @@
+#include "vm/shadow.h"
+#include "mm/page.h"
+#include "mm/pframe.h"
+#include "mm/slab.h"
+#include "util/debug.h"
+#include "util/string.h"
+
+#define SHADOW_SINGLETON_THRESHOLD 5
+
+typedef struct mobj_shadow
+{
+ // the mobj parts of this shadow object
+ mobj_t mobj;
+ // a reference to the mobj that is the data source for this shadow object
+ // This should be a reference to a shadow object of some ancestor process.
+ // This is used to traverse the shadow object chain.
+ mobj_t *shadowed;
+ // a reference to the mobj at the bottom of this shadow object's chain
+ // this should NEVER be a shadow object (i.e. it should have some type other
+ // than MOBJ_SHADOW)
+ mobj_t *bottom_mobj;
+} mobj_shadow_t;
+
+#define MOBJ_TO_SO(o) CONTAINER_OF(o, mobj_shadow_t, mobj)
+
+static slab_allocator_t *shadow_allocator;
+
+static long shadow_get_pframe(mobj_t *o, size_t pagenum, long forwrite,
+ pframe_t **pfp);
+static long shadow_fill_pframe(mobj_t *o, pframe_t *pf);
+static long shadow_flush_pframe(mobj_t *o, pframe_t *pf);
+static void shadow_destructor(mobj_t *o);
+
+static mobj_ops_t shadow_mobj_ops = {.get_pframe = shadow_get_pframe,
+ .fill_pframe = shadow_fill_pframe,
+ .flush_pframe = shadow_flush_pframe,
+ .destructor = shadow_destructor};
+
+/*
+ * Initialize shadow_allocator using the slab allocator.
+ */
+void shadow_init()
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
+
+/*
+ * Create a shadow object that shadows the given mobj.
+ *
+ * Return a new, LOCKED shadow object on success, or NULL upon failure.
+ *
+ * Hints:
+ * 1) Create and initialize a mobj_shadow_t based on the given mobj.
+ * 2) Set up the bottom object of the shadow chain, which could have two cases:
+ * a) Either shadowed is a shadow object, and you can use its bottom_mobj
+ * b) Or shadowed is not a shadow object, in which case it is the bottom
+ * object of this chain.
+ *
+ * Make sure to manage the refcounts correctly.
+ */
+mobj_t *shadow_create(mobj_t *shadowed)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return NULL;
+}
+
+/*
+ * Given a shadow object o, collapse its shadow chain as far as you can.
+ *
+ * Hints:
+ * 1) You can only collapse if the shadowed object is a shadow object.
+ * 2) When collapsing, you must manually migrate pframes from o's shadowed
+ * object to o, checking to see if a copy doesn't already exist in o.
+ * 3) Be careful with refcounting! In particular, when you put away o's
+ * shadowed object, its refcount should drop to 0, initiating its
+ * destruction (shadow_destructor).
+ * 4) As a reminder, any refcounting done in shadow_collapse() must play nice
+ * with any refcounting done in shadow_destructor().
+ * 5) Pay attention to mobj and pframe locking.
+ */
+void shadow_collapse(mobj_t *o)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
+
+/*
+ * Obtain the desired pframe from the given mobj, traversing its shadow chain if
+ * necessary. This is where copy-on-write logic happens!
+ *
+ * Arguments:
+ * o - The object from which to obtain a pframe
+ * pagenum - Number of the desired page relative to the object
+ * forwrite - Set if the caller wants to write to the pframe's data, clear if
+ * only reading
+ * pfp - Upon success, pfp should point to the desired pframe.
+ *
+ * Return 0 on success, or:
+ * - Propagate errors from mobj_default_get_pframe() and mobj_get_pframe()
+ *
+ * Hints:
+ * 1) If forwrite is set, use mobj_default_get_pframe().
+ * 2) If forwrite is clear, check if o already contains the desired frame.
+ * a) If not, iterate through the shadow chain to find the nearest shadow
+ * mobj that has the frame. Do not recurse! If the shadow chain is long,
+ * you will cause a kernel buffer overflow (e.g. from forkbomb).
+ * b) If no shadow objects have the page, call mobj_get_pframe() to get the
+ * page from the bottom object and return what it returns.
+ *
+ * Pay attention to pframe locking.
+ */
+static long shadow_get_pframe(mobj_t *o, size_t pagenum, long forwrite,
+ pframe_t **pfp)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+/*
+ * Use the given mobj's shadow chain to fill the given pframe.
+ *
+ * Return 0 on success, or:
+ * - Propagate errors from mobj_get_pframe()
+ *
+ * Hints:
+ * 1) Explore mobj_default_get_pframe(), which calls mobj_create_pframe(), to
+ * understand what state pf is in when this function is called, and how you
+ * can use it.
+ * 2) As you can see above, shadow_get_pframe would call
+ * mobj_default_get_pframe (when the forwrite is set), which would
+ * create and then fill the pframe (shadow_fill_pframe is called).
+ * 3) Traverse the shadow chain for a copy of the frame, starting at the given
+ * mobj's shadowed object. You can use mobj_find_pframe to look for the
+ * page frame. pay attention to locking/unlocking, and be sure not to
+ * recurse when traversing.
+ * 4) If none of the shadow objects have a copy of the frame, use
+ * mobj_get_pframe on the bottom object to get it.
+ * 5) After obtaining the desired frame, simply copy its contents into pf.
+ */
+static long shadow_fill_pframe(mobj_t *o, pframe_t *pf)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * Flush a shadow object's pframe to disk.
+ *
+ * Return 0 on success.
+ *
+ * Hint:
+ * - Are shadow objects backed to disk? Do you actually need to do anything
+ * here?
+ */
+static long shadow_flush_pframe(mobj_t *o, pframe_t *pf)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * Clean up all resources associated with mobj o.
+ *
+ * Hints:
+ * - Check out mobj_put() to understand how this function gets called.
+ *
+ * 1) Call mobj_default_destructor() to flush o's pframes.
+ * 2) Put the shadow and bottom_mobj members of the shadow object.
+ * 3) Free the mobj_shadow_t.
+ */
+static void shadow_destructor(mobj_t *o)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
diff --git a/kernel/vm/vmmap.c b/kernel/vm/vmmap.c
new file mode 100644
index 0000000..f683ca0
--- /dev/null
+++ b/kernel/vm/vmmap.c
@@ -0,0 +1,326 @@
+#include "globals.h"
+#include "kernel.h"
+#include <errno.h>
+
+#include "vm/anon.h"
+#include "vm/shadow.h"
+
+#include "util/debug.h"
+#include "util/printf.h"
+#include "util/string.h"
+
+#include "fs/file.h"
+#include "fs/vfs_syscall.h"
+#include "fs/vnode.h"
+
+#include "mm/mm.h"
+#include "mm/mman.h"
+#include "mm/slab.h"
+
+static slab_allocator_t *vmmap_allocator;
+static slab_allocator_t *vmarea_allocator;
+
+void vmmap_init(void)
+{
+ vmmap_allocator = slab_allocator_create("vmmap", sizeof(vmmap_t));
+ vmarea_allocator = slab_allocator_create("vmarea", sizeof(vmarea_t));
+ KASSERT(vmmap_allocator && vmarea_allocator);
+}
+
+/*
+ * Allocate and initialize a new vmarea using vmarea_allocator.
+ */
+vmarea_t *vmarea_alloc(void)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return NULL;
+}
+
+/*
+ * Free the vmarea by removing it from any lists it may be on, putting its
+ * vma_obj if it exists, and freeing the vmarea_t.
+ */
+void vmarea_free(vmarea_t *vma)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
+
+/*
+ * Create and initialize a new vmmap. Initialize all the fields of vmmap_t.
+ */
+vmmap_t *vmmap_create(void)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return NULL;
+}
+
+/*
+ * Destroy the map pointed to by mapp and set *mapp = NULL.
+ * Remember to free each vma in the maps list.
+ */
+void vmmap_destroy(vmmap_t **mapp)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
+
+/*
+ * Add a vmarea to an address space. Assumes (i.e. asserts to some extent) the
+ * vmarea is valid. Iterate through the list of vmareas, and add it
+ * accordingly.
+ */
+void vmmap_insert(vmmap_t *map, vmarea_t *new_vma)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+}
+
+/*
+ * Find a contiguous range of free virtual pages of length npages in the given
+ * address space. Returns starting page number for the range, without altering the map.
+ * Return -1 if no such range exists.
+ *
+ * Your algorithm should be first fit.
+ * You should assert that dir is VMMAP_DIR_LOHI OR VMMAP_DIR_HILO.
+ * If dir is:
+ * - VMMAP_DIR_HILO: find a gap as high in the address space as possible,
+ * starting from USER_MEM_HIGH.
+ * - VMMAP_DIR_LOHI: find a gap as low in the address space as possible,
+ * starting from USER_MEM_LOW.
+ *
+ * Make sure you are converting between page numbers and addresses correctly!
+ */
+ssize_t vmmap_find_range(vmmap_t *map, size_t npages, int dir)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * Return the vm_area that vfn (a page number) lies in. Scan the address space looking
+ * for a vma whose range covers vfn. If the page is unmapped, return NULL.
+ */
+vmarea_t *vmmap_lookup(vmmap_t *map, size_t vfn)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return NULL;
+}
+
+/*
+ * For each vmarea in the map, if it is a shadow object, call shadow_collapse.
+ */
+void vmmap_collapse(vmmap_t *map)
+{
+ list_iterate(&map->vmm_list, vma, vmarea_t, vma_plink)
+ {
+ if (vma->vma_obj->mo_type == MOBJ_SHADOW)
+ {
+ mobj_lock(vma->vma_obj);
+ shadow_collapse(vma->vma_obj);
+ mobj_unlock(vma->vma_obj);
+ }
+ }
+}
+
+/*
+ * This is where the magic of fork's copy-on-write gets set up.
+ *
+ * Upon successful return, the new vmmap should be a clone of map with all
+ * shadow objects properly set up.
+ *
+ * For each vmarea, clone it's members.
+ * 1) vmarea is share-mapped, you don't need to do anything special.
+ * 2) vmarea is not share-mapped, time for shadow objects:
+ * a) Create two shadow objects, one for map and one for the new vmmap you
+ * are constructing, both of which shadow the current vma_obj the vmarea
+ * being cloned.
+ * b) After creating the shadow objects, put the original vma_obj
+ * c) and insert the shadow objects into their respective vma's.
+ *
+ * Be sure to clean up in any error case, manage the reference counts correctly,
+ * and to lock/unlock properly.
+ */
+vmmap_t *vmmap_clone(vmmap_t *map)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return NULL;
+}
+
+/*
+ *
+ * Insert a mapping into the map starting at lopage for npages pages.
+ *
+ * file - If provided, the vnode of the file to be mapped in
+ * lopage - If provided, the desired start range of the mapping
+ * prot - See mman.h for possible values
+ * flags - See do_mmap()'s comments for possible values
+ * off - Offset in the file to start mapping at, in bytes
+ * dir - VMMAP_DIR_LOHI or VMMAP_DIR_HILO
+ * new_vma - If provided, on success, must point to the new vmarea_t
+ *
+ * Return 0 on success, or:
+ * - ENOMEM: On vmarea_alloc, anon_create, shadow_create or
+ * vmmap_find_range failure
+ * - Propagate errors from file->vn_ops->mmap and vmmap_remove
+ *
+ * Hints:
+ * - You can assume/assert that all input is valid. It may help to write
+ * this function and do_mmap() somewhat in tandem.
+ * - If file is NULL, create an anon object.
+ * - If file is non-NULL, use the vnode's mmap operation to get the mobj.
+ * Do not assume it is file->vn_obj (mostly relevant for special devices).
+ * - If lopage is 0, use vmmap_find_range() to get a valid range
+ * - If lopage is not 0, the direction flag (dir) is ignored.
+ * - If lopage is nonzero and MAP_FIXED is specified and
+ * the given range overlaps with any preexisting mappings,
+ * remove the preexisting mappings.
+ * - If MAP_PRIVATE is specified, set up a shadow object. Be careful with
+ * refcounts!
+ * - Be careful: off is in bytes (albeit should be page-aligned), but
+ * vma->vma_off is in pages.
+ * - Be careful with the order of operations. Hold off on any irreversible
+ * work until there is no more chance of failure.
+ */
+long vmmap_map(vmmap_t *map, vnode_t *file, size_t lopage, size_t npages,
+ int prot, int flags, off_t off, int dir, vmarea_t **new_vma)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * Iterate over the mapping's vmm_list and make sure that the specified range
+ * is completely empty. You will have to handle the following cases:
+ *
+ * Key: [ ] = existing vmarea_t
+ * ******* = region to be unmapped
+ *
+ * Case 1: [ ******* ]
+ * The region to be unmapped lies completely inside the vmarea. We need to
+ * split the old vmarea into two vmareas. Be sure to increment the refcount of
+ * the object associated with the vmarea.
+ *
+ * Case 2: [ *******]**
+ * The region overlaps the end of the vmarea. Just shorten the length of
+ * the mapping.
+ *
+ * Case 3: *[***** ]
+ * The region overlaps the beginning of the vmarea. Move the beginning of
+ * the mapping (remember to update vma_off), and shorten its length.
+ *
+ * Case 4: *[*************]**
+ * The region completely contains the vmarea. Remove the vmarea from the
+ * list.
+ *
+ * Return 0 on success, or:
+ * - ENOMEM: Failed to allocate a new vmarea when splitting a vmarea (case 1).
+ *
+ * Hints:
+ * - Whenever you shorten/remove any mappings, be sure to call pt_unmap_range()
+ * tlb_flush_range() to clean your pagetables and TLB.
+ */
+long vmmap_remove(vmmap_t *map, size_t lopage, size_t npages)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return -1;
+}
+
+/*
+ * Returns 1 if the given address space has no mappings for the given range,
+ * 0 otherwise.
+ */
+long vmmap_is_range_empty(vmmap_t *map, size_t startvfn, size_t npages)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+/*
+ * Read into 'buf' from the virtual address space of 'map'. Start at 'vaddr'
+ * for size 'count'. 'vaddr' is not necessarily page-aligned. count is in bytes.
+ *
+ * Hints:
+ * 1) Find the vmareas that correspond to the region to read from.
+ * 2) Find the pframes within those vmareas corresponding to the virtual
+ * addresses you want to read.
+ * 3) Read from those page frames and copy it into `buf`.
+ * 4) You will not need to check the permissisons of the area.
+ * 5) You may assume/assert that all areas exist.
+ *
+ * Return 0 on success, -errno on error (propagate from the routines called).
+ * This routine will be used within copy_from_user().
+ */
+long vmmap_read(vmmap_t *map, const void *vaddr, void *buf, size_t count)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+/*
+ * Write from 'buf' into the virtual address space of 'map' starting at
+ * 'vaddr' for size 'count'.
+ *
+ * Hints:
+ * 1) Find the vmareas to write to.
+ * 2) Find the correct pframes within those areas that contain the virtual addresses
+ * that you want to write data to.
+ * 3) Write to the pframes, copying data from buf.
+ * 4) You do not need check permissions of the areas you use.
+ * 5) Assume/assert that all areas exist.
+ * 6) Remember to dirty the pages that you write to.
+ *
+ * Returns 0 on success, -errno on error (propagate from the routines called).
+ * This routine will be used within copy_to_user().
+ */
+long vmmap_write(vmmap_t *map, void *vaddr, const void *buf, size_t count)
+{
+ NOT_YET_IMPLEMENTED("VM: ***none***");
+ return 0;
+}
+
+size_t vmmap_mapping_info(const void *vmmap, char *buf, size_t osize)
+{
+ return vmmap_mapping_info_helper(vmmap, buf, osize, "");
+}
+
+size_t vmmap_mapping_info_helper(const void *vmmap, char *buf, size_t osize,
+ char *prompt)
+{
+ KASSERT(0 < osize);
+ KASSERT(NULL != buf);
+ KASSERT(NULL != vmmap);
+
+ vmmap_t *map = (vmmap_t *)vmmap;
+ ssize_t size = (ssize_t)osize;
+
+ int len =
+ snprintf(buf, (size_t)size, "%s%37s %5s %7s %18s %11s %23s\n", prompt,
+ "VADDR RANGE", "PROT", "FLAGS", "MOBJ", "OFFSET", "VFN RANGE");
+
+ list_iterate(&map->vmm_list, vma, vmarea_t, vma_plink)
+ {
+ size -= len;
+ buf += len;
+ if (0 >= size)
+ {
+ goto end;
+ }
+
+ len =
+ snprintf(buf, (size_t)size,
+ "%s0x%p-0x%p %c%c%c %7s 0x%p %#.9lx %#.9lx-%#.9lx\n",
+ prompt, (void *)(vma->vma_start << PAGE_SHIFT),
+ (void *)(vma->vma_end << PAGE_SHIFT),
+ (vma->vma_prot & PROT_READ ? 'r' : '-'),
+ (vma->vma_prot & PROT_WRITE ? 'w' : '-'),
+ (vma->vma_prot & PROT_EXEC ? 'x' : '-'),
+ (vma->vma_flags & MAP_SHARED ? " SHARED" : "PRIVATE"),
+ vma->vma_obj, vma->vma_off, vma->vma_start, vma->vma_end);
+ }
+
+end:
+ if (size <= 0)
+ {
+ size = osize;
+ buf[osize - 1] = '\0';
+ }
+ return osize - size;
+}
diff --git a/kernel/vm/vmmap.gdb b/kernel/vm/vmmap.gdb
new file mode 100644
index 0000000..528dd1d
--- /dev/null
+++ b/kernel/vm/vmmap.gdb
@@ -0,0 +1,24 @@
+define vmmap
+ if $argc > 0
+ set $proc = proc_lookup($arg0)
+ if $proc != NULL
+ printf "Process %i (%s):\n", $proc->p_pid, $proc->p_name
+ set $vmmap = $proc->p_vmmap
+ else
+ printf "No process with PID %i exists\n", $arg0
+ set $vmmap = NULL
+ end
+ else
+ printf "Current process %i (%s):\n", curproc->p_pid, curproc->p_name
+ set $vmmap = curproc->p_vmmap
+ end
+
+ if $vmmap != NULL
+ kinfo vmmap_mapping_info $vmmap
+ end
+end
+document pagetable
+Without arguments displays current mappings. Takes an optional integer
+argument to specify the PID of a process whose mappings should be
+printed instead.
+end
diff --git a/kernel/weenix.dbg b/kernel/weenix.dbg
new file mode 100644
index 0000000..f342d92
--- /dev/null
+++ b/kernel/weenix.dbg
Binary files differ