Index: arch/makefile ================================================================== --- arch/makefile +++ arch/makefile @@ -1,5 +1,12 @@ -linux-headers = /usr/include/asm/ -calls.x86.lin.32.s: $(linux-headers)/unistd_32.h - grep "#define __NR_" $< | sed "s;^#define __NR_;%define sys.;" > $@ -calls.x86.lin.64.s: $(linux-headers)/unistd_64.h - grep "#define __NR_" $< | sed "s;^#define __NR_;%define sys.;" > $@ +lin-headers = /usr/include/asm +fbsd-headers = /usr/include/sys + +${TMP}: + mkdir -p ${TMP} + +${TMP}/calls.x86.lin.32.s: $(lin-headers)/unistd_32.h ${TMP} + grep "#define __NR_" $< | sed 's;^#define __NR_;%define sys.;' > $@ +${TMP}/calls.x86.lin.64.s: $(lin-headers)/unistd_64.h ${TMP} + grep "#define __NR_" $< | sed 's;^#define __NR_;%define sys.;' > $@ +${TMP}/calls.x86.fbsd.%.s: $(fbsd-headers)/syscall.h ${TMP} + grep "#define SYS_" $< | sed 's;^#define SYS_;%define sys.;' > $@ Index: arch/x86.lin.64.s ================================================================== --- arch/x86.lin.64.s +++ arch/x86.lin.64.s @@ -2,87 +2,8 @@ ; vim: ft=nasm ; syscall64 numbers - syscall table must be created first! %include "calls.x86.lin.64.s" -; syscall ops -%define sys.call syscall +; linux uses the common x86-64 ABI +%include "x86.syscall.64.s" -; register order for syscall convention -%define sys.reg.n 7 -%define sys.reg.ret rax -%define sys.reg.0 rax -%define sys.reg.1 rdi -%define sys.reg.2 rsi -%define sys.reg.3 rdx -%define sys.reg.4 r10 -%define sys.reg.5 r8 -%define sys.reg.6 r9 - -; register order for ccall convention -%define ccall.reg.ct 6 -%define ccall.reg.ret rdi -%define ccall.reg.0 rdi -%define ccall.reg.1 rsi -%define ccall.reg.2 rdx -%define ccall.reg.3 rcx -%define ccall.reg.4 r8 -%define ccall.reg.5 r9 - -%macro sys 1-8 -; syscall64 wrapper, ex. `sys sys.write, 1, msg, msg.len` - %assign i 0 - %rep %0 - mov sys.reg. %+ i, %1 ; i'm actually shocked this worked - %rotate 1 - %assign i i+1 - %endrep - syscall -%endmacro - -%macro sys.prep 1-8 - ; for when we need to modify parameters before we - ; make the actual call. - %assign i 0 - %rep %0 - mov sys.reg. %+ i, %1 - %rotate 1 - %assign i i+1 - %endrep -%endmacro - -%macro ccall 1-* - %if %0 > ccall.reg.ct - %assign ct ccall.reg.ct - %else - %assign ct %0-1 - %endif - %assign i 0 - %rotate 1 - %rep ct - ; if the function is well-behaved, all its arguments fit - ; in registers. if not, things get ugly. see below. - mov ccall.reg. %+ i, %1 - %assign i i+1 - %rotate 1 - %endrep - %if %0 > ccall.reg.ct - ; if there are more parameters to a C function than the - ; number of permitted registers, they must be pushed in - ; reverse order to the stack. - ; keep your function signatures under control, people. - %assign ct (%0-ct)-1 - %rotate ct - %rep ct - %rotate -1 - push %1 - %endrep - %rotate ct - push rsp ; it's our responsibility to preserve the stack - %endif - call %1 - %if %0 > ccall.reg.ct - ; the extra arguments are still on the stack; time to - ; dump them back into the Garbage Zone - pop rsp - %endif -%endmacro Index: kcore/boot.c ================================================================== --- kcore/boot.c +++ kcore/boot.c @@ -1,8 +1,14 @@ -#include "core.h" +#include extern stat entry(kenv); stat _boot(unsigned int argc, char** argv) { - kenv e = { argc, argv }; + kenv e = { + // todo: determine terminal class and set term vs ansi correctly! + { {kiostream_term, 0}, {kiostream_term, 1} }, // chan std + { {kiostream_closed}, {kiostream_term, 2} }, // chan err + argc, argv, + null // no environment yet + }; return entry(e); } Index: kcore/core.h ================================================================== --- kcore/core.h +++ kcore/core.h @@ -1,12 +1,22 @@ #ifndef KIcore #define KIcore +#include +#include +#include + +static void* const null = (void*)0; -typedef unsigned long long sz; -typedef unsigned char stat; +typedef struct kvar { + ksraw name; + ksraw val; + char* platform; +} kvar; typedef struct kenv { - sz argc; - char** argv; + kiochan std; + kiochan err; + sz argc; char** argv; + kvar* vars; } kenv; #endif Index: kcore/kcore.md ================================================================== --- kcore/kcore.md +++ kcore/kcore.md @@ -3,11 +3,11 @@ ## entry when using libk, your program's entry point will not be the `int main(int,char**)` function that libc opens into. libk will call the function `stat entry(kenv)` instead. like libc, the value returned by `entry` will be returned to the host platform. ## types -kcore contains fixed-width integer types. note that the available of each depends on your platform; compilation will fail if e.g. you try to use a u64 or a u128 on a 32-bit platform, so where exact lengths are not required, you may wish to use the built-in C types instead. +kcore contains fixed-width integer types (in ). note that the availability of each depends on your platform; compilation will fail if e.g. you try to use a u64 or a u128 on a 32-bit platform, so where exact lengths are not required, you may wish to use the built-in C types instead. * `u8` - an unsigned 8-bit integer * `s8` - a signed 8-bit integer * `u16` - an unsigned 16-bit integer * `s16` - a signed 16-bit integer @@ -23,12 +23,12 @@ ### struct kenv `kenv` is a struct that encompasses the environment the program was launched in. * `kiochan std` - a stereo IO channel for reading and writing to and from stdout. * `kiochan err` - a mono IO channel for writing to stderr. - * `kvar* env` - a pointer into the program's environment + * `kvar* vars` - a pointer into the program's environment ### struct kvar `kvar` is a struct that abstracts over platform environment variables. * `kstr name` - the name of an environment variable * `kstr val` - the value of an environment variable * `char* platform` - a pointer into the platform's underlying representation Index: kcore/makefile ================================================================== --- kcore/makefile +++ kcore/makefile @@ -1,1 +1,37 @@ +## kcore/makefile +# kcore has to include, among other things, a replacement +# for stddef.h, and that can't be written in portable C, +# so we're generating it at build time. +# +# look, imma just be straight with you. the mechanism we're +# using to generate these headers is unbelievably heinous. +# it's inelegant, it's gross, and it's horrible. in the long +# term this NEEDS to be replaced with a bespoke solution +# instead of makefile gibberish. hopefully tho this will be +# enough in the short term for libk to get going, enough that +# someone more competent than me will someday be interested +# in fixing this horrorshow. +# +# until them: i'm sorry. +# very sincerely yours, lexi hale + +gen-headers = type.h + include ../modmake + +${OUT}/k/type.h: ${TMP}/type.${TARGET}.i + cp $< $@ + +# generating C source in make… yaaay +define arch = +${TMP}/type.$(1).%.$(2).i: type.$(1).$(2).i def.%.i ${TMP} + echo '#ifndef KItype' > $$@ + echo '#define KItype' >> $$@ + cat def.$$*.i >> $$@ + cat $$< >> $$@ + echo '#endif' >> $$@ +endef + +$(eval $(call arch,x86,32)) +$(eval $(call arch,x86,64)) + Index: kgraft/exe.attach.c ================================================================== --- kgraft/exe.attach.c +++ kgraft/exe.attach.c @@ -0,0 +1,4 @@ +#include +stat entry(kenv e) { + return 0; +} Index: kio/io.h ================================================================== --- kio/io.h +++ kio/io.h @@ -1,4 +1,53 @@ #ifndef KIio #define KIio +/* + * ~ lexi hale + * this header declares IO primitive functions and + * structures. it is the same for all platforms. + * platform-specific code is found in the *.platform.h + * files. + */ + +#include + +typedef enum kiostream_kind { + kiostream_closed, + // this kiostream cannot be written to + kiostream_file, + // this kiostream represents a file + kiostream_sock, + // this kiostream is attached to a socket, + // UNIX, IP, or otherwise + kiostream_term, + // this socket is being used to communicate + // directly with a human being + kiostream_ansi, + // like kiostream_term, but can also understand + // ANSI control codes + kiostream_other + // no fuckin idea +} kiostream_kind; + +typedef struct kiostream { + kiostream_kind kind; + #include "kiostream.platform.h" +} kiostream; + +typedef struct kiochan { + kiostream in; + // text can be read from this stream + kiostream out; + // text can be written to this stream +} kiochan; + +unsigned long long kiosend(kiochan); // send data to a channel +unsigned long long kiorecv(kiochan); // receive data from a channel + +typedef enum kiocond { + kiocond_ok, + // success + kiocond_fail, + // action failed +} kiocond; #endif Index: kio/makefile ================================================================== --- kio/makefile +++ kio/makefile @@ -1,1 +1,12 @@ +gen-headers = kiostream.platform.h + include ../modmake + +ifeq (${POSIX},yes) +api = posix +else +api = ${OS} +endif + +${OUT}/k/kiostream.platform.h: kiostream.$(api).i + cp $< $@ Index: kmem/kmem.md ================================================================== --- kmem/kmem.md +++ kmem/kmem.md @@ -52,12 +52,12 @@ ### kmcell `kmcell` is a stub struct used to disambiguate between source types.a "source" is an object that can hold an allocated object, such as the heap, a memory pool, a fixed-length array on stack, or a fixed-length global array. all values produced by a kmem allocation function point to within a `kmcell`. - * `kmptr_kind kind` - kind of cell - * `size_t sz` - kind of cell (data plus all fields) + * `kmkind kind` - kind of cell + * `size_t size` - size of cell (data plus all fields) * `kmshred shred` - shredding flag ### kmref `kmref` is a struct that constitutes the in-memory representation of a reference-counted cell. Index: kmem/mem.h ================================================================== --- kmem/mem.h +++ kmem/mem.h @@ -1,4 +1,34 @@ #ifndef KImem #define KImem +#include + +typedef enum kmkind { + kmkind_none, + kmkind_heap, + kmkind_pool, + kmkind_ref, + kmkind_tree +} kmkind; + +typedef enum kmshred { + kmshred_yes, + kmshred_no +} kmshred; + +typedef struct kmcell { + kmkind kind; + sz size; + kmshred shred; + sz refs; + struct kmcell* src; + char data[]; +} kmcell; + +typedef struct kmptr { + kmkind kind; + kmshred shred; + void* ref; + kmcell* cell; +} kmptr; #endif Index: kstr/kstr.md ================================================================== --- kstr/kstr.md +++ kstr/kstr.md @@ -4,21 +4,21 @@ ## types ### struct kstr `struct kstr` is a structure for holding pascal strings (length-prefixed strings). it is the basic libk string type. **note:** if `ptr.ref` ≠ NULL and `sz` = 0, the string's length is unknown and should be calculated by any function that operates on a kstr, storing the result in the object if possible. - * `size_t sz` - length of string, excluding any null terminator + * `sz size` - length of string, excluding any null terminator * `kmptr ptr` - pointer to string in memory ### struct ksraw `struct ksraw` is like `kstr` except it uses raw `char` pointers instead of a `kmptr`. - * `size_t sz` - length of string, excluding any null terminator + * `sz size` - length of string, excluding any null terminator * `char* ptr` - pointer to string in memory ### struct ksbuf `struct ksbuf` is a structure used to hold buffers. - * `size_t sz` - maximum size of buffer, including any null terminator + * `sz size` - maximum size of buffer, including any null terminator * `char* buf` - region of memory to store buffer in * `ksalloc strat` - allocation strategy * `kmkind rule` - kind of allocator to use. only needs to be set if `where` is NULL. see [kmem](../kmem/kmem.md). * `kmcell* where` - where to allocate the object, in case of pool or tree allocation. @@ -25,12 +25,12 @@ ### struct kschain `struct kschain` is a structure used for string accumulators that works by aggregating pointers to strings, instead of copying the strings themselves. * `kschain_kind kind` - kind of chain * `kmkind rule` - kind of allocation to use if `kind` ≠ `kschain_kind_linked` * `pstr* ptrs` - pointer to pointer list - * `size_t ptrc` - number of pointers - * `size_t sz` - total amount of space in `ptrs` + * `sz ptrc` - number of pointers + * `sz size` - total amount of space in `ptrs` #### enum kschain_kind * `kschain_kind_block` - occupies a single block of memory * `kschain_kind_linked` - uses a linked list, allocated and deallocated as necessary @@ -63,13 +63,13 @@ kstr chain[] = { Kstr(""), ksref(text), Kstr("") }; - char* html = kscomp([Kmsz(chain)](../kmem/kmem.md), chain, &buf); + char* html = kscomp(Kmsz(chain), chain, &buf); kscomp will only calculate the length of individual strings if they are not already known. when it needs to calculate the length of a string, it will store that length in the original array so repeated calls can be made without needing to repeatedly calculate the lengths. this is not always desirable, so the variant `kscompc` exists, which is exactly the same as `kscomp` in every respect except that `chain` is not altered in any way. ### macros if `KFclean` is not set when is included, the following macros are defined. * `Kstr(string)` - the compile-time equivalent to `kstr()`. `Kstr` takes a literal string and inserts the text `{ sizeof (string), string }` into the document, suitable for initializing a kstr. Index: kstr/str.h ================================================================== --- kstr/str.h +++ kstr/str.h @@ -1,4 +1,15 @@ #ifndef KIstr #define KIstr +#include + +typedef struct kstr { + sz size; + kmptr ptr; +} kstr; + +typedef struct ksraw { + sz size; + char* ptr; +} ksraw; #endif Index: libk.md ================================================================== --- libk.md +++ libk.md @@ -19,13 +19,17 @@ libk's goals are far-reaching, and suggestions are welcome. note however that libk is *not* intended to be a kitchen-sink library like libiberty. it's meant to do one thing, and to it well: to provide an easy- and pleasant-to-use foundation for modern open source projects. below is a list of some of the project's major goals. 1. **IO.** libc's basic input/output mechanisms are dreadful, built at entirely the wrong level of abstraction. libk is intended to make many more primitives available to the user, and offer a sliding scale of abstraction so libk is suitable for a wide range of needs. 2. **file manipulation.** libc's file manipulation primitives are a relic of a bygone age and in dire need of upgrading. 3. **terminal manipulation.** libc has no provision for simple output formatting, a task that requires a combination of ANSI codes and in some cases pty manipulation with POSIX APIs, both of which are somewhat dark wizardry. this situation forces many innocent coders to drag in the entire unholy bulk of the aptly named library `ncurses`, much of whose code has been utterly obsolete for the last twenty years and whose API is one of the most singularly hateful ones in existence. libk therefore should offer a simple, straightforward way to do gracefully-degrading terminal sorcery. - 0. **tooling.** libk is intended as more than just a library. it's also intended to work with some basic tooling to automate tasks that current binary tooling is inadequate for -- for instance, embedding binary data into a program binary. (see module [kgraft](kgraft)) - 0. **modularity.** libk is not part of the C specification and it isn't always going to be practical for developers to expect the entire library to be present on the end-user's computer. so libk is designed to be usable in many different ways -- as a traditional library, as a static library, in full form or with only components needed by the developer, to be distributed either on its own or as part of a binary. - 0. **compatibility.** code that links against libk should be able to compile and run on any operating system. in the ideal case (Linux or FreeBSD) it will be able to do so without touching any other system libraries; for less ideal environments like Windows, libk will when necessary abstract over system libraries or libc itself. + 4. **memory management.** the single memory management function `malloc()` provided by libc is absolutely pitiful. this is 2019. modern applications have much more exotic allocation needs, and a standard library should offer a range of allocators and management techniques, as well as abstract pointer objects so that pointers to objects of different allocation types (including static or stack allocation!) can be mixed freely and safely. + 5. **intrinsic reentrancy.** because *jesus christ,* libc. + 6. **interprocess communication.** libc offers no useful IPC abstractions over the paltry array of tools POSIX &co. give us to work with. we can do better. + 7. **tooling.** libk is intended as more than just a library. it's also intended to work with some basic tooling to automate tasks that current binary tooling is inadequate for -- for instance, embedding binary data into a program binary. (see module [kgraft](kgraft)) + 8. **modularity.** libk is not part of the C specification and it isn't always going to be practical for developers to expect the entire library to be present on the end-user's computer. so libk is designed to be usable in many different ways -- as a traditional library, as a static library, in full form or with only components needed by the developer, to be distributed either on its own or as part of a binary. + 9. **compatibility.** code that links against libk should be able to compile and run on any operating system. in the ideal case (Linux or FreeBSD) it will be able to do so without touching any other system libraries; for less ideal environments like Windows, libk will when necessary abstract over system libraries or libc itself. + 10. **sane error-handling.** every time you type `errno` god murders a puppy. ## naming conventions one of the most frustrating things about libc is its complete and total *lack* of a naming convention. in C, every function and global is injected into a single global namespace, including macros. this means that every libc header you include scatters words all over that namespace, potentially clobbering your function with a macro! @@ -64,10 +68,11 @@ * OpenBSD: `obsd` * Darwin/Mac OS X/iOS: `dar` * MS-DOS: `dos` * FreeDOS: `fdos` * Windows: `win` + * Windows MinGW: `mgw` #### file extensions * C function implementations: `*.c` * C module headers: `*.h` @@ -91,10 +96,12 @@ other assemblers will probably be necessary for the more exotic targets, however. ## repository structure libk uses a strict directory structure for code, and deviations from this structure will not be tolerated without extremely good reason. + +total segregation is maintained between source code, temporary files, and output objects. source is found in module directories (`k*/`). the destination for temporary files and output objects are retargetable via the `make` parameters `TMP= OUT=`, but default to `tmp/` and `out/`, which are excluded from repo with fossil's `ignore-glob` setting. all libk code is dispersed into modules: `kcore` for internals, `kio` for I/O, `kgraft` for binary packing, etc. each module has a folder in the root directory. (libk does not have submodules.) inside each module's directory should be a header with the same name as the module (see **naming conventions** above). each function should be kept in a separate file within its module's directory. when OS or architecture-specific code is needed, the file's name should be a list of one or more of the fields [arch, OS, bits, format] separated by a `.` -- for instance, the 32-bit x86 haiku version of a function called `write` defined in assembly would be named `write.x86.hai.32.s`. however, if a function has an extraordinarily large number of versions, they may instead be stored in a folder with the same name as the function. @@ -156,11 +163,11 @@ **libk.a** will build the statically linked form of libk, according to the build variables set **tool** will build the executables used for modules such as `kgraft`. -there is no **clean** target. to clean the repository, simply delete the directory `out/`. +**clean** will delete the `tmp` and `out` trees. ## authors so far, this is a one-woman show. contributions are welcome however. Index: makefile ================================================================== --- makefile +++ makefile @@ -1,28 +1,39 @@ export OUT = $(PWD)/out export ARCH = x86 export OS = lin export BITS = 64 +export TMP = $(PWD)/tmp export TARGET = $(ARCH).$(OS).$(BITS) moddirs = $(wildcard k*) -modules = $(moddirs:k%=%) -headers = $(moddirs:k%=$(OUT)/k/%.h) -objects = $(modules:%=$(OUT)/k%.o) -makefiles = $(moddirs:%=%/makefile) - binaries = $(wildcard */exe.*.c) binmods = $(sort $(dir $(binaries))) -all: obj defs tool +posix-oses = lin fbsd dar and hai mgw + +ifeq ($(findstring $(OS),$(posix-oses)),$(OS)) +export POSIX = yes +else +export POSIX = no +endif + +# include libgcc.a in gcc builds, just in case +ifeq ($(CC),gcc) +export COMPLIB = -lgcc +endif + +all: defs obj tool obj: $(moddirs:%=%.obj) -defs: $(headers) -tool: $(binmods:%=%.tool) +defs: $(moddirs:%=%.def) +tool: $(OUT)/libk.a $(binmods:%=%.tool) +clean: + rm -rf $(TMP) $(OUT) -lists = moddirs modules headers objects makefiles binaries binmods +lists = moddirs objects binaries binmods POSIX dbg: @echo -e lists: $(foreach var, $(lists), "\\n - \\e[1m$(var)\\e[m = $($(var))") %.obj: %/makefile ${TARGET}.calls $(OUT) cd $* && $(MAKE) obj @@ -30,23 +41,23 @@ %.tool: %/makefile $(OUT) cd $* && $(MAKE) tool %.dbg: %/makefile $(OUT) cd $* && $(MAKE) dbg + +%.def: %/makefile $(OUT) $(OUT)/k + cd $* && $(MAKE) def %.calls: arch/makefile - cd arch && make calls.$*.s + cd arch && $(MAKE) $(TMP)/calls.$*.s -$(OUT)/libk.so: mods $(OUT) - $(CC) -shared -o $@ $(objects) +$(OUT)/libk.so: obj $(OUT) + $(CC) -shared -nostdlib $(COMPLIB) -o $@ $(OUT)/*.o -$(OUT)/libk.a: mods $(OUT) - # using `ar c` and ranlib here instead of - # `ar cs` in case `ar` isn't the GNU version - ar c $@ $(objects) +$(OUT)/libk.a: obj $(OUT) + # using `ar rc` and ranlib here instead of + # `ar rcs` in case `ar` isn't the GNU version + ar rc $@ $(OUT)/*.o ranlib $@ -$(OUT)/k/%.h: k%/makefile $(OUT)/k - cd k$* && $(MAKE) $@ - $(OUT) $(OUT)/k: mkdir -p $@ Index: modmake ================================================================== --- modmake +++ modmake @@ -4,35 +4,45 @@ # vim: ft=make mod = $(notdir $(PWD)) src = $(wildcard *.c) $(wildcard *.s) bare = $(mod:k%=%) +headers = $(wildcard *.h) $(gen-headers) tools = $(filter exe.%.c, $(src)) nontools = $(filter-out exe.%.c, $(src)) cobjects = $(filter %.c, $(nontools)) sobjects = $(filter %.${TARGET}.s, $(nontools)) +cflags = -isystem ${OUT} -nostdlib ${COMPLIB} -L${OUT} -lk + obj: $(cobjects:%.c=${OUT}/$(mod).%.o) \ $(sobjects:%.s=${OUT}/$(mod).%.o) -tool: $(tools:exe.%.c=${OUT}/$(mod).%) +tool: $(tools:exe.%.c=${OUT}/$(mod).%) \ + ${OUT}/libk.a + +def: $(headers:%=${OUT}/k/%) dbg: @echo tools = $(tools) @echo TARGET = ${TARGET} @echo cobjects = $(cobjects) @echo sobjects = $(sobjects) + @echo headers = $(headers) @echo mod = $(mod) ${OUT}/$(mod).%.o: %.c - $(CC) -c $< -o $@ + $(CC) $(cflags) -c $< -o $@ -${OUT}/k/$(bare).h: $(bare).h +${OUT}/k/%.h: %.h cp $< $@ ${OUT}/$(mod).%: exe.%.c - $(CC) $< -o $@ + $(CC) $(cflags) $< -o $@ + +${TMP}: + mkdir -p ${TMP} #- assembly # compiling the assembly code will be faster but a lot more # complex, given the nature of assembly and the large number of # platforms targeted. we need to add build rules for every @@ -45,23 +55,27 @@ # % = function name # $(1) = arch tuple arch = ${OUT}/$(mod).%.$(1).o: %.$(1).s # invoke with $(call arch,tuple). do not # put spaces between either term though! + +ifeq ($(debug),yes) +yasm-flags = -gdwarf2 +endif #-- linux # linux uses the ELF{32,64} binary format, and generating these # from yasm is trivial. linux only supports one ABI per format, # at least with ELF, so that's all we need to do. #${OUT}/$(mod).%.x86.lin.32.o: %.x86.lin.32.s $(call arch,x86.lin.32) - yasm -gdwarf2 -felf32 $< -o $@ + yasm $(yasm-flags) -felf32 -i${TMP} $< -o $@ #${OUT}/$(mod).%.x86.lin.64.o: %.x86.lin.64.s $(call arch,x86.lin.64) - yasm -gdwarf2 -felf64 $< -o $@ + yasm $(yasm-flags) -felf64 -i${TMP} $< -o $@ #-- freebsd # the freebsd ABI is different, so it will require different code # (though there might be ways to minimize that). freebsd uses the # same binary format as Linux (though it also supports a.out and