libk  Check-in [f5b7fa5762]

Overview
Comment:updates
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: f5b7fa57628ccb9c61d9729b95f3861eaa0f432ed935140731689c2eff8fba7c
User & Date: lexi on 2019-06-27 05:52:40
Other Links: manifest | tags
Context
2019-06-27
09:57
add in mechanism to generate syscall tables for x86 linux check-in: 860229e8ce user: lexi tags: trunk
05:52
updates check-in: f5b7fa5762 user: lexi tags: trunk
2019-06-26
13:59
fix typo check-in: 4eb81c9227 user: lexi tags: trunk
Changes

Deleted arch/x86.lin.32.inc version [32148439cc].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
; vim: ft=nasm
%define lin.call.exit 1
%define lin.call.fork 2
%define lin.call.read 3
%define lin.call.write 4
%define lin.call.open 5
%define lin.call.close 6

%define lin.call.chdir 12

%define lin.reg.n 6
%define lin.reg.0 eax
%define lin.reg.1 ebx
%define lin.reg.2 ecx
%define lin.reg.3 edx
%define lin.reg.4 esi
%define lin.reg.5 edi

%define lin.call int 0x80 ; sysenter is allegedly the
  ; politically correct option but it does not actually
  ; appear to work without a whole lot of extra bullshit

; todo: learn vdsos
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<














































Added arch/x86.lin.32.s version [7c5909e17f].













































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
; vim: ft=nasm
%define sys.exit 1
%define sys.fork 2
%define sys.read 3
%define sys.write 4
%define sys.open 5
%define sys.close 6
%define sys.chdir 12

%define sys.reg.n 6
%define sys.reg.0 eax
%define sys.reg.1 ebx
%define sys.reg.2 ecx
%define sys.reg.3 edx
%define sys.reg.4 esi
%define sys.reg.5 edi

%define sys.call int 0x80 ; sysenter is allegedly the
  ; politically correct option but it does not actually
  ; appear to work without a whole lot of extra bullshit

; todo: learn vdsos

Deleted arch/x86.lin.64.inc version [abfe39789c].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
; vim: ft=nasm
%define lin.call.exit 60
%define lin.call.fork 57
%define lin.call.read 0
%define lin.call.write 1
%define lin.call.open 2
%define lin.call.close 3

%define lin.call.chdir 80

%define lin.reg.n 6
%define lin.reg.0 rax
%define lin.reg.1 rdi
%define lin.reg.2 rsi
%define lin.reg.3 rdx
%define lin.reg.4 r10
%define lin.reg.6 r8
%define lin.reg.7 r9

%define lin.c.0 rdi
%define lin.c.1 rsi
%define lin.c.2 rdx
%define lin.c.3 rcx
%define lin.c.4 r8
%define lin.c.5 r9

%define lin.call syscall
; todo: learn vdsos
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
























































Added arch/x86.lin.64.s version [fcbc285104].

















































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
;; abi definition file
;; macros:
;; * sys: syscall64 wrapper
;; * ccall: automatically generate code to call a C function
;;          with any number of arguments

; syscall ops
%define sys.call syscall
; syscall numbers
%define sys.write 1
%define sys.brk 12
%define sys.exit 60

; register order for syscall convention
%define sys.reg.0 rax
%define sys.reg.1 rdi
%define sys.reg.2 rsi
%define sys.reg.3 rdx
%define sys.reg.4 r10
%define sys.reg.5 r8
%define sys.reg.6 r9

; register order for ccall convention
%define ccall.reg.ct 6
%define ccall.reg.0 rdi
%define ccall.reg.1 rsi
%define ccall.reg.2 rdx
%define ccall.reg.3 rcx
%define ccall.reg.4 r8
%define ccall.reg.5 r9

%macro sys 1-8
	%assign i 0
	%rep %0
		mov sys.reg. %+ i, %1 ; i'm actually shocked this worked
		%rotate 1
		%assign i i+1
	%endrep 
	syscall
%endmacro

%macro sys.prep 1-8
	; for when we need to modify parameters before we
	; make the actual call.
	%assign i 0
	%rep %0
		mov sys.reg. %+ i, %1
		%rotate 1
		%assign i i+1
	%endrep
%endmacro

%macro ccall 1-*
	%if %0 > ccall.reg.ct
		%assign ct ccall.reg.ct
	%else
		%assign ct %0-1
	%endif
	%assign i 0
	%rotate 1
	%rep ct
		; if the function is well-behaved, all its arguments fit
		; in registers. if not, things get ugly. see below.
		mov ccall.reg. %+ i, %1
		%assign i i+1
		%rotate 1
	%endrep
	%if %0 > ccall.reg.ct
		; if there are more parameters to a C function than the
		; number of permitted registers, they must be pushed in
		; reverse order to the stack.
		; keep your function signatures under control, people.
		%assign ct (%0-ct)-1
		%rotate ct
		%rep ct
			%rotate -1
			push %1
		%endrep
		%rotate ct
		push rsp ; it's our responsibility to preserve the stack
	%endif
	call %1
	%if %0 > ccall.reg.ct
		; the extra arguments are still on the stack; time to
		; dump them back into the Garbage Zone
		pop rsp
	%endif
%endmacro

Added kfile/file.h version [7ec69a3e9e].









>
>
>
>
1
2
3
4
#ifndef KIfile
#define KIfile

#endif

Added kfile/kfile.md version [194f78820a].



>
1
# kfile

Added kfile/makefile version [f0df06fe05].



>
1
include ../modmake

Added kgraft/exe.attach.c version [a7ffc6f8bf].

Added kgraft/graft.h version [510bcb01f3].









>
>
>
>
1
2
3
4
#ifndef KIgraft
#define KIgraft

#endif

Added kgraft/kgraft.md version [74aca3ba50].



>
1
# kgraft

Added kgraft/makefile version [f0df06fe05].



>
1
include ../modmake

Added kio/io.h version [247977844e].









>
>
>
>
1
2
3
4
#ifndef KIio
#define KIio

#endif

Added kio/kio.md version [947bccad05].



>
1
# kio

Modified kio/kio_posix_fd_write.x86.lin.32.s from [c48aae0264] to [7b77d86bab].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
bits 32
global kio_posix_fd_write

%include "../arch/x86.lin.32.inc"
; vim: ft=nasm

kio_posix_fd_write:
	mov lin.reg.0, lin.call.write
	mov lin.reg.1, [esp + 4] ; holy god but this took the most
	mov lin.reg.2, [esp + 8] ; stupidly long time to fucking
	mov lin.reg.3, [esp + 12]; figure out
	lin.call
	ret




|



|
|
|
|
|


1
2
3
4
5
6
7
8
9
10
11
12
13
14
bits 32
global kio_posix_fd_write

%include "../arch/x86.lin.32.s"
; vim: ft=nasm

kio_posix_fd_write:
	mov sys.reg.0, sys.call.write
	mov sys.reg.1, [esp + 4] ; holy god but this took the most
	mov sys.reg.2, [esp + 8] ; stupidly long time to fucking
	mov sys.reg.3, [esp + 12]; figure out
	sys.call
	ret

Modified kio/kio_posix_fd_write.x86.lin.64.s from [4a36b27408] to [b72b3eff18].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
bits 64
global kio_posix_fd_write

%include "../arch/x86.lin.64.inc"
; vim: ft=nasm

kio_posix_fd_write:
	mov lin.reg.0, lin.call.write
	; mov lin.reg.1, lin.c.0 - nop
	; mov lin.reg.2, lin.c.1 - nop
	; mov lin.reg.3, lin.c.2 - nop
	lin.call
	ret




|



|
|
|
|
|


1
2
3
4
5
6
7
8
9
10
11
12
13
14
bits 64
global kio_posix_fd_write

%include "../arch/x86.lin.64.s"
; vim: ft=nasm

kio_posix_fd_write:
	mov sys.reg.0, sys.write
	; mov sys.reg.1, ccall.reg.0 - nop
	; mov sys.reg.2, ccall.reg.1 - nop
	; mov sys.reg.3, ccall.reg.2 - nop
	sys.call
	ret

Modified kio/makefile from [d6a8fa167d] to [f0df06fe05].

1
2
3
4
5
6
7
8
9
10
kio: posix

posix: ${OUT}/kio_posix_fd_write.${TARGET}.o

${OUT}/%.lin.32.o: %.lin.32.s
	yasm -felf32 $< -o $@

${OUT}/%.lin.64.o: %.lin.64.s
	yasm -felf64 $< -o $@

|
<
<
<
<
<
<
<
<
<
1









include ../modmake









Modified kmem/kmem.md from [e0850f6b30] to [9a7772ee1a].

4
5
6
7
8
9
10
11
12
13
14
15
16
17

18
19
20
21
22
23
24
25
26
27











28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
...
151
152
153
154
155
156
157






## module functions

**kmem** supplies two module-level functions, used to interact with the `kmptr` container type.

 * `kmfree(kmptr) → void` - free, downref, or ignore the pasted object as appropriate
 * `kmshred(kmptr) → void` - free, downref, or ignore the pasted object as appropriate. if deallocating, zero its contents
 * `kmstat(void*) → kmptr` - convenience function to wrap a pointer to a non-managed object in a `kmptr` struct, so it can be passed to functions that accept arbitrary objects. `kmptr p = kmstat(raw)` is equivalent to `kmptr p = { kmptr_kind_static, raw, NULL }`.
 * `kmtaint(&kmptr) → void` - "taints" a `kmptr` object by setting it to be shredded when freed. this may be desirable if the object pointed to contains privileged information.

## types

kmem defines the following types:
 

 * `struct kmptr` - abstract pointer object
	* `enum kmptr_kind`
 * `struct kmcell` - abstract memory cell
	* `enum kmcell_kind`
 * `struct kmref` - a reference-counted cell
 * `struct kmnode` - a node in an allocation tree
 * `struct kmpool` - a memory pool

`kmptr` and `kmcell` are both very similar. the difference is that a kmptr points to a region in memory and can be passed around freely. a `kmcell` is the actual in-memory representation of an allocation cell. a `kmcell` cannot be usefully instantiated; rather, it is downcast from an actual cell type (e.g. `kmnode n; kmcell* s = (kmcell*)(&n)`)












### kmptr

kmem functions can operate on both raw pointers and the `kmptr` struct type. `kmptr` is a generic struct that can contain any kind of pointer. this is useful if you wish to allocate different objects in different manners, but pass them on into a single interface.

memory pointed at by `kmptr` pointers can be freed either with the usual specialized function, or by passing the `kmptr` structure itself to the generic function `kmfree`, which will handle it appropriately, even if it's a pointer to a garbage-collected object or to a static region of memory.

a `kmptr` has the following layout:

 * `kmptr_kind kind` - codes the type of pointer
 * `kmshred shred` - an enum. if `kmshred_yes`, the value will be zeroed or otherwise made unreadable on free. if no, `kmfree` will consult `src` for shred policy if it is not NULL.
 * `void* ref` - the raw pointer enclosed by `cell`
 * `kmcell* cell` - a pointer to an object enclosure, typically either a memory pool or a referencing-counting object. NULL if not needed.
 
the convenience function `kmstat(void*) → kmptr` wraps a pointer to a static object in a `kmptr` struct.

#### kmptr_kind

`kmptr_kind` is an enum with one of the following values.

 * `kmptr_kind_none` - not a valid pointer
 * `kmptr_kind_static` - points to a static region of space. `kmptr` instances with this kind will be ignored by `kmfree`.
 * `kmptr_kind_heap` - a traditional heap pointer.
 * `kmptr_kind_pool` - points to a region stored in a memory pool.
 * `kmptr_kind_ref` - points to a reference-counted object.
 * `kmptr_kind_node` - points to a reference-counted object.

### kmcell

`kmcell` is a stub struct used to disambiguate between source types.a "source" is an object that can hold an allocated object, such as the heap, a memory pool, a fixed-length array on stack, or a fixed-length global array. all values produced by a kmem allocation function point to within a `kmcell`.

 * `kmptr_kind kind` - kind of cell
 * `size_t sz` - kind of cell (data plus all fields)
 * `kmshred shred` - shredding flag

### kmref

`kmref` is a struct that constitutes the in-memory representation of a reference-counted cell.

 * `kmcell_kind kind = ref` - kind of cell
 * `size_t sz` - size of cell (data plus all fields)
 * `kmshred shred` - shredding flag
 * `size_t refs` - number of active references 
 * `kmcell* src` - source, if any
 * `char data[]` - content of cell

### kmnode

`kmnode` is a struct that constitutes the in-memory representation of a tree node.

 * `kmcell_kind kind = node` - kind of cell
 * `size_t sz` - size of cell (data plus all fields)
 * `kmshred shred` - shredding flag
 * `kmnode* parent` - parent node
 * `kmnode* child` - first child node
 * `kmnode* lastchild` - last child node
 * `kmnode* prev` - previous sibling, NULL if first
 * `kmnode* next` - next sibling, NULL if last
 * `char data[]` - content of cell

### kmpool

 * `kmcell_kind kind = pool` - indicates the kind of source
 * `size_t sz` - size of cell (data plus all fields)
 * `kmshred shred` - shredding flag
 * `size_t cellsz` - size of individual pool cells
 * `kmpoolcell* top` - pointer to most recently allocated pool cell
 * `kmpoolcell* bottom` - pointer to most recently freed pool cell
 * `kmpoolcell data[]` - content of cell

### kmpoolcell

 * `kmpoolcell* last` - pointer to last element allocated before this one
 * `char data[]` - pool data

### kmshred

`kmshred` is an enum used to indicate whether an object should be "shredded" (written over) in memory when it's deleted. this is a useful means to ensure that privileged information is not accidentally left in memory after use. if the shredding mechanism is not useful, compile libk with the flag `KFmem_noshred` to exclude its functions and fields.
................................................................................
 * `tree` [af] - uses a node-child strategy. when a node is freed, all of its children are automatically freed as well.
   * `kmtreea(kmcell* src, void* parent, size_t) → void*` - create a tree node. if `parent` is NULL, the node will the top of a new tree. if src is null, allocate on-heap.
   * `kmtreez(kmcell* src, void* parent, size_t) → void*` - like `kmtreea` but zeroed 
   * `kmtreeao(kmcell* src, void* parent, size_t) → kmptr` - like `kmtreea` but returns a `kmptr` 
   * `kmtreezo(kmcell* src, void* parent, size_t) → kmptr` - like `kmtreez` but returns a `kmptr` 
   * `kmtreef(void*) → kmptr` - frees a node and all its children













|






>

<

<






>
>
>
>
>
>
>
>
>
>
>








|






<
<
<
<
<
<
<
<
<
<
<












|










|











|







|







 







>
>
>
>
>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

20

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52











53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
...
150
151
152
153
154
155
156
157
158
159
160
161

## module functions

**kmem** supplies two module-level functions, used to interact with the `kmptr` container type.

 * `kmfree(kmptr) → void` - free, downref, or ignore the pasted object as appropriate
 * `kmshred(kmptr) → void` - free, downref, or ignore the pasted object as appropriate. if deallocating, zero its contents
 * `kmstat(void*) → kmptr` - convenience function to wrap a pointer to a non-managed object in a `kmptr` struct, so it can be passed to functions that accept arbitrary objects. `kmptr p = kmstat(raw)` is equivalent to `kmptr p = { kmkind_none, raw, NULL }`.
 * `kmtaint(&kmptr) → void` - "taints" a `kmptr` object by setting it to be shredded when freed. this may be desirable if the object pointed to contains privileged information.

## types

kmem defines the following types:
 
 * `enum kmkind` - enumerates allocation strategies
 * `struct kmptr` - abstract pointer object

 * `struct kmcell` - abstract memory cell

 * `struct kmref` - a reference-counted cell
 * `struct kmnode` - a node in an allocation tree
 * `struct kmpool` - a memory pool

`kmptr` and `kmcell` are both very similar. the difference is that a kmptr points to a region in memory and can be passed around freely. a `kmcell` is the actual in-memory representation of an allocation cell. a `kmcell` cannot be usefully instantiated; rather, it is downcast from an actual cell type (e.g. `kmnode n; kmcell* s = (kmcell*)(&n)`)


### kmkind

`kmkind` is an enum that specifies an allocation function.
 
 * `kmkind_none` - no allocation
 * `kmkind_heap` - heap allocation
 * `kmkind_pool` - pool allocation
 * `kmkind_ref` - reference-counting allocation
 * `kmkind_tree` - tree allocation

### kmptr

kmem functions can operate on both raw pointers and the `kmptr` struct type. `kmptr` is a generic struct that can contain any kind of pointer. this is useful if you wish to allocate different objects in different manners, but pass them on into a single interface.

memory pointed at by `kmptr` pointers can be freed either with the usual specialized function, or by passing the `kmptr` structure itself to the generic function `kmfree`, which will handle it appropriately, even if it's a pointer to a garbage-collected object or to a static region of memory.

a `kmptr` has the following layout:

 * `kmkind kind` - codes the type of pointer; `kmkind_none` indicates a non-allocated pointer to a static (global or on-stack) object.
 * `kmshred shred` - an enum. if `kmshred_yes`, the value will be zeroed or otherwise made unreadable on free. if no, `kmfree` will consult `src` for shred policy if it is not NULL.
 * `void* ref` - the raw pointer enclosed by `cell`
 * `kmcell* cell` - a pointer to an object enclosure, typically either a memory pool or a referencing-counting object. NULL if not needed.
 
the convenience function `kmstat(void*) → kmptr` wraps a pointer to a static object in a `kmptr` struct.












### kmcell

`kmcell` is a stub struct used to disambiguate between source types.a "source" is an object that can hold an allocated object, such as the heap, a memory pool, a fixed-length array on stack, or a fixed-length global array. all values produced by a kmem allocation function point to within a `kmcell`.

 * `kmptr_kind kind` - kind of cell
 * `size_t sz` - kind of cell (data plus all fields)
 * `kmshred shred` - shredding flag

### kmref

`kmref` is a struct that constitutes the in-memory representation of a reference-counted cell.

 * `kmkind kind = kmkind_ref` - kind of cell
 * `size_t sz` - size of cell (data plus all fields)
 * `kmshred shred` - shredding flag
 * `size_t refs` - number of active references 
 * `kmcell* src` - source, if any
 * `char data[]` - content of cell

### kmnode

`kmnode` is a struct that constitutes the in-memory representation of a tree node.

 * `kmkind kind = kmkind_tree` - kind of cell
 * `size_t sz` - size of cell (data plus all fields)
 * `kmshred shred` - shredding flag
 * `kmnode* parent` - parent node
 * `kmnode* child` - first child node
 * `kmnode* lastchild` - last child node
 * `kmnode* prev` - previous sibling, NULL if first
 * `kmnode* next` - next sibling, NULL if last
 * `char data[]` - content of cell

### kmpool

 * `kmkind kind = kmkind_pool` - indicates the kind of source
 * `size_t sz` - size of cell (data plus all fields)
 * `kmshred shred` - shredding flag
 * `size_t cellsz` - size of individual pool cells
 * `kmpoolcell* top` - pointer to most recently allocated pool cell
 * `kmpoolcell* bottom` - pointer to most recently freed pool cell
 * `kmpoolcell data[]` - content of cell

#### kmpoolcell

 * `kmpoolcell* last` - pointer to last element allocated before this one
 * `char data[]` - pool data

### kmshred

`kmshred` is an enum used to indicate whether an object should be "shredded" (written over) in memory when it's deleted. this is a useful means to ensure that privileged information is not accidentally left in memory after use. if the shredding mechanism is not useful, compile libk with the flag `KFmem_noshred` to exclude its functions and fields.
................................................................................
 * `tree` [af] - uses a node-child strategy. when a node is freed, all of its children are automatically freed as well.
   * `kmtreea(kmcell* src, void* parent, size_t) → void*` - create a tree node. if `parent` is NULL, the node will the top of a new tree. if src is null, allocate on-heap.
   * `kmtreez(kmcell* src, void* parent, size_t) → void*` - like `kmtreea` but zeroed 
   * `kmtreeao(kmcell* src, void* parent, size_t) → kmptr` - like `kmtreea` but returns a `kmptr` 
   * `kmtreezo(kmcell* src, void* parent, size_t) → kmptr` - like `kmtreez` but returns a `kmptr` 
   * `kmtreef(void*) → kmptr` - frees a node and all its children

## macros

kmem defines the following macros.

 * `Kmsz(array)` - a convenience macro to return the number of elements in a static array. inserts the text `( sizeof (array) / sizeof (array) [0] )`

Added kmem/makefile version [f0df06fe05].



>
1
include ../modmake

Added kmem/mem.h version [dc631dc568].









>
>
>
>
1
2
3
4
#ifndef KImem
#define KImem

#endif

Added kmsg/kmsg.md version [1bf7781e05].



>
1
# kmsg

Added kmsg/makefile version [f0df06fe05].



>
1
include ../modmake

Added kmsg/msg.h version [b69837b00c].









>
>
>
>
1
2
3
4
#ifndef KImsg
#define KImsg

#endif

Added knet/knet.md version [caaca9de66].



>
1
# knet

Added knet/makefile version [f0df06fe05].



>
1
include ../modmake

Added knet/net.h version [a2f498298b].









>
>
>
>
1
2
3
4
#ifndef KInet
#define KInet

#endif

Added kproc/kproc.md version [ca8b9fab82].



>
1
# kproc

Added kproc/makefile version [f0df06fe05].



>
1
include ../modmake

Added kproc/proc.h version [8fdd57f87b].









>
>
>
>
1
2
3
4
#ifndef KIproc
#define KIproc

#endif

Added kstr/kstr.md version [8ba3da6088].























































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# kstr

**kstr** is the libk string library. it uses the **short** naming convention with the glyph `s`. **kstr** implies `#include <k/mem.h>`.

## types

### struct kstr
`struct kstr` is a structure for holding pascal strings (length-prefixed strings). it is the basic libk string type. **note:** if `ptr.ref` ≠ NULL and `sz` = 0, the string's length is unknown and should be calculated by any function that operates on a kstr, storing the result in the object if possible.
 * `size_t sz` - length of string, excluding any null terminator
 * `kmptr ptr` - pointer to string in memory

### struct ksraw
`struct ksraw` is like `kstr` except it uses raw `char` pointers instead of a `kmptr`.
 * `size_t sz` - length of string, excluding any null terminator
 * `char* ptr` - pointer to string in memory

### struct ksbuf
`struct ksbuf` is a structure used to hold buffers.
 * `size_t sz` - maximum size of buffer, including any null terminator
 * `char* buf` - region of memory to store buffer in
 * `ksalloc strat` - allocation strategy
 * `kmkind rule` - kind of allocator to use. only needs to be set if `where` is NULL. see [kmem](../kmem/kmem.md).
 * `kmcell* where` - where to allocate the object, in case of pool or tree allocation.

### struct kschain
`struct kschain` is a structure used for string accumulators that works by aggregating pointers to strings, instead of copying the strings themselves.
 * `kschain_kind kind` - kind of chain
 * `kmkind rule` - kind of allocation to use if `kind` ≠ `kschain_kind_linked`
 * `pstr* ptrs` - pointer to pointer list
 * `size_t ptrc` - number of pointers
 * `size_t sz` - total amount of space in `ptrs`

#### enum kschain_kind
 * `kschain_kind_block` - occupies a single block of memory
 * `kschain_kind_linked` - uses a linked list, allocated and deallocated as necessary

### enum ksalloc
`enum ksalloc` is an enumerator that tells libk what strategy to use when filling a `ksbuf` or `kschain` struct.
 * `ksalloc_static` - do not allocate memory, fill an already-allocated, statically-sized array.
 * `ksalloc_alloc` - allocate a string in memory using the specified kind of allocator.
 * `ksalloc_dynamic` - fill an already-allocated array if possible, allocate a string in memory if the string length exceeds available space.

## functions

### kssz
`size_t kssz(char* str, size_t max)` returns the number of characters in a C string, **including** the final null. will count at most `max` characters if `max` > 0.

### kstr
`kstr kstr(char* str, size_t max)` takes a C string and returns a P-string, calculating the length of `str` and storing it in the return value. `max` works as in `kssz`.

### kstoraw
`ksraw ksref(kstr)` is a simple convenience function that returns the `ksraw` form of a `kstr`.

### kscomp
`char* kscomp(size_t ct, ksraw struct[], kmbuf* buf)` is a **string composition** function. it serves as an efficient, generalized replacement for functions like `strcat` and `strdup`.

to use kscomp, create an array of `kstr` and fill it with the strings you wish to concatenate. for example, to programmatically generate an HTML link tag, you might use the following code.

	char mem[512];
	kmptr text = <...>;
	char* src = <...>;
	kmbuf buf = { sizeof mem, &mem, kmkind_none };
    kstr chain[] = {
		Kstr("<a href=\""), { 0, src }, Kstr("\">"),
			ksref(text),
		Kstr("</a>")
	};
	char* html = kscomp([Kmsz(chain)](../kmem/kmem.md), chain, &buf);

kscomp will only calculate the length of individual strings if they are not already known. when it needs to calculate the length of a string, it will store that length in the original array so repeated calls can be made without needing to repeatedly calculate the lengths. this is not always desirable, so the variant `kscompc` exists, which is exactly the same as `kscomp` in every respect except that `chain` is not altered in any way.

### macros
if `KFclean` is not set when <k/str.h> is included, the following macros are defined.

 * `Kstr(string)` - the compile-time equivalent to `kstr()`. `Kstr` takes a literal string and inserts the text `{ sizeof (string), string }` into the document, suitable for initializing a kstr.

Added kstr/makefile version [f0df06fe05].



>
1
include ../modmake

Added kstr/str.h version [21d6720beb].









>
>
>
>
1
2
3
4
#ifndef KIstr
#define KIstr

#endif

Added kterm/kterm.md version [5df0db1632].



>
1
# kterm

Added kterm/makefile version [f0df06fe05].



>
1
include ../modmake

Added kterm/term.h version [891027d45b].









>
>
>
>
1
2
3
4
#ifndef KIterm
#define KIterm

#endif

Modified libk.md from [4cea7e271a] to [d44feffbdd].

31
32
33
34
35
36
37
38
39
40
41
42
43







44
45
46
47
48
49
50
..
71
72
73
74
75
76
77


78
79
80
81
82
83
84
..
90
91
92
93
94
95
96


97
98
99
100
101
102
103
104
105
106
107
108
109




























110
111
112
113
114
115
116
117
118
119
120
121

libk is designed to fix this (in hindsight) glaring error.

however, a common problem with libraries is the proliferation of inordinately long and hard-to-type function names such as `SuperWidget_Widget_Label_Font_Size_Set()`. this may be tolerable in IDEs with robust auto-complete or when referencing a highly-specific, sparsely-used library; it is however completely intolerable in the case of a core library with heavily used functionality.

therefore, libk uses two slightly different naming conventions: the **short** convention, for core functions the user will call frequently, and the **full** convention, for less-commonly used functions. the inconvenience of remembering which is which will hopefully be outweighed by the keystrokes (and bytes) saved.

in the **full** convention, a function's name is prefixed with its module name followed by an underscore. thus, `kfile/open.c` will be invoked as `kfile_open()`.

in the **short** convention, the function name is prefixed by the letter `k` followed by the module's "glyph" -- a one- or two-letter sequence that represents the module, usually the first one or two characters. therefore, `kio/write.c` is invoked as `kiowrite`.

which naming convention a module uses should be specified at the top of its documentation. if it uses the short convention, its glyph should be specified as well








### atoms

libk uses the concept of "atoms" (small, regular strings of text) to standardize common references, such as operating systems or processor architectures.

#### operating systems

these atoms will be used to reference operating systems.
................................................................................

these atoms will be used to reference particular system architectures. these will mostly be used in the filenames of assembly code.

## macros

libk will not in any circumstance use macros to encode magic numbers, instead using typedef'd enums. all libk macros begin with the uppercase letter `K` -- e.g. `Kmacro`. macros that can be defined by the user to alter the behavior of the api should begin with `KF` if they are on/off flags, or `KV` otherwise. **macros should only be defined by the libk headers if the flag `KFclean` is *not* defined at the time of inclusion.**



## languages

libk uses only three languages: C (\*.c, \*.h), yasm (\*.s), and make (makefile).

other assemblers will probably be necessary for the more exotic targets, however.

## repository structure
................................................................................
each function should be kept in a separate file within its module's directory. when OS or architecture-specific code is needed, the file's name should be a list of one or more of the fields [arch, OS, bits, format] separated by a `.` -- for instance, the 32-bit x86 haiku version of a function called `write` defined in assembly would be named `write.x86.haiku.32.s`. however, if a function has an extraordinarily large number of versions, they may instead be stored in a folder with the same name as the function.

each module should have a header named the same thing as the module except without the `k` prefix. (e.g. the header for `kio` is `kio/io.h`) located in its folder. this is the header that the end-user will be importing, and should handle any user-defined flags to present the API the user has selected.

each module directory should contain a makefile that can build that module. see **makefiles** below. all makefiles should be named `makefile` (**not** `Makefile`).

each module should contain a markdown file. this file's name should be the name of the parent directory suffixed with `.md`; for instance, `kterm` should contain the file `kterm/kterm.md`. this file should document the module as thoroughly as possible 



the repository root and each module may also contain the directory `misc`. this directory may be used to store miscellaneous data such as ABI references, developer discussions, and roadmaps. if the `misc` directory is deleted, this must not affect the library or build system's function in any way - that is, nothing outside a `misc` folder may reference a `misc` folder or anything inside it, including documentation. the `misc` directory should be removed when its contents are no longer needed. in most cases, the repository wiki and forum should be used instead of the `misc` folder.

the folder `arch` in the root of the repository contains syscall tables and ABI implementations for various architectures.

## makefiles

libk uses `make` as its build system. makefiles should be handwritten. there will be one global makefile in the root of the repository, and one makefile for each module.

each rule should be prefixed with ${OUT}, to allow retargeting of the build-dir with the OUT environment variable. this is particularly important since the makefiles chain.

the rest is TBD.





























## build process

libk has a number of targets. all files generated by a `make` invocation will be stored in the folder "out" at the root of the repository. this directory may be deleted entirely to clean the repository.

**defs** will create the directory `out/k/` and populate it with module header files. the `k/` directory shall be suitable to copy to `/usr/include` or similar. these header files will copied by building the `defs` target of each module's makefile.

**libk.so** will build the dynamically linked form of libk, according to the build variables set

**libk.a** will build the statically linked form of libk, according to the build variables set

**tool** will build the executables used for modules such as `kgraft`.








|

|



>
>
>
>
>
>
>







 







>
>







 







>
>













>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|







31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
..
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
..
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160

libk is designed to fix this (in hindsight) glaring error.

however, a common problem with libraries is the proliferation of inordinately long and hard-to-type function names such as `SuperWidget_Widget_Label_Font_Size_Set()`. this may be tolerable in IDEs with robust auto-complete or when referencing a highly-specific, sparsely-used library; it is however completely intolerable in the case of a core library with heavily used functionality.

therefore, libk uses two slightly different naming conventions: the **short** convention, for core functions the user will call frequently, and the **full** convention, for less-commonly used functions. the inconvenience of remembering which is which will hopefully be outweighed by the keystrokes (and bytes) saved.

in the **full** convention, an identifier's name is prefixed with its module name followed by an underscore. thus, `kgraft/list.c` is invoked as `kgraft_list()`.

in the **short** convention, identifiers are prefixed by the letter `k` followed by the module's "glyph" -- a one- or two-letter sequence that represents the module, usually the first one or two characters. therefore, `kfile/open.c` is invoked as `kfopen`.

which naming convention a module uses should be specified at the top of its documentation. if it uses the short convention, its glyph should be specified as well

in both naming conventions, the following rules apply:

 1. the possible values of enumeration types are always preceded by the name of the enumeration type and an underscore. for instance, the enum `kschain_kind` has a value named `kschain_kind_block`. **exception:** an enum named `<S>_kind`, where `<S>` is a struct type, may simply use the prefix `<S>_`.
 2. macros begin with the uppercase letter `K` -- e.g. `Kmacro`. macros that can be defined by the user to alter the behavior of the api should begin with `KF` if they are on/off flags, or `KV` otherwise.
 3. capital letters are only used in macro prefixes.
 4. low-level function names are prefixed with the API they call into. for example, the function that performs the POSIX syscall `write` is named `kio_posix_fd_write`.a wrapper around the Windows function `CreateProcess()` might be called `kproc_win_createprocess`.

### atoms

libk uses the concept of "atoms" (small, regular strings of text) to standardize common references, such as operating systems or processor architectures.

#### operating systems

these atoms will be used to reference operating systems.
................................................................................

these atoms will be used to reference particular system architectures. these will mostly be used in the filenames of assembly code.

## macros

libk will not in any circumstance use macros to encode magic numbers, instead using typedef'd enums. all libk macros begin with the uppercase letter `K` -- e.g. `Kmacro`. macros that can be defined by the user to alter the behavior of the api should begin with `KF` if they are on/off flags, or `KV` otherwise. **macros should only be defined by the libk headers if the flag `KFclean` is *not* defined at the time of inclusion.**

include guards take the form of the bare module name prefixed by `KI`. so to test if `k/term.h` has been included, you could write `#ifdef KIterm`.

## languages

libk uses only three languages: C (\*.c, \*.h), yasm (\*.s), and make (makefile).

other assemblers will probably be necessary for the more exotic targets, however.

## repository structure
................................................................................
each function should be kept in a separate file within its module's directory. when OS or architecture-specific code is needed, the file's name should be a list of one or more of the fields [arch, OS, bits, format] separated by a `.` -- for instance, the 32-bit x86 haiku version of a function called `write` defined in assembly would be named `write.x86.haiku.32.s`. however, if a function has an extraordinarily large number of versions, they may instead be stored in a folder with the same name as the function.

each module should have a header named the same thing as the module except without the `k` prefix. (e.g. the header for `kio` is `kio/io.h`) located in its folder. this is the header that the end-user will be importing, and should handle any user-defined flags to present the API the user has selected.

each module directory should contain a makefile that can build that module. see **makefiles** below. all makefiles should be named `makefile` (**not** `Makefile`).

each module should contain a markdown file. this file's name should be the name of the parent directory suffixed with `.md`; for instance, `kterm` should contain the file `kterm/kterm.md`. this file should document the module as thoroughly as possible 

each module may contain any number of files of the name `exe.*.c`. this files will be treated as *tools* by the build system and compiled as executables, rather than libraries. they should be compiled to `out/$module.$tool`

the repository root and each module may also contain the directory `misc`. this directory may be used to store miscellaneous data such as ABI references, developer discussions, and roadmaps. if the `misc` directory is deleted, this must not affect the library or build system's function in any way - that is, nothing outside a `misc` folder may reference a `misc` folder or anything inside it, including documentation. the `misc` directory should be removed when its contents are no longer needed. in most cases, the repository wiki and forum should be used instead of the `misc` folder.

the folder `arch` in the root of the repository contains syscall tables and ABI implementations for various architectures.

## makefiles

libk uses `make` as its build system. makefiles should be handwritten. there will be one global makefile in the root of the repository, and one makefile for each module.

each rule should be prefixed with ${OUT}, to allow retargeting of the build-dir with the OUT environment variable. this is particularly important since the makefiles chain.

the rest is TBD.

## design principles

there are four overriding principles that guide the design of libk.

 1. it should be easy to write code that uses it.
 2. it should be easy to read code that uses it.
 3. the simple, obvious way of using libk should produce the most optimal code.
 4. code that uses libk should be idiomatic C.

for these reasons, the codebase follows a number of strict rules.
 
### booleans are banned
there are a number of reasons for this.

the first is simply that the boolean type in C is a bit messy and libk headers are intended to import as few extra files as possible.

the second is that boolean-using code can be hard to read. consider a struct declaration of the form `rule r = { 10, buf, true, false, true }`: the meaning of this declaration is opaque unless you've memorized the structure's definition.

instead, libk uses enums liberally. so the above might be rewritten as e.g.:

    rule r = { 10, buf,
		rule_kind_undialectical,
		rule_action_expropriate,
		rule_target_bourgeoisie
	};

this makes code much more legible and has the added benefit of making the definitions easier to expand at a later date if new functionality values is needed without breaking the API or ABI.
 
## build process

libk has a number of targets. all files generated by a `make` invocation will be stored in the folder "out" at the root of the repository. this directory may be deleted entirely to clean the repository.

**defs** will create the directory `out/k/` and populate it with module header files. the `k/` directory shall be suitable to copy to `/usr/include` or similar. these header files will copied by building the `${OUT}/$(module).h` target of each module's makefile.

**libk.so** will build the dynamically linked form of libk, according to the build variables set

**libk.a** will build the statically linked form of libk, according to the build variables set

**tool** will build the executables used for modules such as `kgraft`.

Modified makefile from [75ffd11290] to [e2aa0ec204].

1





2
3









4



5

























6
7

export OUT=$(PWD)/out





export TARGET=x86.lin.64










all: kio





























$(OUT)/kio.o:
	cd kio && make kio


>
>
>
>
>
|

>
>
>
>
>
>
>
>
>
|
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

49
export OUT = $(PWD)/out

export ARCH = x86
export OS = lin
export BITS = 64

export TARGET = $(ARCH).$(OS).$(BITS)

moddirs = $(wildcard k*)
modules = $(moddirs:k%=%)
headers = $(moddirs:k%=$(OUT)/k/%.h)
objects = $(modules:%=$(OUT)/k%.o)
makefiles = $(moddirs:%=%/makefile)

binaries = $(wildcard */exe.*.c)
binmods = $(sort $(dir $(binaries)))

all: obj defs tool
obj: $(moddirs:%=%.obj)
defs: $(headers)
tool: $(binmods:%=%.tool)

lists = moddirs modules headers objects makefiles binaries binmods
dbg:
	@echo -e lists: $(foreach var, $(lists), "\\n - \\e[1m$(var)\\e[m = $($(var))")

%.obj: %/makefile $(OUT)
	cd $* && $(MAKE) obj

%.tool: %/makefile $(OUT)
	cd $* && $(MAKE) tool

%.dbg: %/makefile $(OUT)
	cd $* && $(MAKE) dbg

$(OUT)/libk.so: mods $(OUT)
	$(CC) -shared -o $@ $(objects)

$(OUT)/libk.a: mods $(OUT)
	# using `ar c` and ranlib here instead of
	# `ar cs` in case `ar` isn't the GNU version
	ar c $@ $(objects)
	ranlib $@

$(OUT)/k/%.h: k%/makefile $(OUT)/k
	cd k$* && $(MAKE) $@

$(OUT) $(OUT)/k:

	mkdir -p $@

Added modmake version [247b2f15bf].































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#- modmake
# this is the master makefile that controls the building of each
# libk module. it is included from each k*/makefile.
# vim: ft=make

mod = $(notdir $(PWD))
src = $(wildcard *.c) $(wildcard *.s)
bare = $(mod:k%=%)

tools    = $(filter     exe.%.c,   $(src))
nontools = $(filter-out exe.%.c,   $(src))
cobjects = $(filter     %.c,       $(nontools))
sobjects = $(filter %.${TARGET}.s, $(nontools))

obj: $(cobjects:%.c=${OUT}/$(mod).%.o) \
	 $(sobjects:%.s=${OUT}/$(mod).%.o)
tool: $(tools:exe.%.c=${OUT}/$(mod).%)

dbg:
	@echo tools = $(tools)
	@echo TARGET = ${TARGET}
	@echo cobjects = $(cobjects)
	@echo sobjects = $(sobjects)
	@echo mod = $(mod)

${OUT}/$(mod).%.o: %.c
	$(CC) -c $< -o $@

${OUT}/k/$(bare).h: $(bare).h
	cp $< $@

${OUT}/$(mod).%: exe.%.c
	$(CC) $< -o $@

#- assembly
# compiling  the assembly  code will  be  faster but  a lot  more
# complex, given the  nature of assembly and the  large number of
# platforms  targeted.  we need  to  add  build rules  for  every
# arch.OS[.bits] tuple;  since this  is a fairly  repetetive task
# that  requires ugly  make  rules,  we're just  going  to use  a
# function to generate these.

# ${OUT} = ultimate build directory
# $(mod) = module name
#      % = function name
#   $(1) = arch tuple
arch = ${OUT}/$(mod).%.$(1).o: %.$(1).s
# invoke with $(call arch,tuple). do not
# put spaces between either term though!

#-- linux
# linux uses the ELF{32,64} binary format,  and generating these
# from yasm is trivial.  linux only supports one ABI per format,
# at least with ELF, so that's all we need to do.

#${OUT}/$(mod).%.x86.lin.32.o: %.x86.lin.32.s
$(call arch,x86.lin.32)
	yasm -felf32 $< -o $@

#${OUT}/$(mod).%.x86.lin.64.o: %.x86.lin.64.s
$(call arch,x86.lin.64)
	yasm -felf64 $< -o $@

#-- freebsd
# the freebsd ABI is different, so it will require different code
# (though there might be ways to minimize that). freebsd uses the
# same binary format as Linux  (though it also supports a.out and
# COFF) but because freebsd can interpret multiple different ABIs
# the  object files  need to  be "branded"  with the  correct one
# using the tool brandelf (`brandelf -t [ABI]`)

$(call arch,x86.fbsd.32)
	yasm -felf32 $< -o $@
	brandelf -t FreeBSD $@

$(call arch,x86.fbsd.64)
	yasm -felf64 $< -o $@
	brandelf -t FreeBSD $@