summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--doc/x86_64.pdfbin0 -> 11185300 bytes
-rw-r--r--flake.lock6
-rw-r--r--notes/flowchart.pngbin0 -> 88718 bytes
-rw-r--r--notes/notes.org85
-rw-r--r--src/library.c225
-rw-r--r--src/main.c47
-rw-r--r--test.lisp7
-rw-r--r--test.pit30
-rw-r--r--x86.pit539
10 files changed, 808 insertions, 135 deletions
diff --git a/Makefile b/Makefile
index d7dcaf1..81d4494 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@ CC ?= gcc
AR ?= ar
CHK_SOURCES ?= $(SRCS)
CPPFLAGS ?= -MMD -MP
-CFLAGS ?= -flto -ffat-lto-objects -march=native --std=c89 -g -Ideps/ -Isrc/ -Iinclude/ -Wall -Wextra -Wpedantic -Wconversion -Wformat-security -Wshadow -Wpointer-arith -Wstrict-prototypes -Wmissing-prototypes -Wnull-dereference -Wfloat-equal -Wundef -Wpointer-arith -Wbad-function-cast -Wmissing-braces -Wcast-align -Wstrict-overflow=5 -ftrapv
-LDFLAGS ?= -flto -g -static -lcolonq-pit -lcolonq-elf
+CFLAGS ?= --std=c89 -g -Ideps/ -Isrc/ -Iinclude/ -Wall -Wextra -Wpedantic -Wconversion -Wformat-security -Wshadow -Wpointer-arith -Wstrict-prototypes -Wmissing-prototypes -Wnull-dereference -Wfloat-equal -Wundef -Wpointer-arith -Wbad-function-cast -Wmissing-braces -Wcast-align -Wstrict-overflow=5 -ftrapv
+LDFLAGS ?= -g -static -lcolonq-pit -lcolonq-elf
BUILD = build_$(CC)
diff --git a/doc/x86_64.pdf b/doc/x86_64.pdf
new file mode 100644
index 0000000..24fa420
--- /dev/null
+++ b/doc/x86_64.pdf
Binary files differ
diff --git a/flake.lock b/flake.lock
index f655d49..3a6136e 100644
--- a/flake.lock
+++ b/flake.lock
@@ -99,11 +99,11 @@
]
},
"locked": {
- "lastModified": 1766198852,
- "narHash": "sha256-DZ0nqM4BwZwm0KzzJt74YVJmQYJDFR1gxuD+GIWDNQk=",
+ "lastModified": 1776209507,
+ "narHash": "sha256-PWtSumsuLypzhqawkGuIXh5LpD7/RtX5Lms5+Jy9RFU=",
"owner": "lcolonq",
"repo": "pit",
- "rev": "e6329f2ce1df83fd729e79f7e92e55fe96a2e826",
+ "rev": "951871e363307203aee0752c9b51cf79f3fd887d",
"type": "github"
},
"original": {
diff --git a/notes/flowchart.png b/notes/flowchart.png
new file mode 100644
index 0000000..5281da0
--- /dev/null
+++ b/notes/flowchart.png
Binary files differ
diff --git a/notes/notes.org b/notes/notes.org
new file mode 100644
index 0000000..d1afd0a
--- /dev/null
+++ b/notes/notes.org
@@ -0,0 +1,85 @@
+* x86_64
+[[./flowchart.png]]
+(optional) legacy prefixes
+-> (optional) REX prefix
+-> primary opcode
+-> (optional) ModRM
+-> (optional) SIB
+-> (optional) displacement
+-> (optional) immediate
+** Legacy prefixes (up to 5)
+*** 0x66 - operand-size override
+Makes operand size 16-bit instead of default 32-bit.
+(REX.W has higher priority and makes operand size 64-bit.)
+*** 0x67 - address-size override
+Makes address size 32-bit instead of default 64-bit.
+** REX
+0x40 through 0x4f
+Low bits:
+3 2 1 0
+W R X B
+
+When W = 1, the operand size is 64-bits (RAX instead of EAX, etc.)
+R is used as an extra high bit for the ModRM reg field (to indicate regs R8 through R15)
+X is used as an extra high bit for the SIB index register
+B is used as an extra high bit for the SIB base register, ModRM r/m field, or an opcode-specific reg field
+** primary opcode map
+Operand notation
+*** Location
+E - general purpose register or memory specified by ModRM.r/m / SIB
+F - rFLAGS register
+G - general purpose register specified by ModRM.reg
+I - immediate value encoded in the immediate field
+J - instruction encoding includes a relative offset added to rIP
+M - memory specified by mod and r/m in ModRM. ModRM.mod /= 0b11
+O - offset of an operand is encoded in the instruction. no ModRM
+R - general purpose register specified by ModRM.r/m. ModRM.mod == 0b11
+*** Type
+b - a byte
+c - a byte or 2 bytes, depending on effective operand size
+d - 4 bytes
+i - a 16-bit integer
+j - a 32-bit integer
+m - a bit mask of size equal to source
+mn - where n = 2,4,8, or 16: a bit mask of size n
+q - 8 bytes
+v - 2 bytes, 4 bytes, or 8 bytes, depending on effective operand size
+w - 2 bytes
+y - 4 bytes or 8 bytes depending on effective operand size
+z - 2 bytes if the effective operand size is 16 bits, or 4 bytes if the effective operand size is 32 or 64 bits
+** ModRM
+Used to specify either 2 register operands or 1 register and 1 memory operand.
+Three fields: MOD, REG, and R/M
+When REX prefix is present, REX.R is used as a high bit on REG and REX.B is used as a high bit on R/M
+7 6 5 4 3 2 1 0
+MOD REG-- R/M--
+
+When MOD is 0b11, both REG and R/M denote registers as follows
+| 000 | rAX, XMM0, etc. |
+| 001 | rCX, XMM1, etc. |
+| 010 | rDX, XMM2, etc. |
+| 011 | rBX, XMM3, etc. |
+| 100 | AH, rSP, XMM4, etc. |
+| 101 | CH, rBP, XMM5, etc. |
+| 110 | DH, rSI, XMM6, etc. |
+| 111 | BH, rDI, XMM7, etc. |
+(When REX prefix is specified, REX.R is an extra high bit that allows access to registers 8 through 15)
+
+When MOD is not 0b11, R/M denotes a base register for memory access
+| 000 | [rAX] |
+| 001 | [rCX] |
+| 010 | [rDX] |
+| 011 | [rBX] |
+| 100 | see SIB byte |
+| 101 | [rBP] or [RIP] if MOD = 0b00 (even if REX.B is 1!!!) |
+| 110 | [rSI] |
+| 111 | [rDI] |
+The offset from the base register is determined by the displacement bytes (these follow SIB)
+If MOD is 01 there is a 1-byte displacement, if MOD is 10 there is a 4-byte displacement
+** SIB
+Occurs only after ModRM
+** Displacement
+A displacement is a signed offset from a base used to indicate a memory address.
+Either 1 or 4 bytes depending on MOD, sign-extended to 64 bits during address calculation.
+** Immediate
+Either 1, 2, 4, or 8 (only for MOV into GPR) bytes.
diff --git a/src/library.c b/src/library.c
index 3e60077..dedc1c4 100644
--- a/src/library.c
+++ b/src/library.c
@@ -22,25 +22,9 @@ static void builder_ensure_size(struct elf_builder *e, i64 sz) {
e->ctx.buf = realloc(e->ctx.buf, (size_t) cap);
memset(e->ctx.buf + e->cap, 0, (size_t) (cap - e->cap));
e->ctx.len = sz;
+ e->cap = cap;
}
-static pit_value impl_elf_new(pit_runtime *rt, pit_value kwargs) {
- /*
- i64 vclass = pit_as_integer(rt, pit_plist_get(rt, pit_intern_cstr(rt, ":class"), kwargs));
- i64 vendianness = pit_as_integer(rt, pit_plist_get(rt, pit_intern_cstr(rt, ":endianness"), kwargs));
- */
- i64 vclass = -1;
- i64 vendianness = -1;
- struct elf_builder *ret = malloc(sizeof(struct elf_builder));
- i64 len = 256;
- u8 *buf = calloc((size_t) len, 1);
- ret->cap = len;
- ret->ctx = elf_ctx_new(buf, len,
- vclass > 0 ? vclass : ELF_CLASS_64,
- vendianness > 0 ? vendianness : ELF_ENDIANNESS_LITTLE
- );
- return pit_nativedata_new(rt, pit_intern_cstr(rt, "elf"), (void *) ret);
-}
static i64 get_kwargs_i64(pit_runtime *rt, i64 def, pit_value kwargs, char *kw) {
pit_value v = pit_plist_get(rt, pit_intern_cstr(rt, kw), kwargs);
if (v != PIT_NIL && pit_is_integer(rt, v)) {
@@ -49,6 +33,16 @@ static i64 get_kwargs_i64(pit_runtime *rt, i64 def, pit_value kwargs, char *kw)
return def;
}
}
+static pit_value impl_elf_new(pit_runtime *rt, pit_value kwargs) {
+ i64 vclass = get_kwargs_i64(rt, ELF_CLASS_64, kwargs, ":class");
+ i64 vendianness = get_kwargs_i64(rt, ELF_ENDIANNESS_LITTLE, kwargs, ":endianness");
+ struct elf_builder *ret = malloc(sizeof(struct elf_builder));
+ i64 len = 256;
+ u8 *buf = calloc((size_t) len, 1);
+ ret->cap = len;
+ ret->ctx = elf_ctx_new(buf, len, vclass, vendianness);
+ return pit_nativedata_new(rt, pit_intern_cstr(rt, "elf"), (void *) ret);
+}
static pit_value impl_elf_write_header(pit_runtime *rt, pit_value args) {
pit_value velf = pit_car(rt, args);
pit_value kwargs = pit_cdr(rt, args);
@@ -134,6 +128,61 @@ static pit_value impl_elf_write_program_header(pit_runtime *rt, pit_value args)
elf_write_program_header(&h, &e->ctx, (u64) off);
return PIT_NIL;
}
+static pit_value impl_elf_write_bytes(pit_runtime *rt, pit_value args) {
+ pit_value velf = pit_car(rt, args);
+ pit_value voff = pit_car(rt, pit_cdr(rt, args));
+ pit_value vbytes = pit_car(rt, pit_cdr(rt, pit_cdr(rt, args)));
+ pit_value_heavy *hbytes = NULL;
+ i64 off = 0;
+ struct elf_builder *e = pit_nativedata_get(rt, pit_intern_cstr(rt, "elf"), velf);
+ if (!e) return PIT_NIL;
+ if (!pit_is_integer(rt, voff)) {
+ pit_error(rt, "offset is not integer");
+ return PIT_NIL;
+ }
+ off = pit_as_integer(rt, voff);
+ if (off < 0) {
+ pit_error(rt, "negative offset");
+ return PIT_NIL;
+ }
+ if (pit_is_bytes(rt, vbytes)) {
+ if (pit_value_sort(vbytes) != PIT_VALUE_SORT_REF) {
+ pit_error(rt, "bytes are not a reference");
+ return PIT_NIL;
+ }
+ hbytes = pit_deref(rt, vbytes);
+ if (!hbytes) { pit_error(rt, "bad ref"); return PIT_NIL; }
+ if (hbytes->hsort != PIT_VALUE_HEAVY_SORT_BYTES) {
+ pit_error(rt, "invalid use of value as bytes");
+ return PIT_NIL;
+ }
+ builder_ensure_size(e, off + hbytes->in.bytes.len);
+ elf_write_bytes(&e->ctx, (u64 *) &off, hbytes->in.bytes.data, hbytes->in.bytes.len);
+ } else if (pit_is_cons(rt, vbytes)) {
+ while (vbytes != PIT_NIL) {
+ pit_value x = pit_car(rt, vbytes);
+ i64 v = 0;
+ u8 byte = 0;
+ if (!pit_is_integer(rt, x)) {
+ pit_error(rt, "byte is not integer");
+ return PIT_NIL;
+ }
+ v = pit_as_integer(rt, x);
+ if (v < 0 || v > 255) {
+ pit_error(rt, "byte out of range");
+ return PIT_NIL;
+ }
+ byte = (u8) v;
+ builder_ensure_size(e, off + 1);
+ if (elf_write_bytes(&e->ctx, (u64 *) &off, (u8 *) &byte, 1) < 0) {
+ pit_error(rt, "failed to write byte at offset: %ld\n", off);
+ return PIT_NIL;
+ }
+ vbytes = pit_cdr(rt, vbytes);
+ }
+ }
+ return PIT_T;
+}
static pit_value impl_elf_spit(pit_runtime *rt, pit_value args) {
char pathbuf[1024] = {0};
i64 len = 0, actual = 0;
@@ -160,76 +209,88 @@ static pit_value impl_elf_spit(pit_runtime *rt, pit_value args) {
return velf;
}
void rj_install_library(pit_runtime *rt) {
- /* constants */
- pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_INVALID"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_32"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_64"), pit_integer_new(rt, 2));
- pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_INVALID"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_LITTLE"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_BIG"), pit_integer_new(rt, 2));
- pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_NONE"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_REL"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_EXEC"), pit_integer_new(rt, 2));
- pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_DYN"), pit_integer_new(rt, 3));
- pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_CORE"), pit_integer_new(rt, 4));
- pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_NONE"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_X86"), pit_integer_new(rt, 3));
- pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_AMD64"), pit_integer_new(rt, 62));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NULL"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_PROGBITS"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SYMTAB"), pit_integer_new(rt, 2));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_STRTAB"), pit_integer_new(rt, 3));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_RELA"), pit_integer_new(rt, 4));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_HASH"), pit_integer_new(rt, 5));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_DYNAMIC"), pit_integer_new(rt, 6));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NOTE"), pit_integer_new(rt, 7));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NOBITS"), pit_integer_new(rt, 8));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_REL"), pit_integer_new(rt, 9));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SHLIB"), pit_integer_new(rt, 10));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_DYNSYM"), pit_integer_new(rt, 11));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_INIT_ARRAY"), pit_integer_new(rt, 14));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_FINI_ARRAY"), pit_integer_new(rt, 15));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_PREINIT_ARRAY"), pit_integer_new(rt, 16));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_GROUP"), pit_integer_new(rt, 17));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SYMTAB_SHNDX"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_WRITE"), pit_integer_new(rt, 0x1));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_ALLOC"), pit_integer_new(rt, 0x2));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_EXECINSTR"), pit_integer_new(rt, 0x4));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_MERGE"), pit_integer_new(rt, 0x10));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_STRINGS"), pit_integer_new(rt, 0x20));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_INFO_LINK"), pit_integer_new(rt, 0x40));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_LINK_ORDER"), pit_integer_new(rt, 0x80));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_OS_NONCOMFORMING"), pit_integer_new(rt, 0x100));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_GROUP"), pit_integer_new(rt, 0x200));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_TLS"), pit_integer_new(rt, 0x400));
- pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_COMPRESSED"), pit_integer_new(rt, 0x80));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_LOCAL"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_GLOBAL"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_WEAK"), pit_integer_new(rt, 2));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_NOTYPE"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_OBJECT"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_FUNC"), pit_integer_new(rt, 2));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_SECTION"), pit_integer_new(rt, 3));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_FILE"), pit_integer_new(rt, 4));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_COMMON"), pit_integer_new(rt, 5));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_TLS"), pit_integer_new(rt, 6));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_DEFAULT"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_INTERNAL"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_HIDDEN"), pit_integer_new(rt, 2));
- pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_PROTECTED"), pit_integer_new(rt, 3));
- pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_NULL"), pit_integer_new(rt, 0));
- pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_LOAD"), pit_integer_new(rt, 1));
- pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_DYNAMIC"), pit_integer_new(rt, 2));
- pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_INTERP"), pit_integer_new(rt, 3));
- pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_NOTE"), pit_integer_new(rt, 4));
- pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_SHLIB"), pit_integer_new(rt, 5));
- pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_PHDR"), pit_integer_new(rt, 6));
- pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_TLS"), pit_integer_new(rt, 7));
+ /* sizes */
+ pit_set(rt, pit_intern_cstr(rt, "elf/HEADER_IDENT_SIZE"), pit_integer_new(rt, ELF_HEADER_IDENT_SIZE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/32_HEADER_SIZE"), pit_integer_new(rt, ELF32_HEADER_SIZE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/64_HEADER_SIZE"), pit_integer_new(rt, ELF64_HEADER_SIZE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/32_SECTION_HEADER_SIZE"), pit_integer_new(rt, ELF32_SECTION_HEADER_SIZE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/64_SECTION_HEADER_SIZE"), pit_integer_new(rt, ELF64_SECTION_HEADER_SIZE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/32_PROGRAM_HEADER_SIZE"), pit_integer_new(rt, ELF32_PROGRAM_HEADER_SIZE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/64_PROGRAM_HEADER_SIZE"), pit_integer_new(rt, ELF64_PROGRAM_HEADER_SIZE));
+ /* enums */
+ pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_INVALID"), pit_integer_new(rt, ELF_CLASS_INVALID));
+ pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_32"), pit_integer_new(rt, ELF_CLASS_32));
+ pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_64"), pit_integer_new(rt, ELF_CLASS_64));
+ pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_INVALID"), pit_integer_new(rt, ELF_ENDIANNESS_INVALID));
+ pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_LITTLE"), pit_integer_new(rt, ELF_ENDIANNESS_LITTLE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_BIG"), pit_integer_new(rt, ELF_ENDIANNESS_BIG));
+ pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_NONE"), pit_integer_new(rt, ELF_TYPE_NONE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_REL"), pit_integer_new(rt, ELF_TYPE_REL));
+ pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_EXEC"), pit_integer_new(rt, ELF_TYPE_EXEC));
+ pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_DYN"), pit_integer_new(rt, ELF_TYPE_DYN));
+ pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_CORE"), pit_integer_new(rt, ELF_TYPE_CORE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_NONE"), pit_integer_new(rt, ELF_MACHINE_NONE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_X86"), pit_integer_new(rt, ELF_MACHINE_X86));
+ pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_AMD64"), pit_integer_new(rt, ELF_MACHINE_AMD64));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NULL"), pit_integer_new(rt, ELF_SECTION_TYPE_NULL));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_PROGBITS"), pit_integer_new(rt, ELF_SECTION_TYPE_PROGBITS));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SYMTAB"), pit_integer_new(rt, ELF_SECTION_TYPE_SYMTAB));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_STRTAB"), pit_integer_new(rt, ELF_SECTION_TYPE_STRTAB));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_RELA"), pit_integer_new(rt, ELF_SECTION_TYPE_RELA));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_HASH"), pit_integer_new(rt, ELF_SECTION_TYPE_HASH));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_DYNAMIC"), pit_integer_new(rt, ELF_SECTION_TYPE_DYNAMIC));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NOTE"), pit_integer_new(rt, ELF_SECTION_TYPE_NOTE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NOBITS"), pit_integer_new(rt, ELF_SECTION_TYPE_NOBITS));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_REL"), pit_integer_new(rt, ELF_SECTION_TYPE_REL));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SHLIB"), pit_integer_new(rt, ELF_SECTION_TYPE_SHLIB));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_DYNSYM"), pit_integer_new(rt, ELF_SECTION_TYPE_DYNSYM));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_INIT_ARRAY"), pit_integer_new(rt, ELF_SECTION_TYPE_INIT_ARRAY));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_FINI_ARRAY"), pit_integer_new(rt, ELF_SECTION_TYPE_FINI_ARRAY));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_PREINIT_ARRAY"), pit_integer_new(rt, ELF_SECTION_TYPE_PREINIT_ARRAY));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_GROUP"), pit_integer_new(rt, ELF_SECTION_TYPE_GROUP));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SYMTAB_SHNDX"), pit_integer_new(rt, ELF_SECTION_TYPE_SYMTAB_SHNDX));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_WRITE"), pit_integer_new(rt, ELF_SECTION_FLAG_WRITE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_ALLOC"), pit_integer_new(rt, ELF_SECTION_FLAG_ALLOC));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_EXECINSTR"), pit_integer_new(rt, ELF_SECTION_FLAG_EXECINSTR));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_MERGE"), pit_integer_new(rt, ELF_SECTION_FLAG_MERGE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_STRINGS"), pit_integer_new(rt, ELF_SECTION_FLAG_STRINGS));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_INFO_LINK"), pit_integer_new(rt, ELF_SECTION_FLAG_INFO_LINK));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_LINK_ORDER"), pit_integer_new(rt, ELF_SECTION_FLAG_LINK_ORDER));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_OS_NONCOMFORMING"), pit_integer_new(rt, ELF_SECTION_FLAG_OS_NONCOMFORMING));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_GROUP"), pit_integer_new(rt, ELF_SECTION_FLAG_GROUP));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_TLS"), pit_integer_new(rt, ELF_SECTION_FLAG_TLS));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_COMPRESSED"), pit_integer_new(rt, ELF_SECTION_FLAG_COMPRESSED));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_LOCAL"), pit_integer_new(rt, ELF_SYMBOL_BINDING_LOCAL));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_GLOBAL"), pit_integer_new(rt, ELF_SYMBOL_BINDING_GLOBAL));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_WEAK"), pit_integer_new(rt, ELF_SYMBOL_BINDING_WEAK));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_NOTYPE"), pit_integer_new(rt, ELF_SYMBOL_TYPE_NOTYPE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_OBJECT"), pit_integer_new(rt, ELF_SYMBOL_TYPE_OBJECT));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_FUNC"), pit_integer_new(rt, ELF_SYMBOL_TYPE_FUNC));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_SECTION"), pit_integer_new(rt, ELF_SYMBOL_TYPE_SECTION));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_FILE"), pit_integer_new(rt, ELF_SYMBOL_TYPE_FILE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_COMMON"), pit_integer_new(rt, ELF_SYMBOL_TYPE_COMMON));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_TLS"), pit_integer_new(rt, ELF_SYMBOL_TYPE_TLS));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_DEFAULT"), pit_integer_new(rt, ELF_SYMBOL_VISIBILITY_DEFAULT));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_INTERNAL"), pit_integer_new(rt, ELF_SYMBOL_VISIBILITY_INTERNAL));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_HIDDEN"), pit_integer_new(rt, ELF_SYMBOL_VISIBILITY_HIDDEN));
+ pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_PROTECTED"), pit_integer_new(rt, ELF_SYMBOL_VISIBILITY_PROTECTED));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_NULL"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_NULL));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_LOAD"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_LOAD));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_DYNAMIC"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_DYNAMIC));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_INTERP"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_INTERP));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_NOTE"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_NOTE));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_SHLIB"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_SHLIB));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_PHDR"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_PHDR));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_TLS"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_TLS));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_FLAG_X"), pit_integer_new(rt, ELF_PROGRAM_HEADER_FLAG_X));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_FLAG_W"), pit_integer_new(rt, ELF_PROGRAM_HEADER_FLAG_W));
+ pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_FLAG_R"), pit_integer_new(rt, ELF_PROGRAM_HEADER_FLAG_R));
pit_fset(rt, pit_intern_cstr(rt, "elf/new!"), pit_nativefunc_new(rt, impl_elf_new));
pit_fset(rt, pit_intern_cstr(rt, "elf/write-header!"), pit_nativefunc_new(rt, impl_elf_write_header));
pit_fset(rt, pit_intern_cstr(rt, "elf/write-section-header!"), pit_nativefunc_new(rt, impl_elf_write_section_header));
pit_fset(rt, pit_intern_cstr(rt, "elf/write-symbol!"), pit_nativefunc_new(rt, impl_elf_write_symbol));
pit_fset(rt, pit_intern_cstr(rt, "elf/write-program-header!"), pit_nativefunc_new(rt, impl_elf_write_program_header));
+ pit_fset(rt, pit_intern_cstr(rt, "elf/write-bytes!"), pit_nativefunc_new(rt, impl_elf_write_bytes));
pit_fset(rt, pit_intern_cstr(rt, "elf/spit!"), pit_nativefunc_new(rt, impl_elf_spit));
}
diff --git a/src/main.c b/src/main.c
index 5ca07f2..4728f11 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,49 +14,14 @@ int main(int argc, char **argv) {
pit_install_library_essential(rt);
pit_install_library_io(rt);
pit_install_library_plist(rt);
+ pit_install_library_alist(rt);
pit_install_library_bytestring(rt);
rj_install_library(rt);
- if (argc < 2) { /* run repl */
- char buf[1024] = {0};
- i64 len = 0;
- pit_runtime_freeze(rt);
- if (pit_runtime_print_error(rt)) { exit(1); }
- setbuf(stdout, NULL);
- printf("> ");
- while (len < (i64) sizeof(buf) && (buf[len++] = (char) getchar()) != EOF) {
- if (buf[len - 1] == '\n') {
- pit_value bs, prog, res;
- buf[len - 1] = 0;
- bs = pit_bytes_new_cstr(rt, buf);
- prog = pit_read_bytes(rt, bs);
- res = pit_eval(rt, prog);
- if (pit_runtime_print_error(rt)) {
- rt->error = PIT_NIL;
- printf("> ");
- } else {
- char dumpbuf[1024] = {0};
- pit_dump(rt, dumpbuf, sizeof(dumpbuf) - 1, res, true);
- printf("%s\n> ", dumpbuf);
- }
- len = 0;
- }
- }
- } else { /* run file */
- pit_value bs = pit_bytes_new_file(rt, argv[1]);
- pit_lexer lex;
- pit_parser parse;
- bool eof = false;
- pit_value p = PIT_NIL;
- if (!pit_lexer_from_bytes(rt, &lex, bs)) {
- pit_error(rt, "failed to initialize lexer");
- }
- pit_parser_from_lexer(&parse, &lex);
- while (p = pit_parse(rt, &parse, &eof), !eof) {
- pit_eval(rt, p);
- if (pit_runtime_print_error(rt)) {
- exit(1);
- }
- }
+ if (argc < 2) {
+ pit_repl(rt);
+ } else {
+ pit_load_file(rt, argv[1]);
+ if (pit_runtime_print_error(rt)) return -1;
}
return 0;
}
diff --git a/test.lisp b/test.lisp
deleted file mode 100644
index 4353ef3..0000000
--- a/test.lisp
+++ /dev/null
@@ -1,7 +0,0 @@
-(let ((elf (elf/new!)))
- (print! elf)
- (elf/write-header! elf
- :type elf/TYPE_EXEC
- :machine elf/MACHINE_AMD64)
- (elf/spit! "test.elf" elf)
- )
diff --git a/test.pit b/test.pit
new file mode 100644
index 0000000..c67246f
--- /dev/null
+++ b/test.pit
@@ -0,0 +1,30 @@
+(let ( (elf (elf/new!))
+ (entrypoint 0x696000)
+ (toff 0x1000)
+ (phdroff 0x2000)
+ )
+ (elf/write-bytes! elf toff
+ '( 0x48 0xc7 0xc0 0x3c 0x00 0x00 0x00
+ 0x48 0xc7 0xc7 0x2a 0x00 0x00 0x00
+ 0x0f 0x05 ))
+ (elf/write-program-header! elf (+ phdroff elf/64_PROGRAM_HEADER_SIZE)
+ :type elf/PROGRAM_HEADER_TYPE_LOAD
+ :offset toff
+ :virtual-addr entrypoint
+ :file-size 0x100
+ :mem-size 0x100
+ :flags (bitwise/or elf/PROGRAM_HEADER_FLAG_R elf/PROGRAM_HEADER_FLAG_X)
+ )
+ (elf/write-header! elf
+ :type elf/TYPE_EXEC
+ :machine elf/MACHINE_AMD64
+ :program-header-offset phdroff
+ :program-header-entry-size elf/64_PROGRAM_HEADER_SIZE
+ :program-header-entries 2
+ :section-header-offset 0
+ :section-header-entry-size elf/64_SECTION_HEADER_SIZE
+ :section-header-entries 0
+ :entry entrypoint
+ )
+ (elf/spit! "test.elf" elf)
+ )
diff --git a/x86.pit b/x86.pit
new file mode 100644
index 0000000..f73e6a4
--- /dev/null
+++ b/x86.pit
@@ -0,0 +1,539 @@
+(defun! x86/split16le (w)
+ "Split the 16-bit W into a little-endian list of 8-bit integers."
+ (list
+ (bitwise/and 0xff w)
+ (bitwise/and 0xff (bitwise/rshift w 8))))
+
+(defun! x86/split32le (w)
+ "Split the 32-bit W into a little-endian list of 8-bit integers."
+ (list
+ (bitwise/and 0xff w)
+ (bitwise/and 0xff (bitwise/rshift w 8))
+ (bitwise/and 0xff (bitwise/rshift w 16))
+ (bitwise/and 0xff (bitwise/rshift w 24))))
+
+(defun! x86/split64le (w)
+ "Split the 64-bit W into a little-endian list of 8-bit integers."
+ (list
+ (bitwise/and 0xff w)
+ (bitwise/and 0xff (bitwise/rshift w 8))
+ (bitwise/and 0xff (bitwise/rshift w 16))
+ (bitwise/and 0xff (bitwise/rshift w 24))
+ (bitwise/and 0xff (bitwise/rshift w 32))
+ (bitwise/and 0xff (bitwise/rshift w 40))
+ (bitwise/and 0xff (bitwise/rshift w 48))
+ (bitwise/and 0xff (bitwise/rshift w 56))))
+
+(defun! x86/register-1byte? (r)
+ "Return the register index for 1-byte register R."
+ (case r
+ (al 0) (cl 1) (dl 2) (bl 3)
+ (ah 4) (ch 5) (dh 6) (bh 7)
+ (r8b 8) (r9b 9) (r10b 10) (r11b 11)
+ (r12b 12) (r13b 13) (r14b 14) (r15b 15)))
+
+(defun! x86/register-2byte? (r)
+ "Return the register index for 2-byte register R."
+ (case r
+ (ax 0) (cx 1) (dx 2) (bx 3)
+ (sp 4) (bp 5) (si 6) (di 7)
+ (r8w 8) (r9w 9) (r10w 10) (r11w 11)
+ (r12w 12) (r13w 13) (r14w 14) (r15w 15)))
+
+(defun! x86/register-4byte? (r)
+ "Return the register index for 4-byte register R."
+ (case r
+ (eax 0) (ecx 1) (edx 2) (ebx 3)
+ (esp 4) (ebp 5) (esi 6) (edi 7)
+ (r8d 8) (r9d 9) (r10d 10) (r11d 11)
+ (r12d 12) (r13d 13) (r14d 14) (r15d 15)))
+
+(defun! x86/register-8byte? (r)
+ "Return the register index for 8-byte register R."
+ (case r
+ (rax 0) (rcx 1) (rdx 2) (rbx 3)
+ (rsp 4) (rbp 5) (rsi 6) (rdi 7)
+ (r8 8) (r9 9) (r10 10) (r11 11)
+ (r12 12) (r13 13) (r14 14) (r15 15)))
+
+(defun! x86/register? (r)
+ "Return the register index of R."
+ (or
+ (x86/register-1byte? r)
+ (x86/register-2byte? r)
+ (x86/register-4byte? r)
+ (x86/register-8byte? r)))
+
+(defun! x86/register-extended? (r)
+ "Return non-nil if R is an extended register."
+ (list/contains? r
+ '( r8b r9b r10b r11b r12b r13b r14b r15b
+ r8w r9w r10w r11w r12w r13w r14w r15w
+ r8d r9d r10d r11d r12d r13d r14d r15d
+ r8 r9 r10 r11 r12 r13 r14 r15)))
+
+(defun! x86/integer-fits-in-bits? (bits x)
+ "Determine if X fits in BITS."
+ (if (integer? x)
+ (let ((leftover (bitwise/rshift x bits)))
+ (eq? leftover 0))))
+(defun! x86/operand-immediate-fits? (sz x)
+ "Determine if immediate operand X fits in SZ."
+ (let
+ ((bits
+ (or
+ (case sz
+ ("b" 8) ("c" 16) ("d" 32) ("i" 16)
+ ("j" 32) ("q" 64) ("v" 64) ("w" 16)
+ ("y" 64) ("z" 32))
+ (error! "unknown operand pattern size"))))
+ (x86/integer-fits-in-bits? bits x)))
+
+(defun! x86/operand-register-fits? (sz r)
+ "Determine if register operand R fits in SZ."
+ (case sz
+ ("b" (x86/register-1byte? r))
+ ("c" (or (x86/register-1byte? r) (x86/register-2byte? r)))
+ ("d" (x86/register-4byte? r))
+ ("i" (x86/register-2byte? r))
+ ("j" (x86/register-4byte? r))
+ ("q" (x86/register-8byte? r))
+ ("v" (or (x86/register-2byte? r) (x86/register-4byte? r) (x86/register-8byte? r)))
+ ("w" (x86/register-2byte? r))
+ ("y" (or (x86/register-4byte? r) (x86/register-8byte? r)))
+ ("z" (or (x86/register-2byte? r) (x86/register-4byte? r)))))
+
+(defun! x86/memory-operand-base (m)
+ (and
+ (eq? (car m) 'mem)
+ (car (cdr m))))
+(defun! x86/memory-operand-off (m)
+ (and
+ (eq? (car m) 'mem)
+ (or (car (cdr (cdr m))) 0)))
+
+(defun! x86/operand-memory-location? (op)
+ "Return non-nil if OP represents a memory location."
+ (let ( (base (x86/memory-operand-base op))
+ (off (x86/memory-operand-off op)))
+ (and
+ (or (x86/register-4byte? base) (x86/register-8byte? base))
+ (integer? off))))
+
+(defun! x86/operand-match? (pat op)
+ "Determine if operand OP matches PAT."
+ (cond
+ ((symbol? pat) (eq? pat op))
+ ((cons? pat) (list/contains? op pat))
+ ((bytes? pat)
+ (let ( (loc (bytes/range 0 1 pat))
+ (sz (bytes/range 1 (bytes/len pat) pat)))
+ (cond
+ ((or (equal? loc "I") (equal? loc "J")) (x86/operand-immediate-fits? sz op))
+ ((or (equal? loc "G") (equal? loc "R")) (x86/operand-register-fits? sz op))
+ ((equal? loc "M") (x86/operand-memory-location? op))
+ ((equal? loc "E")
+ (or (x86/operand-register-fits? sz op) (x86/operand-memory-location? op)))
+ (t (error! "unknown operand pattern location")))))))
+
+(defun! x86/operand-size (op)
+ "Return the minimum power-of-2 size in bytes that contains OP."
+ (cond
+ ((symbol? op)
+ (cond
+ ((x86/register-1byte? op) 1)
+ ((x86/register-2byte? op) 2)
+ ((x86/register-4byte? op) 4)
+ ((x86/register-8byte? op) 8)
+ (t (error! "attempted to take size of unknown register"))))
+ ((integer? op)
+ (cond
+ ((x86/integer-fits-in-bits? 8 op) 1)
+ ((x86/integer-fits-in-bits? 16 op) 2)
+ ((x86/integer-fits-in-bits? 32 op) 4)
+ ((x86/integer-fits-in-bits? 64 op) 8)
+ (t (error! "attempted to take size of too-large immediate"))))
+ ((x86/operand-memory-location? op) 1)
+ (t (error! "attempted to take size of unknown operand"))))
+
+(defstruct! x86/ins
+ operand-size-prefix
+ address-size-prefix
+ rex-w
+ rex-r
+ rex-x
+ rex-b
+ opcode
+ modrm-mod
+ modrm-reg
+ modrm-rm
+ disp ;; pair of size and value
+ imm ;; pair of size and value
+ )
+
+(defun! x86/ins-bytes (ins)
+ "Return a list of bytes encoding INS."
+ (let ( (opcode (x86/ins/get-opcode ins))
+ (rex-w (x86/ins/get-rex-w ins))
+ (rex-r (x86/ins/get-rex-r ins))
+ (rex-x (x86/ins/get-rex-x ins))
+ (rex-b (x86/ins/get-rex-b ins))
+ (modrm-mod (x86/ins/get-modrm-mod ins))
+ (modrm-reg (x86/ins/get-modrm-reg ins))
+ (modrm-rm (x86/ins/get-modrm-rm ins))
+ (disp (x86/ins/get-disp ins))
+ (imm (x86/ins/get-imm ins)))
+ (list/append
+ (if (x86/ins/get-operand-size-prefix ins) '(0x66))
+ (if (x86/ins/get-address-size-prefix ins) '(0x67))
+ (if (or rex-w rex-r rex-x rex-b)
+ (list
+ (bitwise/or
+ 0x40
+ (if rex-w 0b1000 0)
+ (if rex-r 0b0100 0)
+ (if rex-x 0b0010 0)
+ (if rex-b 0b0001 0))))
+ (cond
+ ((not opcode) (error! "no opcode for instruction"))
+ ((cons? opcode) opcode)
+ ((integer? opcode) (list opcode))
+ (t (error! "malformed opcode for instruction")))
+ (if (or modrm-mod modrm-reg modrm-rm)
+ (list
+ (bitwise/or
+ (bitwise/lshift (or modrm-mod 0) 6)
+ (bitwise/lshift (or modrm-reg 0) 3)
+ (or modrm-rm 0))))
+ (if disp
+ (cond
+ ((eq? (car disp) 1) (list (cdr disp)))
+ ((eq? (car disp) 4) (x86/split32le (cdr disp)))
+ (t (error! "malformed displacement for instruction"))))
+ (if imm
+ (cond
+ ((eq? (car imm) 1) (list (cdr imm)))
+ ((eq? (car imm) 2) (x86/split16le (cdr imm)))
+ ((eq? (car imm) 4) (x86/split32le (cdr imm)))
+ ((eq? (car imm) 8) (x86/split64le (cdr imm)))
+ (t (error! "malformed immediate for instruction")))))))
+
+(defun! x86/instruction-update-sizes (ins ops default-size)
+ "Update INS to account for the sizes of OPS.
+DEFAULT-SIZE is the default operand size."
+ (let ((defsz (or default-size 4)))
+ (if (> (list/len ops) 0)
+ (let ((regs (list/uniq (list/map 'x86/operand-size (list/filter 'x86/register? ops)))))
+ (if (> (list/len regs) 1)
+ (error! "invalid mix of register sizes in operands"))
+ (let ((sz (if (eq? (list/len regs) 0) defsz (car regs))))
+ (cond
+ ((eq? sz 1) nil)
+ ((eq? defsz sz) nil)
+ ((and (not (eq? defsz 2)) (eq? sz 2)) (x86/ins/set-operand-size-prefix! ins t))
+ ((and (not (eq? defsz 8)) (eq? sz 8)) (x86/ins/set-rex-w! ins t))
+ (t (error! "unable to encode operands with default size")))
+ sz)))))
+
+(defun! x86/instruction-update-operand (esz ins pat op)
+ "Update INS to account for an operand OP according to PAT.
+The effective operand size is ESZ."
+ (cond
+ ((bytes? pat)
+ (let ((loc (bytes/range 0 1 pat)))
+ (cond
+ ((equal? loc "I")
+ (let ((immsz (if (>= esz 4) 4 esz)))
+ (if (not (x86/integer-fits-in-bits? (* 8 immsz) op))
+ (error! "Immediate too large" op))
+ (x86/ins/set-imm! ins (cons immsz op))))
+ ((equal? loc "J")
+ (let ((immsz (if (eq? esz 1) 1 4)))
+ (if (not (x86/integer-fits-in-bits? (* 8 immsz) op))
+ (error! "jump displacement too large"))
+ (x86/ins/set-disp! ins (cons immsz op))))
+ ((equal? loc "G")
+ (x86/ins/set-modrm-reg! ins
+ (or (x86/register? op) (error "Invalid register: %s" op))))
+ ((or (equal? loc "R") (and (equal? loc "E") (x86/register? op)))
+ (x86/ins/set-modrm-mod! ins 0b11)
+ (x86/ins/set-modrm-rm! ins
+ (or (x86/register? op) (error "Invalid register: %s" op))))
+ ((or (equal? loc "M") (and (equal? loc "E") (x86/operand-memory-location? op)))
+ (let ( (base (x86/memory-operand-base op))
+ (off (x86/memory-operand-off op)))
+ (cond
+ ((eq? base 'eip)
+ (x86/ins/set-modrm-rm! ins 0b101)
+ (x86/ins/set-modrm-mod! ins 0b00)
+ (x86/ins/set-disp! ins (cons 4 off))
+ (x86/ins/set-address-size-prefix! ins t))
+ ((eq? base 'rip)
+ (x86/ins/set-modrm-rm! ins 0b101)
+ (x86/ins/set-modrm-mod! ins 0b00)
+ (x86/ins/set-disp! ins (cons 4 off)))
+ (t
+ (x86/ins/set-modrm-rm! ins
+ (or
+ (x86/register-4byte? base)
+ (x86/register-8byte? base)
+ (error! "invalid base register")))
+ (if (x86/register-4byte? base)
+ (x86/ins/set-address-size-prefix! ins t))
+ (cond
+ ((x86/integer-fits-in-bits? 8 off)
+ (x86/ins/set-disp! ins (cons 1 off))
+ (x86/ins/set-modrm-mod! ins 0b01))
+ ((x86/integer-fits-in-bits? 32 off)
+ (x86/ins/set-disp! ins (cons 4 off))
+ (x86/ins/set-modrm-mod! ins 0b10))
+ (t (error! "invalid offset")))))))
+ (t (error! "invalid operand location code")))))))
+
+(defun! x86/default-instruction-handler (opcode & kwargs)
+ "Return an instruction handler for OPCODE.
+The instruction handler will run POSTHOOK on the instruction at the end.
+DEFAULT-SIZE is the default operand size."
+ (let ( (posthook (plist/get :posthook kwargs))
+ (default-size (plist/get :default-size kwargs)))
+ (lambda (pats ops)
+ (let ((ret (x86/ins/new :opcode opcode)))
+ (let ((esz
+ (or (x86/instruction-update-sizes ret ops default-size)
+ (error! "malformed size for operands"))))
+ (list/zip-with
+ (lambda (it other)
+ (x86/instruction-update-operand esz ret it other))
+ pats
+ ops))
+ (if posthook
+ (funcall posthook ret ops))
+ ret))))
+
+(defun! x86/instruction-handler-jcc (opcode immsz)
+ "Return an instruction handler for a Jcc instruction at OPCODE.
+IMMSZ is the size of the displacement from RIP."
+ (lambda (_pats ops)
+ (let ((ret (x86/ins/new :opcode opcode)))
+ (x86/ins/set-disp! ret (cons immsz (car ops)))
+ ret)))
+
+(defun! x86/generate-handlers-arith (opbase group1reg)
+ "Return handlers for an arithmetic mnemonic starting at OPBASE.
+The REG value in ModR/M is indicated by GROUP1REG."
+ (list
+ (cons '("Eb" "Gb") (x86/default-instruction-handler (+ opbase 0)))
+ (cons '("Ev" "Gv") (x86/default-instruction-handler (+ opbase 1)))
+ (cons '("Gb" "Eb") (x86/default-instruction-handler (+ opbase 2)))
+ (cons '("Gv" "Ev") (x86/default-instruction-handler (+ opbase 3)))
+ (cons '(al "Ib") (x86/default-instruction-handler (+ opbase 4)))
+ (cons '((ax eax rax) "Iz") (x86/default-instruction-handler (+ opbase 5)))
+ (cons '("Eb" "Ib")
+ (x86/default-instruction-handler 0x80
+ :posthook (lambda (ins _) (x86/ins/set-modrm-reg! ins group1reg))))
+ (cons '("Ev" "Iz")
+ (x86/default-instruction-handler 0x81
+ :posthook (lambda (ins _) (x86/ins/set-modrm-reg! ins group1reg))))
+ (cons '("Ev" "Ib")
+ (x86/default-instruction-handler 0x83
+ :posthook (lambda (ins _) (setf (x86/ins-modrm-reg ins) group1reg))))))
+
+(setq! x86/registers-+reg-base
+ '( (+rb . (al cl dl bl ah ch dh bh))
+ (+rw . (ax cx dx bx sp bp si di))
+ (+rd . (eax ecx edx ebx esp ebp esi edi))
+ (+rq . (rax rcx rdx rbx rsp rbp rsi rdi))))
+(setq! x86/registers-+reg-extended
+ '( (+rb . (r8b r9b r10b r11b r12b r13b r14b r15b))
+ (+rw . (r8w r9w r10w r11w r12w r13w r14w r15w))
+ (+rd . (r8d r9d r10d r11d r12d r13d r14d r15d))
+ (+rq . (r8 r9 r10 r11 r12 r13 r14 r15))))
+(defun! x86/generate-handlers-opcode-+reg (opbase extraops addends & args)
+ "Generate handlers for a family of opcodes that uses the +reg encoding.
+OPBASE is the base opcode.
+EXTRAOPS are additional operands after the register operand.
+ADDENDS is a list of symbols like +rw, +rq etc. that denote allowed registers.
+ARGS are passed verbatim to `u/x86/default-instruction-handler."
+ (list/map
+ (lambda (it)
+ (let ( (abase (list/map (lambda (a) (list/nth it (alist/get a x86/registers-+reg-base))) addends))
+ (aext (list/map (lambda (a) (list/nth it (alist/get a x86/registers-+reg-extended))) addends)))
+ (cons
+ (cons (list/append abase aext) extraops)
+ (apply 'x86/default-instruction-handler (cons (+ opbase it) args)))))
+ (list/iota 8)))
+
+(setq!
+ x86/mnemonic-table
+ (list
+ (cons 'add (x86/generate-handlers-arith 0x00 0))
+ (cons 'or (x86/generate-handlers-arith 0x08 1))
+ (cons 'adc (x86/generate-handlers-arith 0x10 2))
+ (cons 'sbb (x86/generate-handlers-arith 0x18 3))
+ (cons 'and (x86/generate-handlers-arith 0x20 4))
+ (cons 'sub (x86/generate-handlers-arith 0x28 5))
+ (cons 'xor (x86/generate-handlers-arith 0x30 6))
+ (cons 'cmp (x86/generate-handlers-arith 0x38 7))
+ (cons 'push
+ (x86/generate-handlers-opcode-+reg 0x50 '() '(+rw +rq)
+ :default-size 8
+ :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops))))))
+ (cons 'pop
+ (x86/generate-handlers-opcode-+reg 0x58 '() '(+rw +rq)
+ :default-size 8
+ :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops))))))
+ (list 'jo
+ (cons '("Jb") (x86/instruction-handler-jcc 0x70 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x80) 4)))
+ (list 'jno
+ (cons '("Jb") (x86/instruction-handler-jcc 0x71 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x81) 4)))
+ (list 'jb
+ (cons '("Jb") (x86/instruction-handler-jcc 0x72 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x82) 4)))
+ (list 'jnb
+ (cons '("Jb") (x86/instruction-handler-jcc 0x73 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x83) 4)))
+ (list 'jz
+ (cons '("Jb") (x86/instruction-handler-jcc 0x74 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x84) 4)))
+ (list 'jnz
+ (cons '("Jb") (x86/instruction-handler-jcc 0x75 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x85) 4)))
+ (list 'jbe
+ (cons '("Jb") (x86/instruction-handler-jcc 0x76 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x86) 4)))
+ (list 'jnbe
+ (cons '("Jb") (x86/instruction-handler-jcc 0x77 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x87) 4)))
+ (list 'js
+ (cons '("Jb") (x86/instruction-handler-jcc 0x78 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x88) 4)))
+ (list 'jns
+ (cons '("Jb") (x86/instruction-handler-jcc 0x79 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x89) 4)))
+ (list 'jp
+ (cons '("Jb") (x86/instruction-handler-jcc 0x7a 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8a) 4)))
+ (list 'jnp
+ (cons '("Jb") (x86/instruction-handler-jcc 0x7b 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8b) 4)))
+ (list 'jl
+ (cons '("Jb") (x86/instruction-handler-jcc 0x7c 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8c) 4)))
+ (list 'jnl
+ (cons '("Jb") (x86/instruction-handler-jcc 0x7d 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8d) 4)))
+ (list 'jle
+ (cons '("Jb") (x86/instruction-handler-jcc 0x7e 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8e) 4)))
+ (list 'jnle
+ (cons '("Jb") (x86/instruction-handler-jcc 0x7f 1))
+ (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8f) 4)))
+ (cons 'mov
+ (list/append
+ (x86/generate-handlers-opcode-+reg 0xb0 '("Ib") '(+rb)
+ :posthook
+ (lambda (ins ops)
+ (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))
+ (x86/generate-handlers-opcode-+reg 0xb8 '("Iv") '(+rw +rd +rq)
+ :posthook
+ (lambda (ins ops)
+ (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))
+ (let ((imm (x86/ins/get-imm ins)))
+ (setcar! imm (if (x86/ins/get-rex-w ins) 8 (car imm))))))))
+ (list 'jmp
+ (cons '("Ev")
+ (x86/default-instruction-handler 0xff
+ :default-size 8
+ :posthook (lambda (ins _) (setf (u/x86/ins-modrm-reg ins) 4)))))
+ (list 'syscall
+ (cons '() (lambda (_ _) (x86/ins/new :opcode '(0x0f 0x05)))))
+ ))
+
+(defun! x86/asm (op)
+ "Assemble OP to an instruction."
+ (let ((mnem (car op)) (operands (cdr op)))
+ (let ((variants (or (alist/get mnem x86/mnemonic-table) (error! "unknown mnemonic"))))
+ (print! variants)
+ (let ((v
+ (list/find
+ (lambda (it)
+ (and (eq? (list/len (car it)) (list/len operands))
+ (list/all? (lambda (x) x) (list/zip-with 'x86/operand-match? (car it) operands))))
+ variants)))
+ (if (and v (function? (cdr v)))
+ (funcall (cdr v) (car v) operands)
+ (error! "could not identify instruction"))))))
+
+(setq! test-ins
+ (list/concat
+ (list/map (lambda (i) (x86/ins-bytes (x86/asm i)))
+ '( (mov rax 0x01)
+ (mov rdi 0x01)
+ (mov rsi 0x696100)
+ (mov rdx 15)
+ (syscall)
+ (mov rax 0x3c)
+ (mov rdi 0)
+ (syscall)
+ ))))
+(print! test-ins)
+
+(let ( (elf (elf/new!))
+ (entrypoint 0x696000)
+ (toff 0x1000)
+ (phdroff 0x2000)
+ (shstrtaboff 0x3000)
+ (shdroff 0x4000)
+ )
+ (elf/write-header! elf
+ :type elf/TYPE_EXEC
+ :machine elf/MACHINE_AMD64
+ :program-header-offset phdroff
+ :program-header-entry-size elf/64_PROGRAM_HEADER_SIZE
+ :program-header-entries 2
+ :section-header-offset shdroff
+ :section-header-entry-size elf/64_SECTION_HEADER_SIZE
+ :section-header-entries 3
+ :entry entrypoint
+ )
+ (elf/write-bytes! elf toff test-ins)
+ (elf/write-bytes! elf (+ toff 0x100) "hello computer
+")
+ (elf/write-program-header! elf (+ phdroff elf/64_PROGRAM_HEADER_SIZE)
+ :type elf/PROGRAM_HEADER_TYPE_LOAD
+ :offset toff
+ :virtual-addr entrypoint
+ :file-size 0x200
+ :mem-size 0x200
+ :flags (bitwise/or elf/PROGRAM_HEADER_FLAG_R elf/PROGRAM_HEADER_FLAG_X)
+ )
+ (elf/write-bytes! elf shstrtaboff
+ '(0 46 115 104 115 116 114 116 97 98 0 46 116 101 120 116 0))
+ (elf/write-section-header! elf shdroff :type elf/SECTION_TYPE_NULL)
+ (setq! shdroff (+ shdroff elf/64_SECTION_HEADER_SIZE))
+ (elf/write-section-header! elf shdroff
+ :name-index 1
+ :type elf/SECTION_TYPE_STRTAB
+ :offset shstrtaboff
+ :size 0x1000)
+ (setq! shdroff (+ shdroff elf/64_SECTION_HEADER_SIZE))
+ (elf/write-section-header! elf shdroff
+ :name-index 11
+ :type elf/SECTION_TYPE_PROGBITS
+ :flags (bitwise/or elf/SECTION_FLAG_ALLOC elf/SECTION_FLAG_EXECINSTR)
+ :addr entrypoint
+ :offset toff
+ :size 0x200)
+ (elf/spit! "test.elf" elf)
+ )
+
+;; (elf/write! "test.elf"
+;; :sections
+;; (list
+;; (elf/section
+;; :name ".text"
+;; :type elf/SECTION_TYPE_PROGBITS
+;; :flags (bitwise/or elf/SECTION_FLAG_ALLOC elf/SECTION_FLAG_EXECINSTR)
+;; :contents (get-the-contents-bytes))))