diff options
| author | LLLL Colonq <llll@colonq> | 2026-04-26 22:55:20 -0400 |
|---|---|---|
| committer | LLLL Colonq <llll@colonq> | 2026-04-26 22:55:20 -0400 |
| commit | 63051c6f1cf59e468cb4175deea83c82888d18c2 (patch) | |
| tree | 8b43f64dd8a1932aa886283a63d49688b149b5a6 | |
| parent | e38591e34fb46b433485e8decbe0ea3ec964fdbe (diff) | |
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | doc/x86_64.pdf | bin | 0 -> 11185300 bytes | |||
| -rw-r--r-- | flake.lock | 6 | ||||
| -rw-r--r-- | notes/flowchart.png | bin | 0 -> 88718 bytes | |||
| -rw-r--r-- | notes/notes.org | 85 | ||||
| -rw-r--r-- | src/library.c | 225 | ||||
| -rw-r--r-- | src/main.c | 47 | ||||
| -rw-r--r-- | test.lisp | 7 | ||||
| -rw-r--r-- | test.pit | 30 | ||||
| -rw-r--r-- | x86.pit | 539 |
10 files changed, 808 insertions, 135 deletions
@@ -2,8 +2,8 @@ CC ?= gcc AR ?= ar CHK_SOURCES ?= $(SRCS) CPPFLAGS ?= -MMD -MP -CFLAGS ?= -flto -ffat-lto-objects -march=native --std=c89 -g -Ideps/ -Isrc/ -Iinclude/ -Wall -Wextra -Wpedantic -Wconversion -Wformat-security -Wshadow -Wpointer-arith -Wstrict-prototypes -Wmissing-prototypes -Wnull-dereference -Wfloat-equal -Wundef -Wpointer-arith -Wbad-function-cast -Wmissing-braces -Wcast-align -Wstrict-overflow=5 -ftrapv -LDFLAGS ?= -flto -g -static -lcolonq-pit -lcolonq-elf +CFLAGS ?= --std=c89 -g -Ideps/ -Isrc/ -Iinclude/ -Wall -Wextra -Wpedantic -Wconversion -Wformat-security -Wshadow -Wpointer-arith -Wstrict-prototypes -Wmissing-prototypes -Wnull-dereference -Wfloat-equal -Wundef -Wpointer-arith -Wbad-function-cast -Wmissing-braces -Wcast-align -Wstrict-overflow=5 -ftrapv +LDFLAGS ?= -g -static -lcolonq-pit -lcolonq-elf BUILD = build_$(CC) diff --git a/doc/x86_64.pdf b/doc/x86_64.pdf Binary files differnew file mode 100644 index 0000000..24fa420 --- /dev/null +++ b/doc/x86_64.pdf @@ -99,11 +99,11 @@ ] }, "locked": { - "lastModified": 1766198852, - "narHash": "sha256-DZ0nqM4BwZwm0KzzJt74YVJmQYJDFR1gxuD+GIWDNQk=", + "lastModified": 1776209507, + "narHash": "sha256-PWtSumsuLypzhqawkGuIXh5LpD7/RtX5Lms5+Jy9RFU=", "owner": "lcolonq", "repo": "pit", - "rev": "e6329f2ce1df83fd729e79f7e92e55fe96a2e826", + "rev": "951871e363307203aee0752c9b51cf79f3fd887d", "type": "github" }, "original": { diff --git a/notes/flowchart.png b/notes/flowchart.png Binary files differnew file mode 100644 index 0000000..5281da0 --- /dev/null +++ b/notes/flowchart.png diff --git a/notes/notes.org b/notes/notes.org new file mode 100644 index 0000000..d1afd0a --- /dev/null +++ b/notes/notes.org @@ -0,0 +1,85 @@ +* x86_64 +[[./flowchart.png]] +(optional) legacy prefixes +-> (optional) REX prefix +-> primary opcode +-> (optional) ModRM +-> (optional) SIB +-> (optional) displacement +-> (optional) immediate +** Legacy prefixes (up to 5) +*** 0x66 - operand-size override +Makes operand size 16-bit instead of default 32-bit. +(REX.W has higher priority and makes operand size 64-bit.) +*** 0x67 - address-size override +Makes address size 32-bit instead of default 64-bit. +** REX +0x40 through 0x4f +Low bits: +3 2 1 0 +W R X B + +When W = 1, the operand size is 64-bits (RAX instead of EAX, etc.) +R is used as an extra high bit for the ModRM reg field (to indicate regs R8 through R15) +X is used as an extra high bit for the SIB index register +B is used as an extra high bit for the SIB base register, ModRM r/m field, or an opcode-specific reg field +** primary opcode map +Operand notation +*** Location +E - general purpose register or memory specified by ModRM.r/m / SIB +F - rFLAGS register +G - general purpose register specified by ModRM.reg +I - immediate value encoded in the immediate field +J - instruction encoding includes a relative offset added to rIP +M - memory specified by mod and r/m in ModRM. ModRM.mod /= 0b11 +O - offset of an operand is encoded in the instruction. no ModRM +R - general purpose register specified by ModRM.r/m. ModRM.mod == 0b11 +*** Type +b - a byte +c - a byte or 2 bytes, depending on effective operand size +d - 4 bytes +i - a 16-bit integer +j - a 32-bit integer +m - a bit mask of size equal to source +mn - where n = 2,4,8, or 16: a bit mask of size n +q - 8 bytes +v - 2 bytes, 4 bytes, or 8 bytes, depending on effective operand size +w - 2 bytes +y - 4 bytes or 8 bytes depending on effective operand size +z - 2 bytes if the effective operand size is 16 bits, or 4 bytes if the effective operand size is 32 or 64 bits +** ModRM +Used to specify either 2 register operands or 1 register and 1 memory operand. +Three fields: MOD, REG, and R/M +When REX prefix is present, REX.R is used as a high bit on REG and REX.B is used as a high bit on R/M +7 6 5 4 3 2 1 0 +MOD REG-- R/M-- + +When MOD is 0b11, both REG and R/M denote registers as follows +| 000 | rAX, XMM0, etc. | +| 001 | rCX, XMM1, etc. | +| 010 | rDX, XMM2, etc. | +| 011 | rBX, XMM3, etc. | +| 100 | AH, rSP, XMM4, etc. | +| 101 | CH, rBP, XMM5, etc. | +| 110 | DH, rSI, XMM6, etc. | +| 111 | BH, rDI, XMM7, etc. | +(When REX prefix is specified, REX.R is an extra high bit that allows access to registers 8 through 15) + +When MOD is not 0b11, R/M denotes a base register for memory access +| 000 | [rAX] | +| 001 | [rCX] | +| 010 | [rDX] | +| 011 | [rBX] | +| 100 | see SIB byte | +| 101 | [rBP] or [RIP] if MOD = 0b00 (even if REX.B is 1!!!) | +| 110 | [rSI] | +| 111 | [rDI] | +The offset from the base register is determined by the displacement bytes (these follow SIB) +If MOD is 01 there is a 1-byte displacement, if MOD is 10 there is a 4-byte displacement +** SIB +Occurs only after ModRM +** Displacement +A displacement is a signed offset from a base used to indicate a memory address. +Either 1 or 4 bytes depending on MOD, sign-extended to 64 bits during address calculation. +** Immediate +Either 1, 2, 4, or 8 (only for MOV into GPR) bytes. diff --git a/src/library.c b/src/library.c index 3e60077..dedc1c4 100644 --- a/src/library.c +++ b/src/library.c @@ -22,25 +22,9 @@ static void builder_ensure_size(struct elf_builder *e, i64 sz) { e->ctx.buf = realloc(e->ctx.buf, (size_t) cap); memset(e->ctx.buf + e->cap, 0, (size_t) (cap - e->cap)); e->ctx.len = sz; + e->cap = cap; } -static pit_value impl_elf_new(pit_runtime *rt, pit_value kwargs) { - /* - i64 vclass = pit_as_integer(rt, pit_plist_get(rt, pit_intern_cstr(rt, ":class"), kwargs)); - i64 vendianness = pit_as_integer(rt, pit_plist_get(rt, pit_intern_cstr(rt, ":endianness"), kwargs)); - */ - i64 vclass = -1; - i64 vendianness = -1; - struct elf_builder *ret = malloc(sizeof(struct elf_builder)); - i64 len = 256; - u8 *buf = calloc((size_t) len, 1); - ret->cap = len; - ret->ctx = elf_ctx_new(buf, len, - vclass > 0 ? vclass : ELF_CLASS_64, - vendianness > 0 ? vendianness : ELF_ENDIANNESS_LITTLE - ); - return pit_nativedata_new(rt, pit_intern_cstr(rt, "elf"), (void *) ret); -} static i64 get_kwargs_i64(pit_runtime *rt, i64 def, pit_value kwargs, char *kw) { pit_value v = pit_plist_get(rt, pit_intern_cstr(rt, kw), kwargs); if (v != PIT_NIL && pit_is_integer(rt, v)) { @@ -49,6 +33,16 @@ static i64 get_kwargs_i64(pit_runtime *rt, i64 def, pit_value kwargs, char *kw) return def; } } +static pit_value impl_elf_new(pit_runtime *rt, pit_value kwargs) { + i64 vclass = get_kwargs_i64(rt, ELF_CLASS_64, kwargs, ":class"); + i64 vendianness = get_kwargs_i64(rt, ELF_ENDIANNESS_LITTLE, kwargs, ":endianness"); + struct elf_builder *ret = malloc(sizeof(struct elf_builder)); + i64 len = 256; + u8 *buf = calloc((size_t) len, 1); + ret->cap = len; + ret->ctx = elf_ctx_new(buf, len, vclass, vendianness); + return pit_nativedata_new(rt, pit_intern_cstr(rt, "elf"), (void *) ret); +} static pit_value impl_elf_write_header(pit_runtime *rt, pit_value args) { pit_value velf = pit_car(rt, args); pit_value kwargs = pit_cdr(rt, args); @@ -134,6 +128,61 @@ static pit_value impl_elf_write_program_header(pit_runtime *rt, pit_value args) elf_write_program_header(&h, &e->ctx, (u64) off); return PIT_NIL; } +static pit_value impl_elf_write_bytes(pit_runtime *rt, pit_value args) { + pit_value velf = pit_car(rt, args); + pit_value voff = pit_car(rt, pit_cdr(rt, args)); + pit_value vbytes = pit_car(rt, pit_cdr(rt, pit_cdr(rt, args))); + pit_value_heavy *hbytes = NULL; + i64 off = 0; + struct elf_builder *e = pit_nativedata_get(rt, pit_intern_cstr(rt, "elf"), velf); + if (!e) return PIT_NIL; + if (!pit_is_integer(rt, voff)) { + pit_error(rt, "offset is not integer"); + return PIT_NIL; + } + off = pit_as_integer(rt, voff); + if (off < 0) { + pit_error(rt, "negative offset"); + return PIT_NIL; + } + if (pit_is_bytes(rt, vbytes)) { + if (pit_value_sort(vbytes) != PIT_VALUE_SORT_REF) { + pit_error(rt, "bytes are not a reference"); + return PIT_NIL; + } + hbytes = pit_deref(rt, vbytes); + if (!hbytes) { pit_error(rt, "bad ref"); return PIT_NIL; } + if (hbytes->hsort != PIT_VALUE_HEAVY_SORT_BYTES) { + pit_error(rt, "invalid use of value as bytes"); + return PIT_NIL; + } + builder_ensure_size(e, off + hbytes->in.bytes.len); + elf_write_bytes(&e->ctx, (u64 *) &off, hbytes->in.bytes.data, hbytes->in.bytes.len); + } else if (pit_is_cons(rt, vbytes)) { + while (vbytes != PIT_NIL) { + pit_value x = pit_car(rt, vbytes); + i64 v = 0; + u8 byte = 0; + if (!pit_is_integer(rt, x)) { + pit_error(rt, "byte is not integer"); + return PIT_NIL; + } + v = pit_as_integer(rt, x); + if (v < 0 || v > 255) { + pit_error(rt, "byte out of range"); + return PIT_NIL; + } + byte = (u8) v; + builder_ensure_size(e, off + 1); + if (elf_write_bytes(&e->ctx, (u64 *) &off, (u8 *) &byte, 1) < 0) { + pit_error(rt, "failed to write byte at offset: %ld\n", off); + return PIT_NIL; + } + vbytes = pit_cdr(rt, vbytes); + } + } + return PIT_T; +} static pit_value impl_elf_spit(pit_runtime *rt, pit_value args) { char pathbuf[1024] = {0}; i64 len = 0, actual = 0; @@ -160,76 +209,88 @@ static pit_value impl_elf_spit(pit_runtime *rt, pit_value args) { return velf; } void rj_install_library(pit_runtime *rt) { - /* constants */ - pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_INVALID"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_32"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_64"), pit_integer_new(rt, 2)); - pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_INVALID"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_LITTLE"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_BIG"), pit_integer_new(rt, 2)); - pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_NONE"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_REL"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_EXEC"), pit_integer_new(rt, 2)); - pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_DYN"), pit_integer_new(rt, 3)); - pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_CORE"), pit_integer_new(rt, 4)); - pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_NONE"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_X86"), pit_integer_new(rt, 3)); - pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_AMD64"), pit_integer_new(rt, 62)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NULL"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_PROGBITS"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SYMTAB"), pit_integer_new(rt, 2)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_STRTAB"), pit_integer_new(rt, 3)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_RELA"), pit_integer_new(rt, 4)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_HASH"), pit_integer_new(rt, 5)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_DYNAMIC"), pit_integer_new(rt, 6)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NOTE"), pit_integer_new(rt, 7)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NOBITS"), pit_integer_new(rt, 8)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_REL"), pit_integer_new(rt, 9)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SHLIB"), pit_integer_new(rt, 10)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_DYNSYM"), pit_integer_new(rt, 11)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_INIT_ARRAY"), pit_integer_new(rt, 14)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_FINI_ARRAY"), pit_integer_new(rt, 15)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_PREINIT_ARRAY"), pit_integer_new(rt, 16)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_GROUP"), pit_integer_new(rt, 17)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SYMTAB_SHNDX"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_WRITE"), pit_integer_new(rt, 0x1)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_ALLOC"), pit_integer_new(rt, 0x2)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_EXECINSTR"), pit_integer_new(rt, 0x4)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_MERGE"), pit_integer_new(rt, 0x10)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_STRINGS"), pit_integer_new(rt, 0x20)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_INFO_LINK"), pit_integer_new(rt, 0x40)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_LINK_ORDER"), pit_integer_new(rt, 0x80)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_OS_NONCOMFORMING"), pit_integer_new(rt, 0x100)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_GROUP"), pit_integer_new(rt, 0x200)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_TLS"), pit_integer_new(rt, 0x400)); - pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_COMPRESSED"), pit_integer_new(rt, 0x80)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_LOCAL"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_GLOBAL"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_WEAK"), pit_integer_new(rt, 2)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_NOTYPE"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_OBJECT"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_FUNC"), pit_integer_new(rt, 2)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_SECTION"), pit_integer_new(rt, 3)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_FILE"), pit_integer_new(rt, 4)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_COMMON"), pit_integer_new(rt, 5)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_TLS"), pit_integer_new(rt, 6)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_DEFAULT"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_INTERNAL"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_HIDDEN"), pit_integer_new(rt, 2)); - pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_PROTECTED"), pit_integer_new(rt, 3)); - pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_NULL"), pit_integer_new(rt, 0)); - pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_LOAD"), pit_integer_new(rt, 1)); - pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_DYNAMIC"), pit_integer_new(rt, 2)); - pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_INTERP"), pit_integer_new(rt, 3)); - pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_NOTE"), pit_integer_new(rt, 4)); - pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_SHLIB"), pit_integer_new(rt, 5)); - pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_PHDR"), pit_integer_new(rt, 6)); - pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_TLS"), pit_integer_new(rt, 7)); + /* sizes */ + pit_set(rt, pit_intern_cstr(rt, "elf/HEADER_IDENT_SIZE"), pit_integer_new(rt, ELF_HEADER_IDENT_SIZE)); + pit_set(rt, pit_intern_cstr(rt, "elf/32_HEADER_SIZE"), pit_integer_new(rt, ELF32_HEADER_SIZE)); + pit_set(rt, pit_intern_cstr(rt, "elf/64_HEADER_SIZE"), pit_integer_new(rt, ELF64_HEADER_SIZE)); + pit_set(rt, pit_intern_cstr(rt, "elf/32_SECTION_HEADER_SIZE"), pit_integer_new(rt, ELF32_SECTION_HEADER_SIZE)); + pit_set(rt, pit_intern_cstr(rt, "elf/64_SECTION_HEADER_SIZE"), pit_integer_new(rt, ELF64_SECTION_HEADER_SIZE)); + pit_set(rt, pit_intern_cstr(rt, "elf/32_PROGRAM_HEADER_SIZE"), pit_integer_new(rt, ELF32_PROGRAM_HEADER_SIZE)); + pit_set(rt, pit_intern_cstr(rt, "elf/64_PROGRAM_HEADER_SIZE"), pit_integer_new(rt, ELF64_PROGRAM_HEADER_SIZE)); + /* enums */ + pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_INVALID"), pit_integer_new(rt, ELF_CLASS_INVALID)); + pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_32"), pit_integer_new(rt, ELF_CLASS_32)); + pit_set(rt, pit_intern_cstr(rt, "elf/CLASS_64"), pit_integer_new(rt, ELF_CLASS_64)); + pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_INVALID"), pit_integer_new(rt, ELF_ENDIANNESS_INVALID)); + pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_LITTLE"), pit_integer_new(rt, ELF_ENDIANNESS_LITTLE)); + pit_set(rt, pit_intern_cstr(rt, "elf/ENDIANNESS_BIG"), pit_integer_new(rt, ELF_ENDIANNESS_BIG)); + pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_NONE"), pit_integer_new(rt, ELF_TYPE_NONE)); + pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_REL"), pit_integer_new(rt, ELF_TYPE_REL)); + pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_EXEC"), pit_integer_new(rt, ELF_TYPE_EXEC)); + pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_DYN"), pit_integer_new(rt, ELF_TYPE_DYN)); + pit_set(rt, pit_intern_cstr(rt, "elf/TYPE_CORE"), pit_integer_new(rt, ELF_TYPE_CORE)); + pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_NONE"), pit_integer_new(rt, ELF_MACHINE_NONE)); + pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_X86"), pit_integer_new(rt, ELF_MACHINE_X86)); + pit_set(rt, pit_intern_cstr(rt, "elf/MACHINE_AMD64"), pit_integer_new(rt, ELF_MACHINE_AMD64)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NULL"), pit_integer_new(rt, ELF_SECTION_TYPE_NULL)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_PROGBITS"), pit_integer_new(rt, ELF_SECTION_TYPE_PROGBITS)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SYMTAB"), pit_integer_new(rt, ELF_SECTION_TYPE_SYMTAB)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_STRTAB"), pit_integer_new(rt, ELF_SECTION_TYPE_STRTAB)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_RELA"), pit_integer_new(rt, ELF_SECTION_TYPE_RELA)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_HASH"), pit_integer_new(rt, ELF_SECTION_TYPE_HASH)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_DYNAMIC"), pit_integer_new(rt, ELF_SECTION_TYPE_DYNAMIC)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NOTE"), pit_integer_new(rt, ELF_SECTION_TYPE_NOTE)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_NOBITS"), pit_integer_new(rt, ELF_SECTION_TYPE_NOBITS)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_REL"), pit_integer_new(rt, ELF_SECTION_TYPE_REL)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SHLIB"), pit_integer_new(rt, ELF_SECTION_TYPE_SHLIB)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_DYNSYM"), pit_integer_new(rt, ELF_SECTION_TYPE_DYNSYM)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_INIT_ARRAY"), pit_integer_new(rt, ELF_SECTION_TYPE_INIT_ARRAY)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_FINI_ARRAY"), pit_integer_new(rt, ELF_SECTION_TYPE_FINI_ARRAY)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_PREINIT_ARRAY"), pit_integer_new(rt, ELF_SECTION_TYPE_PREINIT_ARRAY)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_GROUP"), pit_integer_new(rt, ELF_SECTION_TYPE_GROUP)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_TYPE_SYMTAB_SHNDX"), pit_integer_new(rt, ELF_SECTION_TYPE_SYMTAB_SHNDX)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_WRITE"), pit_integer_new(rt, ELF_SECTION_FLAG_WRITE)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_ALLOC"), pit_integer_new(rt, ELF_SECTION_FLAG_ALLOC)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_EXECINSTR"), pit_integer_new(rt, ELF_SECTION_FLAG_EXECINSTR)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_MERGE"), pit_integer_new(rt, ELF_SECTION_FLAG_MERGE)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_STRINGS"), pit_integer_new(rt, ELF_SECTION_FLAG_STRINGS)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_INFO_LINK"), pit_integer_new(rt, ELF_SECTION_FLAG_INFO_LINK)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_LINK_ORDER"), pit_integer_new(rt, ELF_SECTION_FLAG_LINK_ORDER)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_OS_NONCOMFORMING"), pit_integer_new(rt, ELF_SECTION_FLAG_OS_NONCOMFORMING)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_GROUP"), pit_integer_new(rt, ELF_SECTION_FLAG_GROUP)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_TLS"), pit_integer_new(rt, ELF_SECTION_FLAG_TLS)); + pit_set(rt, pit_intern_cstr(rt, "elf/SECTION_FLAG_COMPRESSED"), pit_integer_new(rt, ELF_SECTION_FLAG_COMPRESSED)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_LOCAL"), pit_integer_new(rt, ELF_SYMBOL_BINDING_LOCAL)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_GLOBAL"), pit_integer_new(rt, ELF_SYMBOL_BINDING_GLOBAL)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_BINDING_WEAK"), pit_integer_new(rt, ELF_SYMBOL_BINDING_WEAK)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_NOTYPE"), pit_integer_new(rt, ELF_SYMBOL_TYPE_NOTYPE)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_OBJECT"), pit_integer_new(rt, ELF_SYMBOL_TYPE_OBJECT)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_FUNC"), pit_integer_new(rt, ELF_SYMBOL_TYPE_FUNC)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_SECTION"), pit_integer_new(rt, ELF_SYMBOL_TYPE_SECTION)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_FILE"), pit_integer_new(rt, ELF_SYMBOL_TYPE_FILE)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_COMMON"), pit_integer_new(rt, ELF_SYMBOL_TYPE_COMMON)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_TYPE_TLS"), pit_integer_new(rt, ELF_SYMBOL_TYPE_TLS)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_DEFAULT"), pit_integer_new(rt, ELF_SYMBOL_VISIBILITY_DEFAULT)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_INTERNAL"), pit_integer_new(rt, ELF_SYMBOL_VISIBILITY_INTERNAL)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_HIDDEN"), pit_integer_new(rt, ELF_SYMBOL_VISIBILITY_HIDDEN)); + pit_set(rt, pit_intern_cstr(rt, "elf/SYMBOL_VISIBILITY_PROTECTED"), pit_integer_new(rt, ELF_SYMBOL_VISIBILITY_PROTECTED)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_NULL"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_NULL)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_LOAD"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_LOAD)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_DYNAMIC"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_DYNAMIC)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_INTERP"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_INTERP)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_NOTE"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_NOTE)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_SHLIB"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_SHLIB)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_PHDR"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_PHDR)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_TYPE_TLS"), pit_integer_new(rt, ELF_PROGRAM_HEADER_TYPE_TLS)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_FLAG_X"), pit_integer_new(rt, ELF_PROGRAM_HEADER_FLAG_X)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_FLAG_W"), pit_integer_new(rt, ELF_PROGRAM_HEADER_FLAG_W)); + pit_set(rt, pit_intern_cstr(rt, "elf/PROGRAM_HEADER_FLAG_R"), pit_integer_new(rt, ELF_PROGRAM_HEADER_FLAG_R)); pit_fset(rt, pit_intern_cstr(rt, "elf/new!"), pit_nativefunc_new(rt, impl_elf_new)); pit_fset(rt, pit_intern_cstr(rt, "elf/write-header!"), pit_nativefunc_new(rt, impl_elf_write_header)); pit_fset(rt, pit_intern_cstr(rt, "elf/write-section-header!"), pit_nativefunc_new(rt, impl_elf_write_section_header)); pit_fset(rt, pit_intern_cstr(rt, "elf/write-symbol!"), pit_nativefunc_new(rt, impl_elf_write_symbol)); pit_fset(rt, pit_intern_cstr(rt, "elf/write-program-header!"), pit_nativefunc_new(rt, impl_elf_write_program_header)); + pit_fset(rt, pit_intern_cstr(rt, "elf/write-bytes!"), pit_nativefunc_new(rt, impl_elf_write_bytes)); pit_fset(rt, pit_intern_cstr(rt, "elf/spit!"), pit_nativefunc_new(rt, impl_elf_spit)); } @@ -14,49 +14,14 @@ int main(int argc, char **argv) { pit_install_library_essential(rt); pit_install_library_io(rt); pit_install_library_plist(rt); + pit_install_library_alist(rt); pit_install_library_bytestring(rt); rj_install_library(rt); - if (argc < 2) { /* run repl */ - char buf[1024] = {0}; - i64 len = 0; - pit_runtime_freeze(rt); - if (pit_runtime_print_error(rt)) { exit(1); } - setbuf(stdout, NULL); - printf("> "); - while (len < (i64) sizeof(buf) && (buf[len++] = (char) getchar()) != EOF) { - if (buf[len - 1] == '\n') { - pit_value bs, prog, res; - buf[len - 1] = 0; - bs = pit_bytes_new_cstr(rt, buf); - prog = pit_read_bytes(rt, bs); - res = pit_eval(rt, prog); - if (pit_runtime_print_error(rt)) { - rt->error = PIT_NIL; - printf("> "); - } else { - char dumpbuf[1024] = {0}; - pit_dump(rt, dumpbuf, sizeof(dumpbuf) - 1, res, true); - printf("%s\n> ", dumpbuf); - } - len = 0; - } - } - } else { /* run file */ - pit_value bs = pit_bytes_new_file(rt, argv[1]); - pit_lexer lex; - pit_parser parse; - bool eof = false; - pit_value p = PIT_NIL; - if (!pit_lexer_from_bytes(rt, &lex, bs)) { - pit_error(rt, "failed to initialize lexer"); - } - pit_parser_from_lexer(&parse, &lex); - while (p = pit_parse(rt, &parse, &eof), !eof) { - pit_eval(rt, p); - if (pit_runtime_print_error(rt)) { - exit(1); - } - } + if (argc < 2) { + pit_repl(rt); + } else { + pit_load_file(rt, argv[1]); + if (pit_runtime_print_error(rt)) return -1; } return 0; } diff --git a/test.lisp b/test.lisp deleted file mode 100644 index 4353ef3..0000000 --- a/test.lisp +++ /dev/null @@ -1,7 +0,0 @@ -(let ((elf (elf/new!))) - (print! elf) - (elf/write-header! elf - :type elf/TYPE_EXEC - :machine elf/MACHINE_AMD64) - (elf/spit! "test.elf" elf) - ) diff --git a/test.pit b/test.pit new file mode 100644 index 0000000..c67246f --- /dev/null +++ b/test.pit @@ -0,0 +1,30 @@ +(let ( (elf (elf/new!)) + (entrypoint 0x696000) + (toff 0x1000) + (phdroff 0x2000) + ) + (elf/write-bytes! elf toff + '( 0x48 0xc7 0xc0 0x3c 0x00 0x00 0x00 + 0x48 0xc7 0xc7 0x2a 0x00 0x00 0x00 + 0x0f 0x05 )) + (elf/write-program-header! elf (+ phdroff elf/64_PROGRAM_HEADER_SIZE) + :type elf/PROGRAM_HEADER_TYPE_LOAD + :offset toff + :virtual-addr entrypoint + :file-size 0x100 + :mem-size 0x100 + :flags (bitwise/or elf/PROGRAM_HEADER_FLAG_R elf/PROGRAM_HEADER_FLAG_X) + ) + (elf/write-header! elf + :type elf/TYPE_EXEC + :machine elf/MACHINE_AMD64 + :program-header-offset phdroff + :program-header-entry-size elf/64_PROGRAM_HEADER_SIZE + :program-header-entries 2 + :section-header-offset 0 + :section-header-entry-size elf/64_SECTION_HEADER_SIZE + :section-header-entries 0 + :entry entrypoint + ) + (elf/spit! "test.elf" elf) + ) @@ -0,0 +1,539 @@ +(defun! x86/split16le (w) + "Split the 16-bit W into a little-endian list of 8-bit integers." + (list + (bitwise/and 0xff w) + (bitwise/and 0xff (bitwise/rshift w 8)))) + +(defun! x86/split32le (w) + "Split the 32-bit W into a little-endian list of 8-bit integers." + (list + (bitwise/and 0xff w) + (bitwise/and 0xff (bitwise/rshift w 8)) + (bitwise/and 0xff (bitwise/rshift w 16)) + (bitwise/and 0xff (bitwise/rshift w 24)))) + +(defun! x86/split64le (w) + "Split the 64-bit W into a little-endian list of 8-bit integers." + (list + (bitwise/and 0xff w) + (bitwise/and 0xff (bitwise/rshift w 8)) + (bitwise/and 0xff (bitwise/rshift w 16)) + (bitwise/and 0xff (bitwise/rshift w 24)) + (bitwise/and 0xff (bitwise/rshift w 32)) + (bitwise/and 0xff (bitwise/rshift w 40)) + (bitwise/and 0xff (bitwise/rshift w 48)) + (bitwise/and 0xff (bitwise/rshift w 56)))) + +(defun! x86/register-1byte? (r) + "Return the register index for 1-byte register R." + (case r + (al 0) (cl 1) (dl 2) (bl 3) + (ah 4) (ch 5) (dh 6) (bh 7) + (r8b 8) (r9b 9) (r10b 10) (r11b 11) + (r12b 12) (r13b 13) (r14b 14) (r15b 15))) + +(defun! x86/register-2byte? (r) + "Return the register index for 2-byte register R." + (case r + (ax 0) (cx 1) (dx 2) (bx 3) + (sp 4) (bp 5) (si 6) (di 7) + (r8w 8) (r9w 9) (r10w 10) (r11w 11) + (r12w 12) (r13w 13) (r14w 14) (r15w 15))) + +(defun! x86/register-4byte? (r) + "Return the register index for 4-byte register R." + (case r + (eax 0) (ecx 1) (edx 2) (ebx 3) + (esp 4) (ebp 5) (esi 6) (edi 7) + (r8d 8) (r9d 9) (r10d 10) (r11d 11) + (r12d 12) (r13d 13) (r14d 14) (r15d 15))) + +(defun! x86/register-8byte? (r) + "Return the register index for 8-byte register R." + (case r + (rax 0) (rcx 1) (rdx 2) (rbx 3) + (rsp 4) (rbp 5) (rsi 6) (rdi 7) + (r8 8) (r9 9) (r10 10) (r11 11) + (r12 12) (r13 13) (r14 14) (r15 15))) + +(defun! x86/register? (r) + "Return the register index of R." + (or + (x86/register-1byte? r) + (x86/register-2byte? r) + (x86/register-4byte? r) + (x86/register-8byte? r))) + +(defun! x86/register-extended? (r) + "Return non-nil if R is an extended register." + (list/contains? r + '( r8b r9b r10b r11b r12b r13b r14b r15b + r8w r9w r10w r11w r12w r13w r14w r15w + r8d r9d r10d r11d r12d r13d r14d r15d + r8 r9 r10 r11 r12 r13 r14 r15))) + +(defun! x86/integer-fits-in-bits? (bits x) + "Determine if X fits in BITS." + (if (integer? x) + (let ((leftover (bitwise/rshift x bits))) + (eq? leftover 0)))) +(defun! x86/operand-immediate-fits? (sz x) + "Determine if immediate operand X fits in SZ." + (let + ((bits + (or + (case sz + ("b" 8) ("c" 16) ("d" 32) ("i" 16) + ("j" 32) ("q" 64) ("v" 64) ("w" 16) + ("y" 64) ("z" 32)) + (error! "unknown operand pattern size")))) + (x86/integer-fits-in-bits? bits x))) + +(defun! x86/operand-register-fits? (sz r) + "Determine if register operand R fits in SZ." + (case sz + ("b" (x86/register-1byte? r)) + ("c" (or (x86/register-1byte? r) (x86/register-2byte? r))) + ("d" (x86/register-4byte? r)) + ("i" (x86/register-2byte? r)) + ("j" (x86/register-4byte? r)) + ("q" (x86/register-8byte? r)) + ("v" (or (x86/register-2byte? r) (x86/register-4byte? r) (x86/register-8byte? r))) + ("w" (x86/register-2byte? r)) + ("y" (or (x86/register-4byte? r) (x86/register-8byte? r))) + ("z" (or (x86/register-2byte? r) (x86/register-4byte? r))))) + +(defun! x86/memory-operand-base (m) + (and + (eq? (car m) 'mem) + (car (cdr m)))) +(defun! x86/memory-operand-off (m) + (and + (eq? (car m) 'mem) + (or (car (cdr (cdr m))) 0))) + +(defun! x86/operand-memory-location? (op) + "Return non-nil if OP represents a memory location." + (let ( (base (x86/memory-operand-base op)) + (off (x86/memory-operand-off op))) + (and + (or (x86/register-4byte? base) (x86/register-8byte? base)) + (integer? off)))) + +(defun! x86/operand-match? (pat op) + "Determine if operand OP matches PAT." + (cond + ((symbol? pat) (eq? pat op)) + ((cons? pat) (list/contains? op pat)) + ((bytes? pat) + (let ( (loc (bytes/range 0 1 pat)) + (sz (bytes/range 1 (bytes/len pat) pat))) + (cond + ((or (equal? loc "I") (equal? loc "J")) (x86/operand-immediate-fits? sz op)) + ((or (equal? loc "G") (equal? loc "R")) (x86/operand-register-fits? sz op)) + ((equal? loc "M") (x86/operand-memory-location? op)) + ((equal? loc "E") + (or (x86/operand-register-fits? sz op) (x86/operand-memory-location? op))) + (t (error! "unknown operand pattern location"))))))) + +(defun! x86/operand-size (op) + "Return the minimum power-of-2 size in bytes that contains OP." + (cond + ((symbol? op) + (cond + ((x86/register-1byte? op) 1) + ((x86/register-2byte? op) 2) + ((x86/register-4byte? op) 4) + ((x86/register-8byte? op) 8) + (t (error! "attempted to take size of unknown register")))) + ((integer? op) + (cond + ((x86/integer-fits-in-bits? 8 op) 1) + ((x86/integer-fits-in-bits? 16 op) 2) + ((x86/integer-fits-in-bits? 32 op) 4) + ((x86/integer-fits-in-bits? 64 op) 8) + (t (error! "attempted to take size of too-large immediate")))) + ((x86/operand-memory-location? op) 1) + (t (error! "attempted to take size of unknown operand")))) + +(defstruct! x86/ins + operand-size-prefix + address-size-prefix + rex-w + rex-r + rex-x + rex-b + opcode + modrm-mod + modrm-reg + modrm-rm + disp ;; pair of size and value + imm ;; pair of size and value + ) + +(defun! x86/ins-bytes (ins) + "Return a list of bytes encoding INS." + (let ( (opcode (x86/ins/get-opcode ins)) + (rex-w (x86/ins/get-rex-w ins)) + (rex-r (x86/ins/get-rex-r ins)) + (rex-x (x86/ins/get-rex-x ins)) + (rex-b (x86/ins/get-rex-b ins)) + (modrm-mod (x86/ins/get-modrm-mod ins)) + (modrm-reg (x86/ins/get-modrm-reg ins)) + (modrm-rm (x86/ins/get-modrm-rm ins)) + (disp (x86/ins/get-disp ins)) + (imm (x86/ins/get-imm ins))) + (list/append + (if (x86/ins/get-operand-size-prefix ins) '(0x66)) + (if (x86/ins/get-address-size-prefix ins) '(0x67)) + (if (or rex-w rex-r rex-x rex-b) + (list + (bitwise/or + 0x40 + (if rex-w 0b1000 0) + (if rex-r 0b0100 0) + (if rex-x 0b0010 0) + (if rex-b 0b0001 0)))) + (cond + ((not opcode) (error! "no opcode for instruction")) + ((cons? opcode) opcode) + ((integer? opcode) (list opcode)) + (t (error! "malformed opcode for instruction"))) + (if (or modrm-mod modrm-reg modrm-rm) + (list + (bitwise/or + (bitwise/lshift (or modrm-mod 0) 6) + (bitwise/lshift (or modrm-reg 0) 3) + (or modrm-rm 0)))) + (if disp + (cond + ((eq? (car disp) 1) (list (cdr disp))) + ((eq? (car disp) 4) (x86/split32le (cdr disp))) + (t (error! "malformed displacement for instruction")))) + (if imm + (cond + ((eq? (car imm) 1) (list (cdr imm))) + ((eq? (car imm) 2) (x86/split16le (cdr imm))) + ((eq? (car imm) 4) (x86/split32le (cdr imm))) + ((eq? (car imm) 8) (x86/split64le (cdr imm))) + (t (error! "malformed immediate for instruction"))))))) + +(defun! x86/instruction-update-sizes (ins ops default-size) + "Update INS to account for the sizes of OPS. +DEFAULT-SIZE is the default operand size." + (let ((defsz (or default-size 4))) + (if (> (list/len ops) 0) + (let ((regs (list/uniq (list/map 'x86/operand-size (list/filter 'x86/register? ops))))) + (if (> (list/len regs) 1) + (error! "invalid mix of register sizes in operands")) + (let ((sz (if (eq? (list/len regs) 0) defsz (car regs)))) + (cond + ((eq? sz 1) nil) + ((eq? defsz sz) nil) + ((and (not (eq? defsz 2)) (eq? sz 2)) (x86/ins/set-operand-size-prefix! ins t)) + ((and (not (eq? defsz 8)) (eq? sz 8)) (x86/ins/set-rex-w! ins t)) + (t (error! "unable to encode operands with default size"))) + sz))))) + +(defun! x86/instruction-update-operand (esz ins pat op) + "Update INS to account for an operand OP according to PAT. +The effective operand size is ESZ." + (cond + ((bytes? pat) + (let ((loc (bytes/range 0 1 pat))) + (cond + ((equal? loc "I") + (let ((immsz (if (>= esz 4) 4 esz))) + (if (not (x86/integer-fits-in-bits? (* 8 immsz) op)) + (error! "Immediate too large" op)) + (x86/ins/set-imm! ins (cons immsz op)))) + ((equal? loc "J") + (let ((immsz (if (eq? esz 1) 1 4))) + (if (not (x86/integer-fits-in-bits? (* 8 immsz) op)) + (error! "jump displacement too large")) + (x86/ins/set-disp! ins (cons immsz op)))) + ((equal? loc "G") + (x86/ins/set-modrm-reg! ins + (or (x86/register? op) (error "Invalid register: %s" op)))) + ((or (equal? loc "R") (and (equal? loc "E") (x86/register? op))) + (x86/ins/set-modrm-mod! ins 0b11) + (x86/ins/set-modrm-rm! ins + (or (x86/register? op) (error "Invalid register: %s" op)))) + ((or (equal? loc "M") (and (equal? loc "E") (x86/operand-memory-location? op))) + (let ( (base (x86/memory-operand-base op)) + (off (x86/memory-operand-off op))) + (cond + ((eq? base 'eip) + (x86/ins/set-modrm-rm! ins 0b101) + (x86/ins/set-modrm-mod! ins 0b00) + (x86/ins/set-disp! ins (cons 4 off)) + (x86/ins/set-address-size-prefix! ins t)) + ((eq? base 'rip) + (x86/ins/set-modrm-rm! ins 0b101) + (x86/ins/set-modrm-mod! ins 0b00) + (x86/ins/set-disp! ins (cons 4 off))) + (t + (x86/ins/set-modrm-rm! ins + (or + (x86/register-4byte? base) + (x86/register-8byte? base) + (error! "invalid base register"))) + (if (x86/register-4byte? base) + (x86/ins/set-address-size-prefix! ins t)) + (cond + ((x86/integer-fits-in-bits? 8 off) + (x86/ins/set-disp! ins (cons 1 off)) + (x86/ins/set-modrm-mod! ins 0b01)) + ((x86/integer-fits-in-bits? 32 off) + (x86/ins/set-disp! ins (cons 4 off)) + (x86/ins/set-modrm-mod! ins 0b10)) + (t (error! "invalid offset"))))))) + (t (error! "invalid operand location code"))))))) + +(defun! x86/default-instruction-handler (opcode & kwargs) + "Return an instruction handler for OPCODE. +The instruction handler will run POSTHOOK on the instruction at the end. +DEFAULT-SIZE is the default operand size." + (let ( (posthook (plist/get :posthook kwargs)) + (default-size (plist/get :default-size kwargs))) + (lambda (pats ops) + (let ((ret (x86/ins/new :opcode opcode))) + (let ((esz + (or (x86/instruction-update-sizes ret ops default-size) + (error! "malformed size for operands")))) + (list/zip-with + (lambda (it other) + (x86/instruction-update-operand esz ret it other)) + pats + ops)) + (if posthook + (funcall posthook ret ops)) + ret)))) + +(defun! x86/instruction-handler-jcc (opcode immsz) + "Return an instruction handler for a Jcc instruction at OPCODE. +IMMSZ is the size of the displacement from RIP." + (lambda (_pats ops) + (let ((ret (x86/ins/new :opcode opcode))) + (x86/ins/set-disp! ret (cons immsz (car ops))) + ret))) + +(defun! x86/generate-handlers-arith (opbase group1reg) + "Return handlers for an arithmetic mnemonic starting at OPBASE. +The REG value in ModR/M is indicated by GROUP1REG." + (list + (cons '("Eb" "Gb") (x86/default-instruction-handler (+ opbase 0))) + (cons '("Ev" "Gv") (x86/default-instruction-handler (+ opbase 1))) + (cons '("Gb" "Eb") (x86/default-instruction-handler (+ opbase 2))) + (cons '("Gv" "Ev") (x86/default-instruction-handler (+ opbase 3))) + (cons '(al "Ib") (x86/default-instruction-handler (+ opbase 4))) + (cons '((ax eax rax) "Iz") (x86/default-instruction-handler (+ opbase 5))) + (cons '("Eb" "Ib") + (x86/default-instruction-handler 0x80 + :posthook (lambda (ins _) (x86/ins/set-modrm-reg! ins group1reg)))) + (cons '("Ev" "Iz") + (x86/default-instruction-handler 0x81 + :posthook (lambda (ins _) (x86/ins/set-modrm-reg! ins group1reg)))) + (cons '("Ev" "Ib") + (x86/default-instruction-handler 0x83 + :posthook (lambda (ins _) (setf (x86/ins-modrm-reg ins) group1reg)))))) + +(setq! x86/registers-+reg-base + '( (+rb . (al cl dl bl ah ch dh bh)) + (+rw . (ax cx dx bx sp bp si di)) + (+rd . (eax ecx edx ebx esp ebp esi edi)) + (+rq . (rax rcx rdx rbx rsp rbp rsi rdi)))) +(setq! x86/registers-+reg-extended + '( (+rb . (r8b r9b r10b r11b r12b r13b r14b r15b)) + (+rw . (r8w r9w r10w r11w r12w r13w r14w r15w)) + (+rd . (r8d r9d r10d r11d r12d r13d r14d r15d)) + (+rq . (r8 r9 r10 r11 r12 r13 r14 r15)))) +(defun! x86/generate-handlers-opcode-+reg (opbase extraops addends & args) + "Generate handlers for a family of opcodes that uses the +reg encoding. +OPBASE is the base opcode. +EXTRAOPS are additional operands after the register operand. +ADDENDS is a list of symbols like +rw, +rq etc. that denote allowed registers. +ARGS are passed verbatim to `u/x86/default-instruction-handler." + (list/map + (lambda (it) + (let ( (abase (list/map (lambda (a) (list/nth it (alist/get a x86/registers-+reg-base))) addends)) + (aext (list/map (lambda (a) (list/nth it (alist/get a x86/registers-+reg-extended))) addends))) + (cons + (cons (list/append abase aext) extraops) + (apply 'x86/default-instruction-handler (cons (+ opbase it) args))))) + (list/iota 8))) + +(setq! + x86/mnemonic-table + (list + (cons 'add (x86/generate-handlers-arith 0x00 0)) + (cons 'or (x86/generate-handlers-arith 0x08 1)) + (cons 'adc (x86/generate-handlers-arith 0x10 2)) + (cons 'sbb (x86/generate-handlers-arith 0x18 3)) + (cons 'and (x86/generate-handlers-arith 0x20 4)) + (cons 'sub (x86/generate-handlers-arith 0x28 5)) + (cons 'xor (x86/generate-handlers-arith 0x30 6)) + (cons 'cmp (x86/generate-handlers-arith 0x38 7)) + (cons 'push + (x86/generate-handlers-opcode-+reg 0x50 '() '(+rw +rq) + :default-size 8 + :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) + (cons 'pop + (x86/generate-handlers-opcode-+reg 0x58 '() '(+rw +rq) + :default-size 8 + :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) + (list 'jo + (cons '("Jb") (x86/instruction-handler-jcc 0x70 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x80) 4))) + (list 'jno + (cons '("Jb") (x86/instruction-handler-jcc 0x71 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x81) 4))) + (list 'jb + (cons '("Jb") (x86/instruction-handler-jcc 0x72 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x82) 4))) + (list 'jnb + (cons '("Jb") (x86/instruction-handler-jcc 0x73 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x83) 4))) + (list 'jz + (cons '("Jb") (x86/instruction-handler-jcc 0x74 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x84) 4))) + (list 'jnz + (cons '("Jb") (x86/instruction-handler-jcc 0x75 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x85) 4))) + (list 'jbe + (cons '("Jb") (x86/instruction-handler-jcc 0x76 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x86) 4))) + (list 'jnbe + (cons '("Jb") (x86/instruction-handler-jcc 0x77 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x87) 4))) + (list 'js + (cons '("Jb") (x86/instruction-handler-jcc 0x78 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x88) 4))) + (list 'jns + (cons '("Jb") (x86/instruction-handler-jcc 0x79 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x89) 4))) + (list 'jp + (cons '("Jb") (x86/instruction-handler-jcc 0x7a 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8a) 4))) + (list 'jnp + (cons '("Jb") (x86/instruction-handler-jcc 0x7b 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8b) 4))) + (list 'jl + (cons '("Jb") (x86/instruction-handler-jcc 0x7c 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8c) 4))) + (list 'jnl + (cons '("Jb") (x86/instruction-handler-jcc 0x7d 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8d) 4))) + (list 'jle + (cons '("Jb") (x86/instruction-handler-jcc 0x7e 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8e) 4))) + (list 'jnle + (cons '("Jb") (x86/instruction-handler-jcc 0x7f 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8f) 4))) + (cons 'mov + (list/append + (x86/generate-handlers-opcode-+reg 0xb0 '("Ib") '(+rb) + :posthook + (lambda (ins ops) + (x86/ins/set-rex-b! ins (x86/register-extended? (car ops))))) + (x86/generate-handlers-opcode-+reg 0xb8 '("Iv") '(+rw +rd +rq) + :posthook + (lambda (ins ops) + (x86/ins/set-rex-b! ins (x86/register-extended? (car ops))) + (let ((imm (x86/ins/get-imm ins))) + (setcar! imm (if (x86/ins/get-rex-w ins) 8 (car imm)))))))) + (list 'jmp + (cons '("Ev") + (x86/default-instruction-handler 0xff + :default-size 8 + :posthook (lambda (ins _) (setf (u/x86/ins-modrm-reg ins) 4))))) + (list 'syscall + (cons '() (lambda (_ _) (x86/ins/new :opcode '(0x0f 0x05))))) + )) + +(defun! x86/asm (op) + "Assemble OP to an instruction." + (let ((mnem (car op)) (operands (cdr op))) + (let ((variants (or (alist/get mnem x86/mnemonic-table) (error! "unknown mnemonic")))) + (print! variants) + (let ((v + (list/find + (lambda (it) + (and (eq? (list/len (car it)) (list/len operands)) + (list/all? (lambda (x) x) (list/zip-with 'x86/operand-match? (car it) operands)))) + variants))) + (if (and v (function? (cdr v))) + (funcall (cdr v) (car v) operands) + (error! "could not identify instruction")))))) + +(setq! test-ins + (list/concat + (list/map (lambda (i) (x86/ins-bytes (x86/asm i))) + '( (mov rax 0x01) + (mov rdi 0x01) + (mov rsi 0x696100) + (mov rdx 15) + (syscall) + (mov rax 0x3c) + (mov rdi 0) + (syscall) + )))) +(print! test-ins) + +(let ( (elf (elf/new!)) + (entrypoint 0x696000) + (toff 0x1000) + (phdroff 0x2000) + (shstrtaboff 0x3000) + (shdroff 0x4000) + ) + (elf/write-header! elf + :type elf/TYPE_EXEC + :machine elf/MACHINE_AMD64 + :program-header-offset phdroff + :program-header-entry-size elf/64_PROGRAM_HEADER_SIZE + :program-header-entries 2 + :section-header-offset shdroff + :section-header-entry-size elf/64_SECTION_HEADER_SIZE + :section-header-entries 3 + :entry entrypoint + ) + (elf/write-bytes! elf toff test-ins) + (elf/write-bytes! elf (+ toff 0x100) "hello computer +") + (elf/write-program-header! elf (+ phdroff elf/64_PROGRAM_HEADER_SIZE) + :type elf/PROGRAM_HEADER_TYPE_LOAD + :offset toff + :virtual-addr entrypoint + :file-size 0x200 + :mem-size 0x200 + :flags (bitwise/or elf/PROGRAM_HEADER_FLAG_R elf/PROGRAM_HEADER_FLAG_X) + ) + (elf/write-bytes! elf shstrtaboff + '(0 46 115 104 115 116 114 116 97 98 0 46 116 101 120 116 0)) + (elf/write-section-header! elf shdroff :type elf/SECTION_TYPE_NULL) + (setq! shdroff (+ shdroff elf/64_SECTION_HEADER_SIZE)) + (elf/write-section-header! elf shdroff + :name-index 1 + :type elf/SECTION_TYPE_STRTAB + :offset shstrtaboff + :size 0x1000) + (setq! shdroff (+ shdroff elf/64_SECTION_HEADER_SIZE)) + (elf/write-section-header! elf shdroff + :name-index 11 + :type elf/SECTION_TYPE_PROGBITS + :flags (bitwise/or elf/SECTION_FLAG_ALLOC elf/SECTION_FLAG_EXECINSTR) + :addr entrypoint + :offset toff + :size 0x200) + (elf/spit! "test.elf" elf) + ) + +;; (elf/write! "test.elf" +;; :sections +;; (list +;; (elf/section +;; :name ".text" +;; :type elf/SECTION_TYPE_PROGBITS +;; :flags (bitwise/or elf/SECTION_FLAG_ALLOC elf/SECTION_FLAG_EXECINSTR) +;; :contents (get-the-contents-bytes)))) |
