From 83d7d3cf960ecb7b1e20b691d4018a67d407cf47 Mon Sep 17 00:00:00 2001 From: LLLL Colonq Date: Fri, 27 Feb 2026 20:59:37 -0500 Subject: Update for RealNaesten :) --- emacs/pit.el | 86 +++++++++ include/lcq/pit/runtime.h | 66 ++----- src/main.c | 6 +- src/parser.c | 6 +- src/runtime.c | 211 +++++++++++++++++++--- x86.lisp | 451 ---------------------------------------------- x86.pit | 451 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 745 insertions(+), 532 deletions(-) create mode 100644 emacs/pit.el delete mode 100644 x86.lisp create mode 100644 x86.pit diff --git a/emacs/pit.el b/emacs/pit.el new file mode 100644 index 0000000..cce2694 --- /dev/null +++ b/emacs/pit.el @@ -0,0 +1,86 @@ +;;; pit --- support for pit -*- lexical-binding: t; -*- +;;; Commentary: +;;; Code: + +(require 'dash) +(require 's) +(require 'cl-lib) +(require 'rx) +(require 'hydra) +(require 'comint) + +(defcustom pit/repl-buffer-name "*pit-repl*" + "Name of the pit REPL buffer." + :type '(string) + :group 'pit) + +(defcustom pit/interpreter-path "~/src/libcolonq/pit/pit" + "Path to the pit interpreter." + :type '(string) + :group 'pit) + +(define-derived-mode pit/mode lisp-mode "pit" + "Major mode for pit source code." + ) +(add-to-list 'auto-mode-alist `(,(rx ".pit" eos) . pit/mode)) + +(defun pit/repl-buffer () + "Ensure the REPL is running and return its buffer." + (make-comint-in-buffer "pit" pit/repl-buffer-name pit/interpreter-path nil) + (get-buffer pit/repl-buffer-name)) + +(defun pit/repl-process () + "Return the Comint process for the REPL." + (get-buffer-process (pit/repl-buffer))) + +(defun pit/send-string (s) + "Send string S to the REPL." + (comint-send-string (pit/repl-process) (s-concat s "\n"))) + +(defun pit/eval-region (start end) + "Send the region from START to END to the REPL." + (interactive "r") + (comint-send-region (pit/repl-process) start end) + (comint-send-string (pit/repl-process) "\n")) + +(defun pit/eval-defun () + "Send the defun under point to the REPL." + (interactive) + (save-excursion + (end-of-defun) + (beginning-of-defun) + (let ((start (point))) + (forward-sexp) + (pit/eval-region start (point))))) + +(defun pit/eval-buffer () + "Send the current buffer to the REPL." + (interactive) + (pit/send-string (format "(progn %s 'done)" (buffer-string)))) + +(defun pit/restart () + "Restart the pit REPL." + (interactive) + (kill-buffer pit/repl-buffer-name) + (pit/repl)) + +(defun pit/repl () + "Launch the pit REPL." + (interactive) + (switch-to-buffer (pit/repl-buffer))) + +;;;; configuration +(defhydra pit/ide (:color teal :hint nil) + "Dispatcher > pit IDE." + ("" keyboard-escape-quit) + ("S" pit/restart "start") + ("e" pit/eval-defun "eval") + ("i" pit/eval-buffer "buffer") + ("r" pit/repl "repl")) +(defun pit/setup () + "Configuration for `pit/mode'." + (setq-local c/contextual-ide 'pit/ide/body)) +(add-hook 'pit/mode-hook #'pit/setup) + +(provide 'pit) +;;; pit.el ends here diff --git a/include/lcq/pit/runtime.h b/include/lcq/pit/runtime.h index 02a2249..3ff8c90 100644 --- a/include/lcq/pit/runtime.h +++ b/include/lcq/pit/runtime.h @@ -13,10 +13,9 @@ typedef struct { u8 data[]; } pit_arena; pit_arena *pit_arena_new(i64 capacity, i64 elem_size); -i32 pit_arena_next_idx(pit_arena *a); -i32 pit_arena_alloc_idx(pit_arena *a); -i32 pit_arena_alloc_bulk_idx(pit_arena *a, i64 num); -void *pit_arena_idx(pit_arena *a, i32 idx); +i64 pit_arena_alloc_idx(pit_arena *a); +i64 pit_arena_alloc_bulk_idx(pit_arena *a, i64 num); +void *pit_arena_idx(pit_arena *a, i64 idx); void *pit_arena_alloc(pit_arena *a); void *pit_arena_alloc_bulk(pit_arena *a, i64 num); @@ -30,8 +29,8 @@ enum pit_value_sort { PIT_VALUE_SORT_SYMBOL = 2, /* 0b10 - NaN-boxed index into symbol table */ PIT_VALUE_SORT_REF = 3 /* 0b11 - NaN-boxed index into "heavy object" arena */ }; -typedef i32 pit_symbol; /* a symbol at runtime is an index into the runtime's symbol table */ -typedef i32 pit_ref; /* a reference is an index into the runtime's arena */ +typedef i64 pit_symbol; /* a symbol at runtime is an index into the runtime's symbol table */ +typedef i64 pit_ref; /* a reference is an index into the runtime's arena */ typedef u64 pit_value; enum pit_value_sort pit_value_sort(pit_value v); u64 pit_value_data(pit_value v); @@ -53,7 +52,8 @@ typedef struct { /* "heavy" values, the targets of refs */ PIT_VALUE_HEAVY_SORT_BYTES, /* bytestring */ PIT_VALUE_HEAVY_SORT_FUNC, /* Lisp closure */ PIT_VALUE_HEAVY_SORT_NATIVEFUNC, /* native function */ - PIT_VALUE_HEAVY_SORT_NATIVEDATA /* native data (C pointer) */ + PIT_VALUE_HEAVY_SORT_NATIVEDATA, /* native data (C pointer) */ + PIT_VALUE_HEAVY_SORT_FORWARDING_POINTER /* forwarding pointer to to-space (during GC) */ } hsort; union { pit_value cell; @@ -63,6 +63,7 @@ typedef struct { /* "heavy" values, the targets of refs */ struct { pit_value env; pit_value args; pit_value arg_rest_nm; pit_value body; } func; pit_nativefunc nativefunc; struct { pit_value tag; void *data; } nativedata; + i64 forwarding_pointer; } in; } pit_value_heavy; @@ -95,6 +96,7 @@ void pit_runtime_eval_program_push_apply(struct pit_runtime *rt, pit_runtime_eva typedef struct pit_runtime { /* interpreter state */ pit_arena *values; /* all heavy values - effectively an array of pit_value_heavy - MUTABLE! */ + pit_arena *values_backbuffer; /* other values buffer (used by GC) */ pit_arena *arrays; /* all arrays - MUTABLE! */ pit_arena *bytes; /* all bytestrings (including symbol names) - immutable */ pit_arena *symtab; i64 symtab_len; /* all symbols - effectively an array of pit_symtab_entry - MUTABLE! */ @@ -150,6 +152,7 @@ bool pit_is_bytes(pit_runtime *rt, pit_value a); bool pit_is_func(pit_runtime *rt, pit_value a); bool pit_is_nativefunc(pit_runtime *rt, pit_value a); bool pit_is_nativedata(pit_runtime *rt, pit_value a); +bool pit_is_forwarding_pointer(pit_runtime *rt, pit_value a); bool pit_eq(pit_value a, pit_value b); bool pit_equal(pit_runtime *rt, pit_value a, pit_value b); @@ -224,50 +227,11 @@ void *pit_nativedata_get(pit_runtime *rt, pit_value tag, pit_value v); pit_value pit_expand_macros(pit_runtime *rt, pit_value top); pit_value pit_eval(pit_runtime *rt, pit_value e); +/* garbage collection */ +void pit_collect_garbage(pit_runtime *rt); + /* repl / file loading */ -#define PIT_DEFAULT_MAIN(rt) \ - if (argc < 2) { /* run repl */ \ - char buf[1024] = {0}; \ - i64 len = 0; \ - pit_runtime_freeze(rt); \ - if (pit_runtime_print_error(rt)) { exit(1); } \ - setbuf(stdout, NULL); \ - printf("> "); \ - while (len < (i64) sizeof(buf) && (buf[len++] = (char) getchar()) != EOF) { \ - if (buf[len - 1] == '\n') { \ - pit_value bs, prog, res; \ - buf[len - 1] = 0; \ - bs = pit_bytes_new_cstr(rt, buf); \ - prog = pit_read_bytes(rt, bs); \ - res = pit_eval(rt, prog); \ - if (pit_runtime_print_error(rt)) { \ - rt->error = PIT_NIL; \ - printf("> "); \ - } else { \ - char dumpbuf[1024] = {0}; \ - pit_dump(rt, dumpbuf, sizeof(dumpbuf) - 1, res, true); \ - printf("%s\n> ", dumpbuf); \ - } \ - len = 0; \ - } \ - } \ - } else { /* run file */ \ - pit_value bs = pit_bytes_new_file(rt, argv[1]); \ - pit_lexer lex; \ - pit_parser parse; \ - bool eof = false; \ - pit_value p = PIT_NIL; \ - if (!pit_lexer_from_bytes(rt, &lex, bs)) { \ - pit_error(rt, "failed to initialize lexer"); \ - } \ - pit_parser_from_lexer(&parse, &lex); \ - while (p = pit_parse(rt, &parse, &eof), !eof) { \ - if (pit_runtime_print_error(rt)) exit(1); \ - pit_eval(rt, p); \ - if (pit_runtime_print_error(rt)) exit(1); \ - } \ - if (pit_runtime_print_error(rt)) exit(1); \ - } \ - return 0; +void pit_run_file(pit_runtime *rt, char *path); +void pit_repl(pit_runtime *rt); #endif diff --git a/src/main.c b/src/main.c index 086f647..b6b2a2c 100644 --- a/src/main.c +++ b/src/main.c @@ -14,5 +14,9 @@ int main(int argc, char **argv) { pit_install_library_plist(rt); pit_install_library_alist(rt); pit_install_library_bytestring(rt); - PIT_DEFAULT_MAIN(rt); + if (argc < 2) { + pit_repl(rt); + } else { + pit_run_file(rt, argv[1]); + } } diff --git a/src/parser.c b/src/parser.c index b2d0f31..1f540d6 100644 --- a/src/parser.c +++ b/src/parser.c @@ -61,7 +61,6 @@ void pit_parser_from_lexer(pit_parser *ret, pit_lexer *lex) { /* parse a single expression */ pit_value pit_parse(pit_runtime *rt, pit_parser *st, bool *eof) { - char buf[256] = {0}; if (rt == NULL || st == NULL) return PIT_NIL; pit_lex_token t = advance(st); rt->source_line = st->cur.line; @@ -153,6 +152,7 @@ pit_value pit_parse(pit_runtime *rt, pit_parser *st, bool *eof) { return pit_integer_new(rt, total); } case PIT_LEX_TOKEN_STRING_LITERAL: { + char buf[256] = {0}; get_token_string(st, buf, sizeof(buf)); i64 len = (i64) strlen(buf); i64 cur = 0; @@ -163,9 +163,11 @@ pit_value pit_parse(pit_runtime *rt, pit_parser *st, bool *eof) { } return pit_bytes_new(rt, (u8 *) buf, cur); } - case PIT_LEX_TOKEN_SYMBOL: + case PIT_LEX_TOKEN_SYMBOL: { + char buf[256] = {0}; get_token_string(st, buf, sizeof(buf)); return pit_intern_cstr(rt, buf); + } default: pit_error(rt, "unexpected token: %s", pit_lex_token_name(t)); return PIT_NIL; diff --git a/src/runtime.c b/src/runtime.c index 668ad4b..1c0e6e1 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -18,38 +18,41 @@ pit_arena *pit_arena_new(i64 capacity, i64 elem_size) { a->next = 0; return a; } -i32 pit_arena_next_idx(pit_arena *a) { - i32 byte_idx = 0; pit_mul(&byte_idx, a->elem_size, a->next); +static i64 pit_arena_byte_idx(pit_arena *a, i64 idx) { + i64 byte_idx = 0; pit_mul(&byte_idx, a->elem_size, idx); return byte_idx; } -i32 pit_arena_alloc_idx(pit_arena *a) { - i32 byte_idx = pit_arena_next_idx(a); +i64 pit_arena_alloc_idx(pit_arena *a) { + i64 ret = a->next; + i64 byte_idx = pit_arena_byte_idx(a, ret); if (byte_idx >= a->capacity) { return -1; } a->next += 1; - return byte_idx; + return ret; } -i32 pit_arena_alloc_bulk_idx(pit_arena *a, i64 num) { - i32 byte_idx = pit_arena_next_idx(a); - i32 byte_len = 0; pit_mul(&byte_len, a->elem_size, num); +i64 pit_arena_alloc_bulk_idx(pit_arena *a, i64 num) { + i64 ret = a->next; + i64 byte_idx = pit_arena_byte_idx(a, ret); + i64 byte_len = 0; pit_mul(&byte_len, a->elem_size, num); if (byte_idx + byte_len > a->capacity) { return -1; } a->next += num; - return byte_idx; + return ret; } -void *pit_arena_idx(pit_arena *a, i32 idx) { - if (idx < 0 || idx >= a->capacity) { +void *pit_arena_idx(pit_arena *a, i64 idx) { + i64 byte_idx = pit_arena_byte_idx(a, idx); + if (byte_idx < 0 || byte_idx >= a->capacity) { return NULL; } - return &a->data[idx]; + return &a->data[byte_idx]; } void *pit_arena_alloc(pit_arena *a) { - i32 byte_idx = pit_arena_alloc_idx(a); - return pit_arena_idx(a, byte_idx); + i64 idx = pit_arena_alloc_idx(a); + return pit_arena_idx(a, idx); } void *pit_arena_alloc_bulk(pit_arena *a, i64 num) { - i32 byte_idx = pit_arena_alloc_bulk_idx(a, num); - return pit_arena_idx(a, byte_idx); + i64 idx = pit_arena_alloc_bulk_idx(a, num); + return pit_arena_idx(a, idx); } enum pit_value_sort pit_value_sort(pit_value v) { @@ -73,6 +76,7 @@ u64 pit_value_data(pit_value v) { pit_runtime *pit_runtime_new() { pit_runtime *ret = malloc(sizeof(*ret)); ret->values = pit_arena_new(64 * 1024 * 1024, sizeof(pit_value_heavy)); + ret->values_backbuffer = pit_arena_new(64 * 1024 * 1024, sizeof(pit_value_heavy)); ret->arrays = pit_arena_new(1024 * 1024, sizeof(pit_value)); ret->bytes = pit_arena_new(1024 * 1024, sizeof(u8)); ret->symtab = pit_arena_new(1024 * 1024, sizeof(pit_symtab_entry)); @@ -98,10 +102,10 @@ pit_runtime *pit_runtime_new() { } void pit_runtime_freeze(pit_runtime *rt) { - rt->frozen_values = pit_arena_next_idx(rt->values); - rt->frozen_arrays = pit_arena_next_idx(rt->arrays); - rt->frozen_bytes = pit_arena_next_idx(rt->bytes); - rt->frozen_symtab = pit_arena_next_idx(rt->symtab); + rt->frozen_values = rt->values->next; + rt->frozen_arrays = rt->arrays->next; + rt->frozen_bytes = rt->bytes->next; + rt->frozen_symtab = rt->symtab->next; } void pit_runtime_reset(pit_runtime *rt) { rt->values->next = rt->frozen_values; @@ -142,7 +146,7 @@ i64 pit_dump(pit_runtime *rt, char *buf, i64 len, pit_value v, bool readable) { } return i; } else { - return snprintf(buf, (size_t) len, "", pit_as_symbol(rt, v)); + return snprintf(buf, (size_t) len, "", pit_as_symbol(rt, v)); } } case PIT_VALUE_SORT_REF: { @@ -150,7 +154,7 @@ i64 pit_dump(pit_runtime *rt, char *buf, i64 len, pit_value v, bool readable) { char *end = buf + len; char *start = buf; h = pit_deref(rt, r); - if (!h) snprintf(buf, (size_t) len, "", r); + if (!h) snprintf(buf, (size_t) len, "", r); else { switch (h->hsort) { case PIT_VALUE_HEAVY_SORT_CELL: { @@ -206,7 +210,7 @@ i64 pit_dump(pit_runtime *rt, char *buf, i64 len, pit_value v, bool readable) { return i; } default: - return snprintf(buf, (size_t) len, "", r); + return snprintf(buf, (size_t) len, "", r); } } break; @@ -306,7 +310,7 @@ pit_value pit_ref_new(pit_runtime *rt, pit_ref r) { } pit_value pit_heavy_new(pit_runtime *rt) { - i32 idx = pit_arena_alloc_idx(rt->values); + i64 idx = pit_arena_alloc_idx(rt->values); if (idx < 0) { pit_error(rt, "failed to allocate space for heavy value"); return PIT_NIL; @@ -363,6 +367,9 @@ bool pit_is_nativefunc(pit_runtime *rt, pit_value a) { bool pit_is_nativedata(pit_runtime *rt, pit_value a) { return pit_is_value_heavy_sort(rt, a, PIT_VALUE_HEAVY_SORT_NATIVEDATA); } +bool pit_is_forwarding_pointer(pit_runtime *rt, pit_value a) { + return pit_is_value_heavy_sort(rt, a, PIT_VALUE_HEAVY_SORT_FORWARDING_POINTER); +} bool pit_eq(pit_value a, pit_value b) { return a == b; } @@ -410,6 +417,8 @@ bool pit_equal(pit_runtime *rt, pit_value a, pit_value b) { return pit_eq(ha->in.nativedata.tag, hb->in.nativedata.tag) && ha->in.nativedata.data == hb->in.nativedata.data; + case PIT_VALUE_HEAVY_SORT_FORWARDING_POINTER: + return ha->in.forwarding_pointer == hb->in.forwarding_pointer; } } } @@ -507,13 +516,12 @@ pit_value pit_read_bytes(pit_runtime *rt, pit_value v) { /* read a single lisp f pit_value pit_intern(pit_runtime *rt, u8 *nm, i64 len) { if (rt->error != PIT_NIL) return PIT_NIL; - for (i64 i = 0; i < rt->symtab_len; ++i) { - pit_symbol sidx = (pit_symbol) (i * (i64) sizeof(pit_symtab_entry)); + for (i64 sidx = 0; sidx < rt->symtab_len; ++sidx) { pit_symtab_entry *sent = pit_arena_idx(rt->symtab, sidx); if (!sent) { pit_error(rt, "corrupted symbol table"); return PIT_NIL; } if (pit_bytes_match(rt, sent->name, nm, len)) return pit_symbol_new(rt, sidx); } - i32 idx = pit_arena_alloc_idx(rt->symtab); + i64 idx = pit_arena_alloc_idx(rt->symtab); pit_symtab_entry *ent = pit_arena_idx(rt->symtab, idx); if (!ent) { pit_error(rt, "failed to allocate symtab entry"); return PIT_NIL; } ent->name = pit_bytes_new(rt, nm, len); @@ -1216,3 +1224,152 @@ end: { return ret; } } + +static i64 gc_copy(pit_arena *tospace, pit_value_heavy *h) { + if (h->hsort == PIT_VALUE_HEAVY_SORT_FORWARDING_POINTER) { + return h->in.forwarding_pointer; + } else { + i64 ret = tospace->next; + pit_value_heavy *g = pit_arena_alloc(tospace); + *g = *h; + h->hsort = PIT_VALUE_HEAVY_SORT_FORWARDING_POINTER; + h->in.forwarding_pointer = ret; + return ret; + } +} +static pit_value gc_copy_value(pit_runtime *rt, pit_arena *tospace, pit_value v) { + if (pit_value_sort(v) == PIT_VALUE_SORT_REF) { + pit_value_heavy *h = pit_deref(rt, pit_as_ref(rt, v)); + i64 new = gc_copy(tospace, h); + return pit_ref_new(rt, new); + } else { + return v; + } +} +void pit_collect_garbage(pit_runtime *rt) { + pit_arena *fromspace = rt->values; + pit_arena *tospace = rt->values_backbuffer; + tospace->next = 0; + /* populate tospace with immediately reachable values */ + for (i64 i = 0; i < rt->symtab_len; ++i) { + pit_symtab_entry *ent = pit_arena_idx(rt->symtab, i); + ent->name = gc_copy_value(rt, tospace, ent->name); + ent->value = gc_copy_value(rt, tospace, ent->value); + ent->function = gc_copy_value(rt, tospace, ent->function); + } + for (i64 i = 0; i < rt->saved_bindings->top; ++i) { + pit_value *v = &rt->saved_bindings->data[i]; + *v = gc_copy_value(rt, tospace, *v); + } + for (i64 scan = 0; scan < tospace->next; ++scan) { + pit_value_heavy *h = pit_arena_idx(tospace, scan); + switch (h->hsort) { + case PIT_VALUE_HEAVY_SORT_CELL: + h->in.cell = gc_copy_value(rt, tospace, h->in.cell); + break; + case PIT_VALUE_HEAVY_SORT_CONS: + h->in.cons.car = gc_copy_value(rt, tospace, h->in.cons.car); + h->in.cons.cdr = gc_copy_value(rt, tospace, h->in.cons.cdr); + break; + case PIT_VALUE_HEAVY_SORT_ARRAY: + for (i64 i = 0; i < h->in.array.len; ++i) { + h->in.array.data[i] = gc_copy_value(rt, tospace, h->in.array.data[i]); + } + break; + case PIT_VALUE_HEAVY_SORT_BYTES: break; + case PIT_VALUE_HEAVY_SORT_FUNC: + h->in.func.env = gc_copy_value(rt, tospace, h->in.func.env); + h->in.func.args = gc_copy_value(rt, tospace, h->in.func.args); + h->in.func.arg_rest_nm = gc_copy_value(rt, tospace, h->in.func.arg_rest_nm); + h->in.func.body = gc_copy_value(rt, tospace, h->in.func.body); + break; + case PIT_VALUE_HEAVY_SORT_NATIVEFUNC: break; + case PIT_VALUE_HEAVY_SORT_NATIVEDATA: + h->in.nativedata.tag = gc_copy_value(rt, tospace, h->in.nativedata.tag); + break; + case PIT_VALUE_HEAVY_SORT_FORWARDING_POINTER: + pit_error(rt, "garbage collection broken! encountered forwarding pointer in to-space"); + break; + } + } + rt->values = tospace; + rt->values_backbuffer = fromspace; +} + +void pit_run_file(pit_runtime *rt, char *path) { + pit_value bs = pit_bytes_new_file(rt, path); + pit_lexer lex; + pit_parser parse; + bool eof = false; + pit_value p = PIT_NIL; + if (!pit_lexer_from_bytes(rt, &lex, bs)) { + pit_error(rt, "failed to initialize lexer"); + } + pit_parser_from_lexer(&parse, &lex); + while (p = pit_parse(rt, &parse, &eof), !eof) { + if (pit_runtime_print_error(rt)) exit(1); + pit_eval(rt, p); + if (pit_runtime_print_error(rt)) exit(1); + // pit_collect_garbage(rt); + if (pit_runtime_print_error(rt)) exit(1); + } + if (pit_runtime_print_error(rt)) exit(1); + fprintf(stderr, "value allocations: %ld\n", rt->values->next); +} + +void pit_repl(pit_runtime *rt) { + size_t bufcap = 8; + char *buf = malloc(bufcap); + i64 len = 0; + pit_runtime_freeze(rt); + if (pit_runtime_print_error(rt)) { exit(1); } + setbuf(stdout, NULL); + printf("> "); + while ((buf[len++] = (char) getchar()) != EOF) { + if (len >= (i64) bufcap) { + bufcap *= 2; + buf = realloc(buf, bufcap); + } + pit_value res; + pit_lexer lex; + pit_parser parse; + bool eof = false; + pit_value p = PIT_NIL; + i64 depth = 0; + bool lex_error = false; + pit_lex_token tok = PIT_LEX_TOKEN_EOF; + if (buf[len - 1] != '\n') continue; + pit_lex_bytes(&lex, buf, len); + while (!lex_error && (tok = pit_lex_next(&lex)) != PIT_LEX_TOKEN_EOF) { + switch (tok) { + case PIT_LEX_TOKEN_ERROR: lex_error = true; break; + case PIT_LEX_TOKEN_LPAREN: depth += 1; break; + case PIT_LEX_TOKEN_RPAREN: depth -= 1; break; + default: break; + } + } + if (lex_error || depth > 0) continue; + buf[len - 1] = 0; + pit_lex_bytes(&lex, buf, len); + pit_parser_from_lexer(&parse, &lex); + while (p = pit_parse(rt, &parse, &eof), !eof) { + res = pit_eval(rt, p); + } + if (pit_runtime_print_error(rt)) { + rt->error = PIT_NIL; + printf("> "); + } else { + char dumpbuf[1024] = {0}; + pit_dump(rt, dumpbuf, sizeof(dumpbuf) - 1, res, true); + pit_collect_garbage(rt); + printf("%s\n> ", dumpbuf); + } + len = 0; + } + if (len >= (i64) sizeof(buf)) { + fprintf(stderr, "expression exceeded REPL buffer size\n"); + } else { + printf("bye!\n"); + } + free(buf); +} diff --git a/x86.lisp b/x86.lisp deleted file mode 100644 index 2aa8d89..0000000 --- a/x86.lisp +++ /dev/null @@ -1,451 +0,0 @@ -(defun! x86/split16le (w) - "Split the 16-bit W16 into a little-endian list of 8-bit integers." - (list - (bitwise/and 0xff w) - (bitwise/and 0xff (bitwise/rshift w 8)))) - -(defun! x86/split32le (w) - "Split the 32-bit W32 into a little-endian list of 8-bit integers." - (list - (bitwise/and 0xff w) - (bitwise/and 0xff (bitwise/rshift w 8)) - (bitwise/and 0xff (bitwise/rshift w 16)) - (bitwise/and 0xff (bitwise/rshift w 24)))) - -(defun! x86/register-1byte? (r) - "Return the register index for 1-byte register R." - (case r - (al 0) (cl 1) (dl 2) (bl 3) - (ah 4) (ch 5) (dh 6) (bh 7) - (r8b 8) (r9b 9) (r10b 10) (r11b 11) - (r12b 12) (r13b 13) (r14b 14) (r15b 15))) - -(defun! x86/register-2byte? (r) - "Return the register index for 2-byte register R." - (case r - (ax 0) (cx 1) (dx 2) (bx 3) - (sp 4) (bp 5) (si 6) (di 7) - (r8w 8) (r9w 9) (r10w 10) (r11w 11) - (r12w 12) (r13w 13) (r14w 14) (r15w 15))) - -(defun! x86/register-4byte? (r) - "Return the register index for 4-byte register R." - (case r - (eax 0) (ecx 1) (edx 2) (ebx 3) - (esp 4) (ebp 5) (esi 6) (edi 7) - (r8d 8) (r9d 9) (r10d 10) (r11d 11) - (r12d 12) (r13d 13) (r14d 14) (r15d 15))) - -(defun! x86/register-8byte? (r) - "Return the register index for 8-byte register R." - (case r - (rax 0) (rcx 1) (rdx 2) (rbx 3) - (rsp 4) (rbp 5) (rsi 6) (rdi 7) - (r8 8) (r9 9) (r10 10) (r11 11) - (r12 12) (r13 13) (r14 14) (r15 15))) - -(defun! x86/register? (r) - "Return the register index of R." - (or - (x86/register-1byte? r) - (x86/register-2byte? r) - (x86/register-4byte? r) - (x86/register-8byte? r))) - -(defun! x86/register-extended? (r) - "Return non-nil if R is an extended register." - (list/contains? r - '( r8b r9b r10b r11b r12b r13b r14b r15b - r8w r9w r10w r11w r12w r13w r14w r15w - r8d r9d r10d r11d r12d r13d r14d r15d - r8 r9 r10 r11 r12 r13 r14 r15))) - -(defun! x86/integer-fits-in-bits? (bits x) - "Determine if X fits in BITS." - (if (integer? x) - (let ((leftover (bitwise/rshift x bits))) - (eq? leftover 0)))) -(defun! x86/operand-immediate-fits? (sz x) - "Determine if immediate operand X fits in SZ." - (let - ((bits - (or - (case sz - ("b" 8) ("c" 16) ("d" 32) ("i" 16) - ("j" 32) ("q" 64) ("v" 64) ("w" 16) - ("y" 64) ("z" 32)) - (error! "unknown operand pattern size")))) - (x86/integer-fits-in-bits? bits x))) - -(defun! x86/operand-register-fits? (sz r) - "Determine if register operand R fits in SZ." - (case sz - ("b" (x86/register-1byte? r)) - ("c" (or (x86/register-1byte? r) (x86/register-2byte? r))) - ("d" (x86/register-4byte? r)) - ("i" (x86/register-2byte? r)) - ("j" (x86/register-4byte? r)) - ("q" (x86/register-8byte? r)) - ("v" (or (x86/register-2byte? r) (x86/register-4byte? r) (x86/register-8byte? r))) - ("w" (x86/register-2byte? r)) - ("y" (or (x86/register-4byte? r) (x86/register-8byte? r))) - ("z" (or (x86/register-2byte? r) (x86/register-4byte? r))))) - -(defun! x86/memory-operand-base (m) - (and - (eq? (car m) 'mem) - (car (cdr m)))) -(defun! x86/memory-operand-off (m) - (and - (eq? (car m) 'mem) - (or (car (cdr (cdr m))) 0))) - -(defun! x86/operand-memory-location? (op) - "Return non-nil if OP represents a memory location." - (let ( (base (x86/memory-operand-base op)) - (off (x86/memory-operand-off op))) - (and - (or (x86/register-4byte? base) (x86/register-8byte? base)) - (integer? off)))) - -(defun! x86/operand-match? (pat op) - "Determine if operand OP matches PAT." - (cond - ((symbol? pat) (eq? pat op)) - ((cons? pat) (list/contains? op pat)) - ((bytes? pat) - (let ( (loc (bytes/range 0 1 pat)) - (sz (bytes/range 1 (bytes/len pat) pat))) - (cond - ((or (equal? loc "I") (equal? loc "J")) (x86/operand-immediate-fits? sz op)) - ((or (equal? loc "G") (equal? loc "R")) (x86/operand-register-fits? sz op)) - ((equal? loc "M") (x86/operand-memory-location? op)) - ((equal? loc "E") - (or (x86/operand-register-fits? sz op) (x86/operand-memory-location? op))) - (t (error! "unknown operand pattern location"))))))) - -(defun! x86/operand-size (op) - "Return the minimum power-of-2 size in bytes that contains OP." - (cond - ((symbol? op) - (cond - ((x86/register-1byte? op) 1) - ((x86/register-2byte? op) 2) - ((x86/register-4byte? op) 4) - ((x86/register-8byte? op) 8) - (t (error! "attempted to take size of unknown register")))) - ((integer? op) - (cond - ((x86/integer-fits-in-bits? 8 op) 1) - ((x86/integer-fits-in-bits? 16 op) 2) - ((x86/integer-fits-in-bits? 32 op) 4) - ((x86/integer-fits-in-bits? 64 op) 8) - (t (error! "attempted to take size of too-large immediate")))) - ((x86/operand-memory-location? op) 1) - (t (error! "attempted to take size of unknown operand")))) - -(defstruct! x86/ins - operand-size-prefix - address-size-prefix - rex-w - rex-r - rex-x - rex-b - opcode - modrm-mod - modrm-reg - modrm-rm - disp ;; pair of size and value - imm ;; pair of size and value - ) - -(defun! x86/ins-bytes (ins) - "Return a list of bytes encoding INS." - (let ( (opcode (x86/ins/get-opcode ins)) - (rex-w (x86/ins/get-rex-w ins)) - (rex-r (x86/ins/get-rex-r ins)) - (rex-x (x86/ins/get-rex-x ins)) - (rex-b (x86/ins/get-rex-b ins)) - (modrm-mod (x86/ins/get-modrm-mod ins)) - (modrm-reg (x86/ins/get-modrm-reg ins)) - (modrm-rm (x86/ins/get-modrm-rm ins)) - (disp (x86/ins/get-disp ins)) - (imm (x86/ins/get-imm ins))) - (list/append - (if (x86/ins/get-operand-size-prefix ins) '(0x66)) - (if (x86/ins/get-address-size-prefix ins) '(0x67)) - (if (or rex-w rex-r rex-x rex-b) - (list - (bitwise/or - 0x40 - (if rex-w 0b1000 0) - (if rex-r 0b0100 0) - (if rex-x 0b0010 0) - (if rex-b 0b0001 0)))) - (cond - ((not opcode) (error! "no opcode for instruction")) - ((cons? opcode) opcode) - ((integer? opcode) (list opcode)) - (t (error! "malformed opcode for instruction"))) - (if (or modrm-mod modrm-reg modrm-rm) - (list - (bitwise/or - (bitwise/lshift (or modrm-mod 0) 6) - (bitwise/lshift (or modrm-reg 0) 3) - (or modrm-rm 0)))) - (if disp - (cond - ((eq? (car disp) 1) (list (cdr disp))) - ((eq? (car disp) 4) (x86/split32le (cdr disp))) - (t (error! "malformed displacement for instruction")))) - (if imm - (cond - ((eq? (car imm) 1) (list (cdr imm))) - ((eq? (car imm) 2) (x86/split16le (cdr imm))) - ((eq? (car imm) 4) (x86/split32le (cdr imm))) - (t (error! "malformed immediate for instruction"))))))) - -(defun! x86/instruction-update-sizes (ins ops default-size) - "Update INS to account for the sizes of OPS. -DEFAULT-SIZE is the default operand size." - (let ((defsz (or default-size 4))) - (if (> (list/len ops) 0) - (let ((regs (list/uniq (list/map 'x86/operand-size (list/filter 'x86/register? ops))))) - (if (> (list/len regs) 1) - (error! "invalid mix of register sizes in operands")) - (let ((sz (if (eq? (list/len regs) 0) defsz (car regs)))) - (cond - ((eq? sz 1) nil) - ((eq? defsz sz) nil) - ((and (not (eq? defsz 2)) (eq? sz 2)) (x86/ins/set-operand-size-prefix! ins t)) - ((and (not (eq? defsz 8)) (eq? sz 8)) (x86/ins/set-rex-w! ins t)) - (t (error! "unable to encode operands with default size"))) - sz))))) - -(defun! x86/instruction-update-operand (esz ins pat op) - "Update INS to account for an operand OP according to PAT. -The effective operand size is ESZ." - (cond - ((bytes? pat) - (let ((loc (bytes/range 0 1 pat))) - (cond - ((equal? loc "I") - (let ((immsz (if (>= esz 4) 4 esz))) - (if (not (x86/integer-fits-in-bits? (* 8 immsz) op)) - (error! "Immediate too large" op)) - (x86/ins/set-imm! ins (cons immsz op)))) - ((equal? loc "J") - (let ((immsz (if (eq? esz 1) 1 4))) - (if (not (x86/integer-fits-in-bits? (* 8 immsz) op)) - (error! "jump displacement too large")) - (x86/ins/set-disp! ins (cons immsz op)))) - ((equal? loc "G") - (x86/ins/set-modrm-reg! ins - (or (x86/register? op) (error "Invalid register: %s" op)))) - ((or (equal? loc "R") (and (equal? loc "E") (x86/register? op))) - (x86/ins/set-modrm-mod! ins 0b11) - (x86/ins/set-modrm-rm! ins - (or (x86/register? op) (error "Invalid register: %s" op)))) - ((or (equal? loc "M") (and (equal? loc "E") (x86/operand-memory-location? op))) - (let ( (base (x86/memory-operand-base op)) - (off (x86/memory-operand-off op))) - (cond - ((eq? base 'eip) - (x86/ins/set-modrm-rm! ins 0b101) - (x86/ins/set-modrm-mod! ins 0b00) - (x86/ins/set-disp! ins (cons 4 off)) - (x86/ins/set-address-size-prefix! ins t)) - ((eq? base 'rip) - (x86/ins/set-modrm-rm! ins 0b101) - (x86/ins/set-modrm-mod! ins 0b00) - (x86/ins/set-disp! ins (cons 4 off))) - (t - (x86/ins/set-modrm-rm! ins - (or - (x86/register-4byte? base) - (x86/register-8byte? base) - (error! "invalid base register"))) - (if (x86/register-4byte? base) - (x86/ins/set-address-size-prefix! ins t)) - (cond - ((x86/integer-fits-in-bits? 8 off) - (x86/ins/set-disp! ins (cons 1 off)) - (x86/ins/set-modrm-mod! ins 0b01)) - ((x86/integer-fits-in-bits? 32 off) - (x86/ins/set-disp! ins (cons 4 off)) - (x86/ins/set-modrm-mod! ins 0b10)) - (t (error! "invalid offset"))))))) - (t (error! "invalid operand location code"))))))) - -(defun! x86/default-instruction-handler (opcode & kwargs) - "Return an instruction handler for OPCODE. -The instruction handler will run POSTHOOK on the instruction at the end. -DEFAULT-SIZE is the default operand size." - (let ( (posthook (plist/get :posthook kwargs)) - (default-size (plist/get :default-size kwargs))) - (lambda (pats ops) - (let ((ret (x86/ins/new :opcode opcode))) - (let ((esz - (or (x86/instruction-update-sizes ret ops default-size) - (error! "malformed size for operands")))) - (list/zip-with - (lambda (it other) - (x86/instruction-update-operand esz ret it other)) - pats - ops)) - (if posthook - (funcall posthook ret ops)) - ret)))) - -(defun! x86/instruction-handler-jcc (opcode immsz) - "Return an instruction handler for a Jcc instruction at OPCODE. -IMMSZ is the size of the displacement from RIP." - (lambda (_pats ops) - (let ((ret (x86/ins/new :opcode opcode))) - (x86/ins/set-disp! ret (cons immsz (car ops))) - ret))) - -(defun! x86/generate-handlers-arith (opbase group1reg) - "Return handlers for an arithmetic mnemonic starting at OPBASE. -The REG value in ModR/M is indicated by GROUP1REG." - (list - (cons '("Eb" "Gb") (x86/default-instruction-handler (+ opbase 0))) - (cons '("Ev" "Gv") (x86/default-instruction-handler (+ opbase 1))) - (cons '("Gb" "Eb") (x86/default-instruction-handler (+ opbase 2))) - (cons '("Gv" "Ev") (x86/default-instruction-handler (+ opbase 3))) - (cons '(al "Ib") (x86/default-instruction-handler (+ opbase 4))) - (cons '((ax eax rax) "Iz") (x86/default-instruction-handler (+ opbase 5))) - (cons '("Eb" "Ib") - (x86/default-instruction-handler 0x80 - :posthook (lambda (ins _) (x86/ins/set-modrm-reg! ins group1reg)))) - (cons '("Ev" "Iz") - (x86/default-instruction-handler 0x81 - :posthook (lambda (ins _) (x86/ins/set-modrm-reg! ins group1reg)))) - (cons '("Ev" "Ib") - (x86/default-instruction-handler 0x83 - :posthook (lambda (ins _) (setf (x86/ins-modrm-reg ins) group1reg)))))) - -(setq! x86/registers-+reg-base - '( (+rb . (al cl dl bl ah ch dh bh)) - (+rw . (ax cx dx bx sp bp si di)) - (+rd . (eax ecx edx ebx esp ebp esi edi)) - (+rq . (rax rcx rdx rbx rsp rbp rsi rdi)))) -(setq! x86/registers-+reg-extended - '( (+rw . (r8b r9b r10b r11b r12b r13b r14b r15b)) - (+rw . (r8w r9w r10w r11w r12w r13w r14w r15w)) - (+rd . (r8d r9d r10d r11d r12d r13d r14d r15d)) - (+rq . (r8 r9 r10 r11 r12 r13 r14 r15)))) -(defun! x86/generate-handlers-opcode-+reg (opbase extraops addends & args) - "Generate handlers for a family of opcodes that uses the +reg encoding. -OPBASE is the base opcode. -EXTRAOPS are additional operands after the register operand. -ADDENDS is a list of symbols like +rw, +rq etc. that denote allowed registers. -ARGS are passed verbatim to `u/x86/default-instruction-handler." - (list/map - (lambda (it) - (let ( (abase (list/map (lambda (a) (list/nth it (alist/get a x86/registers-+reg-base))) addends)) - (aext (list/map (lambda (a) (list/nth it (alist/get a x86/registers-+reg-extended))) addends))) - (cons - (cons (list/append abase aext) extraops) - (apply 'x86/default-instruction-handler (cons (+ opbase it) args))))) - (list/iota 8))) - -(setq! - x86/mnemonic-table - (list - (cons 'add (x86/generate-handlers-arith 0x00 0)) - (cons 'or (x86/generate-handlers-arith 0x08 1)) - (cons 'adc (x86/generate-handlers-arith 0x10 2)) - (cons 'sbb (x86/generate-handlers-arith 0x18 3)) - (cons 'and (x86/generate-handlers-arith 0x20 4)) - (cons 'sub (x86/generate-handlers-arith 0x28 5)) - (cons 'xor (x86/generate-handlers-arith 0x30 6)) - (cons 'cmp (x86/generate-handlers-arith 0x38 7)) - (cons 'push - (x86/generate-handlers-opcode-+reg 0x50 '() '(+rw +rq) - :default-size 8 - :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) - (cons 'pop - (x86/generate-handlers-opcode-+reg 0x58 '() '(+rw +rq) - :default-size 8 - :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) - (list 'jo - (cons '("Jb") (x86/instruction-handler-jcc 0x70 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x80) 4))) - (list 'jno - (cons '("Jb") (x86/instruction-handler-jcc 0x71 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x81) 4))) - (list 'jb - (cons '("Jb") (x86/instruction-handler-jcc 0x72 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x82) 4))) - (list 'jnb - (cons '("Jb") (x86/instruction-handler-jcc 0x73 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x83) 4))) - (list 'jz - (cons '("Jb") (x86/instruction-handler-jcc 0x74 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x84) 4))) - (list 'jnz - (cons '("Jb") (x86/instruction-handler-jcc 0x75 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x85) 4))) - (list 'jbe - (cons '("Jb") (x86/instruction-handler-jcc 0x76 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x86) 4))) - (list 'jnbe - (cons '("Jb") (x86/instruction-handler-jcc 0x77 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x87) 4))) - (list 'js - (cons '("Jb") (x86/instruction-handler-jcc 0x78 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x88) 4))) - (list 'jns - (cons '("Jb") (x86/instruction-handler-jcc 0x79 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x89) 4))) - (list 'jp - (cons '("Jb") (x86/instruction-handler-jcc 0x7a 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8a) 4))) - (list 'jnp - (cons '("Jb") (x86/instruction-handler-jcc 0x7b 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8b) 4))) - (list 'jl - (cons '("Jb") (x86/instruction-handler-jcc 0x7c 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8c) 4))) - (list 'jnl - (cons '("Jb") (x86/instruction-handler-jcc 0x7d 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8d) 4))) - (list 'jle - (cons '("Jb") (x86/instruction-handler-jcc 0x7e 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8e) 4))) - (list 'jnle - (cons '("Jb") (x86/instruction-handler-jcc 0x7f 1)) - (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8f) 4))) - (cons 'mov - (x86/generate-handlers-opcode-+reg 0xb0 '("Ib") '(+rb) - :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) - (cons 'mov - (x86/generate-handlers-opcode-+reg 0xb8 '("Iv") '(+rw +rd +rq) - :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) - (list 'jmp - (cons '("Ev") - (x86/default-instruction-handler 0xff - :default-size 8 - :posthook (lambda (ins _) (setf (u/x86/ins-modrm-reg ins) 4))))) - (list 'syscall - (cons '() (lambda (_ _) (x86/ins/new :opcode '(0x0f 0x05))))) - )) - -(defun! x86/asm (op) - "Assemble OP to an instruction." - (let ((mnem (car op)) (operands (cdr op))) - (let ((variants (or (alist/get mnem x86/mnemonic-table) (error! "unknown mnemonic")))) - (let ((v - (list/find - (lambda (it) - (and (eq? (list/len (car it)) (list/len operands)) - (list/all? (lambda (x) x) (list/zip-with 'x86/operand-match? (car it) operands)))) - variants))) - (if (and v (function? (cdr v))) - (funcall (cdr v) (car v) operands) - (error! "could not identify instruction")))))) - -(setq! test-ins (x86/asm '(syscall))) -(print! test-ins) -(print! (x86/ins-bytes test-ins)) diff --git a/x86.pit b/x86.pit new file mode 100644 index 0000000..2aa8d89 --- /dev/null +++ b/x86.pit @@ -0,0 +1,451 @@ +(defun! x86/split16le (w) + "Split the 16-bit W16 into a little-endian list of 8-bit integers." + (list + (bitwise/and 0xff w) + (bitwise/and 0xff (bitwise/rshift w 8)))) + +(defun! x86/split32le (w) + "Split the 32-bit W32 into a little-endian list of 8-bit integers." + (list + (bitwise/and 0xff w) + (bitwise/and 0xff (bitwise/rshift w 8)) + (bitwise/and 0xff (bitwise/rshift w 16)) + (bitwise/and 0xff (bitwise/rshift w 24)))) + +(defun! x86/register-1byte? (r) + "Return the register index for 1-byte register R." + (case r + (al 0) (cl 1) (dl 2) (bl 3) + (ah 4) (ch 5) (dh 6) (bh 7) + (r8b 8) (r9b 9) (r10b 10) (r11b 11) + (r12b 12) (r13b 13) (r14b 14) (r15b 15))) + +(defun! x86/register-2byte? (r) + "Return the register index for 2-byte register R." + (case r + (ax 0) (cx 1) (dx 2) (bx 3) + (sp 4) (bp 5) (si 6) (di 7) + (r8w 8) (r9w 9) (r10w 10) (r11w 11) + (r12w 12) (r13w 13) (r14w 14) (r15w 15))) + +(defun! x86/register-4byte? (r) + "Return the register index for 4-byte register R." + (case r + (eax 0) (ecx 1) (edx 2) (ebx 3) + (esp 4) (ebp 5) (esi 6) (edi 7) + (r8d 8) (r9d 9) (r10d 10) (r11d 11) + (r12d 12) (r13d 13) (r14d 14) (r15d 15))) + +(defun! x86/register-8byte? (r) + "Return the register index for 8-byte register R." + (case r + (rax 0) (rcx 1) (rdx 2) (rbx 3) + (rsp 4) (rbp 5) (rsi 6) (rdi 7) + (r8 8) (r9 9) (r10 10) (r11 11) + (r12 12) (r13 13) (r14 14) (r15 15))) + +(defun! x86/register? (r) + "Return the register index of R." + (or + (x86/register-1byte? r) + (x86/register-2byte? r) + (x86/register-4byte? r) + (x86/register-8byte? r))) + +(defun! x86/register-extended? (r) + "Return non-nil if R is an extended register." + (list/contains? r + '( r8b r9b r10b r11b r12b r13b r14b r15b + r8w r9w r10w r11w r12w r13w r14w r15w + r8d r9d r10d r11d r12d r13d r14d r15d + r8 r9 r10 r11 r12 r13 r14 r15))) + +(defun! x86/integer-fits-in-bits? (bits x) + "Determine if X fits in BITS." + (if (integer? x) + (let ((leftover (bitwise/rshift x bits))) + (eq? leftover 0)))) +(defun! x86/operand-immediate-fits? (sz x) + "Determine if immediate operand X fits in SZ." + (let + ((bits + (or + (case sz + ("b" 8) ("c" 16) ("d" 32) ("i" 16) + ("j" 32) ("q" 64) ("v" 64) ("w" 16) + ("y" 64) ("z" 32)) + (error! "unknown operand pattern size")))) + (x86/integer-fits-in-bits? bits x))) + +(defun! x86/operand-register-fits? (sz r) + "Determine if register operand R fits in SZ." + (case sz + ("b" (x86/register-1byte? r)) + ("c" (or (x86/register-1byte? r) (x86/register-2byte? r))) + ("d" (x86/register-4byte? r)) + ("i" (x86/register-2byte? r)) + ("j" (x86/register-4byte? r)) + ("q" (x86/register-8byte? r)) + ("v" (or (x86/register-2byte? r) (x86/register-4byte? r) (x86/register-8byte? r))) + ("w" (x86/register-2byte? r)) + ("y" (or (x86/register-4byte? r) (x86/register-8byte? r))) + ("z" (or (x86/register-2byte? r) (x86/register-4byte? r))))) + +(defun! x86/memory-operand-base (m) + (and + (eq? (car m) 'mem) + (car (cdr m)))) +(defun! x86/memory-operand-off (m) + (and + (eq? (car m) 'mem) + (or (car (cdr (cdr m))) 0))) + +(defun! x86/operand-memory-location? (op) + "Return non-nil if OP represents a memory location." + (let ( (base (x86/memory-operand-base op)) + (off (x86/memory-operand-off op))) + (and + (or (x86/register-4byte? base) (x86/register-8byte? base)) + (integer? off)))) + +(defun! x86/operand-match? (pat op) + "Determine if operand OP matches PAT." + (cond + ((symbol? pat) (eq? pat op)) + ((cons? pat) (list/contains? op pat)) + ((bytes? pat) + (let ( (loc (bytes/range 0 1 pat)) + (sz (bytes/range 1 (bytes/len pat) pat))) + (cond + ((or (equal? loc "I") (equal? loc "J")) (x86/operand-immediate-fits? sz op)) + ((or (equal? loc "G") (equal? loc "R")) (x86/operand-register-fits? sz op)) + ((equal? loc "M") (x86/operand-memory-location? op)) + ((equal? loc "E") + (or (x86/operand-register-fits? sz op) (x86/operand-memory-location? op))) + (t (error! "unknown operand pattern location"))))))) + +(defun! x86/operand-size (op) + "Return the minimum power-of-2 size in bytes that contains OP." + (cond + ((symbol? op) + (cond + ((x86/register-1byte? op) 1) + ((x86/register-2byte? op) 2) + ((x86/register-4byte? op) 4) + ((x86/register-8byte? op) 8) + (t (error! "attempted to take size of unknown register")))) + ((integer? op) + (cond + ((x86/integer-fits-in-bits? 8 op) 1) + ((x86/integer-fits-in-bits? 16 op) 2) + ((x86/integer-fits-in-bits? 32 op) 4) + ((x86/integer-fits-in-bits? 64 op) 8) + (t (error! "attempted to take size of too-large immediate")))) + ((x86/operand-memory-location? op) 1) + (t (error! "attempted to take size of unknown operand")))) + +(defstruct! x86/ins + operand-size-prefix + address-size-prefix + rex-w + rex-r + rex-x + rex-b + opcode + modrm-mod + modrm-reg + modrm-rm + disp ;; pair of size and value + imm ;; pair of size and value + ) + +(defun! x86/ins-bytes (ins) + "Return a list of bytes encoding INS." + (let ( (opcode (x86/ins/get-opcode ins)) + (rex-w (x86/ins/get-rex-w ins)) + (rex-r (x86/ins/get-rex-r ins)) + (rex-x (x86/ins/get-rex-x ins)) + (rex-b (x86/ins/get-rex-b ins)) + (modrm-mod (x86/ins/get-modrm-mod ins)) + (modrm-reg (x86/ins/get-modrm-reg ins)) + (modrm-rm (x86/ins/get-modrm-rm ins)) + (disp (x86/ins/get-disp ins)) + (imm (x86/ins/get-imm ins))) + (list/append + (if (x86/ins/get-operand-size-prefix ins) '(0x66)) + (if (x86/ins/get-address-size-prefix ins) '(0x67)) + (if (or rex-w rex-r rex-x rex-b) + (list + (bitwise/or + 0x40 + (if rex-w 0b1000 0) + (if rex-r 0b0100 0) + (if rex-x 0b0010 0) + (if rex-b 0b0001 0)))) + (cond + ((not opcode) (error! "no opcode for instruction")) + ((cons? opcode) opcode) + ((integer? opcode) (list opcode)) + (t (error! "malformed opcode for instruction"))) + (if (or modrm-mod modrm-reg modrm-rm) + (list + (bitwise/or + (bitwise/lshift (or modrm-mod 0) 6) + (bitwise/lshift (or modrm-reg 0) 3) + (or modrm-rm 0)))) + (if disp + (cond + ((eq? (car disp) 1) (list (cdr disp))) + ((eq? (car disp) 4) (x86/split32le (cdr disp))) + (t (error! "malformed displacement for instruction")))) + (if imm + (cond + ((eq? (car imm) 1) (list (cdr imm))) + ((eq? (car imm) 2) (x86/split16le (cdr imm))) + ((eq? (car imm) 4) (x86/split32le (cdr imm))) + (t (error! "malformed immediate for instruction"))))))) + +(defun! x86/instruction-update-sizes (ins ops default-size) + "Update INS to account for the sizes of OPS. +DEFAULT-SIZE is the default operand size." + (let ((defsz (or default-size 4))) + (if (> (list/len ops) 0) + (let ((regs (list/uniq (list/map 'x86/operand-size (list/filter 'x86/register? ops))))) + (if (> (list/len regs) 1) + (error! "invalid mix of register sizes in operands")) + (let ((sz (if (eq? (list/len regs) 0) defsz (car regs)))) + (cond + ((eq? sz 1) nil) + ((eq? defsz sz) nil) + ((and (not (eq? defsz 2)) (eq? sz 2)) (x86/ins/set-operand-size-prefix! ins t)) + ((and (not (eq? defsz 8)) (eq? sz 8)) (x86/ins/set-rex-w! ins t)) + (t (error! "unable to encode operands with default size"))) + sz))))) + +(defun! x86/instruction-update-operand (esz ins pat op) + "Update INS to account for an operand OP according to PAT. +The effective operand size is ESZ." + (cond + ((bytes? pat) + (let ((loc (bytes/range 0 1 pat))) + (cond + ((equal? loc "I") + (let ((immsz (if (>= esz 4) 4 esz))) + (if (not (x86/integer-fits-in-bits? (* 8 immsz) op)) + (error! "Immediate too large" op)) + (x86/ins/set-imm! ins (cons immsz op)))) + ((equal? loc "J") + (let ((immsz (if (eq? esz 1) 1 4))) + (if (not (x86/integer-fits-in-bits? (* 8 immsz) op)) + (error! "jump displacement too large")) + (x86/ins/set-disp! ins (cons immsz op)))) + ((equal? loc "G") + (x86/ins/set-modrm-reg! ins + (or (x86/register? op) (error "Invalid register: %s" op)))) + ((or (equal? loc "R") (and (equal? loc "E") (x86/register? op))) + (x86/ins/set-modrm-mod! ins 0b11) + (x86/ins/set-modrm-rm! ins + (or (x86/register? op) (error "Invalid register: %s" op)))) + ((or (equal? loc "M") (and (equal? loc "E") (x86/operand-memory-location? op))) + (let ( (base (x86/memory-operand-base op)) + (off (x86/memory-operand-off op))) + (cond + ((eq? base 'eip) + (x86/ins/set-modrm-rm! ins 0b101) + (x86/ins/set-modrm-mod! ins 0b00) + (x86/ins/set-disp! ins (cons 4 off)) + (x86/ins/set-address-size-prefix! ins t)) + ((eq? base 'rip) + (x86/ins/set-modrm-rm! ins 0b101) + (x86/ins/set-modrm-mod! ins 0b00) + (x86/ins/set-disp! ins (cons 4 off))) + (t + (x86/ins/set-modrm-rm! ins + (or + (x86/register-4byte? base) + (x86/register-8byte? base) + (error! "invalid base register"))) + (if (x86/register-4byte? base) + (x86/ins/set-address-size-prefix! ins t)) + (cond + ((x86/integer-fits-in-bits? 8 off) + (x86/ins/set-disp! ins (cons 1 off)) + (x86/ins/set-modrm-mod! ins 0b01)) + ((x86/integer-fits-in-bits? 32 off) + (x86/ins/set-disp! ins (cons 4 off)) + (x86/ins/set-modrm-mod! ins 0b10)) + (t (error! "invalid offset"))))))) + (t (error! "invalid operand location code"))))))) + +(defun! x86/default-instruction-handler (opcode & kwargs) + "Return an instruction handler for OPCODE. +The instruction handler will run POSTHOOK on the instruction at the end. +DEFAULT-SIZE is the default operand size." + (let ( (posthook (plist/get :posthook kwargs)) + (default-size (plist/get :default-size kwargs))) + (lambda (pats ops) + (let ((ret (x86/ins/new :opcode opcode))) + (let ((esz + (or (x86/instruction-update-sizes ret ops default-size) + (error! "malformed size for operands")))) + (list/zip-with + (lambda (it other) + (x86/instruction-update-operand esz ret it other)) + pats + ops)) + (if posthook + (funcall posthook ret ops)) + ret)))) + +(defun! x86/instruction-handler-jcc (opcode immsz) + "Return an instruction handler for a Jcc instruction at OPCODE. +IMMSZ is the size of the displacement from RIP." + (lambda (_pats ops) + (let ((ret (x86/ins/new :opcode opcode))) + (x86/ins/set-disp! ret (cons immsz (car ops))) + ret))) + +(defun! x86/generate-handlers-arith (opbase group1reg) + "Return handlers for an arithmetic mnemonic starting at OPBASE. +The REG value in ModR/M is indicated by GROUP1REG." + (list + (cons '("Eb" "Gb") (x86/default-instruction-handler (+ opbase 0))) + (cons '("Ev" "Gv") (x86/default-instruction-handler (+ opbase 1))) + (cons '("Gb" "Eb") (x86/default-instruction-handler (+ opbase 2))) + (cons '("Gv" "Ev") (x86/default-instruction-handler (+ opbase 3))) + (cons '(al "Ib") (x86/default-instruction-handler (+ opbase 4))) + (cons '((ax eax rax) "Iz") (x86/default-instruction-handler (+ opbase 5))) + (cons '("Eb" "Ib") + (x86/default-instruction-handler 0x80 + :posthook (lambda (ins _) (x86/ins/set-modrm-reg! ins group1reg)))) + (cons '("Ev" "Iz") + (x86/default-instruction-handler 0x81 + :posthook (lambda (ins _) (x86/ins/set-modrm-reg! ins group1reg)))) + (cons '("Ev" "Ib") + (x86/default-instruction-handler 0x83 + :posthook (lambda (ins _) (setf (x86/ins-modrm-reg ins) group1reg)))))) + +(setq! x86/registers-+reg-base + '( (+rb . (al cl dl bl ah ch dh bh)) + (+rw . (ax cx dx bx sp bp si di)) + (+rd . (eax ecx edx ebx esp ebp esi edi)) + (+rq . (rax rcx rdx rbx rsp rbp rsi rdi)))) +(setq! x86/registers-+reg-extended + '( (+rw . (r8b r9b r10b r11b r12b r13b r14b r15b)) + (+rw . (r8w r9w r10w r11w r12w r13w r14w r15w)) + (+rd . (r8d r9d r10d r11d r12d r13d r14d r15d)) + (+rq . (r8 r9 r10 r11 r12 r13 r14 r15)))) +(defun! x86/generate-handlers-opcode-+reg (opbase extraops addends & args) + "Generate handlers for a family of opcodes that uses the +reg encoding. +OPBASE is the base opcode. +EXTRAOPS are additional operands after the register operand. +ADDENDS is a list of symbols like +rw, +rq etc. that denote allowed registers. +ARGS are passed verbatim to `u/x86/default-instruction-handler." + (list/map + (lambda (it) + (let ( (abase (list/map (lambda (a) (list/nth it (alist/get a x86/registers-+reg-base))) addends)) + (aext (list/map (lambda (a) (list/nth it (alist/get a x86/registers-+reg-extended))) addends))) + (cons + (cons (list/append abase aext) extraops) + (apply 'x86/default-instruction-handler (cons (+ opbase it) args))))) + (list/iota 8))) + +(setq! + x86/mnemonic-table + (list + (cons 'add (x86/generate-handlers-arith 0x00 0)) + (cons 'or (x86/generate-handlers-arith 0x08 1)) + (cons 'adc (x86/generate-handlers-arith 0x10 2)) + (cons 'sbb (x86/generate-handlers-arith 0x18 3)) + (cons 'and (x86/generate-handlers-arith 0x20 4)) + (cons 'sub (x86/generate-handlers-arith 0x28 5)) + (cons 'xor (x86/generate-handlers-arith 0x30 6)) + (cons 'cmp (x86/generate-handlers-arith 0x38 7)) + (cons 'push + (x86/generate-handlers-opcode-+reg 0x50 '() '(+rw +rq) + :default-size 8 + :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) + (cons 'pop + (x86/generate-handlers-opcode-+reg 0x58 '() '(+rw +rq) + :default-size 8 + :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) + (list 'jo + (cons '("Jb") (x86/instruction-handler-jcc 0x70 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x80) 4))) + (list 'jno + (cons '("Jb") (x86/instruction-handler-jcc 0x71 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x81) 4))) + (list 'jb + (cons '("Jb") (x86/instruction-handler-jcc 0x72 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x82) 4))) + (list 'jnb + (cons '("Jb") (x86/instruction-handler-jcc 0x73 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x83) 4))) + (list 'jz + (cons '("Jb") (x86/instruction-handler-jcc 0x74 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x84) 4))) + (list 'jnz + (cons '("Jb") (x86/instruction-handler-jcc 0x75 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x85) 4))) + (list 'jbe + (cons '("Jb") (x86/instruction-handler-jcc 0x76 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x86) 4))) + (list 'jnbe + (cons '("Jb") (x86/instruction-handler-jcc 0x77 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x87) 4))) + (list 'js + (cons '("Jb") (x86/instruction-handler-jcc 0x78 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x88) 4))) + (list 'jns + (cons '("Jb") (x86/instruction-handler-jcc 0x79 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x89) 4))) + (list 'jp + (cons '("Jb") (x86/instruction-handler-jcc 0x7a 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8a) 4))) + (list 'jnp + (cons '("Jb") (x86/instruction-handler-jcc 0x7b 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8b) 4))) + (list 'jl + (cons '("Jb") (x86/instruction-handler-jcc 0x7c 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8c) 4))) + (list 'jnl + (cons '("Jb") (x86/instruction-handler-jcc 0x7d 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8d) 4))) + (list 'jle + (cons '("Jb") (x86/instruction-handler-jcc 0x7e 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8e) 4))) + (list 'jnle + (cons '("Jb") (x86/instruction-handler-jcc 0x7f 1)) + (cons '("Jz") (x86/instruction-handler-jcc '(0x0f 0x8f) 4))) + (cons 'mov + (x86/generate-handlers-opcode-+reg 0xb0 '("Ib") '(+rb) + :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) + (cons 'mov + (x86/generate-handlers-opcode-+reg 0xb8 '("Iv") '(+rw +rd +rq) + :posthook (lambda (ins ops) (x86/ins/set-rex-b! ins (x86/register-extended? (car ops)))))) + (list 'jmp + (cons '("Ev") + (x86/default-instruction-handler 0xff + :default-size 8 + :posthook (lambda (ins _) (setf (u/x86/ins-modrm-reg ins) 4))))) + (list 'syscall + (cons '() (lambda (_ _) (x86/ins/new :opcode '(0x0f 0x05))))) + )) + +(defun! x86/asm (op) + "Assemble OP to an instruction." + (let ((mnem (car op)) (operands (cdr op))) + (let ((variants (or (alist/get mnem x86/mnemonic-table) (error! "unknown mnemonic")))) + (let ((v + (list/find + (lambda (it) + (and (eq? (list/len (car it)) (list/len operands)) + (list/all? (lambda (x) x) (list/zip-with 'x86/operand-match? (car it) operands)))) + variants))) + (if (and v (function? (cdr v))) + (funcall (cdr v) (car v) operands) + (error! "could not identify instruction")))))) + +(setq! test-ins (x86/asm '(syscall))) +(print! test-ins) +(print! (x86/ins-bytes test-ins)) -- cgit v1.2.3