summaryrefslogtreecommitdiff
path: root/src/parser.c
blob: 1a9f6632962e2526092afb0a5c9ffd7dfb5afbfd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "types.h"
#include "lexer.h"
#include "parser.h"
#include "runtime.h"

static pit_lex_token peek(pit_parser *st) {
    if (!st) return PIT_LEX_TOKEN_ERROR;
    return st->next.token;
}

static pit_lex_token advance(pit_parser *st) {
    if (!st) return PIT_LEX_TOKEN_ERROR;
    st->cur = st->next;
    st->next.token = pit_lex_next(st->lexer);
    st->next.start = st->lexer->start;
    st->next.end = st->lexer->end;
    st->next.line = st->lexer->start_line;
    st->next.column = st->lexer->start_column;
    return st->cur.token;
}

static bool match(pit_parser *st, pit_lex_token t) {
    if (peek(st) == t) {
        advance(st);
        return true;
    } else return false;
}

static void get_token_string(pit_parser *st, char *buf, i64 len) {
    i64 diff = st->cur.end - st->cur.start;
    i64 tlen = diff >= len ? len - 1 : diff;
    memcpy(buf, st->lexer->input + st->cur.start, (size_t) tlen);
    buf[tlen] = 0;
}

void pit_parser_from_lexer(pit_parser *ret, pit_lexer *lex) {
    ret->lexer = lex;
    ret->cur.token = ret->next.token = PIT_LEX_TOKEN_ERROR;
    ret->cur.start = ret->next.start = 0;
    ret->cur.end = ret->next.end = 0;
    ret->cur.line = ret->next.line = -1;
    ret->cur.column = ret->next.column = -1;
    advance(ret);
}

/* parse a single expression */
pit_value pit_parse(pit_runtime *rt, pit_parser *st, bool *eof) {
    char buf[256] = {0};
    pit_lex_token t;
    if (rt == NULL || st == NULL) return PIT_NIL;
    t = advance(st);
    rt->source_line = st->cur.line;
    rt->source_column = st->cur.column;
    switch (t) {
    case PIT_LEX_TOKEN_ERROR:
        pit_error(rt, "encountered an error while lexing: %s", st->lexer->error);
        return PIT_NIL;
    case PIT_LEX_TOKEN_EOF:
        if (eof != NULL) {
            *eof = true;
        } else {
            pit_error(rt, "end-of-file while parsing");
        }
        return PIT_NIL;
    case PIT_LEX_TOKEN_LPAREN: {
        /* to construct a cons-list, we need the arguments "backwards"
           we could reverse or build up a temporary list
           (or use non-tail recursion, which is basically the temporary list on the stack)
           we choose to build a temporary list on the scratch arena
        */
        i64 scratch_reset = rt->scratch->next;
        pit_value ret = PIT_NIL;
        i64 i;
        while (!match(st, PIT_LEX_TOKEN_RPAREN)) {
            pit_value *cell = pit_arena_alloc_bulk(rt->scratch, sizeof(pit_value));
            *cell = pit_parse(rt, st, eof);
            if (rt->error != PIT_NIL) return PIT_NIL; /* if we hit an error, stop!*/
        }
        for (i = rt->scratch->next - (i64) sizeof(pit_value); i >= scratch_reset; i -= (i64) sizeof(pit_value)) {
            pit_value *v = pit_arena_idx(rt->scratch, (i32) i);
            ret = pit_cons(rt, *v, ret);
        }
        rt->scratch->next = scratch_reset;
        return ret;
    }
    case PIT_LEX_TOKEN_QUOTE:
        return pit_list(rt, 2, pit_intern_cstr(rt, "quote"), pit_parse(rt, st, eof));
    case PIT_LEX_TOKEN_INTEGER_LITERAL:
        get_token_string(st, buf, sizeof(buf));
        return pit_integer_new(rt, atoi(buf));
    case PIT_LEX_TOKEN_STRING_LITERAL: {
        i64 len, cur, i;
        get_token_string(st, buf, sizeof(buf));
        len = (i64) strlen(buf);
        cur = 0;
        for (i = 1; i < len; ++i) {
            if (buf[i] == '\\' && i + 1 < len) buf[cur++] = buf[++i];
            else if (buf[i] != '"') buf[cur++] = buf[i];
            else break;
        }
        return pit_bytes_new(rt, (u8 *) buf, cur);
    }
    case PIT_LEX_TOKEN_SYMBOL:
        get_token_string(st, buf, sizeof(buf));
        return pit_intern_cstr(rt, buf);
    default:
        pit_error(rt, "unexpected token: %s", pit_lex_token_name(t));
        return PIT_NIL;
    }
}