1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
#include <lcq/pit/types.h>
#include <lcq/pit/lexer.h>
#include <lcq/pit/parser.h>
#include <lcq/pit/runtime.h>
static pit_lex_token peek(pit_parser *st) {
if (!st) return PIT_LEX_TOKEN_ERROR;
return st->next.token;
}
static pit_lex_token advance(pit_parser *st) {
if (!st) return PIT_LEX_TOKEN_ERROR;
st->cur = st->next;
st->next.token = pit_lex_next(st->lexer);
st->next.start = st->lexer->start;
st->next.end = st->lexer->end;
st->next.line = st->lexer->start_line;
st->next.column = st->lexer->start_column;
return st->cur.token;
}
static bool match(pit_parser *st, pit_lex_token t) {
if (peek(st) == t) {
advance(st);
return true;
} else return false;
}
static void get_token_string(pit_parser *st, char *buf, i64 len) {
i64 diff = st->cur.end - st->cur.start;
i64 tlen = diff >= len ? len - 1 : diff;
pit_string_memcpy((u8 *) buf, (u8 *) st->lexer->input + st->cur.start, (size_t) tlen);
buf[tlen] = 0;
}
static i64 digit_value(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
} else if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
} else {
return 0;
}
}
void pit_parser_from_lexer(pit_parser *ret, pit_lexer *lex) {
ret->lexer = lex;
ret->cur.token = ret->next.token = PIT_LEX_TOKEN_ERROR;
ret->cur.start = ret->next.start = 0;
ret->cur.end = ret->next.end = 0;
ret->cur.line = ret->next.line = -1;
ret->cur.column = ret->next.column = -1;
advance(ret);
}
/* parse a single expression */
pit_value pit_parse(pit_runtime *rt, pit_parser *st, bool *eof) {
if (rt == NULL || st == NULL) return PIT_NIL;
pit_lex_token t = advance(st);
rt->source_line = st->cur.line;
rt->source_column = st->cur.column;
switch (t) {
case PIT_LEX_TOKEN_ERROR:
pit_error(rt, "encountered an error while lexing: %s", st->lexer->error);
return PIT_NIL;
case PIT_LEX_TOKEN_EOF:
if (eof != NULL) {
*eof = true;
} else {
pit_error(rt, "end-of-file while parsing");
}
return PIT_NIL;
case PIT_LEX_TOKEN_LPAREN: {
/* to construct a cons-list, we need the arguments "backwards"
we could reverse or build up a temporary list
(or use non-tail recursion, which is basically the temporary list on the stack)
we choose to build a temporary list on the scratch arena
*/
i64 scratch_reset = rt->scratch->next;
pit_value ret = PIT_NIL;
while (!match(st, PIT_LEX_TOKEN_RPAREN)) {
if (match(st, PIT_LEX_TOKEN_DOT)) {
ret = pit_parse(rt, st, eof);
if (match(st, PIT_LEX_TOKEN_RPAREN)) {
break;
} else {
pit_error(rt, "unterminated dotted list");
return PIT_NIL;
}
} else {
pit_value *cell = pit_arena_alloc_bulk(rt->scratch, sizeof(pit_value));
*cell = pit_parse(rt, st, eof);
}
if (rt->error != PIT_NIL || (eof != NULL && *eof)) {
pit_error(rt, "unterminated list");
return PIT_NIL; /* if we hit an error, stop!*/
}
}
for (i64 i = rt->scratch->next - (i64) sizeof(pit_value);
i >= scratch_reset;
i -= (i64) sizeof(pit_value)
) {
pit_value *v = pit_arena_get(rt->scratch, (i32) i);
ret = pit_cons(rt, *v, ret);
}
rt->scratch->next = scratch_reset;
return ret;
}
case PIT_LEX_TOKEN_LSQUARE: {
i64 scratch_reset = rt->scratch->next;
i64 len = 0;
while (!match(st, PIT_LEX_TOKEN_RSQUARE)) {
pit_value *cell = pit_arena_alloc_bulk(rt->scratch, sizeof(pit_value));
*cell = pit_parse(rt, st, eof);
len += 1;
if (rt->error != PIT_NIL || (eof != NULL && *eof)) {
pit_error(rt, "unterminated array literal");
return PIT_NIL;
}
}
rt->scratch->next = scratch_reset;
return pit_array_from_buf(rt, pit_arena_get(rt->scratch, (i32) scratch_reset), len);
}
case PIT_LEX_TOKEN_QUOTE:
return pit_list(rt, 2, pit_intern_cstr(rt, "quote"), pit_parse(rt, st, eof));
case PIT_LEX_TOKEN_INTEGER_LITERAL: {
i64 idx = st->cur.start;
i64 base = 10;
i64 total = 0;
char c = st->lexer->input[idx++];
if (c == '0' && idx + 1 < st->cur.end) {
switch (st->lexer->input[idx++]) {
case 'b': base = 2; break;
case 'o': base = 8; break;
case 'x': base = 16; break;
default: pit_error(rt, "unknown integer base"); return PIT_NIL;
}
} else { total = digit_value(c); }
while (idx < st->cur.end) {
total *= base;
total += digit_value(st->lexer->input[idx++]);
if (total > 0x1ffffffffffff) {
pit_error(rt, "integer literal too large"); return PIT_NIL;
}
}
return pit_integer_new(rt, total);
}
case PIT_LEX_TOKEN_STRING_LITERAL: {
char buf[256] = {0};
get_token_string(st, buf, sizeof(buf));
i64 len = (i64) pit_string_strlen(buf);
i64 cur = 0;
for (i64 i = 1; i < len; ++i) {
if (buf[i] == '\\' && i + 1 < len) buf[cur++] = buf[++i];
else if (buf[i] != '"') buf[cur++] = buf[i];
else break;
}
return pit_bytes_new(rt, (u8 *) buf, cur);
}
case PIT_LEX_TOKEN_SYMBOL: {
char buf[256] = {0};
get_token_string(st, buf, sizeof(buf));
return pit_intern_cstr(rt, buf);
}
default:
pit_error(rt, "unexpected token: %s", pit_lex_token_name(t));
return PIT_NIL;
}
}
|