1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include "utils.h"
#include "lexer.h"
#include "types.h"
const char *PIT_LEX_TOKEN_NAMES[PIT_LEX_TOKEN__SENTINEL] = {
[PIT_LEX_TOKEN_EOF] = "eof",
[PIT_LEX_TOKEN_LPAREN] = "lparen",
[PIT_LEX_TOKEN_RPAREN] = "rparen",
[PIT_LEX_TOKEN_DOT] = "dot",
[PIT_LEX_TOKEN_QUOTE] = "quote",
[PIT_LEX_TOKEN_INTEGER_LITERAL] = "integer_literal",
[PIT_LEX_TOKEN_STRING_LITERAL] = "string_literal",
[PIT_LEX_TOKEN_SYMBOL] = "symbol",
};
const char *pit_lex_token_name(pit_lex_token t) {
return PIT_LEX_TOKEN_NAMES[t];
}
static bool is_more_input(pit_lexer *st) {
return st && st->end < st->len;
}
static int is_symchar(int c) {
return c != '(' && c != ')' && c != '.' && c != '\'' && c != '"' && isprint(c) && !isspace(c);
}
static char peek(pit_lexer *st) {
if (is_more_input(st)) return st->input[st->end];
else return 0;
}
static char advance(pit_lexer *st) {
if (is_more_input(st)) return st->input[st->end++];
else return 0;
}
static bool match(pit_lexer *st, int (*f)(int)) {
if (f(peek(st))) {
st->end += 1;
return true;
} else return false;
}
pit_lexer *pit_lex_file(char *path) {
pit_lexer *ret = malloc(sizeof(*ret));
FILE *f = fopen(path, "r");
if (f == NULL) {
pit_panic("failed to open file for lexing: %s", path);
return NULL;
}
fseek(f, 0, SEEK_END);
ret->len = ftell(f);
fseek(f, 0, SEEK_SET);
ret->input = calloc(ret->len, sizeof(char));
fread(ret->input, sizeof(char), ret->len, f);
fclose(f);
ret->start = 0;
ret->end = 0;
return ret;
}
pit_lex_token pit_lex_next(pit_lexer *st) {
restart:
st->start = st->end;
char c = advance(st);
switch (c) {
case 0: return PIT_LEX_TOKEN_EOF;
case ';': while (is_more_input(st) && advance(st) != '\n'); goto restart;
case '(': return PIT_LEX_TOKEN_LPAREN;
case ')': return PIT_LEX_TOKEN_RPAREN;
case '.': return PIT_LEX_TOKEN_DOT;
case '\'': return PIT_LEX_TOKEN_QUOTE;
case '"':
while (peek(st) != '"') {
if (peek(st) == '\\') advance(st); // skip escaped characters
if (!advance(st)) pit_panic("unterminated string starting at: %d", st->start);
}
advance(st);
return PIT_LEX_TOKEN_STRING_LITERAL;
default:
if (isspace(c)) goto restart;
if (isdigit(c)) {
while (match(st, isdigit)) {}
return PIT_LEX_TOKEN_INTEGER_LITERAL;
}
else {
while (match(st, is_symchar)) {}
return PIT_LEX_TOKEN_SYMBOL;
}
}
}
|