1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <lcq/pit/utils.h>
#include <lcq/pit/lexer.h>
#include <lcq/pit/types.h>
const char *PIT_LEX_TOKEN_NAMES[PIT_LEX_TOKEN__SENTINEL] = {
/* [PIT_LEX_TOKEN_EOF] = */ "eof",
/* [PIT_LEX_TOKEN_LPAREN] = */ "lparen",
/* [PIT_LEX_TOKEN_RPAREN] = */ "rparen",
/* [PIT_LEX_TOKEN_LSQUARE] = */ "lsquare",
/* [PIT_LEX_TOKEN_RSQUARE] = */ "rsquare",
/* [PIT_LEX_TOKEN_DOT] = */ "dot",
/* [PIT_LEX_TOKEN_QUOTE] = */ "quote",
/* [PIT_LEX_TOKEN_INTEGER_LITERAL] = */ "integer_literal",
/* [PIT_LEX_TOKEN_STRING_LITERAL] = */ "string_literal",
/* [PIT_LEX_TOKEN_SYMBOL] = */ "symbol",
};
const char *pit_lex_token_name(pit_lex_token t) {
return PIT_LEX_TOKEN_NAMES[t];
}
static bool is_more_input(pit_lexer *st) {
return st && st->end < st->len;
}
static int is_symchar(int c) {
return c != '(' && c != ')' && c != '.' && c != '\'' && c != '"' && isprint(c) && !isspace(c);
}
static int is_hexdigit(int c) {
return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
static char peek(pit_lexer *st) {
if (is_more_input(st)) return st->input[st->end];
else return 0;
}
static char advance(pit_lexer *st) {
if (is_more_input(st)) {
char ret = st->input[st->end++];
if (ret == '\n') {
st->line += 1;
st->column = 0;
} else {
st->column += 1;
}
return ret;
}
else return 0;
}
static bool match(pit_lexer *st, int (*f)(int)) {
if (f(peek(st))) {
advance(st);
return true;
} else return false;
}
void pit_lex_cstr(pit_lexer *ret, char *buf) {
ret->input = buf;
ret->len = (i64) strlen(buf);
ret->start = 0;
ret->end = 0;
ret->line = ret->start_line = 1;
ret->column = ret->start_column = 0;
ret->error = NULL;
}
void pit_lex_bytes(pit_lexer *ret, char *buf, i64 len) {
ret->len = len;
ret->input = buf;
ret->start = 0;
ret->end = 0;
ret->line = ret->start_line = 1;
ret->column = ret->start_column = 0;
ret->error = NULL;
}
i64 pit_lex_file(pit_lexer *ret, char *path) {
FILE *f = fopen(path, "r");
if (f == NULL) { return -1; }
fseek(f, 0, SEEK_END);
i64 len = ftell(f);
fseek(f, 0, SEEK_SET);
char *buf = calloc((size_t) len, sizeof(char));
if ((size_t) len != fread(buf, sizeof(char), (size_t) len, f)) {
fclose(f);
return -1;
}
fclose(f);
pit_lex_bytes(ret, buf, len);
return 0;
}
pit_lex_token pit_lex_next(pit_lexer *st) {
restart:
st->start = st->end;
st->start_line = st->line;
st->start_column = st->column;
char c = advance(st);
switch (c) {
case 0: return PIT_LEX_TOKEN_EOF;
case ';': while (is_more_input(st) && advance(st) != '\n'); goto restart;
case '(': return PIT_LEX_TOKEN_LPAREN;
case ')': return PIT_LEX_TOKEN_RPAREN;
case '[': return PIT_LEX_TOKEN_LSQUARE;
case ']': return PIT_LEX_TOKEN_RSQUARE;
case '.': return PIT_LEX_TOKEN_DOT;
case '\'': return PIT_LEX_TOKEN_QUOTE;
case '"':
while (peek(st) != '"') {
if (peek(st) == '\\') advance(st); /* skip escaped characters */
if (!advance(st)) {
st->error = "unterminated string";
return PIT_LEX_TOKEN_ERROR;
}
}
advance(st);
return PIT_LEX_TOKEN_STRING_LITERAL;
default:
if (isspace(c)) goto restart;
if (isdigit(c)) {
if (c == '0') {
int next = peek(st);
if (next != 'x' && next != 'o' && next != 'b') return PIT_LEX_TOKEN_INTEGER_LITERAL;
advance(st); /* skip base specifier */
}
while (match(st, is_hexdigit)) {}
return PIT_LEX_TOKEN_INTEGER_LITERAL;
} else {
while (match(st, is_symchar)) {}
return PIT_LEX_TOKEN_SYMBOL;
}
}
}
|