summaryrefslogtreecommitdiff
path: root/src/lexer.c
blob: 159f73bc1f99b9acccc210389d201646892058f9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

#include "utils.h"
#include "lexer.h"
#include "types.h"

const char *PIT_LEX_TOKEN_NAMES[PIT_LEX_TOKEN__SENTINEL] = {
    [PIT_LEX_TOKEN_EOF] = "eof",
    [PIT_LEX_TOKEN_LPAREN] = "lparen",
    [PIT_LEX_TOKEN_RPAREN] = "rparen",
    [PIT_LEX_TOKEN_DOT] = "dot",
    [PIT_LEX_TOKEN_QUOTE] = "quote",
    [PIT_LEX_TOKEN_INTEGER_LITERAL] = "integer_literal",
    [PIT_LEX_TOKEN_STRING_LITERAL] = "string_literal",
    [PIT_LEX_TOKEN_SYMBOL] = "symbol",
};

const char *pit_lex_token_name(pit_lex_token t) {
    return PIT_LEX_TOKEN_NAMES[t];
}

static bool is_more_input(pit_lexer *st) {
    return st && st->end < st->len;
}

static int is_symchar(int c) {
    return c != '(' && c != ')' && c != '.' && c != '\'' && c != '"' && isprint(c) && !isspace(c);
}

static char peek(pit_lexer *st) {
    if (is_more_input(st)) return st->input[st->end];
    else return 0;
}

static char advance(pit_lexer *st) {
    if (is_more_input(st)) return st->input[st->end++];
    else return 0;
}

static bool match(pit_lexer *st, int (*f)(int)) {
    if (f(peek(st))) {
        st->end += 1;
        return true;
    } else return false;
}

pit_lexer *pit_lex_file(char *path) {
    pit_lexer *ret = malloc(sizeof(*ret));
    FILE *f = fopen(path, "r");
    if (!f) pit_panic("failed to open file for lexing: %s", path);
    fseek(f, 0, SEEK_END);
    ret->len = ftell(f);
    fseek(f, 0, SEEK_SET);
    ret->input = calloc(ret->len, sizeof(char));
    fread(ret->input, sizeof(char), ret->len, f);
    ret->start = 0;
    ret->end = 0;
    return ret;
}

pit_lex_token pit_lex_next(pit_lexer *st) {
restart:
    st->start = st->end;
    char c = advance(st);
    switch (c) {
    case 0: return PIT_LEX_TOKEN_EOF;
    case ';': while (is_more_input(st) && advance(st) != '\n'); goto restart;
    case '(': return PIT_LEX_TOKEN_LPAREN;
    case ')': return PIT_LEX_TOKEN_RPAREN;
    case '.': return PIT_LEX_TOKEN_DOT;
    case '\'': return PIT_LEX_TOKEN_QUOTE;
    case '"':
        while (peek(st) != '"') {
            if (peek(st) == '\\') advance(st); // skip escaped characters
            if (!advance(st)) pit_panic("unterminated string starting at: %d", st->start);
        }
        advance(st);
        return PIT_LEX_TOKEN_STRING_LITERAL;
    default:
        if (isspace(c)) goto restart;
        if (isdigit(c)) { while (match(st, isdigit)); return PIT_LEX_TOKEN_INTEGER_LITERAL; }
        else { while (match(st, is_symchar)); return PIT_LEX_TOKEN_SYMBOL; }
    }
}