-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokenizer.h
89 lines (70 loc) · 1.99 KB
/
tokenizer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#pragma once
#include <variant>
#include <cctype>
#include <optional>
#include <istream>
#include "error.h"
struct SymbolToken {
std::string name;
bool operator==(const SymbolToken& other) const;
};
struct QuoteToken {
bool operator==(const QuoteToken&) const;
};
struct DotToken {
bool operator==(const DotToken&) const;
};
enum class BracketToken { OPEN, CLOSE };
struct ConstantToken {
int64_t value;
bool operator==(const ConstantToken& other) const;
};
struct BooleanToken {
bool value;
bool operator==(const BooleanToken& other) const;
};
using Token =
std::variant<ConstantToken, BracketToken, SymbolToken, QuoteToken, DotToken, BooleanToken>;
class Tokenizer {
public:
Tokenizer(std::istream* in) : in_(in) {
size_t len = 0;
while (!IsEnd()) {
char curr = in_->get();
if (!AvailableChars(curr)) {
throw SyntaxError{"unavailable symbol: " + std::string(1, curr) +
std::to_string(int(curr))};
}
len += 1;
}
while (len > 0) {
in_->unget();
len -= 1;
}
SkipSpaces();
}
bool IsEnd();
void Next();
Token GetToken();
private:
void SkipSpaces() {
while (!IsEnd() && std::isspace(in_->peek())) {
in_->get();
}
}
static bool BeginsWith(char c) {
static const std::string kAvailable = "<=>*#/";
return std::isalpha(c) || kAvailable.find(c) != std::string::npos;
};
static bool AvailableCharsInSymbol(char c) {
static const std::string kAvailable = "?!-";
return BeginsWith(c) || std::isdigit(c) || kAvailable.find(c) != std::string::npos;
};
static bool AvailableChars(char c) {
static const std::string kAvailable = "().'+-";
return AvailableCharsInSymbol(c) || std::isspace(c) ||
kAvailable.find(c) != std::string::npos;
}
std::istream* in_;
size_t token_len_{};
};