155 lines
4.6 KiB
C++
155 lines
4.6 KiB
C++
|
#include "parser.hpp"
|
||
|
#include <cstdlib>
|
||
|
|
||
|
using namespace stela;
|
||
|
|
||
|
static inline auto in_range(char ch, char begin, char end) -> bool
|
||
|
{
|
||
|
return ch >= begin && ch <= end;
|
||
|
}
|
||
|
|
||
|
static inline auto whitespace_char(char ch) -> bool
|
||
|
{
|
||
|
return ch == ' ' or ch == '\t' or ch == '\n';
|
||
|
}
|
||
|
|
||
|
static inline auto id_start_char(char ch) -> bool
|
||
|
{
|
||
|
return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z') or ch == '_';
|
||
|
}
|
||
|
|
||
|
static inline auto id_char(char ch) -> bool
|
||
|
{
|
||
|
return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z')
|
||
|
or in_range(ch, '0', '1') or ch == '_';
|
||
|
}
|
||
|
|
||
|
auto Lexer::next() -> Token
|
||
|
{
|
||
|
auto pos = this->pos();
|
||
|
if (done()) {
|
||
|
return token(TokenType::Eof, pos);
|
||
|
}
|
||
|
char ch = current();
|
||
|
if (whitespace_char(ch)) {
|
||
|
step();
|
||
|
return next();
|
||
|
}
|
||
|
if (id_start_char(ch)) {
|
||
|
std::string value;
|
||
|
value.push_back(ch);
|
||
|
step();
|
||
|
while (not done() and id_char(current())) {
|
||
|
value.push_back(current());
|
||
|
step();
|
||
|
}
|
||
|
if (this->keyword_map.contains(value)) {
|
||
|
return token(this->keyword_map[value], pos);
|
||
|
}
|
||
|
size_t id = this->symbol_values.size();
|
||
|
this->symbol_values.push_back(value);
|
||
|
return Token { TokenType::Id, pos, id };
|
||
|
}
|
||
|
if (in_range(ch, '1', '9')) {
|
||
|
std::string value;
|
||
|
value.push_back(ch);
|
||
|
step();
|
||
|
while (not done() and in_range(ch, '0', '9')) {
|
||
|
value.push_back(current());
|
||
|
step();
|
||
|
}
|
||
|
int64_t int_value = std::strtoll(value.c_str(), nullptr, 10);
|
||
|
size_t id = this->symbol_values.size();
|
||
|
this->int_values.push_back(int_value);
|
||
|
return Token { TokenType::Id, pos, id };
|
||
|
}
|
||
|
if (ch == '0') {
|
||
|
step();
|
||
|
int64_t int_value = 0;
|
||
|
size_t id = this->symbol_values.size();
|
||
|
this->int_values.push_back(int_value);
|
||
|
return Token { TokenType::Id, pos, id };
|
||
|
}
|
||
|
if (ch == '"') {
|
||
|
// TODO string
|
||
|
}
|
||
|
if (ch == '#') {
|
||
|
while (not done() and current() != '\n') {
|
||
|
step();
|
||
|
}
|
||
|
return next();
|
||
|
}
|
||
|
if (ch == '/') {
|
||
|
step();
|
||
|
if (current() == '/') {
|
||
|
while (not done() and current() != '\n') {
|
||
|
step();
|
||
|
}
|
||
|
return next();
|
||
|
}
|
||
|
return error_token(pos, "'/' not implemented");
|
||
|
}
|
||
|
if (ch == '-') {
|
||
|
step();
|
||
|
if (not done() and current() == '>') {
|
||
|
step();
|
||
|
return token(TokenType::MinusLt, pos);
|
||
|
}
|
||
|
return token(TokenType::Minus, pos);
|
||
|
}
|
||
|
if (ch == ':') {
|
||
|
step();
|
||
|
if (current() == ':') {
|
||
|
step();
|
||
|
return token(TokenType::ColonColon, pos);
|
||
|
}
|
||
|
return token(TokenType::Colon, pos);
|
||
|
}
|
||
|
switch (ch) {
|
||
|
case '(':
|
||
|
return single_token(TokenType::LParen, pos);
|
||
|
case ')':
|
||
|
return single_token(TokenType::RParen, pos);
|
||
|
case '{':
|
||
|
return single_token(TokenType::LBrace, pos);
|
||
|
case '}':
|
||
|
return single_token(TokenType::RBrace, pos);
|
||
|
case '[':
|
||
|
return single_token(TokenType::LBracket, pos);
|
||
|
case ']':
|
||
|
return single_token(TokenType::RBracket, pos);
|
||
|
case '.':
|
||
|
return single_token(TokenType::Dot, pos);
|
||
|
case ',':
|
||
|
return single_token(TokenType::Comma, pos);
|
||
|
case ';':
|
||
|
return single_token(TokenType::Semicolon, pos);
|
||
|
}
|
||
|
step();
|
||
|
return error_token(pos, "unrecognized character");
|
||
|
}
|
||
|
|
||
|
auto Lexer::populate_keyword_map()
|
||
|
{
|
||
|
this->keyword_map["error"] = TokenType::Error;
|
||
|
this->keyword_map["eof"] = TokenType::Eof;
|
||
|
this->keyword_map["if"] = TokenType::If;
|
||
|
this->keyword_map["else"] = TokenType::Else;
|
||
|
this->keyword_map["return"] = TokenType::Return;
|
||
|
this->keyword_map["public"] = TokenType::Public;
|
||
|
this->keyword_map["private"] = TokenType::Private;
|
||
|
this->keyword_map["class"] = TokenType::Class;
|
||
|
this->keyword_map["derivable"] = TokenType::Derivable;
|
||
|
this->keyword_map["derives"] = TokenType::Derives;
|
||
|
this->keyword_map["enumeration"] = TokenType::Enumeration;
|
||
|
this->keyword_map["associate"] = TokenType::Associate;
|
||
|
this->keyword_map["attribute"] = TokenType::Attribute;
|
||
|
this->keyword_map["operation"] = TokenType::Operation;
|
||
|
this->keyword_map["state_machine"] = TokenType::StateMachine;
|
||
|
this->keyword_map["transition"] = TokenType::Transition;
|
||
|
this->keyword_map["initial"] = TokenType::Initial;
|
||
|
this->keyword_map["final"] = TokenType::Final;
|
||
|
this->keyword_map["entry"] = TokenType::Entry;
|
||
|
this->keyword_map["exit"] = TokenType::Exit;
|
||
|
}
|