stela/parser.cpp

155 lines
4.6 KiB
C++
Raw Permalink Normal View History

2024-07-19 00:46:30 +01:00
#include "parser.hpp"
#include <cstdlib>
using namespace stela;
static inline auto in_range(char ch, char begin, char end) -> bool
{
return ch >= begin && ch <= end;
}
static inline auto whitespace_char(char ch) -> bool
{
return ch == ' ' or ch == '\t' or ch == '\n';
}
static inline auto id_start_char(char ch) -> bool
{
return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z') or ch == '_';
}
static inline auto id_char(char ch) -> bool
{
return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z')
or in_range(ch, '0', '1') or ch == '_';
}
auto Lexer::next() -> Token
{
auto pos = this->pos();
if (done()) {
return token(TokenType::Eof, pos);
}
char ch = current();
if (whitespace_char(ch)) {
step();
return next();
}
if (id_start_char(ch)) {
std::string value;
value.push_back(ch);
step();
while (not done() and id_char(current())) {
value.push_back(current());
step();
}
if (this->keyword_map.contains(value)) {
return token(this->keyword_map[value], pos);
}
size_t id = this->symbol_values.size();
this->symbol_values.push_back(value);
return Token { TokenType::Id, pos, id };
}
if (in_range(ch, '1', '9')) {
std::string value;
value.push_back(ch);
step();
while (not done() and in_range(ch, '0', '9')) {
value.push_back(current());
step();
}
int64_t int_value = std::strtoll(value.c_str(), nullptr, 10);
size_t id = this->symbol_values.size();
this->int_values.push_back(int_value);
return Token { TokenType::Id, pos, id };
}
if (ch == '0') {
step();
int64_t int_value = 0;
size_t id = this->symbol_values.size();
this->int_values.push_back(int_value);
return Token { TokenType::Id, pos, id };
}
if (ch == '"') {
// TODO string
}
if (ch == '#') {
while (not done() and current() != '\n') {
step();
}
return next();
}
if (ch == '/') {
step();
if (current() == '/') {
while (not done() and current() != '\n') {
step();
}
return next();
}
return error_token(pos, "'/' not implemented");
}
if (ch == '-') {
step();
if (not done() and current() == '>') {
step();
return token(TokenType::MinusLt, pos);
}
return token(TokenType::Minus, pos);
}
if (ch == ':') {
step();
if (current() == ':') {
step();
return token(TokenType::ColonColon, pos);
}
return token(TokenType::Colon, pos);
}
switch (ch) {
case '(':
return single_token(TokenType::LParen, pos);
case ')':
return single_token(TokenType::RParen, pos);
case '{':
return single_token(TokenType::LBrace, pos);
case '}':
return single_token(TokenType::RBrace, pos);
case '[':
return single_token(TokenType::LBracket, pos);
case ']':
return single_token(TokenType::RBracket, pos);
case '.':
return single_token(TokenType::Dot, pos);
case ',':
return single_token(TokenType::Comma, pos);
case ';':
return single_token(TokenType::Semicolon, pos);
}
step();
return error_token(pos, "unrecognized character");
}
auto Lexer::populate_keyword_map()
{
this->keyword_map["error"] = TokenType::Error;
this->keyword_map["eof"] = TokenType::Eof;
this->keyword_map["if"] = TokenType::If;
this->keyword_map["else"] = TokenType::Else;
this->keyword_map["return"] = TokenType::Return;
this->keyword_map["public"] = TokenType::Public;
this->keyword_map["private"] = TokenType::Private;
this->keyword_map["class"] = TokenType::Class;
this->keyword_map["derivable"] = TokenType::Derivable;
this->keyword_map["derives"] = TokenType::Derives;
this->keyword_map["enumeration"] = TokenType::Enumeration;
this->keyword_map["associate"] = TokenType::Associate;
this->keyword_map["attribute"] = TokenType::Attribute;
this->keyword_map["operation"] = TokenType::Operation;
this->keyword_map["state_machine"] = TokenType::StateMachine;
this->keyword_map["transition"] = TokenType::Transition;
this->keyword_map["initial"] = TokenType::Initial;
this->keyword_map["final"] = TokenType::Final;
this->keyword_map["entry"] = TokenType::Entry;
this->keyword_map["exit"] = TokenType::Exit;
}