#include "parser.hpp" #include using namespace stela; static inline auto in_range(char ch, char begin, char end) -> bool { return ch >= begin && ch <= end; } static inline auto whitespace_char(char ch) -> bool { return ch == ' ' or ch == '\t' or ch == '\n'; } static inline auto id_start_char(char ch) -> bool { return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z') or ch == '_'; } static inline auto id_char(char ch) -> bool { return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z') or in_range(ch, '0', '1') or ch == '_'; } auto Lexer::next() -> Token { auto pos = this->pos(); if (done()) { return token(TokenType::Eof, pos); } char ch = current(); if (whitespace_char(ch)) { step(); return next(); } if (id_start_char(ch)) { std::string value; value.push_back(ch); step(); while (not done() and id_char(current())) { value.push_back(current()); step(); } if (this->keyword_map.contains(value)) { return token(this->keyword_map[value], pos); } size_t id = this->symbol_values.size(); this->symbol_values.push_back(value); return Token { TokenType::Id, pos, id }; } if (in_range(ch, '1', '9')) { std::string value; value.push_back(ch); step(); while (not done() and in_range(ch, '0', '9')) { value.push_back(current()); step(); } int64_t int_value = std::strtoll(value.c_str(), nullptr, 10); size_t id = this->symbol_values.size(); this->int_values.push_back(int_value); return Token { TokenType::Id, pos, id }; } if (ch == '0') { step(); int64_t int_value = 0; size_t id = this->symbol_values.size(); this->int_values.push_back(int_value); return Token { TokenType::Id, pos, id }; } if (ch == '"') { // TODO string } if (ch == '#') { while (not done() and current() != '\n') { step(); } return next(); } if (ch == '/') { step(); if (current() == '/') { while (not done() and current() != '\n') { step(); } return next(); } return error_token(pos, "'/' not implemented"); } if (ch == '-') { step(); if (not done() and current() == '>') { step(); return token(TokenType::MinusLt, pos); } return token(TokenType::Minus, pos); } if (ch == ':') { step(); if (current() == ':') { step(); return token(TokenType::ColonColon, pos); } return token(TokenType::Colon, pos); } switch (ch) { case '(': return single_token(TokenType::LParen, pos); case ')': return single_token(TokenType::RParen, pos); case '{': return single_token(TokenType::LBrace, pos); case '}': return single_token(TokenType::RBrace, pos); case '[': return single_token(TokenType::LBracket, pos); case ']': return single_token(TokenType::RBracket, pos); case '.': return single_token(TokenType::Dot, pos); case ',': return single_token(TokenType::Comma, pos); case ';': return single_token(TokenType::Semicolon, pos); } step(); return error_token(pos, "unrecognized character"); } auto Lexer::populate_keyword_map() { this->keyword_map["error"] = TokenType::Error; this->keyword_map["eof"] = TokenType::Eof; this->keyword_map["if"] = TokenType::If; this->keyword_map["else"] = TokenType::Else; this->keyword_map["return"] = TokenType::Return; this->keyword_map["public"] = TokenType::Public; this->keyword_map["private"] = TokenType::Private; this->keyword_map["class"] = TokenType::Class; this->keyword_map["derivable"] = TokenType::Derivable; this->keyword_map["derives"] = TokenType::Derives; this->keyword_map["enumeration"] = TokenType::Enumeration; this->keyword_map["associate"] = TokenType::Associate; this->keyword_map["attribute"] = TokenType::Attribute; this->keyword_map["operation"] = TokenType::Operation; this->keyword_map["state_machine"] = TokenType::StateMachine; this->keyword_map["transition"] = TokenType::Transition; this->keyword_map["initial"] = TokenType::Initial; this->keyword_map["final"] = TokenType::Final; this->keyword_map["entry"] = TokenType::Entry; this->keyword_map["exit"] = TokenType::Exit; }