#include "lexer.hpp" #include #include #include namespace scriptlang { auto Lexer::make_token() noexcept -> Result { if (done()) return token(Tokens::Eof, index, current_location()); if (std::isspace(current()) != 0) return skip_whitespace(); if (std::isdigit(current()) != 0) return make_number(); if (std::isalpha(current()) != 0 or current() == '_') return make_id(); if (current() == '"') return make_string(); return make_static(); } auto Lexer::skip_whitespace() noexcept -> Result { while (!done() and std::isspace(current()) != 0) step(); return make_token(); } auto Lexer::make_number() noexcept -> Result { auto begin = index; auto span_from = current_location(); while (!done() and std::isdigit(current()) != 0) step(); if (current() == '.') { step(); while (!done() and std::isdigit(current()) != 0) step(); return token(Tokens::Float, begin, span_from); } return token(Tokens::Int, begin, span_from); } auto Lexer::make_id() noexcept -> Result { auto begin = index; auto span_from = current_location(); while (!done() and (std::isalpha(current()) != 0 or std::isdigit(current()) != 0 or current() == '_')) step(); return token(id_or_keyword_type(text.substr(begin, index - begin)), begin, span_from); } auto Lexer::id_or_keyword_type(std::string_view substring) noexcept -> Tokens { if (substring.compare("if") == 0) return Tokens::If; if (substring.compare("else") == 0) return Tokens::Else; if (substring.compare("for") == 0) return Tokens::For; if (substring.compare("loop") == 0) return Tokens::Loop; if (substring.compare("while") == 0) return Tokens::While; if (substring.compare("break") == 0) return Tokens::Break; if (substring.compare("continue") == 0) return Tokens::Continue; if (substring.compare("fn") == 0) return Tokens::Fn; if (substring.compare("return") == 0) return Tokens::Return; if (substring.compare("false") == 0) return Tokens::False; if (substring.compare("true") == 0) return Tokens::True; if (substring.compare("and") == 0) return Tokens::And; if (substring.compare("or") == 0) return Tokens::Or; if (substring.compare("xor") == 0) return Tokens::Xor; return Tokens::Id; } auto Lexer::make_string() noexcept -> Result { auto begin = index; auto span_from = current_location(); step(); auto escaped = false; while (!done() and (current() != '"' or escaped)) { escaped = escaped ? false : current() == '\\'; step(); } if (current() != '"') return Error { { span_from, { line, column } }, "unterminated string", }; step(); return token(Tokens::String, begin, span_from); } auto Lexer::make_static() noexcept -> Result { auto begin = index; auto span_from = current_location(); auto type = static_token_type(); if (!type) return type.transform(); return token(*type, begin, span_from); } // NOLINTNEXTLINE(readability-function-cognitive-complexity) auto Lexer::static_token_type() noexcept -> Result { using TT = Tokens; auto stepped = [&](Tokens v) { step(); return v; }; if (current() == '(') return stepped(TT::LParen); if (current() == ')') return stepped(TT::RParen); if (current() == '{') return stepped(TT::LBrace); if (current() == '}') return stepped(TT::RBrace); if (current() == '[') return stepped(TT::LBracket); if (current() == ']') return stepped(TT::RBracket); if (current() == '.') return stepped(TT::Dot); if (current() == ',') return stepped(TT::Comma); if (current() == ':') return stepped(TT::Colon); if (current() == ';') return stepped(TT::Semicolon); if (current() == ']') return stepped(TT::RBracket); if (current() == '+') { step(); if (current() == '+') return stepped(TT::DoublePlus); if (current() == '=') return stepped(TT::PlusEqual); return TT::Plus; } if (current() == '-') { step(); if (current() == '>') return stepped(TT::ThinArrow); if (current() == '-') return stepped(TT::DoubleMinus); if (current() == '=') return stepped(TT::MinusEqual); return TT::Minus; } if (current() == '*') { step(); if (current() == '=') return TT::AsteriskEqual; return TT::Asterisk; } if (current() == '/') { step(); if (current() == '*') return skip_multiline_comment(); if (current() == '/') return skip_singleline_comment(); if (current() == '=') return TT::SlashEqual; return TT::Slash; } if (current() == '%') { step(); if (current() == '=') return TT::PercentEqual; return TT::Percent; } if (current() == '^') { step(); if (current() == '=') return TT::PowerEqual; return TT::Power; } if (current() == '=') { step(); if (current() == '>') return stepped(TT::FatArrow); if (current() == '=') return stepped(TT::DoubleEqual); return TT::Equal; } if (current() == '!') { step(); if (current() == '=') return stepped(TT::ExclamationEqual); return TT::Exclamation; } if (current() == '<') { step(); if (current() == '=') return stepped(TT::LessEqual); return TT::Less; } if (current() == '>') { step(); if (current() == '=') return stepped(TT::GreaterEqual); return TT::Greater; } return Error { { { line, column - 1 }, { line, column } }, "unexpected character", }; } auto Lexer::skip_multiline_comment() noexcept -> Result { step(); auto last = current(); step(); while (!done() and last != '*' and current() != '/') step(); if (last != '*' or current() != '/') return Error { { { line, column - 1 }, { line, column } }, "unterminated multiline comment", }; step(); return Tokens::MultilineComment; } auto Lexer::skip_singleline_comment() noexcept -> Result { step(); while (!done() and current() != '\n') step(); if (current() == '\n') step(); return Tokens::SinglelineComment; } }