From 1417c1cee5e21906d3295d21db49533bdccf86c7 Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Thu, 12 Jan 2023 13:33:19 +0100 Subject: [PATCH] scriptlang: strict value parser done --- .vscode/launch.json | 16 ++++ browser/main.cpp | 12 +++ scriptlang/error.hpp | 17 ---- scriptlang/lexer.cpp | 50 ++++++++---- scriptlang/lexer.hpp | 40 ++++++---- scriptlang/parser.cpp | 178 ++++++++++++++++++++++++++++++++---------- scriptlang/parser.hpp | 88 ++++++++++++++++++--- utils/result.hpp | 4 + 8 files changed, 306 insertions(+), 99 deletions(-) create mode 100644 .vscode/launch.json delete mode 100644 scriptlang/error.hpp diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..7e126bd --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug Browser", + "program": "${workspaceFolder}/builddir/web-browser", + "args": [], + "cwd": "${workspaceFolder}" + } + ] +} \ No newline at end of file diff --git a/browser/main.cpp b/browser/main.cpp index bfae6a2..562e092 100644 --- a/browser/main.cpp +++ b/browser/main.cpp @@ -3,6 +3,7 @@ #include "SDL_rect.h" #include "SDL_render.h" #include "SDL_video.h" +#include "scriptlang/parser.hpp" #include "utils/all.hpp" #include #include @@ -68,6 +69,17 @@ private: auto main() -> int { + + const auto* text = "{name: \"test\", value: [true, false, 123, \"bruh\"], " + "int: 123, float: 3.14}"; + auto ast = scriptlang::Parser(text).parse_expression(true); + if (!ast) + fmt::print("parser error at {}:{}: {}\n\t\n", + ast.unwrap_error().span.from.line, + ast.unwrap_error().span.from.column, ast.unwrap_error().message); + else + fmt::print("ast = {}\n", ast.unwrap()->to_string()); + // test fmt::print("browser: hello world!\n"); auto gui = GUI::create().unwrap(); diff --git a/scriptlang/error.hpp b/scriptlang/error.hpp deleted file mode 100644 index 028bccf..0000000 --- a/scriptlang/error.hpp +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -namespace scriptlang { - -enum class Errors { - NotImplemented, - LexerNoTokenYet, - LexerStringNotTerminated, - LexerUnexpectedCharacer, - LexerMultilineCommentNotTerminated, - NoLexerOutput, - ParserExhausted, - ParserMalformed, - ParserUnexpected, -}; - -} diff --git a/scriptlang/lexer.cpp b/scriptlang/lexer.cpp index 94e7928..706be72 100644 --- a/scriptlang/lexer.cpp +++ b/scriptlang/lexer.cpp @@ -1,14 +1,16 @@ #include "lexer.hpp" -#include "error.hpp" #include +#include #include namespace scriptlang { -auto Lexer::make_token() noexcept -> Result +auto Lexer::make_token() noexcept -> Result { if (done()) return token(Tokens::Eof, index, current_location()); + if (std::isspace(current()) != 0) + return skip_whitespace(); if (std::isdigit(current()) != 0) return make_number(); if (std::isalpha(current()) != 0 or current() == '_') @@ -18,7 +20,14 @@ auto Lexer::make_token() noexcept -> Result return make_static(); } -auto Lexer::make_number() noexcept -> Result +auto Lexer::skip_whitespace() noexcept -> Result +{ + while (!done() and std::isspace(current()) != 0) + step(); + return make_token(); +} + +auto Lexer::make_number() noexcept -> Result { auto begin = index; auto span_from = current_location(); @@ -33,7 +42,7 @@ auto Lexer::make_number() noexcept -> Result return token(Tokens::Int, begin, span_from); } -auto Lexer::make_id() noexcept -> Result +auto Lexer::make_id() noexcept -> Result { auto begin = index; auto span_from = current_location(); @@ -78,7 +87,7 @@ auto Lexer::id_or_keyword_type(std::string_view substring) noexcept -> Tokens return Tokens::Id; } -auto Lexer::make_string() noexcept -> Result +auto Lexer::make_string() noexcept -> Result { auto begin = index; auto span_from = current_location(); @@ -89,12 +98,15 @@ auto Lexer::make_string() noexcept -> Result step(); } if (current() != '"') - return Errors::LexerStringNotTerminated; + return Error { + { span_from, { line, column } }, + "unterminated string", + }; step(); return token(Tokens::String, begin, span_from); } -auto Lexer::make_static() noexcept -> Result +auto Lexer::make_static() noexcept -> Result { auto begin = index; auto span_from = current_location(); @@ -105,7 +117,7 @@ auto Lexer::make_static() noexcept -> Result } // NOLINTNEXTLINE(readability-function-cognitive-complexity) -auto Lexer::static_token_type() noexcept -> Result +auto Lexer::static_token_type() noexcept -> Result { using TT = Tokens; auto stepped = [&](Tokens v) { @@ -113,13 +125,13 @@ auto Lexer::static_token_type() noexcept -> Result return v; }; - if (current() == ')') - return stepped(TT::LParen); if (current() == '(') + return stepped(TT::LParen); + if (current() == ')') return stepped(TT::RParen); - if (current() == '}') - return stepped(TT::LBrace); if (current() == '{') + return stepped(TT::LBrace); + if (current() == '}') return stepped(TT::RBrace); if (current() == '[') return stepped(TT::LBracket); @@ -207,10 +219,13 @@ auto Lexer::static_token_type() noexcept -> Result return stepped(TT::GreaterEqual); return TT::Greater; } - return Errors::LexerUnexpectedCharacer; + return Error { + { { line, column - 1 }, { line, column } }, + "unexpected character", + }; } -auto Lexer::skip_multiline_comment() noexcept -> Result +auto Lexer::skip_multiline_comment() noexcept -> Result { step(); auto last = current(); @@ -218,12 +233,15 @@ auto Lexer::skip_multiline_comment() noexcept -> Result while (!done() and last != '*' and current() != '/') step(); if (last != '*' or current() != '/') - return Errors::LexerMultilineCommentNotTerminated; + return Error { + { { line, column - 1 }, { line, column } }, + "unterminated multiline comment", + }; step(); return Tokens::MultilineComment; } -auto Lexer::skip_singleline_comment() noexcept -> Result +auto Lexer::skip_singleline_comment() noexcept -> Result { step(); while (!done() and current() != '\n') diff --git a/scriptlang/lexer.hpp b/scriptlang/lexer.hpp index 68d59ad..2f05305 100644 --- a/scriptlang/lexer.hpp +++ b/scriptlang/lexer.hpp @@ -1,6 +1,6 @@ -#include "error.hpp" #include "utils/all.hpp" #include +#include #include namespace scriptlang { @@ -89,6 +89,17 @@ struct Token { Tokens type; size_t index, length; Span span; + + [[nodiscard]] static auto token_span( + const Token& from, const Token& to) noexcept -> Span + { + return { from.span.from, to.span.to }; + } +}; + +struct Error { + Span span; + std::string message; }; class Lexer { @@ -96,24 +107,25 @@ public: Lexer(std::string_view text) : text { text } { } - auto next() noexcept -> Result { return make_token(); } - auto peek() noexcept -> Result + auto next() noexcept -> Result { return make_token(); } + auto peek() noexcept -> Result { - if (last_token) - return Result::create_ok(*last_token); - return Errors::LexerNoTokenYet; + if (!last_token) + return Error { { { 0, 0 }, { 0, 0 } }, "no token yet" }; + return Result::create_ok(*last_token); } private: - auto make_token() noexcept -> Result; - auto make_number() noexcept -> Result; - auto make_id() noexcept -> Result; + auto make_token() noexcept -> Result; + auto skip_whitespace() noexcept -> Result; + auto make_number() noexcept -> Result; + auto make_id() noexcept -> Result; auto id_or_keyword_type(std::string_view substring) noexcept -> Tokens; - auto make_string() noexcept -> Result; - auto make_static() noexcept -> Result; - auto static_token_type() noexcept -> Result; - auto skip_multiline_comment() noexcept -> Result; - auto skip_singleline_comment() noexcept -> Result; + auto make_string() noexcept -> Result; + auto make_static() noexcept -> Result; + auto static_token_type() noexcept -> Result; + auto skip_multiline_comment() noexcept -> Result; + auto skip_singleline_comment() noexcept -> Result; [[nodiscard]] auto constexpr inline current_location() const noexcept -> Location diff --git a/scriptlang/parser.cpp b/scriptlang/parser.cpp index 005f164..ba3d68c 100644 --- a/scriptlang/parser.cpp +++ b/scriptlang/parser.cpp @@ -1,85 +1,177 @@ #include "parser.hpp" -#include "error.hpp" +#include "utils/result.hpp" #include #include +#include +#include namespace scriptlang { auto Parser::parse_expression(bool strictly_values) noexcept - -> Result, Errors> + -> Result, Error> { if (strictly_values) - return parse_struct(true); - return Errors::NotImplemented; + return parse_array(true); + return Error { { { 0, 0 }, { 0, 0 } }, "not implemented" }; } +auto Parser::parse_array(bool strictly_values) noexcept + -> Result, Error> +{ + auto values = std::vector> {}; + auto first_bracket = *lexer.peek(); + if (first_bracket.type == Tokens::LBracket) { + (void)lexer.next(); + auto value = parse_expression(strictly_values); + if (!value) + return value; + values.emplace_back(std::move(*value)); + while (lexer.peek()->type == Tokens::Comma) { + (void)lexer.next(); + if (lexer.peek()->type == Tokens::LBracket) + break; + auto value2 = parse_expression(strictly_values); + values.emplace_back(std::move(*value2)); + } + auto last_bracket = *lexer.peek(); + if (last_bracket.type != Tokens::RBracket) + return Error { + last_bracket.span, + "unterminated array", + }; + (void)lexer.next().unwrap(); + return { + std::make_unique( + Token::token_span(first_bracket, last_bracket), + std::move(values)), + }; + } + return parse_struct(strictly_values); +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) auto Parser::parse_struct(bool strictly_values) noexcept - -> Result, Errors> + -> Result, Error> { auto values = std::map> {}; auto first_brace = *lexer.peek(); if (first_brace.type == Tokens::LBrace) { auto name = *lexer.next(); - if (name.type != Tokens::Eof && name.type != Tokens::LBrace) { + if (name.type != Tokens::LBrace) { if (name.type != Tokens::Id) - return Errors::ParserUnexpected; + return Error { + name.span, + "unexpected token, expected Id or String", + }; if (lexer.next()->type != Tokens::Colon) - return Errors::ParserUnexpected; + return Error { + lexer.peek()->span, + "unexpected token, expected ':'", + }; + if (auto result = lexer.next(); !result) + return { std::move(result.unwrap_error()) }; + auto value = parse_expression(strictly_values); + if (!value) + return value.transform>(); + if (values.find(token_text(name)) != values.end()) + return Error { + name.span, + "multiple definitions of struct field", + }; + values.insert_or_assign(token_text(name), std::move(*value)); + while (lexer.peek()->type == Tokens::Comma) { + auto name2 = *lexer.next(); + if (name2.type == Tokens::RBrace) + break; + if (name2.type != Tokens::Id) + return Error { + name2.span, + "unexpected token, expected Id", + }; + if (lexer.next()->type != Tokens::Colon) + return Error { + lexer.peek()->span, + "unexpected token, expected ':'", + }; + (void)lexer.next(); + auto value2 = parse_expression(strictly_values); + if (!value2) + return value2.transform>(); + if (values.find(token_text(name2)) != values.end()) + return Error { + name2.span, + "multiple definitions of struct field", + }; + values.insert_or_assign(token_text(name2), std::move(*value2)); + } } auto last_brace = *lexer.peek(); if (last_brace.type != Tokens::RBrace) - return Errors::ParserMalformed; + return Error { + last_brace.span, + fmt::format("unterminated struct, expected '}}', got {}", + last_brace.type), + }; + (void)lexer.next().unwrap(); return { std::make_unique( - token_span(first_brace, last_brace), std::move(values)), + Token::token_span(first_brace, last_brace), std::move(values)), }; } return parse_atom(); } -auto Parser::parse_atom() noexcept - -> Result, Errors> +auto Parser::parse_atom() noexcept -> Result, Error> { auto token = *lexer.peek(); switch (token.type) { - case Tokens::Id: - return { - std::make_unique(token_span(token, token), - token_text(token.index, token.length)), - }; - case Tokens::Int: - return { - std::make_unique(token_span(token, token), - std::atol(token_text(token.index, token.length).c_str())), - }; - case Tokens::Float: - return { - std::make_unique(token_span(token, token), - std::atof(token_text(token.index, token.length).c_str())), - }; - case Tokens::False: - return { - std::make_unique(token_span(token, token), false), - }; - case Tokens::True: - return { - std::make_unique(token_span(token, token), true), - }; - case Tokens::String: - return { - std::make_unique(token_span(token, token), - *parse_string_value(token_text(token.index, token.length))), - }; + case Tokens::Id: { + auto node = std::make_unique(Token::token_span(token, token), + token_text(token.index, token.length)); + (void)lexer.next().unwrap(); + return { std::move(node) }; + } + case Tokens::Int: { + auto node = std::make_unique(Token::token_span(token, token), + std::atol(token_text(token.index, token.length).c_str())); + (void)lexer.next().unwrap(); + return { std::move(node) }; + } + case Tokens::Float: { + auto node = std::make_unique(Token::token_span(token, token), + std::atof(token_text(token.index, token.length).c_str())); + (void)lexer.next().unwrap(); + return { std::move(node) }; + } + case Tokens::False: { + auto node = std::make_unique( + Token::token_span(token, token), false); + (void)lexer.next().unwrap(); + return { std::move(node) }; + } + case Tokens::True: { + auto node + = std::make_unique(Token::token_span(token, token), true); + (void)lexer.next().unwrap(); + return { std::move(node) }; + } + case Tokens::String: { + auto node + = std::make_unique(Token::token_span(token, token), + *parse_string_value(token_text(token.index, token.length))); + (void)lexer.next().unwrap(); + return { std::move(node) }; + } default: - return Errors::ParserExhausted; + return Error { token.span, "unexpected token, expected value" }; } } [[nodiscard]] auto Parser::parse_string_value(std::string_view literal) noexcept - -> Result + -> Result> { if (literal.size() < 2) - return Errors::ParserMalformed; + return utils::result::Error { "malformed string" }; auto value = std::string {}; auto escaped = false; for (const auto c : literal.substr(1, literal.size() - 2)) { diff --git a/scriptlang/parser.hpp b/scriptlang/parser.hpp index 7f32d7b..9660f93 100644 --- a/scriptlang/parser.hpp +++ b/scriptlang/parser.hpp @@ -1,9 +1,9 @@ #pragma once -#include "error.hpp" #include "lexer.hpp" #include "utils/all.hpp" #include "utils/result.hpp" +#include #include #include #include @@ -22,6 +22,7 @@ enum class Expressions { Call, Operator, + Array, Struct, Id, Int, @@ -41,10 +42,41 @@ public: [[nodiscard]] virtual auto expression_type() const noexcept -> Expressions = 0; [[nodiscard]] virtual auto span() const noexcept -> Span = 0; + [[nodiscard]] virtual auto to_string() const noexcept -> std::string = 0; private: }; +class Array final : public Expression { +public: + Array(Span span, std::vector> values) + : m_span { span } + , m_values { std::move(values) } + { } + [[nodiscard]] auto expression_type() const noexcept -> Expressions override + { + return Expressions::Array; + } + [[nodiscard]] auto span() const noexcept -> Span override { return m_span; } + [[nodiscard]] auto values() const noexcept -> auto& { return m_values; } + [[nodiscard]] auto to_string() const noexcept -> std::string override + { + auto values_strings = std::string {}; + auto first = true; + for (const auto& value : m_values) { + if (!first) + values_strings.append(", "); + first = false; + values_strings.append(value->to_string()); + } + return fmt::format("Array {{ [ {} ] }}", values_strings); + }; + +private: + Span m_span; + std::vector> m_values; +}; + class Struct final : public Expression { public: Struct(Span span, std::map> values) @@ -53,10 +85,22 @@ public: { } [[nodiscard]] auto expression_type() const noexcept -> Expressions override { - return Expressions::Id; + return Expressions::Struct; } [[nodiscard]] auto span() const noexcept -> Span override { return m_span; } [[nodiscard]] auto values() const noexcept -> auto& { return m_values; } + [[nodiscard]] auto to_string() const noexcept -> std::string override + { + auto values_strings = std::string {}; + auto first = true; + for (const auto& [name, value] : m_values) { + if (!first) + values_strings.append(", "); + first = false; + values_strings.append(value->to_string()); + } + return fmt::format("Struct {{ [ {} ] }}", values_strings); + }; private: Span m_span; @@ -75,6 +119,10 @@ public: } [[nodiscard]] auto span() const noexcept -> Span override { return m_span; } [[nodiscard]] auto value() const noexcept { return m_value; } + [[nodiscard]] auto to_string() const noexcept -> std::string override + { + return fmt::format("Id {{ {} }}", m_value); + } private: Span m_span; @@ -93,6 +141,10 @@ public: } [[nodiscard]] auto span() const noexcept -> Span override { return m_span; } [[nodiscard]] auto value() const noexcept { return m_value; } + [[nodiscard]] auto to_string() const noexcept -> std::string override + { + return fmt::format("Int {{ {} }}", m_value); + } private: Span m_span; @@ -111,6 +163,10 @@ public: } [[nodiscard]] auto value() const noexcept { return m_value; } [[nodiscard]] auto span() const noexcept -> Span override { return m_span; } + [[nodiscard]] auto to_string() const noexcept -> std::string override + { + return fmt::format("Float {{ {} }}", m_value); + } private: Span m_span; @@ -129,6 +185,10 @@ public: } [[nodiscard]] auto value() const noexcept { return m_value; } [[nodiscard]] auto span() const noexcept -> Span override { return m_span; } + [[nodiscard]] auto to_string() const noexcept -> std::string override + { + return fmt::format("Bool {{ {} }}", m_value); + } private: Span m_span; @@ -150,6 +210,10 @@ public: return m_value; } [[nodiscard]] auto span() const noexcept -> Span override { return m_span; } + [[nodiscard]] auto to_string() const noexcept -> std::string override + { + return fmt::format("String {{ \"{}\" }}", m_value); + } private: Span m_span; @@ -161,24 +225,30 @@ public: Parser(std::string_view text) : text { text } , lexer(text) - { } + { + [[maybe_unused]] auto _ = lexer.next(); + } auto parse_expression(bool strictly_values) noexcept - -> Result, Errors>; + -> Result, Error>; + auto parse_array(bool strictly_values) noexcept + -> Result, Error>; auto parse_struct(bool strictly_values) noexcept - -> Result, Errors>; - auto parse_atom() noexcept -> Result, Errors>; + -> Result, Error>; + auto parse_atom() noexcept -> Result, Error>; private: [[nodiscard]] static auto parse_string_value( - std::string_view literal) noexcept -> Result; + std::string_view literal) noexcept + -> Result>; [[nodiscard]] auto token_text(size_t index, size_t length) const noexcept -> std::string { return std::string { text.substr(index, length) }; } - [[nodiscard]] static auto token_span(Token from, Token to) noexcept -> Span + [[nodiscard]] auto token_text(const Token& token) const noexcept + -> std::string { - return { from.span.from, to.span.to }; + return std::string { text.substr(token.index, token.length) }; } std::string_view text; diff --git a/utils/result.hpp b/utils/result.hpp index 95a0f61..ef1585a 100644 --- a/utils/result.hpp +++ b/utils/result.hpp @@ -23,6 +23,10 @@ struct StatesValueTypes { struct Error { }; }; +template struct Error { + ErrorV value; +}; + template struct Extracter { using Value = void; using Error = void;