From a023474bdbb8996fb92c2e0388fe505aaf8b3418 Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Mon, 9 Jan 2023 21:00:16 +0100 Subject: [PATCH] start on lexer --- markup/lexer.cpp | 14 ++++++++ markup/lexer.hpp | 88 ++++++++++++++++++++++++++++++++++++++++++++++ markup/meson.build | 1 + 3 files changed, 103 insertions(+) create mode 100644 markup/lexer.cpp create mode 100644 markup/lexer.hpp diff --git a/markup/lexer.cpp b/markup/lexer.cpp new file mode 100644 index 0000000..e889ee1 --- /dev/null +++ b/markup/lexer.cpp @@ -0,0 +1,14 @@ +#include "lexer.hpp" +#include "result.hpp" +#include +#include + +namespace markup { + +auto constexpr Lexer::next() noexcept -> Result +{ + if (done()) + return token(TokenTypes::Eof, index); +} + +} diff --git a/markup/lexer.hpp b/markup/lexer.hpp new file mode 100644 index 0000000..ceb171d --- /dev/null +++ b/markup/lexer.hpp @@ -0,0 +1,88 @@ +#pragma once + +#include "utils.hpp" +#include +#include + +namespace markup { + +enum class TokenTypes { + Eof, + Whitespace, + MultilineComment, + SinglelineComment, + + Name, + Int, + Float, + String, + Id, // Example = `#my_id` + Class, // Example = `.my_class` + + True, + False, + Null, + + LBrace, + RBrace, + Comma, + Equal, +}; + +struct Token { + TokenTypes type; + size_t index, length; + int line, column; +}; + +class Lexer final { +public: + Lexer(std::string_view text) + : text { text } + { } + auto constexpr next() noexcept -> Result; + auto peek() noexcept -> Result + { + if (last_token) + return Result::create_ok(*last_token); + return {}; + } + +private: + auto constexpr make_number() noexcept -> Result; + auto constexpr make_id() noexcept -> Result; + [[nodiscard]] auto constexpr inline token( + TokenTypes type, size_t begin) noexcept -> Token + { + auto token = Token { type, begin, index - begin, line, column }; + last_token = token; + return token; + } + [[nodiscard]] auto constexpr inline done() const noexcept -> bool + { + return index >= text.size(); + } + [[nodiscard]] auto constexpr inline current() const noexcept -> char + { + return text.at(index); + } + auto constexpr inline step() noexcept -> void + { + if (done()) + return; + index++; + column++; + if (!done() and text.at(index) == '\n') { + column = 1; + line++; + } + } + + std::string_view text; + size_t index = 0; + int line = 1; + int column = 1; + std::optional last_token; +}; + +} diff --git a/markup/meson.build b/markup/meson.build index 2a3ae4b..c822e87 100644 --- a/markup/meson.build +++ b/markup/meson.build @@ -1,6 +1,7 @@ markup_sources = files( 'parser.cpp', + 'lexer.cpp', ) markup_inc = include_directories('.')