From 77483438d8c803254db82161c937a6c4fc59f2ee Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Fri, 15 Nov 2024 15:22:57 +0100 Subject: [PATCH] add json --- runtime/json.cpp | 114 +++++++++++++++++++++++++------ runtime/json.hpp | 173 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 253 insertions(+), 34 deletions(-) diff --git a/runtime/json.cpp b/runtime/json.cpp index 901b573..c72cc49 100644 --- a/runtime/json.cpp +++ b/runtime/json.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -20,12 +21,12 @@ auto id_tail_chars = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "1234567890"; -auto Lexer::next() -> Tok +auto Lexer::next() -> Res { - if (done()) { - return TokTyp::Eof; - } auto pos = this->pos(); + if (done()) { + return Tok(TokTyp::Eof, pos); + } if (test('"')) { step(); auto value = std::string(); @@ -54,6 +55,16 @@ auto Lexer::next() -> Tok } step(); } + if (!test('"')) { + return Err { + .pos = pos, + .msg + = std::format("malformed string, expected '\"', got '{}' token", + this->cur()), + }; + } + step(); + return Tok(TokTyp::String, pos, intern_str(value)); } auto step_n_ret = [&](auto tok) { step(); @@ -61,19 +72,19 @@ auto Lexer::next() -> Tok }; switch (cur()) { case '0': - return step_n_ret(Tok(TokTyp::Float, 0.0)); + return step_n_ret(Tok(TokTyp::Float, pos, 0.0)); case '{': - return step_n_ret(TokTyp::LBrace); + return step_n_ret(Tok(TokTyp::LBrace, pos)); case '}': - return step_n_ret(TokTyp::RBrace); + return step_n_ret(Tok(TokTyp::RBrace, pos)); case '[': - return step_n_ret(TokTyp::LBracket); + return step_n_ret(Tok(TokTyp::LBracket, pos)); case ']': - return step_n_ret(TokTyp::RBracket); + return step_n_ret(Tok(TokTyp::RBracket, pos)); case ',': - return step_n_ret(TokTyp::Comma); + return step_n_ret(Tok(TokTyp::Comma, pos)); case ':': - return step_n_ret(TokTyp::Colon); + return step_n_ret(Tok(TokTyp::Colon, pos)); } if (test_in(id_start_chars)) { auto value = std::string(); @@ -82,12 +93,12 @@ auto Lexer::next() -> Tok step(); } if (ident_tok_typs.find(value) == ident_tok_typs.end()) { - std::cerr << std::format("sliger::json::Lexer error: unknown " - "identifier \"{}\" at {}:{}", - value, pos.col, pos.line); - return TokTyp::Error; + return Err { + .pos = pos, + .msg = std::format("unknown identifier \"{}\"", value), + }; } - return ident_tok_typs.at(value); + return Tok(ident_tok_typs.at(value), pos); } if (test_in("123456789")) { auto value_str = std::string(); @@ -96,11 +107,72 @@ auto Lexer::next() -> Tok step(); } auto value = std::atof(value_str.c_str()); - return Tok(TokTyp::Float, value); + return Tok(TokTyp::Float, pos, value); } - std::cerr << std::format( - "sliger::json::Lexer error: unknown character '{}' at {}:{}", cur(), - this->line, this->col); + auto ch = cur(); step(); - return TokTyp::Error; + return Err { + .pos = pos, + .msg = std::format("unknown character '{}'", ch), + }; +} + +auto Parser::parse_val() -> Res> +{ + if (not this->cur.ok()) + return this->cur.err(); + auto cur = this->cur.val(); + switch (cur.typ) { + case TokTyp::Eof: + return Err { + .pos = cur.pos, + .msg = "expected value, got eof", + }; + case TokTyp::String: { + auto value = this->lexer.val(cur.val_id); + step(); + return Res>(std::make_unique(value)); + } + case TokTyp::Float: { + auto value = cur.float_val; + step(); + return Res>(std::make_unique(value)); + } + case TokTyp::False: { + step(); + return Res>(std::make_unique(false)); + } + case TokTyp::True: { + step(); + return Res>(std::make_unique(true)); + } + case TokTyp::Null: { + step(); + return Res>(std::make_unique()); + } + case TokTyp::LBrace: { + step(); + ObjectFields fields; + while (curtyp() != TokTyp::RBrace) { } + if (curtyp() != TokTyp::RBrace) { + return Err { + .pos = this->cur.val().pos, + .msg + = std::format("malformed object, expected '}', got '{}'", + tok_typ_to_string(this->cur.val().typ)), + }; + } + } + case TokTyp::RBrace: + case TokTyp::LBracket: + case TokTyp::RBracket: + case TokTyp::Comma: + case TokTyp::Colon: + return Err { + .pos = cur.pos, + .msg = std::format("expected value, got '{}' token", + tok_typ_to_string(cur.typ)), + }; + break; + } } diff --git a/runtime/json.hpp b/runtime/json.hpp index af472cf..0d39e52 100644 --- a/runtime/json.hpp +++ b/runtime/json.hpp @@ -2,14 +2,80 @@ #include #include -#include #include #include #include +#include +#include #include namespace sliger::json { +struct Pos { + int line; + int col; +}; + +struct Err { + Pos pos; + std::string msg; +}; + +template class Res { +public: + Res(T value) + : maybe_value(std::move(value)) + , maybe_error() + { + } + Res(Err error) + : maybe_value() + , maybe_error(std::move(error)) + { + } + + inline auto ok() const -> bool { return this->maybe_value.has_value(); } + + inline auto val() const& -> const T& { return this->maybe_value.value(); } + inline auto val() & -> T& { return this->maybe_value.value(); } + inline auto val() && -> T&& { return std::move(this->maybe_value.value()); } + + inline auto err() const& -> const Err& { return this->maybe_error.value(); } + inline auto err() & -> Err& { return this->maybe_error.value(); } + inline auto err() && -> Err&& + { + return std::move(this->maybe_error.value()); + } + +private: + std::optional maybe_value; + std::optional maybe_error; +}; + +template <> class Res { +public: + Res() + : maybe_error() + { + } + Res(Err error) + : maybe_error(std::move(error)) + { + } + + inline auto ok() const -> bool { return not this->maybe_error.has_value(); } + + inline auto err() const& -> const Err& { return this->maybe_error.value(); } + inline auto err() & -> Err& { return this->maybe_error.value(); } + inline auto err() && -> Err&& + { + return std::move(this->maybe_error.value()); + } + +private: + std::optional maybe_error; +}; + enum class Type { Null, String, @@ -36,40 +102,58 @@ struct Null final : public Value { }; struct String final : public Value { + String(std::string value) + : value(std::move(value)) + { + } auto type() const -> Type override { return Type::String; } std::string value; }; struct Number final : public Value { + Number(double value) + : value(value) + { + } auto type() const -> Type override { return Type::Number; } double value; }; struct Bool final : public Value { + Bool(bool value) + : value(value) + { + } auto type() const -> Type override { return Type::Bool; } bool value; }; +using ArrayValues = std::vector>; struct Array final : public Value { + Array(ArrayValues values) + : values(std::move(values)) + { + } auto type() const -> Type override { return Type::Array; } - std::vector> values; + ArrayValues values; }; +using ObjectFields = std::unordered_map>; struct Object final : public Value { + Object(ObjectFields fields) + : fields(std::move(fields)) + { + } auto type() const -> Type override { return Type::Object; } -}; -struct Pos { - int line; - int col; + ObjectFields fields; }; enum class TokTyp { - Error, Eof, String, Float, @@ -84,29 +168,64 @@ enum class TokTyp { Colon, }; +inline auto tok_typ_to_string(TokTyp typ) -> std::string +{ + switch (typ) { + case TokTyp::Eof: + return "Eof"; + case TokTyp::String: + return "String"; + case TokTyp::Float: + return "Float"; + case TokTyp::False: + return "False"; + case TokTyp::True: + return "True"; + case TokTyp::Null: + return "Null"; + case TokTyp::LBrace: + return "LBrace"; + case TokTyp::RBrace: + return "RBrace"; + case TokTyp::LBracket: + return "LBracket"; + case TokTyp::RBracket: + return "RBracket"; + case TokTyp::Comma: + return "Comma"; + case TokTyp::Colon: + return "Colon"; + } +} + struct Tok { - Tok(TokTyp typ) + Tok(TokTyp typ, Pos pos) : typ(typ) + , pos(pos) , val_id(0) { } - Tok(TokTyp typ, size_t val_id) + Tok(TokTyp typ, Pos pos, size_t val_id) : typ(typ) + , pos(pos) , val_id(val_id) { } - Tok(TokTyp typ, double float_val) + Tok(TokTyp typ, Pos pos, double float_val) : typ(typ) + , pos(pos) , float_val(float_val) { } - Tok(TokTyp typ, bool bool_val) + Tok(TokTyp typ, Pos pos, bool bool_val) : typ(typ) + , pos(pos) , bool_val(bool_val) { } TokTyp typ; + Pos pos; union { size_t val_id; double float_val; @@ -121,13 +240,28 @@ public: { } - auto next() -> Tok; + auto next() -> Res; inline auto pos() const -> Pos { return { this->line, this->col }; } + inline auto val(size_t val_id) const -> const std::string& + { + return this->strs_vals.at(val_id); + } + private: inline void step() { this->i += 1; } + inline auto intern_str(std::string val) -> size_t + { + if (this->strs_val_id_map.contains(val)) + return strs_val_id_map.at(val); + auto id = this->strs_vals.size(); + this->strs_vals.push_back(val); + this->strs_val_id_map.insert_or_assign(val, id); + return id; + } + inline auto test_in(std::string_view chs) const -> bool { for (auto ch : chs) @@ -143,6 +277,8 @@ private: size_t i = 0; int line = 1; int col = 1; + std::unordered_map strs_val_id_map; + std::vector strs_vals; }; class Parser { @@ -153,9 +289,20 @@ public: { } + auto parse_val() -> Res>; + private: + inline auto step() -> Res + { + auto tok = this->lexer.next(); + if (not tok.ok()) + return tok.err(); + return {}; + } + inline auto curtyp() const -> TokTyp { return this->cur.val().typ; } + Lexer lexer; - Tok cur; + Res cur; }; struct Jsonable {