This commit is contained in:
SimonFJ20 2024-11-15 15:22:57 +01:00
parent b8b9a08229
commit 77483438d8
2 changed files with 253 additions and 34 deletions

View File

@ -2,6 +2,7 @@
#include <cstdlib> #include <cstdlib>
#include <format> #include <format>
#include <iostream> #include <iostream>
#include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@ -20,12 +21,12 @@ auto id_tail_chars = "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"1234567890"; "1234567890";
auto Lexer::next() -> Tok auto Lexer::next() -> Res<Tok>
{ {
if (done()) {
return TokTyp::Eof;
}
auto pos = this->pos(); auto pos = this->pos();
if (done()) {
return Tok(TokTyp::Eof, pos);
}
if (test('"')) { if (test('"')) {
step(); step();
auto value = std::string(); auto value = std::string();
@ -54,6 +55,16 @@ auto Lexer::next() -> Tok
} }
step(); step();
} }
if (!test('"')) {
return Err {
.pos = pos,
.msg
= std::format("malformed string, expected '\"', got '{}' token",
this->cur()),
};
}
step();
return Tok(TokTyp::String, pos, intern_str(value));
} }
auto step_n_ret = [&](auto tok) { auto step_n_ret = [&](auto tok) {
step(); step();
@ -61,19 +72,19 @@ auto Lexer::next() -> Tok
}; };
switch (cur()) { switch (cur()) {
case '0': case '0':
return step_n_ret(Tok(TokTyp::Float, 0.0)); return step_n_ret(Tok(TokTyp::Float, pos, 0.0));
case '{': case '{':
return step_n_ret(TokTyp::LBrace); return step_n_ret(Tok(TokTyp::LBrace, pos));
case '}': case '}':
return step_n_ret(TokTyp::RBrace); return step_n_ret(Tok(TokTyp::RBrace, pos));
case '[': case '[':
return step_n_ret(TokTyp::LBracket); return step_n_ret(Tok(TokTyp::LBracket, pos));
case ']': case ']':
return step_n_ret(TokTyp::RBracket); return step_n_ret(Tok(TokTyp::RBracket, pos));
case ',': case ',':
return step_n_ret(TokTyp::Comma); return step_n_ret(Tok(TokTyp::Comma, pos));
case ':': case ':':
return step_n_ret(TokTyp::Colon); return step_n_ret(Tok(TokTyp::Colon, pos));
} }
if (test_in(id_start_chars)) { if (test_in(id_start_chars)) {
auto value = std::string(); auto value = std::string();
@ -82,12 +93,12 @@ auto Lexer::next() -> Tok
step(); step();
} }
if (ident_tok_typs.find(value) == ident_tok_typs.end()) { if (ident_tok_typs.find(value) == ident_tok_typs.end()) {
std::cerr << std::format("sliger::json::Lexer error: unknown " return Err {
"identifier \"{}\" at {}:{}", .pos = pos,
value, pos.col, pos.line); .msg = std::format("unknown identifier \"{}\"", value),
return TokTyp::Error; };
} }
return ident_tok_typs.at(value); return Tok(ident_tok_typs.at(value), pos);
} }
if (test_in("123456789")) { if (test_in("123456789")) {
auto value_str = std::string(); auto value_str = std::string();
@ -96,11 +107,72 @@ auto Lexer::next() -> Tok
step(); step();
} }
auto value = std::atof(value_str.c_str()); auto value = std::atof(value_str.c_str());
return Tok(TokTyp::Float, value); return Tok(TokTyp::Float, pos, value);
} }
std::cerr << std::format( auto ch = cur();
"sliger::json::Lexer error: unknown character '{}' at {}:{}", cur(),
this->line, this->col);
step(); step();
return TokTyp::Error; return Err {
.pos = pos,
.msg = std::format("unknown character '{}'", ch),
};
}
auto Parser::parse_val() -> Res<std::unique_ptr<Value>>
{
if (not this->cur.ok())
return this->cur.err();
auto cur = this->cur.val();
switch (cur.typ) {
case TokTyp::Eof:
return Err {
.pos = cur.pos,
.msg = "expected value, got eof",
};
case TokTyp::String: {
auto value = this->lexer.val(cur.val_id);
step();
return Res<std::unique_ptr<Value>>(std::make_unique<String>(value));
}
case TokTyp::Float: {
auto value = cur.float_val;
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Number>(value));
}
case TokTyp::False: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Bool>(false));
}
case TokTyp::True: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Bool>(true));
}
case TokTyp::Null: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Null>());
}
case TokTyp::LBrace: {
step();
ObjectFields fields;
while (curtyp() != TokTyp::RBrace) { }
if (curtyp() != TokTyp::RBrace) {
return Err {
.pos = this->cur.val().pos,
.msg
= std::format("malformed object, expected '}', got '{}'",
tok_typ_to_string(this->cur.val().typ)),
};
}
}
case TokTyp::RBrace:
case TokTyp::LBracket:
case TokTyp::RBracket:
case TokTyp::Comma:
case TokTyp::Colon:
return Err {
.pos = cur.pos,
.msg = std::format("expected value, got '{}' token",
tok_typ_to_string(cur.typ)),
};
break;
}
} }

View File

@ -2,14 +2,80 @@
#include <concepts> #include <concepts>
#include <cstddef> #include <cstddef>
#include <cstdint>
#include <memory> #include <memory>
#include <optional> #include <optional>
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector> #include <vector>
namespace sliger::json { namespace sliger::json {
struct Pos {
int line;
int col;
};
struct Err {
Pos pos;
std::string msg;
};
template <typename T> class Res {
public:
Res(T value)
: maybe_value(std::move(value))
, maybe_error()
{
}
Res(Err error)
: maybe_value()
, maybe_error(std::move(error))
{
}
inline auto ok() const -> bool { return this->maybe_value.has_value(); }
inline auto val() const& -> const T& { return this->maybe_value.value(); }
inline auto val() & -> T& { return this->maybe_value.value(); }
inline auto val() && -> T&& { return std::move(this->maybe_value.value()); }
inline auto err() const& -> const Err& { return this->maybe_error.value(); }
inline auto err() & -> Err& { return this->maybe_error.value(); }
inline auto err() && -> Err&&
{
return std::move(this->maybe_error.value());
}
private:
std::optional<T> maybe_value;
std::optional<Err> maybe_error;
};
template <> class Res<void> {
public:
Res()
: maybe_error()
{
}
Res(Err error)
: maybe_error(std::move(error))
{
}
inline auto ok() const -> bool { return not this->maybe_error.has_value(); }
inline auto err() const& -> const Err& { return this->maybe_error.value(); }
inline auto err() & -> Err& { return this->maybe_error.value(); }
inline auto err() && -> Err&&
{
return std::move(this->maybe_error.value());
}
private:
std::optional<Err> maybe_error;
};
enum class Type { enum class Type {
Null, Null,
String, String,
@ -36,40 +102,58 @@ struct Null final : public Value {
}; };
struct String final : public Value { struct String final : public Value {
String(std::string value)
: value(std::move(value))
{
}
auto type() const -> Type override { return Type::String; } auto type() const -> Type override { return Type::String; }
std::string value; std::string value;
}; };
struct Number final : public Value { struct Number final : public Value {
Number(double value)
: value(value)
{
}
auto type() const -> Type override { return Type::Number; } auto type() const -> Type override { return Type::Number; }
double value; double value;
}; };
struct Bool final : public Value { struct Bool final : public Value {
Bool(bool value)
: value(value)
{
}
auto type() const -> Type override { return Type::Bool; } auto type() const -> Type override { return Type::Bool; }
bool value; bool value;
}; };
using ArrayValues = std::vector<std::unique_ptr<Value>>;
struct Array final : public Value { struct Array final : public Value {
Array(ArrayValues values)
: values(std::move(values))
{
}
auto type() const -> Type override { return Type::Array; } auto type() const -> Type override { return Type::Array; }
std::vector<std::unique_ptr<Value>> values; ArrayValues values;
}; };
using ObjectFields = std::unordered_map<std::string, std::unique_ptr<Value>>;
struct Object final : public Value { struct Object final : public Value {
Object(ObjectFields fields)
: fields(std::move(fields))
{
}
auto type() const -> Type override { return Type::Object; } auto type() const -> Type override { return Type::Object; }
};
struct Pos { ObjectFields fields;
int line;
int col;
}; };
enum class TokTyp { enum class TokTyp {
Error,
Eof, Eof,
String, String,
Float, Float,
@ -84,29 +168,64 @@ enum class TokTyp {
Colon, Colon,
}; };
inline auto tok_typ_to_string(TokTyp typ) -> std::string
{
switch (typ) {
case TokTyp::Eof:
return "Eof";
case TokTyp::String:
return "String";
case TokTyp::Float:
return "Float";
case TokTyp::False:
return "False";
case TokTyp::True:
return "True";
case TokTyp::Null:
return "Null";
case TokTyp::LBrace:
return "LBrace";
case TokTyp::RBrace:
return "RBrace";
case TokTyp::LBracket:
return "LBracket";
case TokTyp::RBracket:
return "RBracket";
case TokTyp::Comma:
return "Comma";
case TokTyp::Colon:
return "Colon";
}
}
struct Tok { struct Tok {
Tok(TokTyp typ) Tok(TokTyp typ, Pos pos)
: typ(typ) : typ(typ)
, pos(pos)
, val_id(0) , val_id(0)
{ {
} }
Tok(TokTyp typ, size_t val_id) Tok(TokTyp typ, Pos pos, size_t val_id)
: typ(typ) : typ(typ)
, pos(pos)
, val_id(val_id) , val_id(val_id)
{ {
} }
Tok(TokTyp typ, double float_val) Tok(TokTyp typ, Pos pos, double float_val)
: typ(typ) : typ(typ)
, pos(pos)
, float_val(float_val) , float_val(float_val)
{ {
} }
Tok(TokTyp typ, bool bool_val) Tok(TokTyp typ, Pos pos, bool bool_val)
: typ(typ) : typ(typ)
, pos(pos)
, bool_val(bool_val) , bool_val(bool_val)
{ {
} }
TokTyp typ; TokTyp typ;
Pos pos;
union { union {
size_t val_id; size_t val_id;
double float_val; double float_val;
@ -121,13 +240,28 @@ public:
{ {
} }
auto next() -> Tok; auto next() -> Res<Tok>;
inline auto pos() const -> Pos { return { this->line, this->col }; } inline auto pos() const -> Pos { return { this->line, this->col }; }
inline auto val(size_t val_id) const -> const std::string&
{
return this->strs_vals.at(val_id);
}
private: private:
inline void step() { this->i += 1; } inline void step() { this->i += 1; }
inline auto intern_str(std::string val) -> size_t
{
if (this->strs_val_id_map.contains(val))
return strs_val_id_map.at(val);
auto id = this->strs_vals.size();
this->strs_vals.push_back(val);
this->strs_val_id_map.insert_or_assign(val, id);
return id;
}
inline auto test_in(std::string_view chs) const -> bool inline auto test_in(std::string_view chs) const -> bool
{ {
for (auto ch : chs) for (auto ch : chs)
@ -143,6 +277,8 @@ private:
size_t i = 0; size_t i = 0;
int line = 1; int line = 1;
int col = 1; int col = 1;
std::unordered_map<std::string, size_t> strs_val_id_map;
std::vector<std::string> strs_vals;
}; };
class Parser { class Parser {
@ -153,9 +289,20 @@ public:
{ {
} }
auto parse_val() -> Res<std::unique_ptr<Value>>;
private: private:
inline auto step() -> Res<void>
{
auto tok = this->lexer.next();
if (not tok.ok())
return tok.err();
return {};
}
inline auto curtyp() const -> TokTyp { return this->cur.val().typ; }
Lexer lexer; Lexer lexer;
Tok cur; Res<Tok> cur;
}; };
struct Jsonable { struct Jsonable {