This commit is contained in:
SimonFJ20 2024-11-15 15:22:57 +01:00
parent b8b9a08229
commit 77483438d8
2 changed files with 253 additions and 34 deletions

View File

@ -2,6 +2,7 @@
#include <cstdlib>
#include <format>
#include <iostream>
#include <memory>
#include <string>
#include <unordered_map>
@ -20,12 +21,12 @@ auto id_tail_chars = "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"1234567890";
auto Lexer::next() -> Tok
auto Lexer::next() -> Res<Tok>
{
if (done()) {
return TokTyp::Eof;
}
auto pos = this->pos();
if (done()) {
return Tok(TokTyp::Eof, pos);
}
if (test('"')) {
step();
auto value = std::string();
@ -54,6 +55,16 @@ auto Lexer::next() -> Tok
}
step();
}
if (!test('"')) {
return Err {
.pos = pos,
.msg
= std::format("malformed string, expected '\"', got '{}' token",
this->cur()),
};
}
step();
return Tok(TokTyp::String, pos, intern_str(value));
}
auto step_n_ret = [&](auto tok) {
step();
@ -61,19 +72,19 @@ auto Lexer::next() -> Tok
};
switch (cur()) {
case '0':
return step_n_ret(Tok(TokTyp::Float, 0.0));
return step_n_ret(Tok(TokTyp::Float, pos, 0.0));
case '{':
return step_n_ret(TokTyp::LBrace);
return step_n_ret(Tok(TokTyp::LBrace, pos));
case '}':
return step_n_ret(TokTyp::RBrace);
return step_n_ret(Tok(TokTyp::RBrace, pos));
case '[':
return step_n_ret(TokTyp::LBracket);
return step_n_ret(Tok(TokTyp::LBracket, pos));
case ']':
return step_n_ret(TokTyp::RBracket);
return step_n_ret(Tok(TokTyp::RBracket, pos));
case ',':
return step_n_ret(TokTyp::Comma);
return step_n_ret(Tok(TokTyp::Comma, pos));
case ':':
return step_n_ret(TokTyp::Colon);
return step_n_ret(Tok(TokTyp::Colon, pos));
}
if (test_in(id_start_chars)) {
auto value = std::string();
@ -82,12 +93,12 @@ auto Lexer::next() -> Tok
step();
}
if (ident_tok_typs.find(value) == ident_tok_typs.end()) {
std::cerr << std::format("sliger::json::Lexer error: unknown "
"identifier \"{}\" at {}:{}",
value, pos.col, pos.line);
return TokTyp::Error;
return Err {
.pos = pos,
.msg = std::format("unknown identifier \"{}\"", value),
};
}
return ident_tok_typs.at(value);
return Tok(ident_tok_typs.at(value), pos);
}
if (test_in("123456789")) {
auto value_str = std::string();
@ -96,11 +107,72 @@ auto Lexer::next() -> Tok
step();
}
auto value = std::atof(value_str.c_str());
return Tok(TokTyp::Float, value);
return Tok(TokTyp::Float, pos, value);
}
std::cerr << std::format(
"sliger::json::Lexer error: unknown character '{}' at {}:{}", cur(),
this->line, this->col);
auto ch = cur();
step();
return TokTyp::Error;
return Err {
.pos = pos,
.msg = std::format("unknown character '{}'", ch),
};
}
auto Parser::parse_val() -> Res<std::unique_ptr<Value>>
{
if (not this->cur.ok())
return this->cur.err();
auto cur = this->cur.val();
switch (cur.typ) {
case TokTyp::Eof:
return Err {
.pos = cur.pos,
.msg = "expected value, got eof",
};
case TokTyp::String: {
auto value = this->lexer.val(cur.val_id);
step();
return Res<std::unique_ptr<Value>>(std::make_unique<String>(value));
}
case TokTyp::Float: {
auto value = cur.float_val;
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Number>(value));
}
case TokTyp::False: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Bool>(false));
}
case TokTyp::True: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Bool>(true));
}
case TokTyp::Null: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Null>());
}
case TokTyp::LBrace: {
step();
ObjectFields fields;
while (curtyp() != TokTyp::RBrace) { }
if (curtyp() != TokTyp::RBrace) {
return Err {
.pos = this->cur.val().pos,
.msg
= std::format("malformed object, expected '}', got '{}'",
tok_typ_to_string(this->cur.val().typ)),
};
}
}
case TokTyp::RBrace:
case TokTyp::LBracket:
case TokTyp::RBracket:
case TokTyp::Comma:
case TokTyp::Colon:
return Err {
.pos = cur.pos,
.msg = std::format("expected value, got '{}' token",
tok_typ_to_string(cur.typ)),
};
break;
}
}

View File

@ -2,14 +2,80 @@
#include <concepts>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace sliger::json {
struct Pos {
int line;
int col;
};
struct Err {
Pos pos;
std::string msg;
};
template <typename T> class Res {
public:
Res(T value)
: maybe_value(std::move(value))
, maybe_error()
{
}
Res(Err error)
: maybe_value()
, maybe_error(std::move(error))
{
}
inline auto ok() const -> bool { return this->maybe_value.has_value(); }
inline auto val() const& -> const T& { return this->maybe_value.value(); }
inline auto val() & -> T& { return this->maybe_value.value(); }
inline auto val() && -> T&& { return std::move(this->maybe_value.value()); }
inline auto err() const& -> const Err& { return this->maybe_error.value(); }
inline auto err() & -> Err& { return this->maybe_error.value(); }
inline auto err() && -> Err&&
{
return std::move(this->maybe_error.value());
}
private:
std::optional<T> maybe_value;
std::optional<Err> maybe_error;
};
template <> class Res<void> {
public:
Res()
: maybe_error()
{
}
Res(Err error)
: maybe_error(std::move(error))
{
}
inline auto ok() const -> bool { return not this->maybe_error.has_value(); }
inline auto err() const& -> const Err& { return this->maybe_error.value(); }
inline auto err() & -> Err& { return this->maybe_error.value(); }
inline auto err() && -> Err&&
{
return std::move(this->maybe_error.value());
}
private:
std::optional<Err> maybe_error;
};
enum class Type {
Null,
String,
@ -36,40 +102,58 @@ struct Null final : public Value {
};
struct String final : public Value {
String(std::string value)
: value(std::move(value))
{
}
auto type() const -> Type override { return Type::String; }
std::string value;
};
struct Number final : public Value {
Number(double value)
: value(value)
{
}
auto type() const -> Type override { return Type::Number; }
double value;
};
struct Bool final : public Value {
Bool(bool value)
: value(value)
{
}
auto type() const -> Type override { return Type::Bool; }
bool value;
};
using ArrayValues = std::vector<std::unique_ptr<Value>>;
struct Array final : public Value {
Array(ArrayValues values)
: values(std::move(values))
{
}
auto type() const -> Type override { return Type::Array; }
std::vector<std::unique_ptr<Value>> values;
ArrayValues values;
};
using ObjectFields = std::unordered_map<std::string, std::unique_ptr<Value>>;
struct Object final : public Value {
Object(ObjectFields fields)
: fields(std::move(fields))
{
}
auto type() const -> Type override { return Type::Object; }
};
struct Pos {
int line;
int col;
ObjectFields fields;
};
enum class TokTyp {
Error,
Eof,
String,
Float,
@ -84,29 +168,64 @@ enum class TokTyp {
Colon,
};
inline auto tok_typ_to_string(TokTyp typ) -> std::string
{
switch (typ) {
case TokTyp::Eof:
return "Eof";
case TokTyp::String:
return "String";
case TokTyp::Float:
return "Float";
case TokTyp::False:
return "False";
case TokTyp::True:
return "True";
case TokTyp::Null:
return "Null";
case TokTyp::LBrace:
return "LBrace";
case TokTyp::RBrace:
return "RBrace";
case TokTyp::LBracket:
return "LBracket";
case TokTyp::RBracket:
return "RBracket";
case TokTyp::Comma:
return "Comma";
case TokTyp::Colon:
return "Colon";
}
}
struct Tok {
Tok(TokTyp typ)
Tok(TokTyp typ, Pos pos)
: typ(typ)
, pos(pos)
, val_id(0)
{
}
Tok(TokTyp typ, size_t val_id)
Tok(TokTyp typ, Pos pos, size_t val_id)
: typ(typ)
, pos(pos)
, val_id(val_id)
{
}
Tok(TokTyp typ, double float_val)
Tok(TokTyp typ, Pos pos, double float_val)
: typ(typ)
, pos(pos)
, float_val(float_val)
{
}
Tok(TokTyp typ, bool bool_val)
Tok(TokTyp typ, Pos pos, bool bool_val)
: typ(typ)
, pos(pos)
, bool_val(bool_val)
{
}
TokTyp typ;
Pos pos;
union {
size_t val_id;
double float_val;
@ -121,13 +240,28 @@ public:
{
}
auto next() -> Tok;
auto next() -> Res<Tok>;
inline auto pos() const -> Pos { return { this->line, this->col }; }
inline auto val(size_t val_id) const -> const std::string&
{
return this->strs_vals.at(val_id);
}
private:
inline void step() { this->i += 1; }
inline auto intern_str(std::string val) -> size_t
{
if (this->strs_val_id_map.contains(val))
return strs_val_id_map.at(val);
auto id = this->strs_vals.size();
this->strs_vals.push_back(val);
this->strs_val_id_map.insert_or_assign(val, id);
return id;
}
inline auto test_in(std::string_view chs) const -> bool
{
for (auto ch : chs)
@ -143,6 +277,8 @@ private:
size_t i = 0;
int line = 1;
int col = 1;
std::unordered_map<std::string, size_t> strs_val_id_map;
std::vector<std::string> strs_vals;
};
class Parser {
@ -153,9 +289,20 @@ public:
{
}
auto parse_val() -> Res<std::unique_ptr<Value>>;
private:
inline auto step() -> Res<void>
{
auto tok = this->lexer.next();
if (not tok.ok())
return tok.err();
return {};
}
inline auto curtyp() const -> TokTyp { return this->cur.val().typ; }
Lexer lexer;
Tok cur;
Res<Tok> cur;
};
struct Jsonable {