slige/runtime/json.cpp

239 lines
7.6 KiB
C++
Raw Normal View History

2024-11-12 10:59:42 +00:00
#include "json.hpp"
#include <cstdlib>
#include <format>
2024-11-15 14:22:57 +00:00
#include <memory>
2024-11-12 10:59:42 +00:00
#include <string>
#include <unordered_map>
using namespace sliger::json;
auto ident_tok_typs = std::unordered_map<std::string, TokTyp> {
{ "null", TokTyp::Null },
{ "false", TokTyp::False },
{ "true", TokTyp::True },
};
auto id_start_chars = "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ_";
auto id_tail_chars = "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"1234567890";
2024-11-15 14:22:57 +00:00
auto Lexer::next() -> Res<Tok>
2024-11-12 10:59:42 +00:00
{
2024-11-15 14:22:57 +00:00
auto pos = this->pos();
2024-11-12 10:59:42 +00:00
if (done()) {
2024-11-15 14:22:57 +00:00
return Tok(TokTyp::Eof, pos);
2024-11-12 10:59:42 +00:00
}
if (test('"')) {
step();
auto value = std::string();
while (!done() and !test('"')) {
if (cur() == '\\') {
step();
if (done())
break;
value.push_back([&] {
char ch = cur();
switch (ch) {
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case '0':
return '\0';
default:
return ch;
}
}());
} else {
value.push_back(cur());
}
step();
}
2024-11-15 14:22:57 +00:00
if (!test('"')) {
return Err {
.pos = pos,
.msg
= std::format("malformed string, expected '\"', got '{}' token",
this->cur()),
};
}
step();
return Tok(TokTyp::String, pos, intern_str(value));
2024-11-12 10:59:42 +00:00
}
auto step_n_ret = [&](auto tok) {
step();
return tok;
};
switch (cur()) {
case '0':
2024-11-15 14:22:57 +00:00
return step_n_ret(Tok(TokTyp::Float, pos, 0.0));
2024-11-12 10:59:42 +00:00
case '{':
2024-11-15 14:22:57 +00:00
return step_n_ret(Tok(TokTyp::LBrace, pos));
2024-11-12 10:59:42 +00:00
case '}':
2024-11-15 14:22:57 +00:00
return step_n_ret(Tok(TokTyp::RBrace, pos));
2024-11-12 10:59:42 +00:00
case '[':
2024-11-15 14:22:57 +00:00
return step_n_ret(Tok(TokTyp::LBracket, pos));
2024-11-12 10:59:42 +00:00
case ']':
2024-11-15 14:22:57 +00:00
return step_n_ret(Tok(TokTyp::RBracket, pos));
2024-11-12 10:59:42 +00:00
case ',':
2024-11-15 14:22:57 +00:00
return step_n_ret(Tok(TokTyp::Comma, pos));
2024-11-12 10:59:42 +00:00
case ':':
2024-11-15 14:22:57 +00:00
return step_n_ret(Tok(TokTyp::Colon, pos));
2024-11-12 10:59:42 +00:00
}
if (test_in(id_start_chars)) {
auto value = std::string();
while (test_in(id_tail_chars)) {
value.push_back(cur());
step();
}
if (ident_tok_typs.find(value) == ident_tok_typs.end()) {
2024-11-15 14:22:57 +00:00
return Err {
.pos = pos,
.msg = std::format("unknown identifier \"{}\"", value),
};
2024-11-12 10:59:42 +00:00
}
2024-11-15 14:22:57 +00:00
return Tok(ident_tok_typs.at(value), pos);
2024-11-12 10:59:42 +00:00
}
if (test_in("123456789")) {
auto value_str = std::string();
while (test_in("1234567890")) {
value_str.push_back(cur());
step();
}
auto value = std::atof(value_str.c_str());
2024-11-15 14:22:57 +00:00
return Tok(TokTyp::Float, pos, value);
2024-11-12 10:59:42 +00:00
}
2024-11-15 14:22:57 +00:00
auto ch = cur();
2024-11-12 10:59:42 +00:00
step();
2024-11-15 14:22:57 +00:00
return Err {
.pos = pos,
.msg = std::format("unknown character '{}'", ch),
};
}
auto Parser::parse_val() -> Res<std::unique_ptr<Value>>
{
if (not this->cur.ok())
return this->cur.err();
auto cur = this->cur.val();
switch (cur.typ) {
case TokTyp::Eof:
return Err {
.pos = cur.pos,
.msg = "expected value, got eof",
};
case TokTyp::String: {
auto value = this->lexer.val(cur.val_id);
step();
return Res<std::unique_ptr<Value>>(std::make_unique<String>(value));
}
case TokTyp::Float: {
auto value = cur.float_val;
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Number>(value));
}
case TokTyp::False: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Bool>(false));
}
case TokTyp::True: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Bool>(true));
}
case TokTyp::Null: {
step();
return Res<std::unique_ptr<Value>>(std::make_unique<Null>());
}
case TokTyp::LBrace: {
step();
ObjectFields fields;
if (curtyp() != TokTyp::RBrace) {
2024-11-18 09:11:58 +00:00
if (curtyp() != TokTyp::String) {
return unexpected_tok_err(
TokTyp::String, "malformed object");
}
auto key = this->lexer.val(this->cur.val().val_id);
step();
if (curtyp() != TokTyp::Comma) {
return unexpected_tok_err(
TokTyp::Comma, "malformed object");
}
step();
auto value = parse_val();
if (value.ok()) {
return value.err();
}
fields.insert_or_assign(key, std::move(value.val()));
while (curtyp() == TokTyp::Comma) {
step();
if (curtyp() != TokTyp::String) {
return unexpected_tok_err(
TokTyp::String, "malformed object");
}
auto key = this->lexer.val(this->cur.val().val_id);
step();
if (curtyp() != TokTyp::Comma) {
return unexpected_tok_err(
TokTyp::Comma, "malformed object");
}
step();
auto value = parse_val();
if (value.ok()) {
return value.err();
}
fields.insert_or_assign(key, std::move(value.val()));
}
}
if (curtyp() != TokTyp::RBrace) {
return unexpected_tok_err(TokTyp::RBrace, "malformed object");
}
return Res<std::unique_ptr<Value>>(
std::make_unique<Object>(std::move(fields)));
}
case TokTyp::LBracket: {
step();
ArrayValues values;
if (curtyp() != TokTyp::RBrace) {
auto value = parse_val();
if (value.ok()) {
return value.err();
}
values.push_back(std::move(value.val()));
while (curtyp() == TokTyp::Comma) {
step();
auto value = parse_val();
if (value.ok()) {
return value.err();
}
values.push_back(std::move(value.val()));
}
2024-11-15 14:22:57 +00:00
}
2024-11-18 09:11:58 +00:00
if (curtyp() != TokTyp::RBrace) {
return unexpected_tok_err(TokTyp::RBrace, "malformed object");
}
return Res<std::unique_ptr<Value>>(
std::make_unique<Array>(std::move(values)));
2024-11-15 14:22:57 +00:00
}
case TokTyp::RBrace:
case TokTyp::RBracket:
case TokTyp::Comma:
case TokTyp::Colon:
return Err {
.pos = cur.pos,
.msg = std::format("expected value, got '{}' token",
tok_typ_to_string(cur.typ)),
};
break;
}
2024-11-18 09:11:58 +00:00
return Err {
.pos = cur.pos,
.msg = std::format(
"internal error, could not parse '{}'", tok_typ_to_string(cur.typ)),
};
2024-11-12 10:59:42 +00:00
}