olisp/src/parser.c
2023-07-28 03:17:10 +02:00

351 lines
9.6 KiB
C

#include "parser.h"
#include "lexer.h"
#include <stdio.h>
#include <stdlib.h>
void expr_vec_construct(ExprVec* vec)
{
const size_t starting_capacity = 8;
*vec = (ExprVec) {
.exprs = malloc(starting_capacity * sizeof(Expr)),
.length = 0,
.capacity = starting_capacity,
};
ASSERT(vec->exprs);
}
void expr_vec_destroy(ExprVec* vec)
{
for (size_t i = 0; i < vec->length; ++i) {
expr_destroy(&vec->exprs[i]);
}
free(vec->exprs);
}
void expr_vec_push(ExprVec* vec, Expr expr)
{
if (vec->length >= vec->capacity) {
vec->capacity *= 2;
Expr* new_buffer = realloc(vec->exprs, vec->capacity * sizeof(Expr));
ASSERT(new_buffer);
vec->exprs = new_buffer;
}
vec->exprs[vec->length] = expr;
vec->length += 1;
}
void expr_vec_stringify(const ExprVec* vec, String* acc)
{
string_append_cstr(acc, "[");
if (vec->length > 0) {
expr_stringify(&vec->exprs[0], acc);
for (size_t i = 1; i < vec->length; ++i) {
string_append_cstr(acc, ", ");
expr_stringify(&vec->exprs[i], acc);
}
}
string_append_cstr(acc, "]");
}
Expr error_expr_construct(Pos pos)
{
return (Expr) { ExprTypeError, pos, { 0 } };
}
Expr id_expr_construct(Pos pos, String value)
{
return (Expr) { ExprTypeId, pos, .id_value = value };
}
Expr int_expr_construct(Pos pos, int64_t value)
{
return (Expr) { ExprTypeInt, pos, .int_value = value };
}
Expr char_expr_construct(Pos pos, char value)
{
return (Expr) { ExprTypeChar, pos, .char_value = value };
}
Expr string_expr_construct(Pos pos, String value)
{
return (Expr) { ExprTypeString, pos, .string_value = value };
}
Expr list_expr_construct(Pos pos, ExprVec exprs)
{
return (Expr) { ExprTypeList, pos, .list = exprs };
}
Expr s_expr_construct(Pos pos, ExprVec exprs)
{
return (Expr) { ExprTypeS, pos, .s = exprs };
}
void expr_destroy(Expr* expr)
{
switch (expr->type) {
case ExprTypeError:
break;
case ExprTypeId:
string_destroy(&expr->id_value);
break;
case ExprTypeInt:
case ExprTypeChar:
break;
case ExprTypeString:
string_destroy(&expr->string_value);
break;
case ExprTypeList:
expr_vec_destroy(&expr->list);
break;
case ExprTypeS:
expr_vec_destroy(&expr->s);
break;
}
}
void expr_stringify(const Expr* expr, String* acc)
{
switch (expr->type) {
case ExprTypeError:
string_append_formatted(acc, "Error");
break;
case ExprTypeId:
string_append_formatted(acc, "Id(%s)", expr->id_value.data);
break;
case ExprTypeInt:
string_append_formatted(acc, "Int(\'%ld\')", expr->int_value);
break;
case ExprTypeChar:
string_append_formatted(acc, "Char(\'%c\')", expr->char_value);
break;
case ExprTypeString:
string_append_formatted(
acc, "String(\"%s\")", expr->string_value.data
);
break;
case ExprTypeList:
string_append_cstr(acc, "[");
if (expr->list.length > 0) {
expr_stringify(&expr->list.exprs[0], acc);
for (size_t i = 1; i < expr->list.length; ++i) {
string_append_cstr(acc, " ");
expr_stringify(&expr->list.exprs[i], acc);
}
}
string_append_cstr(acc, "]");
break;
case ExprTypeS:
string_append_cstr(acc, "(");
if (expr->s.length > 0) {
expr_stringify(&expr->s.exprs[0], acc);
for (size_t i = 1; i < expr->s.length; ++i) {
string_append_cstr(acc, " ");
expr_stringify(&expr->s.exprs[i], acc);
}
}
string_append_cstr(acc, ")");
break;
}
}
#define PARSER_ADD_ERROR(PARSER, POS, ...) \
{ \
String error_message; \
string_construct(&error_message); \
string_append_formatted(&error_message, __VA_ARGS__); \
Error error; \
error_construct(&error, (POS), error_message); \
error_collector_add((PARSER)->errors, error); \
}
Parser
parser_create(const CharReader* reader, Lexer lexer, ErrorCollector* errors)
{
Token first = lexer_next(&lexer);
return (Parser) {
.reader = reader,
.lexer = lexer,
.errors = errors,
.current = first,
};
}
ExprVec parser_parse(Parser* parser)
{
ExprVec exprs;
expr_vec_construct(&exprs);
while (parser->current.type != TokenTypeEof) {
expr_vec_push(&exprs, parser_parse_expr(parser));
}
return exprs;
}
Expr parser_parse_expr(Parser* parser)
{
Pos pos = parser->current.pos;
switch (parser->current.type) {
case TokenTypeId:
return parser_parse_id(parser);
case TokenTypeInt:
return parser_parse_int(parser);
case TokenTypeChar:
return parser_parse_char(parser);
case TokenTypeString:
return parser_parse_string(parser);
case TokenTypeLBracket:
return parser_parse_list(parser);
case TokenTypeLParen:
return parser_parse_s(parser);
default:
PARSER_ADD_ERROR(
parser,
pos,
"expected value, got `%s`",
token_type_value(parser->current.type)
);
parser_step(parser);
return error_expr_construct(pos);
}
}
Expr parser_parse_id(Parser* parser)
{
Token token = parser->current;
StringSlice slice
= parser->reader->value(parser->reader, token.pos.index, token.length);
String value;
string_from_slice(&value, slice);
parser_step(parser);
return id_expr_construct(token.pos, value);
}
Expr parser_parse_int(Parser* parser)
{
Token token = parser->current;
String text;
string_from_slice(
&text,
parser->reader->value(parser->reader, token.pos.index, token.length)
);
int64_t value = atol(text.data);
string_destroy(&text);
parser_step(parser);
return int_expr_construct(token.pos, value);
}
Expr parser_parse_char(Parser* parser)
{
Token token = parser->current;
StringSlice text
= parser->reader->value(parser->reader, token.pos.index, token.length);
char value = text.data[1];
if (value == '\\') {
switch (text.data[2]) {
case '0':
value = '\0';
break;
case 't':
value = '\t';
break;
case 'r':
value = '\r';
break;
case 'n':
value = '\n';
break;
default:
value = text.data[2];
break;
}
}
parser_step(parser);
return char_expr_construct(token.pos, value);
}
Expr parser_parse_string(Parser* parser)
{
Token token = parser->current;
StringSlice text
= parser->reader->value(parser->reader, token.pos.index, token.length);
String value;
string_construct(&value);
for (size_t i = 1; i < text.length - 2; ++i) {
if (text.data[i] == '\\') {
i += 1;
switch (text.data[i]) {
case '0':
string_append_char(&value, '\0');
break;
case 't':
string_append_char(&value, '\t');
break;
case 'r':
string_append_char(&value, '\r');
break;
case 'n':
string_append_char(&value, '\n');
break;
default:
string_append_char(&value, text.data[i]);
break;
}
} else {
string_append_char(&value, text.data[i]);
}
}
parser_step(parser);
return string_expr_construct(token.pos, value);
}
Expr parser_parse_list(Parser* parser)
{
Pos pos = parser->current.pos;
parser_step(parser);
ExprVec exprs;
expr_vec_construct(&exprs);
while (parser->current.type != TokenTypeEof
&& parser->current.type != TokenTypeRBracket) {
expr_vec_push(&exprs, parser_parse_expr(parser));
}
if (parser->current.type != TokenTypeRBracket) {
PARSER_ADD_ERROR(
parser,
pos,
"expected `]`, got `%s`",
token_type_value(parser->current.type)
)
} else {
parser_step(parser);
}
return list_expr_construct(pos, exprs);
}
Expr parser_parse_s(Parser* parser)
{
Pos pos = parser->current.pos;
parser_step(parser);
ExprVec exprs;
expr_vec_construct(&exprs);
while (parser->current.type != TokenTypeEof
&& parser->current.type != TokenTypeRParen) {
expr_vec_push(&exprs, parser_parse_expr(parser));
}
if (parser->current.type != TokenTypeRParen) {
PARSER_ADD_ERROR(
parser,
pos,
"expected `]`, got `%s`",
token_type_value(parser->current.type)
)
} else {
parser_step(parser);
}
return s_expr_construct(pos, exprs);
}
void parser_step(Parser* parser)
{
parser->current = lexer_next(&parser->lexer);
}