351 lines
9.6 KiB
C
351 lines
9.6 KiB
C
#include "parser.h"
|
|
#include "lexer.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
void expr_vec_construct(ExprVec* vec)
|
|
{
|
|
const size_t starting_capacity = 8;
|
|
*vec = (ExprVec) {
|
|
.exprs = malloc(starting_capacity * sizeof(Expr)),
|
|
.length = 0,
|
|
.capacity = starting_capacity,
|
|
};
|
|
ASSERT(vec->exprs);
|
|
}
|
|
void expr_vec_destroy(ExprVec* vec)
|
|
{
|
|
for (size_t i = 0; i < vec->length; ++i) {
|
|
expr_destroy(&vec->exprs[i]);
|
|
}
|
|
free(vec->exprs);
|
|
}
|
|
|
|
void expr_vec_push(ExprVec* vec, Expr expr)
|
|
{
|
|
if (vec->length >= vec->capacity) {
|
|
vec->capacity *= 2;
|
|
Expr* new_buffer = realloc(vec->exprs, vec->capacity * sizeof(Expr));
|
|
ASSERT(new_buffer);
|
|
vec->exprs = new_buffer;
|
|
}
|
|
vec->exprs[vec->length] = expr;
|
|
vec->length += 1;
|
|
}
|
|
|
|
void expr_vec_stringify(const ExprVec* vec, String* acc)
|
|
{
|
|
string_append_cstr(acc, "[");
|
|
if (vec->length > 0) {
|
|
expr_stringify(&vec->exprs[0], acc);
|
|
for (size_t i = 1; i < vec->length; ++i) {
|
|
string_append_cstr(acc, ", ");
|
|
expr_stringify(&vec->exprs[i], acc);
|
|
}
|
|
}
|
|
string_append_cstr(acc, "]");
|
|
}
|
|
|
|
Expr error_expr_construct(Pos pos)
|
|
{
|
|
return (Expr) { ExprTypeError, pos, { 0 } };
|
|
}
|
|
|
|
Expr id_expr_construct(Pos pos, String value)
|
|
{
|
|
return (Expr) { ExprTypeId, pos, .id_value = value };
|
|
}
|
|
|
|
Expr int_expr_construct(Pos pos, int64_t value)
|
|
{
|
|
return (Expr) { ExprTypeInt, pos, .int_value = value };
|
|
}
|
|
|
|
Expr char_expr_construct(Pos pos, char value)
|
|
{
|
|
return (Expr) { ExprTypeChar, pos, .char_value = value };
|
|
}
|
|
|
|
Expr string_expr_construct(Pos pos, String value)
|
|
{
|
|
return (Expr) { ExprTypeString, pos, .string_value = value };
|
|
}
|
|
|
|
Expr list_expr_construct(Pos pos, ExprVec exprs)
|
|
{
|
|
return (Expr) { ExprTypeList, pos, .list = exprs };
|
|
}
|
|
|
|
Expr s_expr_construct(Pos pos, ExprVec exprs)
|
|
{
|
|
return (Expr) { ExprTypeS, pos, .s = exprs };
|
|
}
|
|
|
|
void expr_destroy(Expr* expr)
|
|
{
|
|
switch (expr->type) {
|
|
case ExprTypeError:
|
|
break;
|
|
case ExprTypeId:
|
|
string_destroy(&expr->id_value);
|
|
break;
|
|
case ExprTypeInt:
|
|
case ExprTypeChar:
|
|
break;
|
|
case ExprTypeString:
|
|
string_destroy(&expr->string_value);
|
|
break;
|
|
case ExprTypeList:
|
|
expr_vec_destroy(&expr->list);
|
|
break;
|
|
case ExprTypeS:
|
|
expr_vec_destroy(&expr->s);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void expr_stringify(const Expr* expr, String* acc)
|
|
{
|
|
switch (expr->type) {
|
|
case ExprTypeError:
|
|
string_append_formatted(acc, "Error");
|
|
break;
|
|
case ExprTypeId:
|
|
string_append_formatted(acc, "Id(%s)", expr->id_value.data);
|
|
break;
|
|
case ExprTypeInt:
|
|
string_append_formatted(acc, "Int(\'%ld\')", expr->int_value);
|
|
break;
|
|
case ExprTypeChar:
|
|
string_append_formatted(acc, "Char(\'%c\')", expr->char_value);
|
|
break;
|
|
case ExprTypeString:
|
|
string_append_formatted(
|
|
acc, "String(\"%s\")", expr->string_value.data
|
|
);
|
|
break;
|
|
case ExprTypeList:
|
|
string_append_cstr(acc, "[");
|
|
if (expr->list.length > 0) {
|
|
expr_stringify(&expr->list.exprs[0], acc);
|
|
for (size_t i = 1; i < expr->list.length; ++i) {
|
|
string_append_cstr(acc, " ");
|
|
expr_stringify(&expr->list.exprs[i], acc);
|
|
}
|
|
}
|
|
string_append_cstr(acc, "]");
|
|
break;
|
|
case ExprTypeS:
|
|
string_append_cstr(acc, "(");
|
|
if (expr->s.length > 0) {
|
|
expr_stringify(&expr->s.exprs[0], acc);
|
|
for (size_t i = 1; i < expr->s.length; ++i) {
|
|
string_append_cstr(acc, " ");
|
|
expr_stringify(&expr->s.exprs[i], acc);
|
|
}
|
|
}
|
|
string_append_cstr(acc, ")");
|
|
break;
|
|
}
|
|
}
|
|
|
|
#define PARSER_ADD_ERROR(PARSER, POS, ...) \
|
|
{ \
|
|
String error_message; \
|
|
string_construct(&error_message); \
|
|
string_append_formatted(&error_message, __VA_ARGS__); \
|
|
Error error; \
|
|
error_construct(&error, (POS), error_message); \
|
|
error_collector_add((PARSER)->errors, error); \
|
|
}
|
|
|
|
Parser
|
|
parser_create(const CharReader* reader, Lexer lexer, ErrorCollector* errors)
|
|
{
|
|
Token first = lexer_next(&lexer);
|
|
return (Parser) {
|
|
.reader = reader,
|
|
.lexer = lexer,
|
|
.errors = errors,
|
|
.current = first,
|
|
};
|
|
}
|
|
|
|
ExprVec parser_parse(Parser* parser)
|
|
{
|
|
ExprVec exprs;
|
|
expr_vec_construct(&exprs);
|
|
while (parser->current.type != TokenTypeEof) {
|
|
expr_vec_push(&exprs, parser_parse_expr(parser));
|
|
}
|
|
return exprs;
|
|
}
|
|
|
|
Expr parser_parse_expr(Parser* parser)
|
|
{
|
|
Pos pos = parser->current.pos;
|
|
switch (parser->current.type) {
|
|
case TokenTypeId:
|
|
return parser_parse_id(parser);
|
|
case TokenTypeInt:
|
|
return parser_parse_int(parser);
|
|
case TokenTypeChar:
|
|
return parser_parse_char(parser);
|
|
case TokenTypeString:
|
|
return parser_parse_string(parser);
|
|
case TokenTypeLBracket:
|
|
return parser_parse_list(parser);
|
|
case TokenTypeLParen:
|
|
return parser_parse_s(parser);
|
|
default:
|
|
PARSER_ADD_ERROR(
|
|
parser,
|
|
pos,
|
|
"expected value, got `%s`",
|
|
token_type_value(parser->current.type)
|
|
);
|
|
parser_step(parser);
|
|
return error_expr_construct(pos);
|
|
}
|
|
}
|
|
|
|
Expr parser_parse_id(Parser* parser)
|
|
{
|
|
Token token = parser->current;
|
|
StringSlice slice
|
|
= parser->reader->value(parser->reader, token.pos.index, token.length);
|
|
String value;
|
|
string_from_slice(&value, slice);
|
|
parser_step(parser);
|
|
return id_expr_construct(token.pos, value);
|
|
}
|
|
|
|
Expr parser_parse_int(Parser* parser)
|
|
{
|
|
Token token = parser->current;
|
|
String text;
|
|
string_from_slice(
|
|
&text,
|
|
parser->reader->value(parser->reader, token.pos.index, token.length)
|
|
);
|
|
int64_t value = atol(text.data);
|
|
string_destroy(&text);
|
|
parser_step(parser);
|
|
return int_expr_construct(token.pos, value);
|
|
}
|
|
|
|
Expr parser_parse_char(Parser* parser)
|
|
{
|
|
Token token = parser->current;
|
|
StringSlice text
|
|
= parser->reader->value(parser->reader, token.pos.index, token.length);
|
|
char value = text.data[1];
|
|
if (value == '\\') {
|
|
switch (text.data[2]) {
|
|
case '0':
|
|
value = '\0';
|
|
break;
|
|
case 't':
|
|
value = '\t';
|
|
break;
|
|
case 'r':
|
|
value = '\r';
|
|
break;
|
|
case 'n':
|
|
value = '\n';
|
|
break;
|
|
default:
|
|
value = text.data[2];
|
|
break;
|
|
}
|
|
}
|
|
parser_step(parser);
|
|
return char_expr_construct(token.pos, value);
|
|
}
|
|
|
|
Expr parser_parse_string(Parser* parser)
|
|
{
|
|
Token token = parser->current;
|
|
StringSlice text
|
|
= parser->reader->value(parser->reader, token.pos.index, token.length);
|
|
String value;
|
|
string_construct(&value);
|
|
for (size_t i = 1; i < text.length - 2; ++i) {
|
|
if (text.data[i] == '\\') {
|
|
i += 1;
|
|
switch (text.data[i]) {
|
|
case '0':
|
|
string_append_char(&value, '\0');
|
|
break;
|
|
case 't':
|
|
string_append_char(&value, '\t');
|
|
break;
|
|
case 'r':
|
|
string_append_char(&value, '\r');
|
|
break;
|
|
case 'n':
|
|
string_append_char(&value, '\n');
|
|
break;
|
|
default:
|
|
string_append_char(&value, text.data[i]);
|
|
break;
|
|
}
|
|
} else {
|
|
string_append_char(&value, text.data[i]);
|
|
}
|
|
}
|
|
parser_step(parser);
|
|
return string_expr_construct(token.pos, value);
|
|
}
|
|
|
|
Expr parser_parse_list(Parser* parser)
|
|
{
|
|
Pos pos = parser->current.pos;
|
|
parser_step(parser);
|
|
ExprVec exprs;
|
|
expr_vec_construct(&exprs);
|
|
while (parser->current.type != TokenTypeEof
|
|
&& parser->current.type != TokenTypeRBracket) {
|
|
expr_vec_push(&exprs, parser_parse_expr(parser));
|
|
}
|
|
if (parser->current.type != TokenTypeRBracket) {
|
|
PARSER_ADD_ERROR(
|
|
parser,
|
|
pos,
|
|
"expected `]`, got `%s`",
|
|
token_type_value(parser->current.type)
|
|
)
|
|
} else {
|
|
parser_step(parser);
|
|
}
|
|
return list_expr_construct(pos, exprs);
|
|
}
|
|
|
|
Expr parser_parse_s(Parser* parser)
|
|
{
|
|
Pos pos = parser->current.pos;
|
|
parser_step(parser);
|
|
ExprVec exprs;
|
|
expr_vec_construct(&exprs);
|
|
while (parser->current.type != TokenTypeEof
|
|
&& parser->current.type != TokenTypeRParen) {
|
|
expr_vec_push(&exprs, parser_parse_expr(parser));
|
|
}
|
|
if (parser->current.type != TokenTypeRParen) {
|
|
PARSER_ADD_ERROR(
|
|
parser,
|
|
pos,
|
|
"expected `]`, got `%s`",
|
|
token_type_value(parser->current.type)
|
|
)
|
|
} else {
|
|
parser_step(parser);
|
|
}
|
|
return s_expr_construct(pos, exprs);
|
|
}
|
|
|
|
void parser_step(Parser* parser)
|
|
{
|
|
parser->current = lexer_next(&parser->lexer);
|
|
}
|