#include "parser.h" #include "lexer.h" #include #include void expr_vec_construct(ExprVec* vec) { const size_t starting_capacity = 8; *vec = (ExprVec) { .exprs = malloc(starting_capacity * sizeof(Expr)), .length = 0, .capacity = starting_capacity, }; ASSERT(vec->exprs); } void expr_vec_destroy(ExprVec* vec) { for (size_t i = 0; i < vec->length; ++i) { expr_destroy(&vec->exprs[i]); } free(vec->exprs); } void expr_vec_push(ExprVec* vec, Expr expr) { if (vec->length >= vec->capacity) { vec->capacity *= 2; Expr* new_buffer = realloc(vec->exprs, vec->capacity * sizeof(Expr)); ASSERT(new_buffer); vec->exprs = new_buffer; } vec->exprs[vec->length] = expr; vec->length += 1; } void expr_vec_stringify(const ExprVec* vec, String* acc) { string_append_cstr(acc, "["); if (vec->length > 0) { expr_stringify(&vec->exprs[0], acc); for (size_t i = 1; i < vec->length; ++i) { string_append_cstr(acc, ", "); expr_stringify(&vec->exprs[i], acc); } } string_append_cstr(acc, "]"); } Expr error_expr_construct(Pos pos) { return (Expr) { ExprTypeError, pos, { 0 } }; } Expr id_expr_construct(Pos pos, String value) { return (Expr) { ExprTypeId, pos, .id_value = value }; } Expr int_expr_construct(Pos pos, int64_t value) { return (Expr) { ExprTypeInt, pos, .int_value = value }; } Expr char_expr_construct(Pos pos, char value) { return (Expr) { ExprTypeChar, pos, .char_value = value }; } Expr string_expr_construct(Pos pos, String value) { return (Expr) { ExprTypeString, pos, .string_value = value }; } Expr list_expr_construct(Pos pos, ExprVec exprs) { return (Expr) { ExprTypeList, pos, .list = exprs }; } Expr s_expr_construct(Pos pos, ExprVec exprs) { return (Expr) { ExprTypeS, pos, .s = exprs }; } void expr_destroy(Expr* expr) { switch (expr->type) { case ExprTypeError: break; case ExprTypeId: string_destroy(&expr->id_value); break; case ExprTypeInt: case ExprTypeChar: break; case ExprTypeString: string_destroy(&expr->string_value); break; case ExprTypeList: expr_vec_destroy(&expr->list); break; case ExprTypeS: expr_vec_destroy(&expr->s); break; } } void expr_stringify(const Expr* expr, String* acc) { switch (expr->type) { case ExprTypeError: string_append_formatted(acc, "Error"); break; case ExprTypeId: string_append_formatted(acc, "Id(%s)", expr->id_value.data); break; case ExprTypeInt: string_append_formatted(acc, "Int(\'%ld\')", expr->int_value); break; case ExprTypeChar: string_append_formatted(acc, "Char(\'%c\')", expr->char_value); break; case ExprTypeString: string_append_formatted( acc, "String(\"%s\")", expr->string_value.data ); break; case ExprTypeList: string_append_cstr(acc, "["); if (expr->list.length > 0) { expr_stringify(&expr->list.exprs[0], acc); for (size_t i = 1; i < expr->list.length; ++i) { string_append_cstr(acc, " "); expr_stringify(&expr->list.exprs[i], acc); } } string_append_cstr(acc, "]"); break; case ExprTypeS: string_append_cstr(acc, "("); if (expr->s.length > 0) { expr_stringify(&expr->s.exprs[0], acc); for (size_t i = 1; i < expr->s.length; ++i) { string_append_cstr(acc, " "); expr_stringify(&expr->s.exprs[i], acc); } } string_append_cstr(acc, ")"); break; } } #define PARSER_ADD_ERROR(PARSER, POS, ...) \ { \ String error_message; \ string_construct(&error_message); \ string_append_formatted(&error_message, __VA_ARGS__); \ Error error; \ error_construct(&error, (POS), error_message); \ error_collector_add((PARSER)->errors, error); \ } Parser parser_create(const CharReader* reader, Lexer lexer, ErrorCollector* errors) { Token first = lexer_next(&lexer); return (Parser) { .reader = reader, .lexer = lexer, .errors = errors, .current = first, }; } ExprVec parser_parse(Parser* parser) { ExprVec exprs; expr_vec_construct(&exprs); while (parser->current.type != TokenTypeEof) { expr_vec_push(&exprs, parser_parse_expr(parser)); } return exprs; } Expr parser_parse_expr(Parser* parser) { Pos pos = parser->current.pos; switch (parser->current.type) { case TokenTypeId: return parser_parse_id(parser); case TokenTypeInt: return parser_parse_int(parser); case TokenTypeChar: return parser_parse_char(parser); case TokenTypeString: return parser_parse_string(parser); case TokenTypeLBracket: return parser_parse_list(parser); case TokenTypeLParen: return parser_parse_s(parser); default: PARSER_ADD_ERROR( parser, pos, "expected value, got `%s`", token_type_value(parser->current.type) ); parser_step(parser); return error_expr_construct(pos); } } Expr parser_parse_id(Parser* parser) { Token token = parser->current; StringSlice slice = parser->reader->value(parser->reader, token.pos.index, token.length); String value; string_from_slice(&value, slice); parser_step(parser); return id_expr_construct(token.pos, value); } Expr parser_parse_int(Parser* parser) { Token token = parser->current; String text; string_from_slice( &text, parser->reader->value(parser->reader, token.pos.index, token.length) ); int64_t value = atol(text.data); string_destroy(&text); parser_step(parser); return int_expr_construct(token.pos, value); } Expr parser_parse_char(Parser* parser) { Token token = parser->current; StringSlice text = parser->reader->value(parser->reader, token.pos.index, token.length); char value = text.data[1]; if (value == '\\') { switch (text.data[2]) { case '0': value = '\0'; break; case 't': value = '\t'; break; case 'r': value = '\r'; break; case 'n': value = '\n'; break; default: value = text.data[2]; break; } } parser_step(parser); return char_expr_construct(token.pos, value); } Expr parser_parse_string(Parser* parser) { Token token = parser->current; StringSlice text = parser->reader->value(parser->reader, token.pos.index, token.length); String value; string_construct(&value); for (size_t i = 1; i < text.length - 2; ++i) { if (text.data[i] == '\\') { i += 1; switch (text.data[i]) { case '0': string_append_char(&value, '\0'); break; case 't': string_append_char(&value, '\t'); break; case 'r': string_append_char(&value, '\r'); break; case 'n': string_append_char(&value, '\n'); break; default: string_append_char(&value, text.data[i]); break; } } else { string_append_char(&value, text.data[i]); } } parser_step(parser); return string_expr_construct(token.pos, value); } Expr parser_parse_list(Parser* parser) { Pos pos = parser->current.pos; parser_step(parser); ExprVec exprs; expr_vec_construct(&exprs); while (parser->current.type != TokenTypeEof && parser->current.type != TokenTypeRBracket) { expr_vec_push(&exprs, parser_parse_expr(parser)); } if (parser->current.type != TokenTypeRBracket) { PARSER_ADD_ERROR( parser, pos, "expected `]`, got `%s`", token_type_value(parser->current.type) ) } else { parser_step(parser); } return list_expr_construct(pos, exprs); } Expr parser_parse_s(Parser* parser) { Pos pos = parser->current.pos; parser_step(parser); ExprVec exprs; expr_vec_construct(&exprs); while (parser->current.type != TokenTypeEof && parser->current.type != TokenTypeRParen) { expr_vec_push(&exprs, parser_parse_expr(parser)); } if (parser->current.type != TokenTypeRParen) { PARSER_ADD_ERROR( parser, pos, "expected `]`, got `%s`", token_type_value(parser->current.type) ) } else { parser_step(parser); } return s_expr_construct(pos, exprs); } void parser_step(Parser* parser) { parser->current = lexer_next(&parser->lexer); }