From 2de85bf57dcca9eeeff4288f69ad54b772ae1d6c Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Thu, 27 Jul 2023 04:10:30 +0200 Subject: [PATCH] add parser --- Makefile | 6 +- src/lexer.c | 531 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lexer.h | 140 ++++++++++++++ src/main.c | 29 ++- src/parser.c | 352 ++++++++++++++++++++++++++++++++++ src/parser.h | 75 ++++++++ test.ol | 1 + 7 files changed, 1125 insertions(+), 9 deletions(-) create mode 100644 src/parser.c create mode 100644 src/parser.h create mode 100644 test.ol diff --git a/Makefile b/Makefile index db25fa4..a9baff9 100644 --- a/Makefile +++ b/Makefile @@ -8,10 +8,12 @@ C_FLAGS = \ -Wextra \ -Wpedantic \ -Wconversion \ - -Wno-gnu-case-range + -Wno-gnu-case-range \ + -g LINKER_FLAGS = \ - -fsanitize=address,undefined + -fsanitize=address,undefined \ + -g SOURCE_FOLDER = src BUILD_FOLDER = build diff --git a/src/lexer.c b/src/lexer.c index 723ab1d..50f33fb 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1 +1,532 @@ #include "lexer.h" +#include +#include +#include +#include + +bool string_slice_equal(StringSlice slice, const char* data) +{ + if (strlen(data) != slice.length) { + return false; + } + return strncmp(data, slice.data, slice.length) == 0; +} + +void string_construct(String* string) +{ + *string = (String) { + .data = NULL, + .length = 0, + .capacity = 0, + }; +} + +void string_destroy(String* string) +{ + if (string->data) { + free(string->data); + } +} + +const size_t string_starting_alloc_size = 8; + +void string_append_char(String* string, char value) +{ + if (string->length + 1 >= string->capacity) { + if (string->capacity == 0) { + string->capacity = string_starting_alloc_size; + } else { + string->capacity *= 2; + } + if (string->data == NULL) { + string->data = malloc(string->capacity * sizeof(char)); + ASSERT(string->data); + } else { + char* new_buffer + = realloc(string->data, string->capacity * sizeof(char)); + ASSERT(new_buffer); + string->data = new_buffer; + } + } + string->data[string->length] = value; + string->length += 1; +} + +void string_from_cstr(String* string, const char* value) +{ + string_construct(string); + string_append_cstr(string, value); +} + +void string_from_slice(String* string, StringSlice slice) +{ + string_construct(string); + for (size_t i = 0; i < slice.length; ++i) { + string_append_char(string, slice.data[i]); + } +} + +void file_char_reader_construct(FileCharReader* reader, FILE* file) +{ + *reader = (FileCharReader) { + .next = file_char_reader_next, + .value = file_char_reader_value, + .file = file, + .buffer = { 0 }, + }; + string_construct(&reader->buffer); +} + +void string_append_cstr(String* string, const char* value) +{ + size_t value_length = strlen(value); + + if (string->length + value_length + 1 > string->capacity) { + if (string->capacity == 0) { + string->capacity = string_starting_alloc_size; + } else { + string->capacity *= 2; + } + while (string->length + value_length + 1 > string->capacity) { + string->capacity *= 2; + } + char* new_buffer + = realloc(string->data, string->capacity * sizeof(char)); + ASSERT(new_buffer); + string->data = new_buffer; + } + + strncpy(&string->data[string->length], value, value_length); +} + +void string_append_formatted(String* string, const char* format, ...) +{ + va_list varargs; + va_start(varargs, format); + + size_t format_length = (size_t)vsnprintf(NULL, 0, format, varargs); + + if (string->length + format_length + 1 > string->capacity) { + if (string->capacity == 0) { + string->capacity = string_starting_alloc_size; + } else { + string->capacity *= 2; + } + while (string->length + format_length + 1 > string->capacity) { + string->capacity *= 2; + } + char* new_buffer + = realloc(string->data, string->capacity * sizeof(char)); + ASSERT(new_buffer); + string->data = new_buffer; + } + + size_t written = (size_t)vsnprintf( + &string->data[string->length], + string->capacity - string->length, + format, + varargs + ); + ASSERT(written == format_length); + string->length += written; + + va_end(varargs); +} + +bool string_equal(const String* string, const char* value) +{ + return strncmp(value, string->data, string->length + 1); +} + +void file_char_reader_destroy(FileCharReader* reader) +{ + string_destroy(&reader->buffer); +} + +char file_char_reader_next(FileCharReader* reader) +{ + int read_maybe_char = fgetc(reader->file); + if (read_maybe_char == EOF) { + return '\0'; + } + char read_char = (char)read_maybe_char; + string_append_char(&reader->buffer, read_char); + return (char)read_char; +} + +StringSlice file_char_reader_value( + const FileCharReader* reader, size_t index, size_t length +) +{ + ASSERT(index + length <= reader->buffer.length); + return (StringSlice) { + .data = &reader->buffer.data[index], + .length = length, + }; +} + +void error_construct(Error* error, Pos pos, String message) +{ + *error = (Error) { + .pos = pos, + .message = message, + }; +} + +void error_destroy(Error* error) { string_destroy(&error->message); } + +void error_collector_construct(ErrorCollector* collector) +{ + const size_t errors_start_capacity = 64; + *collector = (ErrorCollector) { + .errors = malloc(errors_start_capacity * sizeof(Error)), + .errors_length = 0, + .errors_capacity = errors_start_capacity, + }; +} + +void error_collector_destroy(ErrorCollector* collector) +{ + for (size_t i = 0; i < collector->errors_length; ++i) { + error_destroy(&collector->errors[i]); + } + free(collector->errors); +} + +void error_collector_add(ErrorCollector* collector, Error error) +{ + if (collector->errors_length >= collector->errors_capacity) { + Error* new_buffer = realloc( + collector->errors, collector->errors_capacity * sizeof(Error) + ); + ASSERT(new_buffer); + collector->errors = new_buffer; + } + collector->errors[collector->errors_length] = error; + collector->errors_length += 1; +} + +const char* token_type_value(TokenType type) +{ + switch (type) { + case TokenTypeEof: + return "Eof"; + case TokenTypeError: + return "error"; + case TokenTypeId: + return "Id"; + case TokenTypeInt: + return "Int"; + case TokenTypeChar: + return "Char"; + case TokenTypeString: + return "String"; + case TokenTypeLParen: + return "("; + case TokenTypeRParen: + return ")"; + case TokenTypeLBrace: + return "{"; + case TokenTypeRBrace: + return "}"; + case TokenTypeLBracket: + return "["; + case TokenTypeRBracket: + return "]"; + } +} + +#define LEXER_ADD_ERROR(LEXER, POS, ...) \ + { \ + String error_message; \ + string_construct(&error_message); \ + string_append_formatted(&error_message, __VA_ARGS__); \ + Error error; \ + error_construct(&error, (POS), error_message); \ + error_collector_add((LEXER)->errors, error); \ + } + +Lexer lexer_create(CharReader* reader, ErrorCollector* errors) +{ + return (Lexer) { + .reader = reader, + .errors = errors, + .current = reader->next(reader), + .pos = { 0, 1, 1 }, + }; +} + +Token lexer_next(Lexer* lexer) +{ + Pos pos = lexer->pos; + if (lexer_done(lexer)) { + return lexer_token(lexer, TokenTypeEof, pos); + } + switch (lexer->current) { + case ' ': + case '\t': + case '\r': + case '\n': + return lexer_skip_whitespace(lexer); + case '"': + return lexer_lex_string(lexer); + case '(': + return ( + lexer_step(lexer), lexer_token(lexer, TokenTypeLParen, pos) + ); + case ')': + return ( + lexer_step(lexer), lexer_token(lexer, TokenTypeRParen, pos) + ); + case '{': + return ( + lexer_step(lexer), lexer_token(lexer, TokenTypeLBrace, pos) + ); + case '}': + return ( + lexer_step(lexer), lexer_token(lexer, TokenTypeRBrace, pos) + ); + case '[': + return ( + lexer_step(lexer), lexer_token(lexer, TokenTypeLBracket, pos) + ); + case ']': + return ( + lexer_step(lexer), lexer_token(lexer, TokenTypeRBracket, pos) + ); + case '/': + return lexer_lex_slash(lexer); + case '#': + return lexer_lex_hashtag(lexer); + case '0' ... '9': + return lexer_lex_int(lexer); + case '+': + case '-': + case '*': + case '<': + case '>': + case '=': + case '!': + case 'a' ... 'z': + case 'A' ... 'Z': + return lexer_lex_id(lexer); + default: + LEXER_ADD_ERROR( + lexer, pos, "unsupported character `%c`", lexer->current + ); + return (lexer_step(lexer), lexer_token(lexer, TokenTypeError, pos)); + } +} + +Token lexer_lex_id(Lexer* lexer) +{ + Pos pos = lexer->pos; + lexer_step(lexer); + while (true) { + switch (lexer->current) { + case '+': + case '-': + case '*': + case '<': + case '>': + case '=': + case '!': + case '0' ... '9': + case 'a' ... 'z': + case 'A' ... 'Z': + lexer_step(lexer); + break; + default: + goto break_loop; + } + } +break_loop: + return lexer_token(lexer, TokenTypeId, pos); +} + +Token lexer_lex_int(Lexer* lexer) +{ + Pos pos = lexer->pos; + lexer_step(lexer); + while (true) { + switch (lexer->current) { + case '0' ... '9': + lexer_step(lexer); + break; + default: + goto break_loop; + } + } +break_loop: + return lexer_token(lexer, TokenTypeInt, pos); +} + +Token lexer_lex_char(Lexer* lexer) +{ + Pos pos = lexer->pos; + lexer_step(lexer); + if (lexer_done(lexer)) { + LEXER_ADD_ERROR( + lexer, pos, "malformed character literal, got unexpected Eof" + ); + return lexer_token(lexer, TokenTypeError, pos); + } + if (lexer->current == '\'') { + LEXER_ADD_ERROR( + lexer, pos, "malformed character literal, got unexpected `'`" + ); + return lexer_token(lexer, TokenTypeError, pos); + } + if (lexer->current == '\\') { + lexer_step(lexer); + if (lexer_done(lexer)) { + LEXER_ADD_ERROR( + lexer, pos, "malformed character literal, got unexpected Eof" + ); + return lexer_token(lexer, TokenTypeError, pos); + } + lexer_step(lexer); + } + if (lexer_done(lexer)) { + LEXER_ADD_ERROR( + lexer, pos, "malformed character literal, expected `'`, got Eof" + ); + return lexer_token(lexer, TokenTypeError, pos); + } + if (lexer->current != '\'') { + LEXER_ADD_ERROR( + lexer, + pos, + "malformed character literal, expected `'`, got `%c`", + lexer->current + ); + return lexer_token(lexer, TokenTypeError, pos); + } + lexer_step(lexer); + return lexer_token(lexer, TokenTypeChar, pos); +} + +Token lexer_lex_string(Lexer* lexer) +{ + Pos pos = lexer->pos; + lexer_step(lexer); + while (!lexer_done(lexer)) { + char maybe_escape_char = lexer->current; + lexer_step(lexer); + if (maybe_escape_char == '\\') { + if (lexer_done(lexer)) { + LEXER_ADD_ERROR( + lexer, + pos, + "malformed string literal escape sequence, got unexpected " + "Eof" + ); + return lexer_token(lexer, TokenTypeError, pos); + } + lexer_step(lexer); + } + } + if (lexer_done(lexer)) { + LEXER_ADD_ERROR( + lexer, pos, "malformed string literal, expected `\"`, got Eof" + ); + return lexer_token(lexer, TokenTypeError, pos); + } + if (lexer->current != '\"') { + LEXER_ADD_ERROR( + lexer, + pos, + "malformed string literal, expected `\"`, got `%c`", + lexer->current + ); + return lexer_token(lexer, TokenTypeError, pos); + } + lexer_step(lexer); + return lexer_token(lexer, TokenTypeString, pos); +} + +Token lexer_lex_slash(Lexer* lexer) +{ + Pos pos = lexer->pos; + lexer_step(lexer); + if (lexer->current == '*') { + lexer_step(lexer); + int depth = 1; + char last = '\0'; + while (!lexer_done(lexer) && depth > 0) { + if (last == '*' && lexer->current == '/') { + depth -= 1; + } else if (last == '/' && lexer->current == '*') { + depth += 1; + } + last = lexer->current; + lexer_step(lexer); + } + if (depth > 0) { + String error_message; + string_from_cstr( + &error_message, + "malformed multiline comment, expected `*/` before end" + ); + Error error; + error_construct(&error, pos, error_message); + error_collector_add(lexer->errors, error); + return lexer_token(lexer, TokenTypeError, pos); + } + return lexer_next(lexer); + } + if (lexer->current == '/') { + lexer_step(lexer); + while (!lexer_done(lexer) && lexer->current != '\n') { + lexer_step(lexer); + } + return lexer_next(lexer); + } + return lexer_token(lexer, TokenTypeId, pos); +} + +Token lexer_lex_hashtag(Lexer* lexer) +{ + lexer_step(lexer); + while (!lexer_done(lexer) && lexer->current != '\n') { + lexer_step(lexer); + } + return lexer_next(lexer); +} + +Token lexer_skip_whitespace(Lexer* lexer) +{ + lexer_step(lexer); + while (true) { + switch (lexer->current) { + case ' ': + case '\t': + case '\r': + case '\n': + lexer_step(lexer); + break; + default: + goto break_loop; + } + } +break_loop: + return lexer_next(lexer); +} + +Token lexer_token(const Lexer* lexer, TokenType type, Pos pos) +{ + return (Token) { type, pos, .length = lexer->pos.index - pos.index }; +} + +void lexer_step(Lexer* lexer) +{ + if (lexer->current == '\n') { + lexer->pos.line += 1; + lexer->pos.col = 1; + } else if (!lexer_done(lexer)) { + lexer->pos.col += 1; + } + lexer->current = lexer->reader->next(lexer->reader); +} + +bool lexer_done(const Lexer* lexer) { return lexer->current == '\0'; } diff --git a/src/lexer.h b/src/lexer.h index aec926c..a2af55c 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -1,4 +1,144 @@ #ifndef LEXER_H #define LEXER_H +#include +#include +#include +#include + +#define PANIC(...) \ + (fprintf(stderr, "panic: "), \ + fprintf(stderr, __VA_ARGS__), \ + fprintf(stderr, ", at %s:%d in %s()", __FILE__, __LINE__, __func__), \ + exit(1)); + +#define ASSERT(CONDITION) \ + { \ + if (!(CONDITION)) { \ + (fprintf(stderr, "assert failed: "), \ + fprintf( \ + stderr, \ + "(%s), at %s:%d in %s()", \ + #CONDITION, \ + __FILE__, \ + __LINE__, \ + __func__ \ + ), \ + exit(1)); \ + } \ + }; + +typedef struct { + const char* data; + size_t length; +} StringSlice; + +bool string_slice_equal(StringSlice slice, const char* data); + +typedef struct { + char* data; + size_t length; + size_t capacity; +} String; + +void string_construct(String* string); +void string_destroy(String* string); +void string_from_cstr(String* string, const char* value); +void string_from_slice(String* string, StringSlice slice); +void string_append_char(String* string, char value); +void string_append_cstr(String* string, const char* value); +void string_append_formatted(String* string, const char* format, ...); +bool string_equal(const String* string, const char* value); + +typedef struct CharReader { + char (*next)(struct CharReader* reader); + StringSlice (*value)( + const struct CharReader* reader, size_t index, size_t length + ); +} CharReader; + +typedef struct FileCharReader { + char (*next)(struct FileCharReader* reader); + StringSlice (*value)( + const struct FileCharReader* reader, size_t index, size_t length + ); + FILE* file; + String buffer; +} FileCharReader; + +void file_char_reader_construct(FileCharReader* reader, FILE* file); +void file_char_reader_destroy(FileCharReader* reader); +char file_char_reader_next(FileCharReader* reader); +StringSlice file_char_reader_value( + const FileCharReader* reader, size_t index, size_t length +); + +typedef struct { + size_t index; + size_t line; + size_t col; +} Pos; + +typedef struct { + Pos pos; + String message; +} Error; + +void error_construct(Error* error, Pos pos, String message); +void error_destroy(Error* error); + +typedef struct { + Error* errors; + size_t errors_length; + size_t errors_capacity; +} ErrorCollector; + +void error_collector_construct(ErrorCollector* collector); +void error_collector_destroy(ErrorCollector* collector); +void error_collector_add(ErrorCollector* collector, Error error); + +typedef enum { + TokenTypeEof, + TokenTypeError, + TokenTypeId, + TokenTypeInt, + TokenTypeChar, + TokenTypeString, + TokenTypeLParen, + TokenTypeRParen, + TokenTypeLBrace, + TokenTypeRBrace, + TokenTypeLBracket, + TokenTypeRBracket, +} TokenType; + +const char* token_type_value(TokenType type); + +typedef struct { + TokenType type; + Pos pos; + size_t length; +} Token; + +typedef struct { + CharReader* reader; + ErrorCollector* errors; + char current; + Pos pos; +} Lexer; + +Lexer lexer_create(CharReader* reader, ErrorCollector* errors); +Token lexer_next(Lexer* lexer); + +Token lexer_lex_id(Lexer* lexer); +Token lexer_lex_int(Lexer* lexer); +Token lexer_lex_char(Lexer* lexer); +Token lexer_lex_string(Lexer* lexer); +Token lexer_lex_slash(Lexer* lexer); +Token lexer_lex_hashtag(Lexer* lexer); +Token lexer_skip_whitespace(Lexer* lexer); +Token lexer_token(const Lexer* lexer, TokenType type, Pos pos); +void lexer_step(Lexer* lexer); +bool lexer_done(const Lexer* lexer); + #endif diff --git a/src/main.c b/src/main.c index 6b5a066..2475e06 100644 --- a/src/main.c +++ b/src/main.c @@ -1,3 +1,5 @@ +#include "lexer.h" +#include "parser.h" #include int main(int argc, const char** argv) @@ -14,11 +16,24 @@ int main(int argc, const char** argv) return 1; } - printf("file:\n"); - int read_char = fgetc(file); - while (read_char != EOF) { - fputc(read_char, stdout); - read_char = fgetc(file); - } - fputc('\n', stdout); + ErrorCollector errors; + error_collector_construct(&errors); + + FileCharReader reader; + file_char_reader_construct(&reader, file); + + Lexer lexer = lexer_create((CharReader*)&reader, &errors); + Parser parser = parser_create((CharReader*)&reader, lexer, &errors); + + ExprVec ast = parser_parse(&parser); + + String ast_string; + string_construct(&ast_string); + expr_vec_stringify(&ast, &ast_string); + printf("ast = %s\n", ast_string.data); + string_destroy(&ast_string); + + expr_vec_destroy(&ast); + file_char_reader_destroy(&reader); + error_collector_destroy(&errors); } diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..b72d144 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,352 @@ +#include "parser.h" +#include "lexer.h" +#include +#include + +void expr_vec_construct(ExprVec* vec) +{ + const size_t starting_capacity = 8; + *vec = (ExprVec) { + .exprs = malloc(starting_capacity * sizeof(Expr)), + .length = 0, + .capacity = starting_capacity, + }; + ASSERT(vec->exprs); +} +void expr_vec_destroy(ExprVec* vec) +{ + for (size_t i = 0; i < vec->length; ++i) { + expr_destroy(&vec->exprs[i]); + } + free(vec->exprs); +} + +void expr_vec_push(ExprVec* vec, Expr expr) +{ + if (vec->length >= vec->capacity) { + vec->capacity *= 2; + Expr* new_buffer = realloc(vec->exprs, vec->capacity * sizeof(Expr)); + ASSERT(new_buffer); + vec->exprs = new_buffer; + } + vec->exprs[vec->length] = expr; + vec->length += 1; +} + +void expr_vec_stringify(const ExprVec* vec, String* acc) +{ + string_append_cstr(acc, "["); + printf("helo worl\n"); + if (vec->length > 0) { + expr_stringify(&vec->exprs[0], acc); + for (size_t i = 1; i < vec->length; ++i) { + string_append_cstr(acc, ", "); + expr_stringify(&vec->exprs[i], acc); + } + } + string_append_cstr(acc, "]"); +} + +Expr error_expr_construct(Pos pos) +{ + return (Expr) { ExprTypeError, pos, { 0 } }; +} + +Expr id_expr_construct(Pos pos, String value) +{ + return (Expr) { ExprTypeId, pos, .id_value = value }; +} + +Expr int_expr_construct(Pos pos, int64_t value) +{ + return (Expr) { ExprTypeInt, pos, .int_value = value }; +} + +Expr char_expr_construct(Pos pos, char value) +{ + return (Expr) { ExprTypeChar, pos, .char_value = value }; +} + +Expr string_expr_construct(Pos pos, String value) +{ + return (Expr) { ExprTypeString, pos, .string_value = value }; +} + +Expr list_expr_construct(Pos pos, ExprVec exprs) +{ + return (Expr) { ExprTypeList, pos, .list = exprs }; +} + +Expr quote_expr_construct(Pos pos, ExprVec exprs) +{ + return (Expr) { ExprTypeQuote, pos, .quote = exprs }; +} + +void expr_destroy(Expr* expr) +{ + switch (expr->type) { + case ExprTypeError: + break; + case ExprTypeId: + string_destroy(&expr->id_value); + break; + case ExprTypeInt: + case ExprTypeChar: + break; + case ExprTypeString: + string_destroy(&expr->string_value); + break; + case ExprTypeList: + expr_vec_destroy(&expr->list); + break; + case ExprTypeQuote: + expr_vec_destroy(&expr->quote); + break; + } +} + +void expr_stringify(const Expr* expr, String* acc) +{ + switch (expr->type) { + case ExprTypeError: + string_append_formatted(acc, "Error"); + break; + case ExprTypeId: + string_append_formatted(acc, "Id(%s)", expr->id_value.data); + break; + case ExprTypeInt: + string_append_formatted(acc, "Int(\'%ld\')", expr->int_value); + break; + case ExprTypeChar: + string_append_formatted(acc, "Char(\'%c\')", expr->char_value); + break; + case ExprTypeString: + string_append_formatted( + acc, "String(\"%s\")", expr->string_value.data + ); + break; + case ExprTypeList: + string_append_cstr(acc, "List("); + if (expr->list.length > 0) { + expr_stringify(&expr->list.exprs[0], acc); + for (size_t i = 1; i < expr->list.length; ++i) { + string_append_cstr(acc, ", "); + expr_stringify(&expr->list.exprs[i], acc); + } + } + string_append_cstr(acc, ")"); + break; + case ExprTypeQuote: + string_append_cstr(acc, "Quote("); + if (expr->quote.length > 0) { + expr_stringify(&expr->quote.exprs[0], acc); + for (size_t i = 1; i < expr->quote.length; ++i) { + string_append_cstr(acc, ", "); + expr_stringify(&expr->quote.exprs[i], acc); + } + } + string_append_cstr(acc, ")"); + break; + } +} + +#define PARSER_ADD_ERROR(PARSER, POS, ...) \ + { \ + String error_message; \ + string_construct(&error_message); \ + string_append_formatted(&error_message, __VA_ARGS__); \ + Error error; \ + error_construct(&error, (POS), error_message); \ + error_collector_add((PARSER)->errors, error); \ + } + +Parser +parser_create(const CharReader* reader, Lexer lexer, ErrorCollector* errors) +{ + Token first = lexer_next(&lexer); + return (Parser) { + .reader = reader, + .lexer = lexer, + .errors = errors, + .current = first, + }; +} + +ExprVec parser_parse(Parser* parser) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + while (parser->current.type != TokenTypeEof) { + expr_vec_push(&exprs, parser_parse_expr(parser)); + } + return exprs; +} + +Expr parser_parse_expr(Parser* parser) +{ + Pos pos = parser->current.pos; + switch (parser->current.type) { + case TokenTypeId: + return parser_parse_id(parser); + case TokenTypeInt: + return parser_parse_int(parser); + case TokenTypeChar: + return parser_parse_char(parser); + case TokenTypeString: + return parser_parse_string(parser); + case TokenTypeLParen: + return parser_parse_list(parser); + case TokenTypeLBracket: + return parser_parse_quote(parser); + default: + PARSER_ADD_ERROR( + parser, + pos, + "expected value, got `%s`", + token_type_value(parser->current.type) + ); + parser_step(parser); + return error_expr_construct(pos); + } +} + +Expr parser_parse_id(Parser* parser) +{ + Token token = parser->current; + String value; + string_from_slice( + &value, + parser->reader->value(parser->reader, token.pos.index, token.length) + ); + parser_step(parser); + return id_expr_construct(token.pos, value); +} + +Expr parser_parse_int(Parser* parser) +{ + Token token = parser->current; + String text; + string_from_slice( + &text, + parser->reader->value(parser->reader, token.pos.index, token.length) + ); + int64_t value = atol(text.data); + string_destroy(&text); + parser_step(parser); + return int_expr_construct(token.pos, value); +} + +Expr parser_parse_char(Parser* parser) +{ + Token token = parser->current; + StringSlice text + = parser->reader->value(parser->reader, token.pos.index, token.length); + char value = text.data[1]; + if (value == '\\') { + switch (text.data[2]) { + case '0': + value = '\0'; + break; + case 't': + value = '\t'; + break; + case 'r': + value = '\r'; + break; + case 'n': + value = '\n'; + break; + default: + value = text.data[2]; + break; + } + } + parser_step(parser); + return char_expr_construct(token.pos, value); +} + +Expr parser_parse_string(Parser* parser) +{ + Token token = parser->current; + StringSlice text + = parser->reader->value(parser->reader, token.pos.index, token.length); + String value; + string_construct(&value); + for (size_t i = 1; i < text.length - 2; ++i) { + if (text.data[i] == '\\') { + i += 1; + switch (text.data[i]) { + case '0': + string_append_char(&value, '\0'); + break; + case 't': + string_append_char(&value, '\t'); + break; + case 'r': + string_append_char(&value, '\r'); + break; + case 'n': + string_append_char(&value, '\n'); + break; + default: + string_append_char(&value, text.data[i]); + break; + } + } else { + string_append_char(&value, text.data[i]); + } + } + parser_step(parser); + return string_expr_construct(token.pos, value); +} + +Expr parser_parse_list(Parser* parser) +{ + Pos pos = parser->current.pos; + parser_step(parser); + ExprVec exprs; + expr_vec_construct(&exprs); + while (parser->current.type != TokenTypeEof + && parser->current.type != TokenTypeRParen) { + expr_vec_push(&exprs, parser_parse_expr(parser)); + } + if (parser->current.type != TokenTypeRParen) { + PARSER_ADD_ERROR( + parser, + pos, + "expected `]`, got `%s`", + token_type_value(parser->current.type) + ) + } else { + parser_step(parser); + } + return quote_expr_construct(pos, exprs); +} + +Expr parser_parse_quote(Parser* parser) +{ + Pos pos = parser->current.pos; + parser_step(parser); + ExprVec exprs; + expr_vec_construct(&exprs); + while (parser->current.type != TokenTypeEof + && parser->current.type != TokenTypeRBracket) { + expr_vec_push(&exprs, parser_parse_expr(parser)); + } + if (parser->current.type != TokenTypeRBracket) { + PARSER_ADD_ERROR( + parser, + pos, + "expected `]`, got `%s`", + token_type_value(parser->current.type) + ) + } else { + parser_step(parser); + } + return quote_expr_construct(pos, exprs); +} + +void parser_step(Parser* parser) +{ + parser->current = lexer_next(&parser->lexer); +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..df8eb68 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,75 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "lexer.h" +#include +#include +#include + +typedef enum { + ExprTypeError, + ExprTypeId, + ExprTypeInt, + ExprTypeChar, + ExprTypeString, + ExprTypeList, + ExprTypeQuote, +} ExprType; + +typedef struct Expr Expr; + +typedef struct { + Expr* exprs; + size_t length; + size_t capacity; +} ExprVec; + +void expr_vec_construct(ExprVec* vec); +void expr_vec_destroy(ExprVec* vec); +void expr_vec_push(ExprVec* vec, Expr expr); +void expr_vec_stringify(const ExprVec* vec, String* acc); + +struct Expr { + ExprType type; + Pos pos; + union { + String id_value; + int64_t int_value; + char char_value; + String string_value; + ExprVec list; + ExprVec quote; + }; +}; + +Expr error_expr_construct(Pos pos); +Expr id_expr_construct(Pos pos, String value); +Expr int_expr_construct(Pos pos, int64_t value); +Expr char_expr_construct(Pos pos, char value); +Expr string_expr_construct(Pos pos, String value); +Expr list_expr_construct(Pos pos, ExprVec exprs); +Expr quote_expr_construct(Pos pos, ExprVec exprs); +void expr_destroy(Expr* expr); +void expr_stringify(const Expr* expr, String* acc); + +typedef struct { + const CharReader* reader; + Lexer lexer; + ErrorCollector* errors; + Token current; +} Parser; + +Parser +parser_create(const CharReader* reader, Lexer lexer, ErrorCollector* errors); +ExprVec parser_parse(Parser* parser); + +Expr parser_parse_expr(Parser* parser); +Expr parser_parse_id(Parser* parser); +Expr parser_parse_int(Parser* parser); +Expr parser_parse_char(Parser* parser); +Expr parser_parse_string(Parser* parser); +Expr parser_parse_list(Parser* parser); +Expr parser_parse_quote(Parser* parser); +void parser_step(Parser* parser); + +#endif diff --git a/test.ol b/test.ol new file mode 100644 index 0000000..70d56b2 --- /dev/null +++ b/test.ol @@ -0,0 +1 @@ +(fn sum (a b) (+ a b))