From 669b0458cca29c60addfd073cbca17f6d35cbcad Mon Sep 17 00:00:00 2001 From: Simon Date: Wed, 22 Mar 2023 02:02:55 +0100 Subject: [PATCH] add parser --- src/lexer.c | 35 +++++++++ src/lexer.h | 6 ++ src/parser.c | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.h | 60 +++++++++++++++ 4 files changed, 303 insertions(+) create mode 100644 src/parser.c create mode 100644 src/parser.h diff --git a/src/lexer.c b/src/lexer.c index a0027fd..dfb8521 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,5 +1,6 @@ #include "lexer.h" #include +#include bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; } @@ -121,3 +122,37 @@ Token lexer_next(Lexer* lexer) else return lexer_level2(lexer); } + +char* pos_string(Position pos, size_t length, const char* text) +{ + return strndup(&text[pos.index], length); +} + +const char* token_type_to_string(TokenType type) +{ + switch (type) { + case TokenTypeEof: + return "Eof"; + case TokenTypeInvalidChar: + return "InvalidChar"; + case TokenTypeInt: + return "Int"; + case TokenTypePlus: + return "Plus"; + case TokenTypeMinus: + return "Minus"; + case TokenTypeAsterisk: + return "Asterisk"; + case TokenTypeSlash: + return "Slash"; + case TokenTypeLParen: + return "LParen"; + case TokenTypeRParen: + return "RParen"; + } +} + +char* token_to_string(Token* token, const char* text) +{ + // frick it late +} diff --git a/src/lexer.h b/src/lexer.h index f225940..2718d79 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -8,6 +8,8 @@ typedef struct { int line, col; } Position; +char* pos_string(Position pos, size_t length, const char* text); + typedef enum { TokenTypeEof, TokenTypeInvalidChar, @@ -20,12 +22,16 @@ typedef enum { TokenTypeRParen, } TokenType; +const char* token_type_to_string(TokenType type); + typedef struct { TokenType type; size_t index, length; int line, col; } Token; +char* token_to_string(Token* token, const char* text); + typedef struct { const char* text; size_t index, length; diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..062daa1 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,202 @@ +#include "parser.h" +#include "lexer.h" +#include +#include +#include +#include +#include + +#define PARSER_ERROR_BUFFER_SIZE 512 + +Expr* error_expr(Position pos, const char* message) +{ + Expr* node = malloc(sizeof(Expr)); + *node = (Expr) { + .type = ExprTypeError, + .error = (ErrorExpr) { + .pos = pos, + .message = strdup(message), + }, + }; + return node; +} + +Expr* int_expr(int64_t value) +{ + Expr* node = malloc(sizeof(Expr)); + *node = (Expr) { + .type = ExprTypeInt, + .int_expr = (IntExpr) { + .value = value, + }, + }; + return node; +} + +Expr* unary_expr(UnaryExprType type, Expr* subject) +{ + Expr* node = malloc(sizeof(Expr)); + *node = (Expr) { + .type = ExprTypeUnary, + .unary = (UnaryExpr) { + .type = type, + .subject = subject, + }, + }; + return node; +} + +Expr* binary_expr(BinaryExprType type, Expr* left, Expr* right) +{ + Expr* node = malloc(sizeof(Expr)); + *node = (Expr) { + .type = ExprTypeBinary, + .binary = (BinaryExpr) { + .type = type, + .left = left, + .right = right, + }, + }; + return node; +} + +typedef struct { + const char* text; + size_t length; + Lexer* lexer; + Token current; +} Parser; + +bool parser_done(Parser* parser) +{ + return parser->current.type == TokenTypeEof; +} + +bool parser_current_is(Parser* parser, TokenType type) +{ + return !parser_done(parser) && parser->current.type == type; +} + +void parser_step(Parser* parser) +{ + parser->current = lexer_next(parser->lexer); +} + +Position parser_pos(Parser* parser) +{ + return (Position) { + .index = parser->current.index, + .line = parser->current.line, + .col = parser->current.col, + }; +} + +Expr* parser_error(Parser* parser, const char* message) +{ + size_t line_width = 0; + for (size_t i = 0; + i < parser->length && parser->text[parser->current.index + i] != '\r' + && parser->text[parser->current.index + i] != '\n'; + ++i) + line_width = i; + + char line[PARSER_ERROR_BUFFER_SIZE] = { 0 }; + + char underline_indent[512] = { 0 }; + if (line_width > 0) + memset(underline_indent, ' ', line_width - 1); + + char underline[512] = { '^', 0 }; + memset(underline, '^', parser->current.length); + + char formatted[PARSER_ERROR_BUFFER_SIZE]; + snprintf( + formatted, + PARSER_ERROR_BUFFER_SIZE, + "error: %s\n |\n %-4d|%s\n |%s%s\n", + message, + parser->current.line, + line, + underline_indent, + underline + ); + return error_expr(parser_pos(parser), formatted); +} + +Expr* parser_unknown_token_error(Parser* parser) { } + +Expr* parser_operand(Parser* parser) +{ + if (parser_current_is(parser, TokenTypeInt)) { + return NULL; + } else { + parser_step(parser); + return parser_error(parser, "invalid token"); + } +} + +Expr* parser_unary(Parser* parser) +{ + if (parser_current_is(parser, TokenTypeMinus)) { + parser_step(parser); + return unary_expr(UnaryExprTypeNegate, parser_operand(parser)); + } else { + return parser_operand(parser); + } +} + +Expr* parser_multiply_divide(Parser* parser) +{ + Expr* left = parser_unary(parser); + while (!parser_done(parser)) { + if (parser_current_is(parser, TokenTypeAsterisk)) { + parser_step(parser); + Expr* right = parser_unary(parser); + left = binary_expr(BinaryExprTypeMultiply, left, right); + } else if (parser_current_is(parser, TokenTypeSlash)) { + parser_step(parser); + Expr* right = parser_unary(parser); + left = binary_expr(BinaryExprTypeDivide, left, right); + } else { + break; + } + } + return left; +} + +Expr* parser_add_subtract(Parser* parser) +{ + Expr* left = parser_multiply_divide(parser); + while (!parser_done(parser)) { + if (parser_current_is(parser, TokenTypePlus)) { + parser_step(parser); + Expr* right = parser_multiply_divide(parser); + left = binary_expr(BinaryExprTypeAdd, left, right); + } else if (parser_current_is(parser, TokenTypeMinus)) { + parser_step(parser); + Expr* right = parser_multiply_divide(parser); + left = binary_expr(BinaryExprTypeSubtract, left, right); + } else { + break; + } + } + return left; +} + +Expr* parser_expr(Parser* parser) { return parser_add_subtract(parser); } + +Expr* parse(Lexer* lexer, const char* text, size_t length) +{ + Parser parser = { + .text = text, + .length = length, + .lexer = lexer, + .current = lexer_next(lexer), + }; + return parser_expr(&parser); +} + +void free_expr(Expr* expr) +{ + // +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..f319990 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,60 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "lexer.h" +#include + +typedef enum { + ExprTypeError, + ExprTypeInt, + ExprTypeUnary, + ExprTypeBinary, +} ExprType; + +typedef struct Expr Expr; + +typedef struct { + Position pos; + char* message; +} ErrorExpr; + +typedef struct { + int64_t value; +} IntExpr; + +typedef enum { + UnaryExprTypeNegate, +} UnaryExprType; + +typedef struct { + UnaryExprType type; + Expr* subject; +} UnaryExpr; + +typedef enum { + BinaryExprTypeAdd, + BinaryExprTypeSubtract, + BinaryExprTypeMultiply, + BinaryExprTypeDivide, +} BinaryExprType; + +typedef struct { + BinaryExprType type; + Expr* left; + Expr* right; +} BinaryExpr; + +struct Expr { + ExprType type; + union { + ErrorExpr error; + IntExpr int_expr; + UnaryExpr unary; + BinaryExpr binary; + }; +}; + +Expr* parse(Lexer* lexer, const char* text, size_t length); +void free_expr(Expr* expr); + +#endif