diff --git a/Makefile b/Makefile index 45cdc1f..8399348 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,8 @@ CC = gcc +TARGET = matemateak + CFLAGS = -std=c17 -Wall -Wextra -Wpedantic -Wconversion LFLAGS = -lm @@ -9,15 +11,17 @@ HEADER_FILES = $(shell find src/ -name *.h) OBJECT_FILES = $(patsubst %.c, %.o, $(C_FILES)) -matemateak: $(OBJECT_FILES) +all: compile_flags.txt $(TARGET) + +$(TARGET): $(OBJECT_FILES) $(CC) -o $@ $(LFLAGS) $^ %.o: %.c $(HEADER_FILES) $(CC) -c -o $@ $(CFLAGS) $< compile_flags.txt: - echo -xc $(C_FLAGS) | sed 's/\s\+/\n/g' > compile_flags.txt + echo -xc $(CFLAGS) | sed 's/\s\+/\n/g' > compile_flags.txt clean: - $(RM) $(OBJECT_FILES) matemateak + $(RM) $(OBJECT_FILES) $(TARGET) diff --git a/src/lexer.c b/src/lexer.c index dfb8521..b67db70 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,7 +1,19 @@ #include "lexer.h" #include +#include +#include #include +char* strndup(const char* value, size_t length) +{ + size_t string_length = strlen(value); + if (string_length >= length) + string_length = length; + char* allocated = malloc(sizeof(char) * (string_length + 1)); + strncpy(allocated, value, string_length); + return allocated; +} + bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; } char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; } @@ -149,10 +161,66 @@ const char* token_type_to_string(TokenType type) return "LParen"; case TokenTypeRParen: return "RParen"; + default: + fprintf( + stderr, + "panic: unexhausted value ./%s:%d %s()\n", + __FILE__, + __LINE__, + __func__ + ); + exit(1); } } char* token_to_string(Token* token, const char* text) { - // frick it late + char value[32] = { 0 }; + size_t value_i = 0; + for (size_t i = 0; i < token->length && i < 32; i++) { + char c = text[token->index + i]; + switch (c) { + case '\\': + value[value_i++] = '\\'; + value[value_i++] = '\\'; + break; + case '\t': + value[value_i++] = '\\'; + value[value_i++] = 't'; + break; + case '\r': + value[value_i++] = '\\'; + value[value_i++] = 'r'; + break; + case '\n': + value[value_i++] = '\\'; + value[value_i++] = 'n'; + break; + case '\0': + value[value_i++] = '\\'; + value[value_i++] = '0'; + break; + case '\"': + value[value_i++] = '\\'; + value[value_i++] = '\"'; + break; + default: + value[value_i++] = c; + break; + } + } + + char formattet[128]; + snprintf( + formattet, + 128, + "Token { %11s [%3ld:%-3ld] %3d:%-3d \"%s\" }", + token_type_to_string(token->type), + token->index, + token->length, + token->line, + token->line, + value + ); + return strndup(formattet, 128); } diff --git a/src/lexer.h b/src/lexer.h index 2718d79..6e86fbb 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -3,6 +3,8 @@ #include +char* strndup(const char* value, size_t length); + typedef struct { size_t index; int line, col; diff --git a/src/main.c b/src/main.c index 70b4681..d43fd5b 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,30 @@ +#include "lexer.h" +#include "parser.h" #include #include +#include -int main(void) { printf("hello world\n"); } +int main(void) +{ + char* text = "2 * (3 + 4)"; + + Lexer lexer1; + lexer(&lexer1, text, strlen(text)); + Token current_token = lexer_next(&lexer1); + printf("tokens = [\n"); + while (current_token.type != TokenTypeEof) { + char* token_string = token_to_string(¤t_token, text); + printf(" %s,\n", token_string); + free(token_string); + current_token = lexer_next(&lexer1); + } + printf("]\n"); + + Lexer lexer2; + lexer(&lexer2, text, strlen(text)); + Expr* ast = parse(&lexer2, text, strlen(text)); + char* ast_string = expr_to_string(ast); + printf("ast = %s\n", ast_string); + free(ast_string); + free_expr(ast); +} diff --git a/src/parser.c b/src/parser.c index 95b2e77..aea069d 100644 --- a/src/parser.c +++ b/src/parser.c @@ -13,7 +13,7 @@ Expr* error_expr(Position pos, const char* message) .type = ExprTypeError, .error = (ErrorExpr) { .pos = pos, - .message = strdup(message), + .message = strndup(message, strlen(message)), }, }; return node; @@ -89,37 +89,7 @@ Position parser_pos(Parser* parser) }; } -Expr* parser_error(Parser* parser, const char* message) -{ - size_t line_width = 0; - for (size_t i = 0; - i < parser->length && parser->text[parser->current.index + i] != '\r' - && parser->text[parser->current.index + i] != '\n'; - ++i) - line_width = i; - - char line[512] = { 0 }; - - char underline_indent[512] = { 0 }; - if (line_width > 0) - memset(underline_indent, ' ', line_width - 1); - - char underline[512] = { '^', 0 }; - memset(underline, '^', parser->current.length); - - char formatted[512 * 4]; - snprintf( - formatted, - 512 * 4, - "error: %s\n |\n %-4d|%s\n |%s%s\n", - message, - parser->current.line, - line, - underline_indent, - underline - ); - return error_expr(parser_pos(parser), formatted); -} +Expr* parser_expr(Parser* parser); Expr* parser_unknown_token_error(Parser* parser) { @@ -127,15 +97,22 @@ Expr* parser_unknown_token_error(Parser* parser) snprintf( buffer, 128, "unknown char '%c'", parser->text[parser->current.index] ); - return parser_error(parser, buffer); + return error_expr(parser_pos(parser), buffer); } Expr* parser_operand(Parser* parser) { if (parser_current_is(parser, TokenTypeInt)) { - return NULL; + char buffer[24]; + strncpy( + buffer, &parser->text[parser->current.index], parser->current.length + ); + parser_step(parser); + return int_expr(atol(buffer)); } else if (parser_current_is(parser, TokenTypeEof)) { - return parser_error(parser, "unexpected end-of-file"); + Position pos = parser_pos(parser); + parser_step(parser); + return error_expr(pos, "unexpected end-of-file"); } else { Expr* error = parser_unknown_token_error(parser); parser_step(parser); @@ -143,13 +120,29 @@ Expr* parser_operand(Parser* parser) } } +Expr* parser_group(Parser* parser) +{ + if (parser_current_is(parser, TokenTypeLParen)) { + parser_step(parser); + Expr* value = parser_expr(parser); + if (!parser_current_is(parser, TokenTypeRParen)) { + parser_step(parser); + return error_expr(parser_pos(parser), "expected ')'"); + } + parser_step(parser); + return value; + } else { + return parser_operand(parser); + } +} + Expr* parser_unary(Parser* parser) { if (parser_current_is(parser, TokenTypeMinus)) { parser_step(parser); - return unary_expr(UnaryExprTypeNegate, parser_operand(parser)); + return unary_expr(UnaryExprTypeNegate, parser_group(parser)); } else { - return parser_operand(parser); + return parser_group(parser); } } @@ -206,5 +199,130 @@ Expr* parse(Lexer* lexer, const char* text, size_t length) void free_expr(Expr* expr) { - // + switch (expr->type) { + case ExprTypeError: + free(expr->error.message); + break; + case ExprTypeInt: + break; + case ExprTypeUnary: + free_expr(expr->unary.subject); + break; + case ExprTypeBinary: + free_expr(expr->binary.left); + free_expr(expr->binary.right); + break; + default: + fprintf( + stderr, + "panic: unexhausted value ./%s:%d %s()\n", + __FILE__, + __LINE__, + __func__ + ); + exit(1); + } +} + +const char* unary_expr_type_to_string(UnaryExprType type) +{ + switch (type) { + case UnaryExprTypeNegate: + return "Negate"; + default: + fprintf( + stderr, + "panic: unexhausted value ./%s:%d %s()\n", + __FILE__, + __LINE__, + __func__ + ); + exit(1); + } +} + +const char* binary_expr_type_to_string(BinaryExprType type) +{ + switch (type) { + case BinaryExprTypeAdd: + return "Add"; + case BinaryExprTypeSubtract: + return "Subtract"; + case BinaryExprTypeMultiply: + return "Multiply"; + case BinaryExprTypeDivide: + return "Divide"; + default: + fprintf( + stderr, + "panic: unexhausted value ./%s:%d %s()\n", + __FILE__, + __LINE__, + __func__ + ); + exit(1); + } +} + +char* expr_to_string(const Expr* expr) +{ + printf("expr->type == %d\n", expr->type); + switch (expr->type) { + case ExprTypeError: { + char formattet[65536]; + snprintf( + formattet, + 65536, + "Error { [%ld] %d:%d \"%s\" }", + expr->error.pos.index, + expr->error.pos.line, + expr->error.pos.col, + expr->error.message + ); + return strndup(formattet, 65536); + } + case ExprTypeInt: { + char formattet[65536]; + snprintf(formattet, 65536, "Int(%ld)", expr->int_expr.value); + return strndup(formattet, 65536); + } + case ExprTypeUnary: { + char* subject = expr_to_string(expr->unary.subject); + char formattet[65536]; + snprintf( + formattet, + 65536, + "Unary { type: %s, subject: %s }", + unary_expr_type_to_string(expr->unary.type), + subject + ); + free(subject); + return strndup(formattet, 65536); + } + case ExprTypeBinary: { + char* left = expr_to_string(expr->binary.left); + char* right = expr_to_string(expr->binary.right); + char formattet[65536]; + snprintf( + formattet, + 65536, + "Binary { type: %s, left: %s, right: %s }", + binary_expr_type_to_string(expr->binary.type), + left, + right + ); + free(left); + free(right); + return strndup(formattet, 65536); + } + default: + fprintf( + stderr, + "panic: unexhausted value ./%s:%d %s()\n", + __FILE__, + __LINE__, + __func__ + ); + exit(1); + } } diff --git a/src/parser.h b/src/parser.h index f319990..767a54a 100644 --- a/src/parser.h +++ b/src/parser.h @@ -56,5 +56,6 @@ struct Expr { Expr* parse(Lexer* lexer, const char* text, size_t length); void free_expr(Expr* expr); +char* expr_to_string(const Expr* expr); #endif