diff --git a/.clang-format b/.clang-format index af281a7..8ae9b75 100644 --- a/.clang-format +++ b/.clang-format @@ -6,4 +6,9 @@ BreakBeforeBraces: Custom BraceWrapping: AfterFunction: true SplitEmptyFunction: false +AlignAfterOpenBracket: BlockIndent +AlignOperands: AlignAfterOperator +BreakBeforeBinaryOperators: true +BinPackArguments: false +BinPackParameters: false diff --git a/Makefile b/Makefile index cc81cef..c271318 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ HEADERS = $(wildcard *.h) all: compile_flags.txt wacc -wacc: main.o lexer.o parser.o +wacc: main.o lexer.o parser.o utils.o gcc $^ -o $@ %.o: %.c $(HEADERS) diff --git a/lexer.c b/lexer.c index fcbdfc6..2e71ec3 100644 --- a/lexer.c +++ b/lexer.c @@ -1,39 +1,11 @@ #include "lexer.h" +#include "utils.h" #include #include #include #include #include -#define ASSERT_EXHAUSTIVE_MATCH() \ - (fprintf(stderr, "unexhaustive match at %s:%d in %s()\n", __FILE__, \ - __LINE__, __func__), \ - exit(1)) - -Token lexer_skip_whitespace(Lexer* lexer); -Token lexer_make_int_or_float(Lexer* lexer); -Token lexer_make_id(Lexer* lexer); -bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value); -Token lexer_make_static_token(Lexer* lexer); -Token lexer_make_int_hex_binary_or_float(Lexer* lexer); -Token lexer_make_char(Lexer* lexer); -Token lexer_make_string(Lexer* lexer); -void lexer_skip_literal_char(Lexer* lexer); -Token lexer_make_single_char_token(Lexer* lexer, TokenType type); -Token lexer_make_dot_token(Lexer* lexer); -Token lexer_make_colon_token(Lexer* lexer); -Token lexer_make_slash_token(Lexer* lexer); -Token lexer_skip_singleline_comment(Lexer* lexer); -Token lexer_make_single_or_double_char_token(Lexer* lexer, - TokenType single_type, char second_char, TokenType double_type); -Token lexer_skip_multiline_comment(Lexer* lexer); -Token lexer_make_invalid_char(Lexer* lexer); -Position lexer_position(const Lexer* lexer); -Token lexer_token(const Lexer* lexer, TokenType type, Position begin); -bool lexer_done(const Lexer* lexer); -char lexer_current(const Lexer* lexer); -void lexer_step(Lexer* lexer); - void lexer_create(Lexer* lexer, const char* text, size_t text_length) { *lexer = (Lexer) { diff --git a/lexer.h b/lexer.h index 90bc787..1839b6b 100644 --- a/lexer.h +++ b/lexer.h @@ -1,6 +1,7 @@ #ifndef LEXER_H #define LEXER_H +#include #include typedef enum { @@ -104,4 +105,28 @@ void lexer_create(Lexer* lexer, const char* text, size_t text_length); Token lexer_next(Lexer* lexer); char* lexer_token_string(const Lexer* lexer, const Token* token); +Token lexer_skip_whitespace(Lexer* lexer); +Token lexer_make_int_or_float(Lexer* lexer); +Token lexer_make_id(Lexer* lexer); +bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value); +Token lexer_make_static_token(Lexer* lexer); +Token lexer_make_int_hex_binary_or_float(Lexer* lexer); +Token lexer_make_char(Lexer* lexer); +Token lexer_make_string(Lexer* lexer); +void lexer_skip_literal_char(Lexer* lexer); +Token lexer_make_single_char_token(Lexer* lexer, TokenType type); +Token lexer_make_dot_token(Lexer* lexer); +Token lexer_make_colon_token(Lexer* lexer); +Token lexer_make_slash_token(Lexer* lexer); +Token lexer_skip_singleline_comment(Lexer* lexer); +Token lexer_make_single_or_double_char_token(Lexer* lexer, + TokenType single_type, char second_char, TokenType double_type); +Token lexer_skip_multiline_comment(Lexer* lexer); +Token lexer_make_invalid_char(Lexer* lexer); +Position lexer_position(const Lexer* lexer); +Token lexer_token(const Lexer* lexer, TokenType type, Position begin); +bool lexer_done(const Lexer* lexer); +char lexer_current(const Lexer* lexer); +void lexer_step(Lexer* lexer); + #endif diff --git a/main.c b/main.c index 17d32ad..4e9ad5d 100644 --- a/main.c +++ b/main.c @@ -1,9 +1,10 @@ #include "lexer.h" +#include "parser.h" #include #include #include -int main(void) +void test_print_lexer(void) { char text[] = "abc 123 123.. 0xFF 0b101 .5 1. 3.14 'a' '\\n' \"hello\" " @@ -29,3 +30,19 @@ int main(void) } printf("]\n"); } + +int main(void) +{ + char text[] = "abc"; + Lexer lexer; + lexer_create(&lexer, text, strlen(text)); + Parser parser; + + parser_create(&parser, text, &lexer); + ParsedExpr* expr = parser_parse_expression(&parser); + + printf("%d, %s\n", expr->type, expr->id.value); + // printf("%d, %ld\n", expr->type, expr->int_value); + + parsed_expr_free(expr); +} diff --git a/parser.c b/parser.c index 2e0b8df..aecfba9 100644 --- a/parser.c +++ b/parser.c @@ -1,42 +1,101 @@ #include "parser.h" #include "lexer.h" +#include "utils.h" +#include #include #include -inline char* allocate_string(const char* source) +ParsedExpr* parsed_expr_alloc(const ParsedExpr source) { - char* destination = malloc(strlen(source) + 1); - strcpy(destination, source); + ParsedExpr* destination = malloc(sizeof(ParsedExpr)); + *destination = source; return destination; } -void parser_create(Parser* parser, Lexer* lexer) +void parsed_expr_free(ParsedExpr* expr) +{ + switch (expr->type) { + case ParsedExprTypeError: + free(expr->error.message); + break; + case ParsedExprTypeId: + free(expr->id.value); + break; + case ParsedExprTypeInt: + break; + default: + TODO(); + } + free(expr); +} + +ParsedExpr* parsed_error_expr(Position position, const char* message) +{ + return parsed_expr_alloc((ParsedExpr) { + .type = ParsedExprTypeError, + .error = { + .position = position, + .message = alloc_cstring(message), + }, + }); +} + +void parser_create(Parser* parser, const char* text, Lexer* lexer) { *parser = (Parser) { + .text = text, .lexer = lexer, .current = lexer_next(lexer), }; } -ParsedExpr* parser_parse_expression(Parser* parser) { return NULL; } +ParsedExpr* parser_parse_expression(Parser* parser) +{ + return parser_parse_operand(parser); +} ParsedExpr* parser_parse_operand(Parser* parser) { Token current_token = parser_current(parser); - if (!parser_done(parser) && parser_current(parser).type == TokenTypeId) { - + if (parser_current_is(parser, TokenTypeId)) { + AllocatedString string + = alloc_token_string(parser_current(parser), parser->text); + return parsed_expr_alloc((ParsedExpr) { + .type = ParsedExprTypeId, + .id = { + .value = string.value, + .length = string.length, + }, + }); + } else if (parser_current_is(parser, TokenTypeInt)) { + char string_value[24] = { 0 }; + strncpy( + string_value, + &parser->text[current_token.position.index], + current_token.length + ); + int64_t value = strtoll(string_value, NULL, 10); + return parsed_expr_alloc((ParsedExpr) { + .type = ParsedExprTypeInt, + .int_value = value, + }); } else { parser_step(parser); - return parsed_expr_allocate(&(ParsedExpr) { + return parsed_expr_alloc((ParsedExpr) { .type = ParsedExprTypeError, .error = { .position = current_token.position, - .message = allocate_string("expected value"), + .message = alloc_cstring("expected value"), }, }); } } +bool parser_current_is(const Parser* parser, TokenType type) +{ + return !parser_done(parser) && parser_current(parser).type == type; +} + bool parser_done(const Parser* parser) { return parser->current.type == TokenTypeEof; @@ -49,9 +108,7 @@ void parser_step(Parser* parser) parser->current = lexer_next(parser->lexer); } -ParsedExpr* parsed_expr_allocate(const ParsedExpr* source) +AllocatedString alloc_token_string(Token token, const char* text) { - ParsedExpr* destination = malloc(sizeof(ParsedExpr)); - *destination = *source; - return destination; + return alloc_string(&text[token.position.index], token.length); } diff --git a/parser.h b/parser.h index 23ab782..ee9666d 100644 --- a/parser.h +++ b/parser.h @@ -2,12 +2,14 @@ #define PARSER_H #include "lexer.h" +#include "utils.h" #include #include #include typedef enum { ParsedExprTypeError, + ParsedExprTypeId, ParsedExprTypeInt, ParsedExprTypeFloat, ParsedExprTypeChar, @@ -68,6 +70,10 @@ struct ParsedExpr { Position position; char* message; } error; + struct { + char* value; + size_t length; + } id; int64_t int_value; double float_value; char char_value; @@ -117,7 +123,9 @@ struct ParsedExpr { }; }; -ParsedExpr* parsed_expr_allocate(const ParsedExpr* source); +ParsedExpr* parsed_expr_alloc(const ParsedExpr source); +void parsed_expr_free(ParsedExpr* expr); +ParsedExpr* parsed_error_expr(Position position, const char* message); struct KeyValuePair { char* key; @@ -126,16 +134,20 @@ struct KeyValuePair { }; typedef struct { + const char* text; Lexer* lexer; Token current; } Parser; -void parser_create(Parser* parser, Lexer* lexer); +void parser_create(Parser* parser, const char* text, Lexer* lexer); ParsedExpr* parser_parse_expression(Parser* parser); ParsedExpr* parser_parse_operand(Parser* parser); +bool parser_current_is(const Parser* parser, TokenType type); bool parser_done(const Parser* parser); Token parser_current(const Parser* parser); void parser_step(Parser* parser); +AllocatedString alloc_token_string(Token token, const char* text); + #endif diff --git a/utils.c b/utils.c new file mode 100644 index 0000000..22ef29e --- /dev/null +++ b/utils.c @@ -0,0 +1,20 @@ +#include "utils.h" +#include +#include + +char* alloc_cstring(const char* source) +{ + char* destination = malloc(strlen(source) + 1); + strcpy(destination, source); + return destination; +} + +AllocatedString alloc_string(const char* source, size_t length) +{ + char* value = malloc(length + 1); + strncpy(value, source, length); + return (AllocatedString) { + .value = value, + .length = length, + }; +} diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..c7f0511 --- /dev/null +++ b/utils.h @@ -0,0 +1,46 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#define ASSERT_EXHAUSTIVE_MATCH() \ + (fprintf( \ + stderr, \ + "unexhaustive match at %s:%d in %s()\n", \ + __FILE__, \ + __LINE__, \ + __func__ \ + ), \ + exit(1)) + +#define TODO() \ + (fprintf( \ + stderr, \ + "unimplemented branch at %s:%d in %s()\n", \ + __FILE__, \ + __LINE__, \ + __func__ \ + ), \ + exit(1)) + +#define ASSERT(condition, message) \ + if (!condition) \ + (fprintf( \ + stderr, \ + "failed assertion: \"%s\" at %s:%d in %s()", \ + message, \ + __FILE__, \ + __LINE__, \ + __func__ \ + ), \ + exit(1)) + +char* alloc_cstring(const char* source); + +typedef struct { + char* value; + size_t length; +} AllocatedString; + +AllocatedString alloc_string(const char* source, size_t length); + +#endif