diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..8ae9b75 --- /dev/null +++ b/.clang-format @@ -0,0 +1,14 @@ +BasedOnStyle: WebKit +IndentWidth: 4 +ColumnLimit: 80 +IndentCaseLabels: true +BreakBeforeBraces: Custom +BraceWrapping: + AfterFunction: true + SplitEmptyFunction: false +AlignAfterOpenBracket: BlockIndent +AlignOperands: AlignAfterOperator +BreakBeforeBinaryOperators: true +BinPackArguments: false +BinPackParameters: false + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..65ff49b --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +matemateak +*.o +compile_flags.txt + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..45cdc1f --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ + +CC = gcc + +CFLAGS = -std=c17 -Wall -Wextra -Wpedantic -Wconversion +LFLAGS = -lm + +C_FILES = $(shell find src/ -name *.c) +HEADER_FILES = $(shell find src/ -name *.h) + +OBJECT_FILES = $(patsubst %.c, %.o, $(C_FILES)) + +matemateak: $(OBJECT_FILES) + $(CC) -o $@ $(LFLAGS) $^ + +%.o: %.c $(HEADER_FILES) + $(CC) -c -o $@ $(CFLAGS) $< + +compile_flags.txt: + echo -xc $(C_FLAGS) | sed 's/\s\+/\n/g' > compile_flags.txt + +clean: + $(RM) $(OBJECT_FILES) matemateak + diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..a0027fd --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,123 @@ +#include "lexer.h" +#include + +bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; } + +char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; } + +void lexer_step(Lexer* lexer) +{ + lexer->index += 1; + if (!lexer_done(lexer)) { + if (lexer_current(lexer) == '\n') { + lexer->line += 1; + lexer->col = 1; + } else { + lexer->col += 1; + } + } +} + +Position lexer_pos(const Lexer* lexer) +{ + return (Position) { + .index = lexer->index, + .line = lexer->line, + .col = lexer->col, + }; +} + +Token lexer_token(const Lexer* lexer, TokenType type, Position start) +{ + return (Token) { + .type = type, + .index = start.index, + .length = lexer->index - start.index, + .line = start.line, + .col = start.col, + }; +} + +Token lexer_step_and_token(Lexer* lexer, TokenType type, Position start) +{ + lexer_step(lexer); + return lexer_token(lexer, type, start); +} + +bool lexer_is_whitespace(char value) +{ + return value == ' ' || value == '\t' || value == '\r' || value == '\n'; +} + +Token lexer_skip_whitespace(Lexer* lexer) +{ + lexer_step(lexer); + while (!lexer_done(lexer) && lexer_is_whitespace(lexer_current(lexer))) + lexer_step(lexer); + return lexer_next(lexer); +} + +bool lexer_is_int(char value) { return value >= '0' && value <= '9'; } + +bool lexer_is_int_start(char value) { return lexer_is_int(value); } + +Token lexer_int_token(Lexer* lexer) +{ + Position start = lexer_pos(lexer); + lexer_step(lexer); + while (!lexer_done(lexer) && lexer_is_int(lexer_current(lexer))) + lexer_step(lexer); + return lexer_token(lexer, TokenTypeInt, start); +} + +void lexer(Lexer* lexer, const char* text, size_t length) +{ + *lexer = (Lexer) { + .text = text, + .length = length, + .index = 0, + .line = 1, + .col = 1, + }; +} + +Token lexer_level3(Lexer* lexer) +{ + if (lexer_is_int_start(lexer_current(lexer))) + return lexer_int_token(lexer); + else + return lexer_step_and_token( + lexer, TokenTypeInvalidChar, lexer_pos(lexer) + ); +} + +Token lexer_level2(Lexer* lexer) +{ + Position start = lexer_pos(lexer); + switch (lexer_current(lexer)) { + case '+': + return lexer_step_and_token(lexer, TokenTypePlus, start); + case '-': + return lexer_step_and_token(lexer, TokenTypeMinus, start); + case '*': + return lexer_step_and_token(lexer, TokenTypeAsterisk, start); + case '/': + return lexer_step_and_token(lexer, TokenTypeSlash, start); + case '(': + return lexer_step_and_token(lexer, TokenTypeLParen, start); + case ')': + return lexer_step_and_token(lexer, TokenTypeRParen, start); + default: + return lexer_level3(lexer); + } +} + +Token lexer_next(Lexer* lexer) +{ + if (lexer_done(lexer)) + return lexer_token(lexer, TokenTypeEof, lexer_pos(lexer)); + else if (lexer_is_whitespace(lexer_current(lexer))) + return lexer_skip_whitespace(lexer); + else + return lexer_level2(lexer); +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..f225940 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,38 @@ +#ifndef LEXER_H +#define LEXER_H + +#include + +typedef struct { + size_t index; + int line, col; +} Position; + +typedef enum { + TokenTypeEof, + TokenTypeInvalidChar, + TokenTypeInt, + TokenTypePlus, + TokenTypeMinus, + TokenTypeAsterisk, + TokenTypeSlash, + TokenTypeLParen, + TokenTypeRParen, +} TokenType; + +typedef struct { + TokenType type; + size_t index, length; + int line, col; +} Token; + +typedef struct { + const char* text; + size_t index, length; + int line, col; +} Lexer; + +void lexer(Lexer* lexer, const char* text, size_t length); +Token lexer_next(Lexer* lexer); + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..70b4681 --- /dev/null +++ b/src/main.c @@ -0,0 +1,4 @@ +#include +#include + +int main(void) { printf("hello world\n"); }