commit 85adad9831c714c391c51adbaba45a52a89137fa Author: Simon Date: Sat Feb 11 18:59:19 2023 +0000 init diff --git a/' b/' new file mode 100644 index 0000000..2ee068e --- /dev/null +++ b/' @@ -0,0 +1,36 @@ +#ifndef LEXER_H +#define LEXER_H + +#include + +typedef enum { + Id, + Int, + Float, + String, + + If, + Else, + While, + Break, + + LParen, + RParen, + LBrace, + RBrace, + + Plus, + Minus, +} TokenType; + +typedef struct { + TokenType type; + size_t index, length; + int line, column; +} Token; + +typedef struct Lexer Lexer; + +void lexer_create(Lexer* lexer, const char* text, size_t text_length); + +#endif diff --git a/:w b/:w new file mode 100644 index 0000000..eb0d0d5 --- /dev/null +++ b/:w @@ -0,0 +1,187 @@ +#include "lexer.h" +#include +#include +#include +#include + +struct Lexer { + const char* text; + size_t index, length; + int line, column; +}; + +Token lexer_skip_whitespace(Lexer* lexer); +Token lexer_make_int(Lexer* lexer); +Token lexer_make_id(Lexer* lexer); +bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value); +Token lexer_make_static(Lexer* lexer); +Token make_single_char_token(Lexer* lexer, TokenType type); +Token make_slash_token(Lexer* lexer); +Token lexer_make_invalid_char(Lexer* lexer); +Position lexer_position(const Lexer* lexer); +Token lexer_token(const Lexer* lexer, TokenType type, Position begin); +bool lexer_done(const Lexer* lexer); +char lexer_current(const Lexer* lexer); +void lexer_step(Lexer* lexer); + +void lexer_create(Lexer* lexer, const char* text, size_t text_length) +{ + *lexer = (Lexer) { + .text = text, + .length = text_length, + .line = 1, + .column = 1, + }; +} + +Token lexer_next(Lexer* lexer) +{ + char c = lexer_current(lexer); + if (lexer_done(lexer)) + return lexer_token(lexer, TokenTypeEof, lexer_position(lexer)); + else if (isspace(c)) + return lexer_skip_whitespace(lexer); + else if (isdigit(c)) + return lexer_make_int(lexer); + else if (isalpha(c) || c == '_') + return lexer_make_id(lexer); + else + return lexer_make_static(lexer); +} + +Token lexer_skip_whitespace(Lexer* lexer) +{ + lexer_step(lexer); + while (!lexer_done(lexer) && isspace(lexer_current(lexer))) + lexer_step(lexer); + return lexer_next(lexer); +} + +Token lexer_make_int(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) + lexer_step(lexer); + return lexer_token(lexer, TokenTypeInt, begin); +} + +Token lexer_make_id(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + while (!lexer_done(lexer) + && (isalpha(lexer_current(lexer)) || isdigit(lexer_current(lexer)) + || lexer_current(lexer) == '_')) + lexer_step(lexer); + if (lexer_span_matches(lexer, begin, "if")) + return lexer_token(lexer, TokenTypeIf, begin); + else if (lexer_span_matches(lexer, begin, "else")) + return lexer_token(lexer, TokenTypeElse, begin); + else if (lexer_span_matches(lexer, begin, "while")) + return lexer_token(lexer, TokenTypeWhile, begin); + else if (lexer_span_matches(lexer, begin, "break")) + return lexer_token(lexer, TokenTypeBreak, begin); + else + return lexer_token(lexer, TokenTypeId, begin); +} + +bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value) +{ + size_t length = lexer->index - begin.index; + if (length != strlen(value)) + return false; + return strncmp(&lexer->text[begin.index], value, length) == 0; +} + +Token lexer_make_static(Lexer* lexer) +{ + switch (lexer_current(lexer)) { + case '(': + return make_single_char_token(lexer, TokenTypeLParen); + case ')': + return make_single_char_token(lexer, TokenTypeRParen); + case '{': + return make_single_char_token(lexer, TokenTypeLBrace); + case '}': + return make_single_char_token(lexer, TokenTypeRBrace); + case ';': + return make_single_char_token(lexer, TokenTypeSemicolon); + case '+': + return make_single_char_token(lexer, TokenTypePlus); + case '-': + return make_single_char_token(lexer, TokenTypeMinus); + case '*': + return make_single_char_token(lexer, TokenTypeAsterisk); + case '/': + return make_slash_token(lexer); + case '%': + return make_single_char_token(lexer, TokenTypePercent); + default: + return lexer_make_invalid_char(lexer); + } +} + +Token make_single_char_token(Lexer* lexer, TokenType type) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + return lexer_token(lexer, type, begin); +} + +Token skip_singleline_comment(Lexer* lexer); +Token skip_multiline_comment(Lexer* lexer); + +Token make_slash_token(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + switch (lexer_current(lexer)) { + case '/': + return skip_singleline_comment(lexer); + default: + return lexer_token(lexer, TokenTypeSlash, begin); + } +} + +Token lexer_make_invalid_char(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + return lexer_token(lexer, TokenTypeInvalidChar, begin); +} + +Position lexer_position(const Lexer* lexer) +{ + return (Position) { + .index = lexer->index, + .line = lexer->line, + .column = lexer->column, + }; +} + +Token lexer_token(const Lexer* lexer, TokenType type, Position begin) +{ + return (Token) { + .type = type, + .position = begin, + .length = lexer->index - begin.index, + }; +} + +bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; } + +char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; } + +void lexer_step(Lexer* lexer) +{ + if (lexer_done(lexer)) + return; + if (lexer_current(lexer) == '\n') { + lexer->line += 1; + lexer->column = 1; + } else { + lexer->column += 1; + } + lexer->index += 1; +} diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2071b23 --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ac5288e --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ + +CFLAGS = -std=c17 -Wall -Wextra -Wpedantic -Wconversion + +HEADERS = $(wildcard *.h) + +all: compile_flags.txt wacc + +wacc: main.o lexer.o + gcc $^ -o $@ + +%.o: %.c $(HEADERS) + gcc $< -c -o $@ $(CFLAGS) + +clean: + rm -rf *.o wacc + +compile_flags.txt: + echo -xc $(CFLAGS) | sed 's/\s\+/\n/g' > compile_flags.txt + diff --git a/README.md b/README.md new file mode 100644 index 0000000..31c53b6 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# wacc + diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..083ed44 --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,6 @@ +-xc +-std=c17 +-Wall +-Wextra +-Wpedantic +-Wconversion diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..297c410 --- /dev/null +++ b/lexer.c @@ -0,0 +1,353 @@ +#include "lexer.h" +#include +#include +#include +#include + +struct Lexer { + const char* text; + size_t index, length; + int line, column; +}; + +Token lexer_skip_whitespace(Lexer* lexer); +Token lexer_make_int_or_float(Lexer* lexer); +Token lexer_make_id(Lexer* lexer); +bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value); +Token lexer_make_static_token(Lexer* lexer); +Token lexer_make_int_hex_or_binary(Lexer* lexer); +Token lexer_make_char(Lexer* lexer); +Token lexer_make_string(Lexer* lexer); +void lexer_skip_literal_char(Lexer* lexer); +Token lexer_make_single_char_token(Lexer* lexer, TokenType type); +Token lexer_make_slash_token(Lexer* lexer); +Token lexer_skip_singleline_comment(Lexer* lexer); +Token lexer_make_single_or_double_char_token( + Lexer* lexer, TokenType single_type, char second_char, TokenType double_type); +Token lexer_skip_multiline_comment(Lexer* lexer); +Token lexer_make_invalid_char(Lexer* lexer); +Position lexer_position(const Lexer* lexer); +Token lexer_token(const Lexer* lexer, TokenType type, Position begin); +bool lexer_done(const Lexer* lexer); +char lexer_current(const Lexer* lexer); +void lexer_step(Lexer* lexer); + +void lexer_create(Lexer* lexer, const char* text, size_t text_length) +{ + *lexer = (Lexer) { + .text = text, + .length = text_length, + .line = 1, + .column = 1, + }; +} + +Token lexer_next(Lexer* lexer) +{ + char c = lexer_current(lexer); + if (lexer_done(lexer)) + return lexer_token(lexer, TokenTypeEof, lexer_position(lexer)); + else if (isspace(c)) + return lexer_skip_whitespace(lexer); + else if (c >= '1' && c <= '9') + return lexer_make_int_or_float(lexer); + else if (isalpha(c) || c == '_') + return lexer_make_id(lexer); + else + return lexer_make_static_token(lexer); +} + +Token lexer_skip_whitespace(Lexer* lexer) +{ + lexer_step(lexer); + while (!lexer_done(lexer) && isspace(lexer_current(lexer))) + lexer_step(lexer); + return lexer_next(lexer); +} + +Token lexer_make_int_or_float(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == '.') { + while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) + lexer_step(lexer); + return lexer_token(lexer, TokenTypeFloat, begin); + } else { + return lexer_token(lexer, TokenTypeInt, begin); + } +} + +Token lexer_make_id(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + while (!lexer_done(lexer) + && (isalpha(lexer_current(lexer)) || isdigit(lexer_current(lexer)) + || lexer_current(lexer) == '_')) + lexer_step(lexer); + if (lexer_span_matches(lexer, begin, "if")) + return lexer_token(lexer, TokenTypeIf, begin); + else if (lexer_span_matches(lexer, begin, "else")) + return lexer_token(lexer, TokenTypeElse, begin); + else if (lexer_span_matches(lexer, begin, "while")) + return lexer_token(lexer, TokenTypeWhile, begin); + else if (lexer_span_matches(lexer, begin, "break")) + return lexer_token(lexer, TokenTypeBreak, begin); + else + return lexer_token(lexer, TokenTypeId, begin); +} + +bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value) +{ + size_t length = lexer->index - begin.index; + if (length != strlen(value)) + return false; + return strncmp(&lexer->text[begin.index], value, length) == 0; +} + +Token lexer_make_static_token(Lexer* lexer) +{ + switch (lexer_current(lexer)) { + case '0': + return lexer_make_int_hex_or_binary(lexer); + case '\'': + return lexer_make_char(lexer); + case '"': + return lexer_make_string(lexer); + case '(': + return lexer_make_single_char_token(lexer, TokenTypeLParen); + case ')': + return lexer_make_single_char_token(lexer, TokenTypeRParen); + case '{': + return lexer_make_single_char_token(lexer, TokenTypeLBrace); + case '}': + return lexer_make_single_char_token(lexer, TokenTypeRBrace); + case '[': + return lexer_make_single_char_token(lexer, TokenTypeLBracket); + case ']': + return lexer_make_single_char_token(lexer, TokenTypeRBracket); + case '.': + return lexer_make_single_char_token(lexer, TokenTypeDot); + case ',': + return lexer_make_single_char_token(lexer, TokenTypeComma); + case ':': + return lexer_make_single_char_token(lexer, TokenTypeColon); + case ';': + return lexer_make_single_char_token(lexer, TokenTypeSemicolon); + case '+': + return lexer_make_single_or_double_char_token( + lexer, TokenTypePlus, '=', TokenTypePlusEqual); + case '-': + return lexer_make_single_or_double_char_token( + lexer, TokenTypeMinus, '=', TokenTypeMinusEqual); + case '*': + return lexer_make_single_or_double_char_token( + lexer, TokenTypeAsterisk, '=', TokenTypeAsteriskEqual); + case '/': + return lexer_make_slash_token(lexer); + case '%': + return lexer_make_single_or_double_char_token( + lexer, TokenTypePercent, '=', TokenTypePercentEqual); + case '=': + return lexer_make_single_or_double_char_token( + lexer, TokenTypeEqual, '=', TokenTypeDoubleEqual); + case '!': + return lexer_make_single_or_double_char_token( + lexer, TokenTypeExclamation, '=', TokenTypeExclamationEqual); + case '<': + return lexer_make_single_or_double_char_token( + lexer, TokenTypeLt, '=', TokenTypeLtEqual); + case '>': + return lexer_make_single_or_double_char_token( + lexer, TokenTypeGt, '=', TokenTypeGtEqual); + default: + return lexer_make_invalid_char(lexer); + } +} + +Token lexer_make_int_hex_or_binary(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + if (!lexer_done(lexer) && (lexer_current(lexer) == 'x' || lexer_current(lexer) == 'X')) { + while (!lexer_done(lexer) + && (isdigit(lexer_current(lexer)) + || (lexer_current(lexer) >= 'a' || lexer_current(lexer) <= 'f') + || (lexer_current(lexer) >= 'A' || lexer_current(lexer) <= 'F'))) + lexer_step(lexer); + return lexer_token(lexer, TokenTypeHex, begin); + } else if (!lexer_done(lexer) && (lexer_current(lexer) == 'b' || lexer_current(lexer) == 'B')) { + while (!lexer_done(lexer) && (lexer_current(lexer) == '0' || lexer_current(lexer) == '1')) + lexer_step(lexer); + return lexer_token(lexer, TokenTypeBinary, begin); + } else { + return lexer_token(lexer, TokenTypeInt, begin); + } +} + +Token lexer_make_char(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + if (lexer_done(lexer)) + return lexer_token(lexer, TokenTypeMalformedChar, begin); + lexer_skip_literal_char(lexer); + if (lexer_done(lexer) && lexer_current(lexer) != '\'') + return lexer_token(lexer, TokenTypeMalformedChar, begin); + lexer_step(lexer); + return lexer_token(lexer, TokenTypeChar, begin); +} + +Token lexer_make_string(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + if (lexer_done(lexer)) + return lexer_token(lexer, TokenTypeMalformedString, begin); + while (!lexer_done(lexer) && lexer_current(lexer) != '\"') + lexer_skip_literal_char(lexer); + if (lexer_done(lexer) && lexer_current(lexer) != '\"') + return lexer_token(lexer, TokenTypeMalformedString, begin); + lexer_step(lexer); + return lexer_token(lexer, TokenTypeChar, begin); +} + +void lexer_skip_literal_char(Lexer* lexer) +{ + if (lexer_current(lexer) == '\\') { + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == '0') { + lexer_step(lexer); + } else if (!lexer_done(lexer) && lexer_current(lexer) >= '1' + && lexer_current(lexer) <= '9') { + lexer_step(lexer); + while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) + lexer_step(lexer); + } else if (!lexer_done(lexer) + && (lexer_current(lexer) == 'x' || lexer_current(lexer) == 'X')) { + lexer_step(lexer); + while (!lexer_done(lexer) + && (isdigit(lexer_current(lexer)) + || (lexer_current(lexer) >= 'a' && lexer_current(lexer) <= 'f') + || (lexer_current(lexer) >= 'A' && lexer_current(lexer) <= 'F'))) + lexer_step(lexer); + } else if (!lexer_done(lexer)) { + lexer_step(lexer); + } + } else { + lexer_step(lexer); + } +} + +Token lexer_make_single_char_token(Lexer* lexer, TokenType type) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + return lexer_token(lexer, type, begin); +} + +Token lexer_make_single_or_double_char_token( + Lexer* lexer, TokenType single_type, char second_char, TokenType double_type) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == second_char) { + lexer_step(lexer); + return lexer_token(lexer, single_type, begin); + } else { + return lexer_token(lexer, double_type, begin); + } +} + +Token lexer_make_slash_token(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + switch (lexer_current(lexer)) { + case '/': + return lexer_skip_singleline_comment(lexer); + case '*': + return lexer_skip_multiline_comment(lexer); + case '=': + lexer_step(lexer); + return lexer_token(lexer, TokenTypeSlashEqual, begin); + default: + return lexer_token(lexer, TokenTypeSlash, begin); + } +} + +Token lexer_skip_singleline_comment(Lexer* lexer) +{ + lexer_step(lexer); + while (!lexer_done(lexer) && lexer_current(lexer) != '\n') + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == '\n') + lexer_step(lexer); + return lexer_next(lexer); +} + +Token lexer_skip_multiline_comment(Lexer* lexer) +{ + lexer_step(lexer); + int depth = 1; + while (!lexer_done(lexer)) { + if (lexer_current(lexer) == '/') { + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == '*') + depth += 1; + } else if (lexer_current(lexer) == '*') { + lexer_step(lexer); + if (lexer_done(lexer) && lexer_current(lexer) == '/') + depth -= 1; + } + lexer_step(lexer); + } + return depth != 0 + ? lexer_token(lexer, TokenTypeMalformedMultilineComment, lexer_position(lexer)) + : lexer_next(lexer); +} + +Token lexer_make_invalid_char(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + return lexer_token(lexer, TokenTypeInvalidChar, begin); +} + +Position lexer_position(const Lexer* lexer) +{ + return (Position) { + .index = lexer->index, + .line = lexer->line, + .column = lexer->column, + }; +} + +Token lexer_token(const Lexer* lexer, TokenType type, Position begin) +{ + return (Token) { + .type = type, + .position = begin, + .length = lexer->index - begin.index, + }; +} + +bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; } + +char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; } + +void lexer_step(Lexer* lexer) +{ + if (lexer_done(lexer)) + return; + if (lexer_current(lexer) == '\n') { + lexer->line += 1; + lexer->column = 1; + } else { + lexer->column += 1; + } + lexer->index += 1; +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..1efce31 --- /dev/null +++ b/lexer.h @@ -0,0 +1,76 @@ +#ifndef LEXER_H +#define LEXER_H + +#include + +typedef enum { + TokenTypeEof, + + TokenTypeInvalidChar, + TokenTypeMalformedMultilineComment, + TokenTypeMalformedChar, + TokenTypeMalformedString, + + TokenTypeId, + TokenTypeInt, + TokenTypeHex, + TokenTypeBinary, + TokenTypeFloat, + TokenTypeChar, + TokenTypeString, + + TokenTypeIf, + TokenTypeElse, + TokenTypeWhile, + TokenTypeBreak, + + TokenTypeLParen, + TokenTypeRParen, + TokenTypeLBrace, + TokenTypeRBrace, + TokenTypeLBracket, + TokenTypeRBracket, + TokenTypeDot, + TokenTypeComma, + TokenTypeColon, + TokenTypeSemicolon, + + TokenTypePlusEqual, + TokenTypeMinusEqual, + TokenTypeAsteriskEqual, + TokenTypeSlashEqual, + TokenTypePercentEqual, + TokenTypeDoubleEqual, + TokenTypeExclamationEqual, + TokenTypeLtEqual, + TokenTypeGtEqual, + + TokenTypePlus, + TokenTypeMinus, + TokenTypeAsterisk, + TokenTypeSlash, + TokenTypePercent, + TokenTypeEqual, + TokenTypeExclamation, + TokenTypeLt, + TokenTypeGt, + +} TokenType; + +typedef struct { + size_t index; + int line, column; +} Position; + +typedef struct { + TokenType type; + Position position; + size_t length; +} Token; + +typedef struct Lexer Lexer; + +void lexer_create(Lexer* lexer, const char* text, size_t text_length); +Token lexer_next(Lexer* lexer); + +#endif diff --git a/lexer.o b/lexer.o new file mode 100644 index 0000000..e794c73 Binary files /dev/null and b/lexer.o differ diff --git a/main.c b/main.c new file mode 100644 index 0000000..50c765e --- /dev/null +++ b/main.c @@ -0,0 +1,3 @@ +#include + +int main(void) { printf("hello world\n"); } diff --git a/main.o b/main.o new file mode 100644 index 0000000..09c8eea Binary files /dev/null and b/main.o differ diff --git a/wacc b/wacc new file mode 100755 index 0000000..8ad967f Binary files /dev/null and b/wacc differ