diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..af281a7 --- /dev/null +++ b/.clang-format @@ -0,0 +1,9 @@ +BasedOnStyle: WebKit +IndentWidth: 4 +ColumnLimit: 80 +IndentCaseLabels: true +BreakBeforeBraces: Custom +BraceWrapping: + AfterFunction: true + SplitEmptyFunction: false + diff --git a/lexer.c b/lexer.c index 9ec7c34..fcbdfc6 100644 --- a/lexer.c +++ b/lexer.c @@ -5,8 +5,9 @@ #include #include -#define ASSERT_EXHAUSTIVE_MATCH() \ - (fprintf(stderr, "unexhaustive match at %s:%d in %s()\n", __FILE__, __LINE__, __func__), \ +#define ASSERT_EXHAUSTIVE_MATCH() \ + (fprintf(stderr, "unexhaustive match at %s:%d in %s()\n", __FILE__, \ + __LINE__, __func__), \ exit(1)) Token lexer_skip_whitespace(Lexer* lexer); @@ -19,10 +20,12 @@ Token lexer_make_char(Lexer* lexer); Token lexer_make_string(Lexer* lexer); void lexer_skip_literal_char(Lexer* lexer); Token lexer_make_single_char_token(Lexer* lexer, TokenType type); +Token lexer_make_dot_token(Lexer* lexer); +Token lexer_make_colon_token(Lexer* lexer); Token lexer_make_slash_token(Lexer* lexer); Token lexer_skip_singleline_comment(Lexer* lexer); -Token lexer_make_single_or_double_char_token( - Lexer* lexer, TokenType single_type, char second_char, TokenType double_type); +Token lexer_make_single_or_double_char_token(Lexer* lexer, + TokenType single_type, char second_char, TokenType double_type); Token lexer_skip_multiline_comment(Lexer* lexer); Token lexer_make_invalid_char(Lexer* lexer); Position lexer_position(const Lexer* lexer); @@ -72,9 +75,14 @@ Token lexer_make_int_or_float(Lexer* lexer) lexer_step(lexer); if (!lexer_done(lexer) && lexer_current(lexer) == '.') { lexer_step(lexer); - while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) + if (!lexer_done(lexer) && lexer_current(lexer) == '.') { lexer_step(lexer); - return lexer_token(lexer, TokenTypeFloat, begin); + return lexer_token(lexer, TokenTypeIntDoubleDot, begin); + } else { + while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) + lexer_step(lexer); + return lexer_token(lexer, TokenTypeFloat, begin); + } } else { return lexer_token(lexer, TokenTypeInt, begin); } @@ -125,6 +133,10 @@ Token lexer_make_id(Lexer* lexer) return lexer_token(lexer, TokenTypeFn, begin); else if (lexer_span_matches(lexer, begin, "return")) return lexer_token(lexer, TokenTypeReturn, begin); + else if (lexer_span_matches(lexer, begin, "mut")) + return lexer_token(lexer, TokenTypeMut, begin); + else if (lexer_span_matches(lexer, begin, "defer")) + return lexer_token(lexer, TokenTypeDefer, begin); else return lexer_token(lexer, TokenTypeId, begin); } @@ -159,12 +171,11 @@ Token lexer_make_static_token(Lexer* lexer) case ']': return lexer_make_single_char_token(lexer, TokenTypeRBracket); case '.': - return lexer_make_single_or_double_char_token( - lexer, TokenTypeDot, '.', TokenTypeDoubleDot); + return lexer_make_dot_token(lexer); case ',': return lexer_make_single_char_token(lexer, TokenTypeComma); case ':': - return lexer_make_single_char_token(lexer, TokenTypeColon); + return lexer_make_colon_token(lexer); case ';': return lexer_make_single_char_token(lexer, TokenTypeSemicolon); case '&': @@ -209,16 +220,21 @@ Token lexer_make_int_hex_binary_or_float(Lexer* lexer) while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) lexer_step(lexer); return lexer_token(lexer, TokenTypeFloat, begin); - } else if (!lexer_done(lexer) && (lexer_current(lexer) == 'x' || lexer_current(lexer) == 'X')) { + } else if (!lexer_done(lexer) + && (lexer_current(lexer) == 'x' || lexer_current(lexer) == 'X')) { + lexer_step(lexer); while (!lexer_done(lexer) && (isdigit(lexer_current(lexer)) || (lexer_current(lexer) >= 'a' && lexer_current(lexer) <= 'f') - || (lexer_current(lexer) >= 'A' && lexer_current(lexer) <= 'F'))) + || (lexer_current(lexer) >= 'A' + && lexer_current(lexer) <= 'F'))) lexer_step(lexer); return lexer_token(lexer, TokenTypeHex, begin); - } else if (!lexer_done(lexer) && (lexer_current(lexer) == 'b' || lexer_current(lexer) == 'B')) { + } else if (!lexer_done(lexer) + && (lexer_current(lexer) == 'b' || lexer_current(lexer) == 'B')) { lexer_step(lexer); - while (!lexer_done(lexer) && (lexer_current(lexer) == '0' || lexer_current(lexer) == '1')) + while (!lexer_done(lexer) + && (lexer_current(lexer) == '0' || lexer_current(lexer) == '1')) lexer_step(lexer); return lexer_token(lexer, TokenTypeBinary, begin); } else { @@ -271,7 +287,8 @@ void lexer_skip_literal_char(Lexer* lexer) while (!lexer_done(lexer) && (isdigit(lexer_current(lexer)) || (lexer_current(lexer) >= 'a' && lexer_current(lexer) <= 'f') - || (lexer_current(lexer) >= 'A' && lexer_current(lexer) <= 'F'))) + || (lexer_current(lexer) >= 'A' + && lexer_current(lexer) <= 'F'))) lexer_step(lexer); } } @@ -283,8 +300,8 @@ Token lexer_make_single_char_token(Lexer* lexer, TokenType type) return lexer_token(lexer, type, begin); } -Token lexer_make_single_or_double_char_token( - Lexer* lexer, TokenType single_type, char second_char, TokenType double_type) +Token lexer_make_single_or_double_char_token(Lexer* lexer, + TokenType single_type, char second_char, TokenType double_type) { Position begin = lexer_position(lexer); lexer_step(lexer); @@ -296,6 +313,48 @@ Token lexer_make_single_or_double_char_token( } } +Token lexer_make_dot_token(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == '.') { + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == '=') { + lexer_step(lexer); + return lexer_token(lexer, TokenTypeDoubleDotEqual, begin); + } else if (!lexer_done(lexer) && lexer_current(lexer) == '<') { + lexer_step(lexer); + return lexer_token(lexer, TokenTypeDoubleDotLt, begin); + } else { + return lexer_token(lexer, TokenTypeDoubleDot, begin); + } + } else if (!lexer_done(lexer) && isdigit(lexer_current(lexer))) { + lexer_step(lexer); + while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) + lexer_step(lexer); + return lexer_token(lexer, TokenTypeFloat, begin); + } else { + return lexer_token(lexer, TokenTypeDot, begin); + } +} + +Token lexer_make_colon_token(Lexer* lexer) +{ + Position begin = lexer_position(lexer); + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == ':') { + lexer_step(lexer); + if (!lexer_done(lexer) && lexer_current(lexer) == '<') { + lexer_step(lexer); + return lexer_token(lexer, TokenTypeDoubleColonLt, begin); + } else { + return lexer_token(lexer, TokenTypeDoubleColon, begin); + } + } else { + return lexer_token(lexer, TokenTypeColon, begin); + } +} + Token lexer_make_slash_token(Lexer* lexer) { Position begin = lexer_position(lexer); @@ -343,9 +402,9 @@ Token lexer_skip_multiline_comment(Lexer* lexer) } lexer_step(lexer); } - return depth != 0 - ? lexer_token(lexer, TokenTypeMalformedMultilineComment, lexer_position(lexer)) - : lexer_next(lexer); + return depth != 0 ? lexer_token( + lexer, TokenTypeMalformedMultilineComment, lexer_position(lexer)) + : lexer_next(lexer); } Token lexer_make_invalid_char(Lexer* lexer) @@ -417,93 +476,137 @@ const char* token_type_to_string(TokenType type) { switch (type) { case TokenTypeEof: - return "Eof"; + return "TokenTypeEof"; case TokenTypeInvalidChar: - return "InvalidChar"; + return "TokenTypeInvalidChar"; case TokenTypeMalformedMultilineComment: - return "MalformedMultilineComment"; + return "TokenTypeMalformedMultilineComment"; case TokenTypeMalformedChar: - return "MalformedChar"; + return "TokenTypeMalformedChar"; case TokenTypeMalformedString: - return "MalformedString"; + return "TokenTypeMalformedString"; case TokenTypeId: - return "Id"; + return "TokenTypeId"; case TokenTypeInt: - return "Int"; + return "TokenTypeInt"; + case TokenTypeIntDoubleDot: + return "TokenTypeIntDoubleDot"; case TokenTypeHex: - return "Hex"; + return "TokenTypeHex"; case TokenTypeBinary: - return "Binary"; + return "TokenTypeBinary"; case TokenTypeFloat: - return "Float"; + return "TokenTypeFloat"; case TokenTypeChar: - return "Char"; + return "TokenTypeChar"; case TokenTypeString: - return "String"; + return "TokenTypeString"; case TokenTypeIf: - return "If"; + return "TokenTypeIf"; case TokenTypeElse: - return "Else"; + return "TokenTypeElse"; + case TokenTypeLoop: + return "TokenTypeLoop"; case TokenTypeWhile: - return "While"; + return "TokenTypeWhile"; + case TokenTypeFor: + return "TokenTypeFor"; + case TokenTypeIn: + return "TokenTypeIn"; case TokenTypeBreak: - return "Break"; + return "TokenTypeBreak"; + case TokenTypeLet: + return "TokenTypeLet"; + case TokenTypeMatch: + return "TokenTypeMatch"; + case TokenTypeFalse: + return "TokenTypeFalse"; + case TokenTypeTrue: + return "TokenTypeTrue"; + case TokenTypeNot: + return "TokenTypeNot"; + case TokenTypeAnd: + return "TokenTypeAnd"; + case TokenTypeOr: + return "TokenTypeOr"; + case TokenTypeFn: + return "TokenTypeFn"; + case TokenTypeReturn: + return "TokenTypeReturn"; + case TokenTypeMut: + return "TokenTypeMut"; + case TokenTypeDefer: + return "TokenTypeDefer"; case TokenTypeLParen: - return "LParen"; + return "TokenTypeLParen"; case TokenTypeRParen: - return "RParen"; + return "TokenTypeRParen"; case TokenTypeLBrace: - return "LBrace"; + return "TokenTypeLBrace"; case TokenTypeRBrace: - return "RBrace"; + return "TokenTypeRBrace"; case TokenTypeLBracket: - return "LBracket"; + return "TokenTypeLBracket"; case TokenTypeRBracket: - return "RBracket"; - case TokenTypeDot: - return "Dot"; + return "TokenTypeRBracket"; case TokenTypeComma: - return "Comma"; + return "TokenTypeComma"; case TokenTypeColon: - return "Colon"; + return "TokenTypeColon"; + case TokenTypeDoubleColon: + return "TokenTypeDoubleColon"; + case TokenTypeDoubleColonLt: + return "TokenTypeDoubleColonLt"; case TokenTypeSemicolon: - return "Semicolon"; + return "TokenTypeSemicolon"; + case TokenTypeAmpersand: + return "TokenTypeAmpersand"; + case TokenTypeUnderscore: + return "TokenTypeUnderscore"; + case TokenTypeDot: + return "TokenTypeDot"; + case TokenTypeDoubleDot: + return "TokenTypeDoubleDot"; + case TokenTypeDoubleDotEqual: + return "TokenTypeDoubleDotEqual"; + case TokenTypeDoubleDotLt: + return "TokenTypeDoubleDotLt"; case TokenTypePlusEqual: - return "PlusEqual"; + return "TokenTypePlusEqual"; case TokenTypeMinusEqual: - return "MinusEqual"; + return "TokenTypeMinusEqual"; case TokenTypeAsteriskEqual: - return "AsteriskEqual"; + return "TokenTypeAsteriskEqual"; case TokenTypeSlashEqual: - return "SlashEqual"; + return "TokenTypeSlashEqual"; case TokenTypePercentEqual: - return "PercentEqual"; + return "TokenTypePercentEqual"; case TokenTypeDoubleEqual: - return "DoubleEqual"; + return "TokenTypeDoubleEqual"; case TokenTypeExclamationEqual: - return "ExclamationEqual"; + return "TokenTypeExclamationEqual"; case TokenTypeLtEqual: - return "LtEqual"; + return "TokenTypeLtEqual"; case TokenTypeGtEqual: - return "GtEqual"; + return "TokenTypeGtEqual"; case TokenTypePlus: - return "Plus"; + return "TokenTypePlus"; case TokenTypeMinus: - return "Minus"; + return "TokenTypeMinus"; case TokenTypeAsterisk: - return "Asterisk"; + return "TokenTypeAsterisk"; case TokenTypeSlash: - return "Slash"; + return "TokenTypeSlash"; case TokenTypePercent: - return "Percent"; + return "TokenTypePercent"; case TokenTypeEqual: - return "Equal"; + return "TokenTypeEqual"; case TokenTypeExclamation: - return "Exclamation"; + return "TokenTypeExclamation"; case TokenTypeLt: - return "Lt"; + return "TokenTypeLt"; case TokenTypeGt: - return "Gt"; + return "TokenTypeGt"; default: ASSERT_EXHAUSTIVE_MATCH(); } diff --git a/lexer.h b/lexer.h index 41c1c3b..90bc787 100644 --- a/lexer.h +++ b/lexer.h @@ -13,6 +13,7 @@ typedef enum { TokenTypeId, TokenTypeInt, + TokenTypeIntDoubleDot, TokenTypeHex, TokenTypeBinary, TokenTypeFloat, @@ -35,6 +36,8 @@ typedef enum { TokenTypeOr, TokenTypeFn, TokenTypeReturn, + TokenTypeMut, + TokenTypeDefer, TokenTypeLParen, TokenTypeRParen, @@ -42,13 +45,17 @@ typedef enum { TokenTypeRBrace, TokenTypeLBracket, TokenTypeRBracket, - TokenTypeDot, TokenTypeComma, TokenTypeColon, + TokenTypeDoubleColon, + TokenTypeDoubleColonLt, TokenTypeSemicolon, - TokenTypeDoubleMatch, TokenTypeAmpersand, TokenTypeUnderscore, + TokenTypeDot, + TokenTypeDoubleDot, + TokenTypeDoubleDotEqual, + TokenTypeDoubleDotLt, TokenTypePlusEqual, TokenTypeMinusEqual, diff --git a/main.c b/main.c index 39f9629..17d32ad 100644 --- a/main.c +++ b/main.c @@ -6,8 +6,13 @@ int main(void) { char text[] - = "abc 123 0xFF 0b101 3.14 'a' '\\n' \"hello\" \"world\\\"\\n\" if else /* /* while */ */ " - "while break (){}[].,:; += -= *= /= %= == != <= >= + - * / % % = ! < >"; + = "abc 123 123.. 0xFF 0b101 .5 1. 3.14 'a' '\\n' \"hello\" " + "\"world\\\"\\n\" if else /* /* while */ */ " + "while for in // in \n break let match false true not and or fn " + "return mut " + "defer (){}[],: :: ::< ; & _ . .. ..= ..< += -= *= /= %= == != <= >= " + "+ - * / % " + "% = ! < >"; printf("text = \"%s\"\n", text); diff --git a/parser.h b/parser.h index 4775dd4..d4c5f12 100644 --- a/parser.h +++ b/parser.h @@ -2,10 +2,13 @@ #define PARSER_H #include "lexer.h" +#include +#include #include typedef enum { ParsedNodeTypeError, + ParsedNodeTypeInt, ParsedNodeTypeFloat, ParsedNodeTypeChar, @@ -27,9 +30,41 @@ typedef enum { ParsedNodeTypeAssign, } ParsedNodeType; -typedef struct ParsedNode { - ParsedNodeType node_type; -} ParsedNode; +typedef struct KeyValuePair KeyValuePair; +typedef struct ParsedNode ParsedNode; + +struct ParsedNode { + ParsedNodeType type; + union { + int64_t int_value; + double float_value; + char char_value; + struct { + char* value; + size_t length; + } string; + bool bool_value; + struct { + ParsedNode* values; + size_t length; + } array; + struct { + KeyValuePair* pairs; + size_t length; + } dict; + struct { + ParsedNode* condition; + ParsedNode* truthy; + ParsedNode* falsy; + } if_node; + }; +}; + +struct KeyValuePair { + char* key; + size_t key_length; + ParsedNode* value; +}; typedef struct { Lexer* lexer;