more lexer stuff
This commit is contained in:
parent
8c4d734af0
commit
b71c2e5afb
9
.clang-format
Normal file
9
.clang-format
Normal file
@ -0,0 +1,9 @@
|
||||
BasedOnStyle: WebKit
|
||||
IndentWidth: 4
|
||||
ColumnLimit: 80
|
||||
IndentCaseLabels: true
|
||||
BreakBeforeBraces: Custom
|
||||
BraceWrapping:
|
||||
AfterFunction: true
|
||||
SplitEmptyFunction: false
|
||||
|
231
lexer.c
231
lexer.c
@ -5,8 +5,9 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define ASSERT_EXHAUSTIVE_MATCH() \
|
||||
(fprintf(stderr, "unexhaustive match at %s:%d in %s()\n", __FILE__, __LINE__, __func__), \
|
||||
#define ASSERT_EXHAUSTIVE_MATCH() \
|
||||
(fprintf(stderr, "unexhaustive match at %s:%d in %s()\n", __FILE__, \
|
||||
__LINE__, __func__), \
|
||||
exit(1))
|
||||
|
||||
Token lexer_skip_whitespace(Lexer* lexer);
|
||||
@ -19,10 +20,12 @@ Token lexer_make_char(Lexer* lexer);
|
||||
Token lexer_make_string(Lexer* lexer);
|
||||
void lexer_skip_literal_char(Lexer* lexer);
|
||||
Token lexer_make_single_char_token(Lexer* lexer, TokenType type);
|
||||
Token lexer_make_dot_token(Lexer* lexer);
|
||||
Token lexer_make_colon_token(Lexer* lexer);
|
||||
Token lexer_make_slash_token(Lexer* lexer);
|
||||
Token lexer_skip_singleline_comment(Lexer* lexer);
|
||||
Token lexer_make_single_or_double_char_token(
|
||||
Lexer* lexer, TokenType single_type, char second_char, TokenType double_type);
|
||||
Token lexer_make_single_or_double_char_token(Lexer* lexer,
|
||||
TokenType single_type, char second_char, TokenType double_type);
|
||||
Token lexer_skip_multiline_comment(Lexer* lexer);
|
||||
Token lexer_make_invalid_char(Lexer* lexer);
|
||||
Position lexer_position(const Lexer* lexer);
|
||||
@ -72,9 +75,14 @@ Token lexer_make_int_or_float(Lexer* lexer)
|
||||
lexer_step(lexer);
|
||||
if (!lexer_done(lexer) && lexer_current(lexer) == '.') {
|
||||
lexer_step(lexer);
|
||||
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
|
||||
if (!lexer_done(lexer) && lexer_current(lexer) == '.') {
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeFloat, begin);
|
||||
return lexer_token(lexer, TokenTypeIntDoubleDot, begin);
|
||||
} else {
|
||||
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeFloat, begin);
|
||||
}
|
||||
} else {
|
||||
return lexer_token(lexer, TokenTypeInt, begin);
|
||||
}
|
||||
@ -125,6 +133,10 @@ Token lexer_make_id(Lexer* lexer)
|
||||
return lexer_token(lexer, TokenTypeFn, begin);
|
||||
else if (lexer_span_matches(lexer, begin, "return"))
|
||||
return lexer_token(lexer, TokenTypeReturn, begin);
|
||||
else if (lexer_span_matches(lexer, begin, "mut"))
|
||||
return lexer_token(lexer, TokenTypeMut, begin);
|
||||
else if (lexer_span_matches(lexer, begin, "defer"))
|
||||
return lexer_token(lexer, TokenTypeDefer, begin);
|
||||
else
|
||||
return lexer_token(lexer, TokenTypeId, begin);
|
||||
}
|
||||
@ -159,12 +171,11 @@ Token lexer_make_static_token(Lexer* lexer)
|
||||
case ']':
|
||||
return lexer_make_single_char_token(lexer, TokenTypeRBracket);
|
||||
case '.':
|
||||
return lexer_make_single_or_double_char_token(
|
||||
lexer, TokenTypeDot, '.', TokenTypeDoubleDot);
|
||||
return lexer_make_dot_token(lexer);
|
||||
case ',':
|
||||
return lexer_make_single_char_token(lexer, TokenTypeComma);
|
||||
case ':':
|
||||
return lexer_make_single_char_token(lexer, TokenTypeColon);
|
||||
return lexer_make_colon_token(lexer);
|
||||
case ';':
|
||||
return lexer_make_single_char_token(lexer, TokenTypeSemicolon);
|
||||
case '&':
|
||||
@ -209,16 +220,21 @@ Token lexer_make_int_hex_binary_or_float(Lexer* lexer)
|
||||
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeFloat, begin);
|
||||
} else if (!lexer_done(lexer) && (lexer_current(lexer) == 'x' || lexer_current(lexer) == 'X')) {
|
||||
} else if (!lexer_done(lexer)
|
||||
&& (lexer_current(lexer) == 'x' || lexer_current(lexer) == 'X')) {
|
||||
lexer_step(lexer);
|
||||
while (!lexer_done(lexer)
|
||||
&& (isdigit(lexer_current(lexer))
|
||||
|| (lexer_current(lexer) >= 'a' && lexer_current(lexer) <= 'f')
|
||||
|| (lexer_current(lexer) >= 'A' && lexer_current(lexer) <= 'F')))
|
||||
|| (lexer_current(lexer) >= 'A'
|
||||
&& lexer_current(lexer) <= 'F')))
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeHex, begin);
|
||||
} else if (!lexer_done(lexer) && (lexer_current(lexer) == 'b' || lexer_current(lexer) == 'B')) {
|
||||
} else if (!lexer_done(lexer)
|
||||
&& (lexer_current(lexer) == 'b' || lexer_current(lexer) == 'B')) {
|
||||
lexer_step(lexer);
|
||||
while (!lexer_done(lexer) && (lexer_current(lexer) == '0' || lexer_current(lexer) == '1'))
|
||||
while (!lexer_done(lexer)
|
||||
&& (lexer_current(lexer) == '0' || lexer_current(lexer) == '1'))
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeBinary, begin);
|
||||
} else {
|
||||
@ -271,7 +287,8 @@ void lexer_skip_literal_char(Lexer* lexer)
|
||||
while (!lexer_done(lexer)
|
||||
&& (isdigit(lexer_current(lexer))
|
||||
|| (lexer_current(lexer) >= 'a' && lexer_current(lexer) <= 'f')
|
||||
|| (lexer_current(lexer) >= 'A' && lexer_current(lexer) <= 'F')))
|
||||
|| (lexer_current(lexer) >= 'A'
|
||||
&& lexer_current(lexer) <= 'F')))
|
||||
lexer_step(lexer);
|
||||
}
|
||||
}
|
||||
@ -283,8 +300,8 @@ Token lexer_make_single_char_token(Lexer* lexer, TokenType type)
|
||||
return lexer_token(lexer, type, begin);
|
||||
}
|
||||
|
||||
Token lexer_make_single_or_double_char_token(
|
||||
Lexer* lexer, TokenType single_type, char second_char, TokenType double_type)
|
||||
Token lexer_make_single_or_double_char_token(Lexer* lexer,
|
||||
TokenType single_type, char second_char, TokenType double_type)
|
||||
{
|
||||
Position begin = lexer_position(lexer);
|
||||
lexer_step(lexer);
|
||||
@ -296,6 +313,48 @@ Token lexer_make_single_or_double_char_token(
|
||||
}
|
||||
}
|
||||
|
||||
Token lexer_make_dot_token(Lexer* lexer)
|
||||
{
|
||||
Position begin = lexer_position(lexer);
|
||||
lexer_step(lexer);
|
||||
if (!lexer_done(lexer) && lexer_current(lexer) == '.') {
|
||||
lexer_step(lexer);
|
||||
if (!lexer_done(lexer) && lexer_current(lexer) == '=') {
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeDoubleDotEqual, begin);
|
||||
} else if (!lexer_done(lexer) && lexer_current(lexer) == '<') {
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeDoubleDotLt, begin);
|
||||
} else {
|
||||
return lexer_token(lexer, TokenTypeDoubleDot, begin);
|
||||
}
|
||||
} else if (!lexer_done(lexer) && isdigit(lexer_current(lexer))) {
|
||||
lexer_step(lexer);
|
||||
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeFloat, begin);
|
||||
} else {
|
||||
return lexer_token(lexer, TokenTypeDot, begin);
|
||||
}
|
||||
}
|
||||
|
||||
Token lexer_make_colon_token(Lexer* lexer)
|
||||
{
|
||||
Position begin = lexer_position(lexer);
|
||||
lexer_step(lexer);
|
||||
if (!lexer_done(lexer) && lexer_current(lexer) == ':') {
|
||||
lexer_step(lexer);
|
||||
if (!lexer_done(lexer) && lexer_current(lexer) == '<') {
|
||||
lexer_step(lexer);
|
||||
return lexer_token(lexer, TokenTypeDoubleColonLt, begin);
|
||||
} else {
|
||||
return lexer_token(lexer, TokenTypeDoubleColon, begin);
|
||||
}
|
||||
} else {
|
||||
return lexer_token(lexer, TokenTypeColon, begin);
|
||||
}
|
||||
}
|
||||
|
||||
Token lexer_make_slash_token(Lexer* lexer)
|
||||
{
|
||||
Position begin = lexer_position(lexer);
|
||||
@ -343,9 +402,9 @@ Token lexer_skip_multiline_comment(Lexer* lexer)
|
||||
}
|
||||
lexer_step(lexer);
|
||||
}
|
||||
return depth != 0
|
||||
? lexer_token(lexer, TokenTypeMalformedMultilineComment, lexer_position(lexer))
|
||||
: lexer_next(lexer);
|
||||
return depth != 0 ? lexer_token(
|
||||
lexer, TokenTypeMalformedMultilineComment, lexer_position(lexer))
|
||||
: lexer_next(lexer);
|
||||
}
|
||||
|
||||
Token lexer_make_invalid_char(Lexer* lexer)
|
||||
@ -417,93 +476,137 @@ const char* token_type_to_string(TokenType type)
|
||||
{
|
||||
switch (type) {
|
||||
case TokenTypeEof:
|
||||
return "Eof";
|
||||
return "TokenTypeEof";
|
||||
case TokenTypeInvalidChar:
|
||||
return "InvalidChar";
|
||||
return "TokenTypeInvalidChar";
|
||||
case TokenTypeMalformedMultilineComment:
|
||||
return "MalformedMultilineComment";
|
||||
return "TokenTypeMalformedMultilineComment";
|
||||
case TokenTypeMalformedChar:
|
||||
return "MalformedChar";
|
||||
return "TokenTypeMalformedChar";
|
||||
case TokenTypeMalformedString:
|
||||
return "MalformedString";
|
||||
return "TokenTypeMalformedString";
|
||||
case TokenTypeId:
|
||||
return "Id";
|
||||
return "TokenTypeId";
|
||||
case TokenTypeInt:
|
||||
return "Int";
|
||||
return "TokenTypeInt";
|
||||
case TokenTypeIntDoubleDot:
|
||||
return "TokenTypeIntDoubleDot";
|
||||
case TokenTypeHex:
|
||||
return "Hex";
|
||||
return "TokenTypeHex";
|
||||
case TokenTypeBinary:
|
||||
return "Binary";
|
||||
return "TokenTypeBinary";
|
||||
case TokenTypeFloat:
|
||||
return "Float";
|
||||
return "TokenTypeFloat";
|
||||
case TokenTypeChar:
|
||||
return "Char";
|
||||
return "TokenTypeChar";
|
||||
case TokenTypeString:
|
||||
return "String";
|
||||
return "TokenTypeString";
|
||||
case TokenTypeIf:
|
||||
return "If";
|
||||
return "TokenTypeIf";
|
||||
case TokenTypeElse:
|
||||
return "Else";
|
||||
return "TokenTypeElse";
|
||||
case TokenTypeLoop:
|
||||
return "TokenTypeLoop";
|
||||
case TokenTypeWhile:
|
||||
return "While";
|
||||
return "TokenTypeWhile";
|
||||
case TokenTypeFor:
|
||||
return "TokenTypeFor";
|
||||
case TokenTypeIn:
|
||||
return "TokenTypeIn";
|
||||
case TokenTypeBreak:
|
||||
return "Break";
|
||||
return "TokenTypeBreak";
|
||||
case TokenTypeLet:
|
||||
return "TokenTypeLet";
|
||||
case TokenTypeMatch:
|
||||
return "TokenTypeMatch";
|
||||
case TokenTypeFalse:
|
||||
return "TokenTypeFalse";
|
||||
case TokenTypeTrue:
|
||||
return "TokenTypeTrue";
|
||||
case TokenTypeNot:
|
||||
return "TokenTypeNot";
|
||||
case TokenTypeAnd:
|
||||
return "TokenTypeAnd";
|
||||
case TokenTypeOr:
|
||||
return "TokenTypeOr";
|
||||
case TokenTypeFn:
|
||||
return "TokenTypeFn";
|
||||
case TokenTypeReturn:
|
||||
return "TokenTypeReturn";
|
||||
case TokenTypeMut:
|
||||
return "TokenTypeMut";
|
||||
case TokenTypeDefer:
|
||||
return "TokenTypeDefer";
|
||||
case TokenTypeLParen:
|
||||
return "LParen";
|
||||
return "TokenTypeLParen";
|
||||
case TokenTypeRParen:
|
||||
return "RParen";
|
||||
return "TokenTypeRParen";
|
||||
case TokenTypeLBrace:
|
||||
return "LBrace";
|
||||
return "TokenTypeLBrace";
|
||||
case TokenTypeRBrace:
|
||||
return "RBrace";
|
||||
return "TokenTypeRBrace";
|
||||
case TokenTypeLBracket:
|
||||
return "LBracket";
|
||||
return "TokenTypeLBracket";
|
||||
case TokenTypeRBracket:
|
||||
return "RBracket";
|
||||
case TokenTypeDot:
|
||||
return "Dot";
|
||||
return "TokenTypeRBracket";
|
||||
case TokenTypeComma:
|
||||
return "Comma";
|
||||
return "TokenTypeComma";
|
||||
case TokenTypeColon:
|
||||
return "Colon";
|
||||
return "TokenTypeColon";
|
||||
case TokenTypeDoubleColon:
|
||||
return "TokenTypeDoubleColon";
|
||||
case TokenTypeDoubleColonLt:
|
||||
return "TokenTypeDoubleColonLt";
|
||||
case TokenTypeSemicolon:
|
||||
return "Semicolon";
|
||||
return "TokenTypeSemicolon";
|
||||
case TokenTypeAmpersand:
|
||||
return "TokenTypeAmpersand";
|
||||
case TokenTypeUnderscore:
|
||||
return "TokenTypeUnderscore";
|
||||
case TokenTypeDot:
|
||||
return "TokenTypeDot";
|
||||
case TokenTypeDoubleDot:
|
||||
return "TokenTypeDoubleDot";
|
||||
case TokenTypeDoubleDotEqual:
|
||||
return "TokenTypeDoubleDotEqual";
|
||||
case TokenTypeDoubleDotLt:
|
||||
return "TokenTypeDoubleDotLt";
|
||||
case TokenTypePlusEqual:
|
||||
return "PlusEqual";
|
||||
return "TokenTypePlusEqual";
|
||||
case TokenTypeMinusEqual:
|
||||
return "MinusEqual";
|
||||
return "TokenTypeMinusEqual";
|
||||
case TokenTypeAsteriskEqual:
|
||||
return "AsteriskEqual";
|
||||
return "TokenTypeAsteriskEqual";
|
||||
case TokenTypeSlashEqual:
|
||||
return "SlashEqual";
|
||||
return "TokenTypeSlashEqual";
|
||||
case TokenTypePercentEqual:
|
||||
return "PercentEqual";
|
||||
return "TokenTypePercentEqual";
|
||||
case TokenTypeDoubleEqual:
|
||||
return "DoubleEqual";
|
||||
return "TokenTypeDoubleEqual";
|
||||
case TokenTypeExclamationEqual:
|
||||
return "ExclamationEqual";
|
||||
return "TokenTypeExclamationEqual";
|
||||
case TokenTypeLtEqual:
|
||||
return "LtEqual";
|
||||
return "TokenTypeLtEqual";
|
||||
case TokenTypeGtEqual:
|
||||
return "GtEqual";
|
||||
return "TokenTypeGtEqual";
|
||||
case TokenTypePlus:
|
||||
return "Plus";
|
||||
return "TokenTypePlus";
|
||||
case TokenTypeMinus:
|
||||
return "Minus";
|
||||
return "TokenTypeMinus";
|
||||
case TokenTypeAsterisk:
|
||||
return "Asterisk";
|
||||
return "TokenTypeAsterisk";
|
||||
case TokenTypeSlash:
|
||||
return "Slash";
|
||||
return "TokenTypeSlash";
|
||||
case TokenTypePercent:
|
||||
return "Percent";
|
||||
return "TokenTypePercent";
|
||||
case TokenTypeEqual:
|
||||
return "Equal";
|
||||
return "TokenTypeEqual";
|
||||
case TokenTypeExclamation:
|
||||
return "Exclamation";
|
||||
return "TokenTypeExclamation";
|
||||
case TokenTypeLt:
|
||||
return "Lt";
|
||||
return "TokenTypeLt";
|
||||
case TokenTypeGt:
|
||||
return "Gt";
|
||||
return "TokenTypeGt";
|
||||
default:
|
||||
ASSERT_EXHAUSTIVE_MATCH();
|
||||
}
|
||||
|
11
lexer.h
11
lexer.h
@ -13,6 +13,7 @@ typedef enum {
|
||||
|
||||
TokenTypeId,
|
||||
TokenTypeInt,
|
||||
TokenTypeIntDoubleDot,
|
||||
TokenTypeHex,
|
||||
TokenTypeBinary,
|
||||
TokenTypeFloat,
|
||||
@ -35,6 +36,8 @@ typedef enum {
|
||||
TokenTypeOr,
|
||||
TokenTypeFn,
|
||||
TokenTypeReturn,
|
||||
TokenTypeMut,
|
||||
TokenTypeDefer,
|
||||
|
||||
TokenTypeLParen,
|
||||
TokenTypeRParen,
|
||||
@ -42,13 +45,17 @@ typedef enum {
|
||||
TokenTypeRBrace,
|
||||
TokenTypeLBracket,
|
||||
TokenTypeRBracket,
|
||||
TokenTypeDot,
|
||||
TokenTypeComma,
|
||||
TokenTypeColon,
|
||||
TokenTypeDoubleColon,
|
||||
TokenTypeDoubleColonLt,
|
||||
TokenTypeSemicolon,
|
||||
TokenTypeDoubleMatch,
|
||||
TokenTypeAmpersand,
|
||||
TokenTypeUnderscore,
|
||||
TokenTypeDot,
|
||||
TokenTypeDoubleDot,
|
||||
TokenTypeDoubleDotEqual,
|
||||
TokenTypeDoubleDotLt,
|
||||
|
||||
TokenTypePlusEqual,
|
||||
TokenTypeMinusEqual,
|
||||
|
9
main.c
9
main.c
@ -6,8 +6,13 @@
|
||||
int main(void)
|
||||
{
|
||||
char text[]
|
||||
= "abc 123 0xFF 0b101 3.14 'a' '\\n' \"hello\" \"world\\\"\\n\" if else /* /* while */ */ "
|
||||
"while break (){}[].,:; += -= *= /= %= == != <= >= + - * / % % = ! < >";
|
||||
= "abc 123 123.. 0xFF 0b101 .5 1. 3.14 'a' '\\n' \"hello\" "
|
||||
"\"world\\\"\\n\" if else /* /* while */ */ "
|
||||
"while for in // in \n break let match false true not and or fn "
|
||||
"return mut "
|
||||
"defer (){}[],: :: ::< ; & _ . .. ..= ..< += -= *= /= %= == != <= >= "
|
||||
"+ - * / % "
|
||||
"% = ! < >";
|
||||
|
||||
printf("text = \"%s\"\n", text);
|
||||
|
||||
|
41
parser.h
41
parser.h
@ -2,10 +2,13 @@
|
||||
#define PARSER_H
|
||||
|
||||
#include "lexer.h"
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef enum {
|
||||
ParsedNodeTypeError,
|
||||
|
||||
ParsedNodeTypeInt,
|
||||
ParsedNodeTypeFloat,
|
||||
ParsedNodeTypeChar,
|
||||
@ -27,9 +30,41 @@ typedef enum {
|
||||
ParsedNodeTypeAssign,
|
||||
} ParsedNodeType;
|
||||
|
||||
typedef struct ParsedNode {
|
||||
ParsedNodeType node_type;
|
||||
} ParsedNode;
|
||||
typedef struct KeyValuePair KeyValuePair;
|
||||
typedef struct ParsedNode ParsedNode;
|
||||
|
||||
struct ParsedNode {
|
||||
ParsedNodeType type;
|
||||
union {
|
||||
int64_t int_value;
|
||||
double float_value;
|
||||
char char_value;
|
||||
struct {
|
||||
char* value;
|
||||
size_t length;
|
||||
} string;
|
||||
bool bool_value;
|
||||
struct {
|
||||
ParsedNode* values;
|
||||
size_t length;
|
||||
} array;
|
||||
struct {
|
||||
KeyValuePair* pairs;
|
||||
size_t length;
|
||||
} dict;
|
||||
struct {
|
||||
ParsedNode* condition;
|
||||
ParsedNode* truthy;
|
||||
ParsedNode* falsy;
|
||||
} if_node;
|
||||
};
|
||||
};
|
||||
|
||||
struct KeyValuePair {
|
||||
char* key;
|
||||
size_t key_length;
|
||||
ParsedNode* value;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
Lexer* lexer;
|
||||
|
Loading…
Reference in New Issue
Block a user