2024-04-05 18:16:58 +01:00
|
|
|
#include "compiler.h"
|
2024-04-02 18:47:16 +01:00
|
|
|
#include <stddef.h>
|
2024-04-05 03:14:35 +01:00
|
|
|
#include <stdio.h>
|
2024-04-02 18:47:16 +01:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
void lexer_construct(Lexer* lexer, const char* text, size_t length)
|
|
|
|
{
|
|
|
|
*lexer = (Lexer) {
|
|
|
|
.text = text,
|
|
|
|
.text_length = length,
|
|
|
|
.index = 0,
|
|
|
|
.line = 1,
|
|
|
|
.col = 1,
|
2024-04-04 01:08:11 +01:00
|
|
|
.failed = false,
|
2024-04-02 18:47:16 +01:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_id_start_char(char c)
|
|
|
|
{
|
|
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool is_id_char(char c)
|
|
|
|
{
|
|
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
|
|
|
|| (c >= '0' && c <= '9') || c == '_';
|
|
|
|
}
|
|
|
|
|
2024-04-04 01:08:11 +01:00
|
|
|
static inline Token skip_comment(Lexer* lexer)
|
|
|
|
{
|
|
|
|
Pos pos = lexer_pos(lexer);
|
|
|
|
lexer_step(lexer);
|
|
|
|
if (lexer_current(lexer) == '/') {
|
|
|
|
while (!lexer_done(lexer) && lexer_current(lexer) != '\n') {
|
|
|
|
lexer_step(lexer);
|
|
|
|
}
|
|
|
|
return lexer_next(lexer);
|
|
|
|
} else if (lexer_current(lexer) == '*') {
|
|
|
|
lexer_step(lexer);
|
|
|
|
char last = '\0';
|
|
|
|
while (!lexer_done(lexer)
|
|
|
|
&& !(last == '*' && lexer_current(lexer) == '/')) {
|
|
|
|
last = lexer_current(lexer);
|
|
|
|
lexer_step(lexer);
|
|
|
|
}
|
|
|
|
if (lexer_done(lexer)) {
|
|
|
|
lexer->failed = true;
|
|
|
|
print_error("lexer: malformed multiline comment", pos);
|
|
|
|
return lexer_token(lexer, TokenType_Error, pos);
|
|
|
|
}
|
|
|
|
return lexer_next(lexer);
|
|
|
|
} else {
|
|
|
|
lexer->failed = true;
|
|
|
|
print_error("lexer: malformed comment", pos);
|
|
|
|
return lexer_token(lexer, TokenType_Error, pos);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-04-02 18:47:16 +01:00
|
|
|
struct MatchIdToTokenTypeCase {
|
|
|
|
const char* keyword;
|
|
|
|
TokenType token_type;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline TokenType match_id_to_token_type(
|
|
|
|
const char* source, size_t length, struct MatchIdToTokenTypeCase cases[])
|
|
|
|
{
|
|
|
|
for (size_t i = 0; cases[i].keyword != NULL; ++i) {
|
2024-04-05 03:14:35 +01:00
|
|
|
if (strlen(cases[i].keyword) <= length
|
|
|
|
&& strncmp(source, cases[i].keyword, length) == 0) {
|
2024-04-02 18:47:16 +01:00
|
|
|
return cases[i].token_type;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return TokenType_Id;
|
|
|
|
}
|
|
|
|
|
2024-04-04 01:08:11 +01:00
|
|
|
static inline Token lex_id_or_keyword(Lexer* lexer)
|
|
|
|
{
|
|
|
|
Pos pos = lexer_pos(lexer);
|
|
|
|
lexer_step(lexer);
|
|
|
|
while (!lexer_done(lexer) && is_id_char(lexer_current(lexer))) {
|
|
|
|
lexer_step(lexer);
|
|
|
|
}
|
|
|
|
size_t length = lexer->index - pos.index;
|
|
|
|
TokenType token_type
|
|
|
|
= match_id_to_token_type(&lexer->text[pos.index], length,
|
|
|
|
(struct MatchIdToTokenTypeCase[]) {
|
|
|
|
{ "not", TokenType_Not },
|
|
|
|
{ "and", TokenType_And },
|
|
|
|
{ "or", TokenType_Or },
|
|
|
|
{ "if", TokenType_If },
|
2024-04-05 03:14:35 +01:00
|
|
|
{ "else", TokenType_Else },
|
2024-04-04 01:08:11 +01:00
|
|
|
{ "loop", TokenType_Loop },
|
2024-04-05 03:14:35 +01:00
|
|
|
{ "let", TokenType_Let },
|
2024-04-04 01:08:11 +01:00
|
|
|
{ "fn", TokenType_Fn },
|
|
|
|
{ "return", TokenType_Return },
|
|
|
|
{ "break", TokenType_Break },
|
|
|
|
{ NULL, TokenType_Id },
|
|
|
|
});
|
|
|
|
return lexer_token(lexer, token_type, pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
Token lex_single_char(Lexer* lexer, TokenType token_type)
|
|
|
|
{
|
|
|
|
Pos pos = lexer_pos(lexer);
|
|
|
|
lexer_step(lexer);
|
|
|
|
return lexer_token(lexer, token_type, pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
Token lex_single_or_double_char(
|
|
|
|
Lexer* lexer, TokenType first, char c2, TokenType second)
|
|
|
|
{
|
|
|
|
Pos pos = lexer_pos(lexer);
|
|
|
|
lexer_step(lexer);
|
|
|
|
if (lexer_done(lexer) || lexer_current(lexer) != c2) {
|
|
|
|
return lexer_token(lexer, first, pos);
|
|
|
|
}
|
|
|
|
lexer_step(lexer);
|
|
|
|
return lexer_token(lexer, second, pos);
|
|
|
|
}
|
|
|
|
|
2024-04-02 18:47:16 +01:00
|
|
|
Token lexer_next(Lexer* lexer)
|
|
|
|
{
|
|
|
|
Pos pos = lexer_pos(lexer);
|
|
|
|
if (lexer_done(lexer)) {
|
|
|
|
return lexer_token(lexer, TokenType_EOF, pos);
|
|
|
|
}
|
|
|
|
char c = lexer_current(lexer);
|
|
|
|
if (c == ' ' || c == '\t' || c == '\n') {
|
|
|
|
lexer_step(lexer);
|
|
|
|
return lexer_next(lexer);
|
|
|
|
}
|
2024-04-04 01:08:11 +01:00
|
|
|
if (c == '/') {
|
|
|
|
return skip_comment(lexer);
|
|
|
|
}
|
2024-04-02 18:47:16 +01:00
|
|
|
if (is_id_start_char(c)) {
|
2024-04-04 01:08:11 +01:00
|
|
|
return lex_id_or_keyword(lexer);
|
2024-04-02 18:47:16 +01:00
|
|
|
}
|
|
|
|
if (c >= '1' && c <= '9') {
|
|
|
|
lexer_step(lexer);
|
2024-04-05 03:14:35 +01:00
|
|
|
while (!lexer_done(lexer) && lexer_current(lexer) >= '0'
|
|
|
|
&& lexer_current(lexer) <= '9') {
|
2024-04-02 18:47:16 +01:00
|
|
|
lexer_step(lexer);
|
|
|
|
}
|
|
|
|
return lexer_token(lexer, TokenType_Int, pos);
|
|
|
|
}
|
2024-04-05 03:14:35 +01:00
|
|
|
if (c == '-') {
|
|
|
|
lexer_step(lexer);
|
|
|
|
if (lexer_current(lexer) == '=') {
|
|
|
|
lexer_step(lexer);
|
|
|
|
return lexer_token(lexer, TokenType_MinusEqual, pos);
|
|
|
|
} else if (lexer_current(lexer) == '>') {
|
|
|
|
lexer_step(lexer);
|
|
|
|
return lexer_token(lexer, TokenType_MinusGT, pos);
|
|
|
|
}
|
|
|
|
return lexer_token(lexer, TokenType_Minus, pos);
|
|
|
|
}
|
2024-04-04 01:08:11 +01:00
|
|
|
switch (c) {
|
|
|
|
case '0':
|
|
|
|
return lex_single_char(lexer, TokenType_Int);
|
|
|
|
case '(':
|
|
|
|
return lex_single_char(lexer, TokenType_LParen);
|
|
|
|
case ')':
|
|
|
|
return lex_single_char(lexer, TokenType_RParen);
|
|
|
|
case '{':
|
|
|
|
return lex_single_char(lexer, TokenType_LBrace);
|
|
|
|
case '}':
|
|
|
|
return lex_single_char(lexer, TokenType_RBrace);
|
|
|
|
case '[':
|
|
|
|
return lex_single_char(lexer, TokenType_LBracket);
|
|
|
|
case ']':
|
|
|
|
return lex_single_char(lexer, TokenType_RBracket);
|
|
|
|
case ',':
|
|
|
|
return lex_single_char(lexer, TokenType_Comma);
|
|
|
|
case ';':
|
|
|
|
return lex_single_char(lexer, TokenType_Semicolon);
|
|
|
|
case '+':
|
|
|
|
return lex_single_or_double_char(
|
|
|
|
lexer, TokenType_Plus, '=', TokenType_PlusEqual);
|
|
|
|
case '*':
|
|
|
|
return lex_single_or_double_char(
|
|
|
|
lexer, TokenType_Asterisk, '=', TokenType_AsteriskEqual);
|
|
|
|
case '=':
|
|
|
|
return lex_single_or_double_char(
|
|
|
|
lexer, TokenType_Equal, '=', TokenType_EqualEqual);
|
|
|
|
case '!':
|
|
|
|
return lex_single_or_double_char(
|
|
|
|
lexer, TokenType_Exclamation, '=', TokenType_ExclamationEqual);
|
|
|
|
case '<':
|
|
|
|
return lex_single_or_double_char(
|
|
|
|
lexer, TokenType_LT, '=', TokenType_LTEqual);
|
|
|
|
case '>':
|
|
|
|
return lex_single_or_double_char(
|
|
|
|
lexer, TokenType_GT, '=', TokenType_GTEqual);
|
|
|
|
case '|':
|
|
|
|
return lex_single_or_double_char(
|
|
|
|
lexer, TokenType_Pipe, '>', TokenType_PipeGT);
|
|
|
|
}
|
|
|
|
lexer->failed = true;
|
|
|
|
print_error("lexer: unrecognized character", pos);
|
2024-04-02 18:47:16 +01:00
|
|
|
return lexer_token(lexer, TokenType_Error, pos);
|
|
|
|
}
|
|
|
|
|
2024-04-04 01:08:11 +01:00
|
|
|
bool lexer_failed(const Lexer* lexer) { return lexer->failed; }
|
|
|
|
|
2024-04-02 18:47:16 +01:00
|
|
|
Token lexer_token(Lexer* lexer, TokenType token_type, Pos pos)
|
|
|
|
{
|
|
|
|
return (Token) {
|
|
|
|
.token_type = token_type,
|
|
|
|
.pos = pos,
|
|
|
|
.length = lexer->index - pos.index,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
void lexer_step(Lexer* lexer)
|
|
|
|
{
|
|
|
|
if (lexer_done(lexer)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
lexer->index += 1;
|
|
|
|
if (lexer_current(lexer) == '\n') {
|
|
|
|
lexer->line += 1;
|
|
|
|
lexer->col = 1;
|
|
|
|
} else {
|
|
|
|
lexer->col += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool lexer_done(const Lexer* lexer)
|
|
|
|
{
|
|
|
|
return lexer->index >= lexer->text_length;
|
|
|
|
}
|
|
|
|
|
|
|
|
char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; }
|
|
|
|
|
|
|
|
Pos lexer_pos(const Lexer* lexer)
|
|
|
|
{
|
|
|
|
return (Pos) {
|
|
|
|
.index = lexer->index,
|
|
|
|
.line = lexer->line,
|
|
|
|
.col = lexer->col,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
int ast_node_vec_construct(ASTNodeVec* vec)
|
|
|
|
{
|
|
|
|
const size_t capacity_start = 4;
|
|
|
|
*vec = (ASTNodeVec) {
|
|
|
|
.data = malloc(capacity_start),
|
|
|
|
.length = 0,
|
|
|
|
.capacity = capacity_start,
|
|
|
|
};
|
|
|
|
if (vec->data == NULL) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ast_node_vec_destroy(ASTNodeVec* vec)
|
|
|
|
{
|
|
|
|
if (vec->data != NULL) {
|
|
|
|
free(vec->data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int ast_node_vec_push(ASTNodeVec* vec, ASTNode* item)
|
|
|
|
{
|
|
|
|
if (vec->length + 1 > vec->capacity) {
|
|
|
|
vec->capacity *= 2;
|
|
|
|
ASTNode** data = realloc(vec->data, vec->capacity);
|
|
|
|
if (data == NULL) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
vec->data = data;
|
|
|
|
}
|
|
|
|
vec->data[vec->length] = item;
|
|
|
|
vec->length += 1;
|
|
|
|
return 0;
|
|
|
|
}
|
2024-04-04 01:08:11 +01:00
|
|
|
|
|
|
|
ASTNode* ast_node_new(ASTNodeType node_type, Pos pos, ASTNode spec_init)
|
|
|
|
{
|
|
|
|
ASTNode* node = malloc(sizeof(ASTNode));
|
|
|
|
if (node == NULL) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
*node = spec_init;
|
|
|
|
node->node_type = node_type;
|
|
|
|
node->pos = pos;
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ast_node_free(ASTNode* node)
|
|
|
|
{
|
|
|
|
if (node == NULL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
switch (node->node_type) {
|
|
|
|
case ASTNodeType_Error:
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Id:
|
|
|
|
if (node->id_value != NULL) {
|
|
|
|
free(node->id_value);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Int:
|
|
|
|
break;
|
2024-04-05 03:14:35 +01:00
|
|
|
case ASTNodeType_Group:
|
|
|
|
ast_node_free(node->group_value);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Statements:
|
2024-04-04 01:08:11 +01:00
|
|
|
case ASTNodeType_Block:
|
|
|
|
for (size_t i = 0; i < node->statements.length; ++i) {
|
|
|
|
ast_node_free(node->statements.data[i]);
|
|
|
|
}
|
|
|
|
ast_node_vec_destroy(&node->statements);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_If:
|
|
|
|
ast_node_free(node->if_node.condition);
|
|
|
|
ast_node_free(node->if_node.truthy);
|
|
|
|
ast_node_free(node->if_node.falsy);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Loop:
|
|
|
|
ast_node_free(node->loop_node.body);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Call:
|
|
|
|
ast_node_free(node->call_node.subject);
|
|
|
|
for (size_t i = 0; i < node->call_node.args.length; ++i) {
|
|
|
|
ast_node_free(node->call_node.args.data[i]);
|
|
|
|
}
|
|
|
|
ast_node_vec_destroy(&node->call_node.args);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Index:
|
|
|
|
ast_node_free(node->index_node.subject);
|
|
|
|
ast_node_free(node->index_node.value);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Unary:
|
|
|
|
ast_node_free(node->unary_node.subject);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Binary:
|
|
|
|
ast_node_free(node->binary_node.left);
|
|
|
|
ast_node_free(node->binary_node.right);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Assign:
|
|
|
|
ast_node_free(node->assign_node.subject);
|
|
|
|
ast_node_free(node->assign_node.value);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Let:
|
2024-04-05 03:14:35 +01:00
|
|
|
ast_node_free(node->let_node.id);
|
2024-04-04 01:08:11 +01:00
|
|
|
ast_node_free(node->let_node.value);
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Break:
|
|
|
|
break;
|
|
|
|
case ASTNodeType_Fn:
|
2024-04-05 03:14:35 +01:00
|
|
|
ast_node_free(node->fn_node.id);
|
2024-04-04 01:08:11 +01:00
|
|
|
for (size_t i = 0; i < node->fn_node.params.length; ++i) {
|
|
|
|
ast_node_free(node->fn_node.params.data[i]);
|
|
|
|
}
|
|
|
|
ast_node_vec_destroy(&node->fn_node.params);
|
|
|
|
ast_node_free(node->fn_node.body);
|
|
|
|
break;
|
2024-04-05 03:14:35 +01:00
|
|
|
case ASTNodeType_Return:
|
|
|
|
if (node->return_node.value != NULL) {
|
|
|
|
ast_node_free(node->return_node.value);
|
|
|
|
}
|
|
|
|
break;
|
2024-04-04 01:08:11 +01:00
|
|
|
}
|
|
|
|
free(node);
|
|
|
|
}
|
|
|
|
|
|
|
|
void parser_construct(Parser* parser, const char* text, size_t text_length)
|
|
|
|
{
|
|
|
|
*parser = (Parser) {
|
|
|
|
.text = text,
|
|
|
|
.text_length = text_length,
|
|
|
|
.lexer = { 0 },
|
|
|
|
.current = { 0 },
|
|
|
|
.failed = false,
|
|
|
|
};
|
|
|
|
lexer_construct(&parser->lexer, text, text_length);
|
2024-04-05 03:14:35 +01:00
|
|
|
parser_step(parser);
|
2024-04-04 01:08:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
bool parser_failed(const Parser* parser) { return parser->failed; }
|
|
|
|
|
|
|
|
void parser_step(Parser* parser)
|
|
|
|
{
|
|
|
|
parser->current = lexer_next(&parser->lexer);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool parser_done(const Parser* parser)
|
|
|
|
{
|
|
|
|
return parser->current.token_type == TokenType_EOF;
|
|
|
|
}
|
|
|
|
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* parser_parse_statements(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNodeVec statements;
|
|
|
|
ast_node_vec_construct(&statements);
|
|
|
|
while (!parser_done(parser)) {
|
|
|
|
ASTNode* statement = parser_parse_statement(parser);
|
|
|
|
ast_node_vec_push(&statements, statement);
|
|
|
|
}
|
|
|
|
return ast_node_new(
|
|
|
|
ASTNodeType_Statements, pos, (ASTNode) { .statements = statements });
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_statement(Parser* parser)
|
|
|
|
{
|
|
|
|
if (parser->current.token_type == TokenType_Fn) {
|
|
|
|
return parser_parse_fn(parser);
|
|
|
|
} else if (parser->current.token_type == TokenType_If) {
|
|
|
|
return parser_parse_if(parser);
|
|
|
|
} else if (parser->current.token_type == TokenType_Loop) {
|
|
|
|
return parser_parse_loop(parser);
|
|
|
|
} else {
|
|
|
|
ASTNode* statement = parser_parse_single_line_statement(parser);
|
|
|
|
if (parser->current.token_type != TokenType_Semicolon) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected ';'", parser->current.pos);
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
|
|
|
return statement;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_fn(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
parser_step(parser);
|
|
|
|
if (parser->current.token_type != TokenType_Id) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected 'id'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
ASTNode* id = parser_parse_id(parser);
|
|
|
|
if (parser->current.token_type != TokenType_LParen) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected '('", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
|
|
|
ASTNodeVec params;
|
|
|
|
ast_node_vec_construct(¶ms);
|
|
|
|
if (!parser_done(parser)
|
|
|
|
&& parser->current.token_type != TokenType_RParen) {
|
|
|
|
if (parser->current.token_type != TokenType_Id) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected 'id'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
ast_node_vec_push(¶ms, parser_parse_id(parser));
|
|
|
|
while (parser->current.token_type == TokenType_Comma) {
|
|
|
|
parser_step(parser);
|
|
|
|
if (parser->current.token_type == TokenType_RParen) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (parser->current.token_type != TokenType_Id) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected 'id'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
ast_node_vec_push(¶ms, parser_parse_id(parser));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (parser->current.token_type != TokenType_RParen) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected ')'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
|
|
|
if (parser->current.token_type != TokenType_LBrace) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected '{'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
ASTNode* body = parser_parse_block(parser);
|
|
|
|
return ast_node_new(ASTNodeType_Fn, pos, (ASTNode) {
|
|
|
|
.fn_node = (ASTFnNode) {
|
|
|
|
.id = id,
|
|
|
|
.params = params,
|
|
|
|
.body = body,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_single_line_statement(Parser* parser)
|
|
|
|
{
|
|
|
|
if (parser->current.token_type == TokenType_Let) {
|
|
|
|
return parser_parse_let(parser);
|
|
|
|
} else if (parser->current.token_type == TokenType_Return) {
|
|
|
|
return parser_parse_return(parser);
|
|
|
|
} else if (parser->current.token_type == TokenType_Break) {
|
|
|
|
return parser_parse_break(parser);
|
|
|
|
} else {
|
|
|
|
return parser_parse_assign(parser);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_let(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
parser_step(parser);
|
|
|
|
if (parser->current.token_type != TokenType_Id) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected 'id'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
ASTNode* id = parser_parse_id(parser);
|
|
|
|
if (parser->current.token_type != TokenType_Equal) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected '='", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
|
|
|
ASTNode* value = parser_parse_expr(parser);
|
|
|
|
return ast_node_new(ASTNodeType_Let, pos, (ASTNode) {
|
|
|
|
.let_node = (ASTLetNode) {
|
|
|
|
.id = id,
|
|
|
|
.value = value,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_return(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
parser_step(parser);
|
|
|
|
ASTNode* value = NULL;
|
|
|
|
if (parser->current.token_type != TokenType_Semicolon) {
|
|
|
|
value = parser_parse_expr(parser);
|
|
|
|
}
|
|
|
|
return ast_node_new(ASTNodeType_Return, pos, (ASTNode) {
|
|
|
|
.return_node = (ASTReturnNode) {
|
|
|
|
.value = value,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
2024-04-04 01:08:11 +01:00
|
|
|
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* parser_parse_break(Parser* parser)
|
2024-04-04 01:08:11 +01:00
|
|
|
{
|
2024-04-05 03:14:35 +01:00
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
parser_step(parser);
|
|
|
|
return ast_node_new(ASTNodeType_Return, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_assign(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNode* subject = parser_parse_expr(parser);
|
|
|
|
int assign_type = -1;
|
|
|
|
if (parser->current.token_type == TokenType_Equal) {
|
|
|
|
assign_type = AssignType_Assign;
|
|
|
|
} else if (parser->current.token_type == TokenType_PlusEqual) {
|
|
|
|
assign_type = AssignType_Add;
|
|
|
|
} else if (parser->current.token_type == TokenType_MinusEqual) {
|
|
|
|
assign_type = AssignType_Subtract;
|
|
|
|
} else if (parser->current.token_type == TokenType_AsteriskEqual) {
|
|
|
|
assign_type = AssignType_Multiply;
|
|
|
|
}
|
|
|
|
if (assign_type == -1) {
|
|
|
|
return subject;
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
|
|
|
return ast_node_new(ASTNodeType_Assign, pos, (ASTNode) {
|
|
|
|
.assign_node = (ASTAssignNode) {
|
|
|
|
.assign_type = (AssignType)assign_type,
|
|
|
|
.subject = subject,
|
|
|
|
.value = parser_parse_expr(parser),
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_expr(Parser* parser) { return parser_parse_or(parser); }
|
|
|
|
|
|
|
|
static inline ASTNode* binary_node(
|
|
|
|
Pos pos, BinaryType binary_type, ASTNode* left, ASTNode* right)
|
|
|
|
{
|
|
|
|
return ast_node_new(ASTNodeType_Binary, pos, (ASTNode) {
|
|
|
|
.binary_node = (ASTBinaryNode) {
|
|
|
|
.binary_type = binary_type,
|
|
|
|
.left = left,
|
|
|
|
.right = right,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_or(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNode* left = parser_parse_and(parser);
|
|
|
|
while (parser->current.token_type == TokenType_Or) {
|
|
|
|
parser_step(parser);
|
|
|
|
left = binary_node(pos, BinaryType_Or, left, parser_parse_and(parser));
|
|
|
|
}
|
|
|
|
return left;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_and(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNode* left = parser_parse_equality(parser);
|
|
|
|
while (parser->current.token_type == TokenType_And) {
|
|
|
|
parser_step(parser);
|
|
|
|
left = binary_node(
|
|
|
|
pos, BinaryType_And, left, parser_parse_equality(parser));
|
|
|
|
}
|
|
|
|
return left;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_equality(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNode* left = parser_parse_comparison(parser);
|
|
|
|
if (parser->current.token_type == TokenType_EqualEqual) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_EE, left, parser_parse_comparison(parser));
|
|
|
|
} else if (parser->current.token_type == TokenType_ExclamationEqual) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_NE, left, parser_parse_comparison(parser));
|
|
|
|
} else {
|
|
|
|
return left;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_comparison(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNode* left = parser_parse_term(parser);
|
|
|
|
while (!parser_done(parser)) {
|
|
|
|
if (parser->current.token_type == TokenType_LT) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_LT, left, parser_parse_term(parser));
|
|
|
|
} else if (parser->current.token_type == TokenType_LTEqual) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_LTE, left, parser_parse_term(parser));
|
|
|
|
} else if (parser->current.token_type == TokenType_GT) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_GT, left, parser_parse_term(parser));
|
|
|
|
} else if (parser->current.token_type == TokenType_GTEqual) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_GTE, left, parser_parse_term(parser));
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return left;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_term(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNode* left = parser_parse_factor(parser);
|
|
|
|
while (!parser_done(parser)) {
|
|
|
|
if (parser->current.token_type == TokenType_Plus) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_Add, left, parser_parse_factor(parser));
|
|
|
|
} else if (parser->current.token_type == TokenType_Minus) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_Subtract, left, parser_parse_factor(parser));
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return left;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_factor(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNode* left = parser_parse_unary(parser);
|
|
|
|
while (!parser_done(parser)) {
|
|
|
|
if (parser->current.token_type == TokenType_Asterisk) {
|
|
|
|
parser_step(parser);
|
|
|
|
return binary_node(
|
|
|
|
pos, BinaryType_Multiply, left, parser_parse_unary(parser));
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return left;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_unary(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
if (parser->current.token_type == TokenType_Not) {
|
|
|
|
parser_step(parser);
|
|
|
|
return ast_node_new(ASTNodeType_Unary, pos, (ASTNode) {
|
|
|
|
.unary_node = (ASTUnaryNode) {
|
|
|
|
.unary_type = UnaryType_Not,
|
|
|
|
.subject = parser_parse_unary(parser),
|
|
|
|
},
|
|
|
|
});
|
|
|
|
} else if (parser->current.token_type == TokenType_Minus) {
|
|
|
|
parser_step(parser);
|
|
|
|
return ast_node_new(ASTNodeType_Unary, pos, (ASTNode) {
|
|
|
|
.unary_node = (ASTUnaryNode) {
|
|
|
|
.unary_type = UnaryType_Negate,
|
|
|
|
.subject = parser_parse_unary(parser),
|
|
|
|
},
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
return parser_parse_index_call(parser);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_index_call(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
ASTNode* subject = parser_parse_operand(parser);
|
|
|
|
while (!parser_done(parser)) {
|
|
|
|
if (parser->current.token_type == TokenType_LParen) {
|
|
|
|
parser_step(parser);
|
|
|
|
ASTNodeVec args;
|
|
|
|
ast_node_vec_construct(&args);
|
|
|
|
if (!parser_done(parser)
|
|
|
|
&& parser->current.token_type != TokenType_RParen) {
|
|
|
|
ast_node_vec_push(&args, parser_parse_expr(parser));
|
|
|
|
while (parser->current.token_type == TokenType_Comma) {
|
|
|
|
parser_step(parser);
|
|
|
|
if (parser->current.token_type == TokenType_RParen) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
ast_node_vec_push(&args, parser_parse_expr(parser));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (parser->current.token_type != TokenType_RParen) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected ')'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
|
|
|
subject = ast_node_new(ASTNodeType_Call, pos, (ASTNode) {
|
|
|
|
.call_node = (ASTCallNode) {
|
|
|
|
.subject = subject,
|
|
|
|
.args = args,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
} else if (parser->current.token_type == TokenType_LBracket) {
|
|
|
|
parser_step(parser);
|
|
|
|
ASTNode* value = parser_parse_expr(parser);
|
|
|
|
if (parser->current.token_type != TokenType_RBracket) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected ']'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
|
|
|
subject = ast_node_new(ASTNodeType_Index, pos, (ASTNode) {
|
|
|
|
.index_node = (ASTIndexNode) {
|
|
|
|
.subject = subject,
|
|
|
|
.value = value,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return subject;
|
2024-04-04 01:08:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_operand(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
switch (parser->current.token_type) {
|
|
|
|
case TokenType_Error:
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
case TokenType_Id:
|
|
|
|
return parser_parse_id(parser);
|
|
|
|
case TokenType_Int:
|
|
|
|
return parser_parse_int(parser);
|
|
|
|
case TokenType_LParen:
|
2024-04-05 03:14:35 +01:00
|
|
|
return parser_parse_group(parser);
|
2024-04-04 01:08:11 +01:00
|
|
|
case TokenType_LBrace:
|
2024-04-05 03:14:35 +01:00
|
|
|
return parser_parse_block(parser);
|
2024-04-04 01:08:11 +01:00
|
|
|
case TokenType_If:
|
2024-04-05 03:14:35 +01:00
|
|
|
return parser_parse_if(parser);
|
2024-04-04 01:08:11 +01:00
|
|
|
case TokenType_Loop:
|
2024-04-05 03:14:35 +01:00
|
|
|
return parser_parse_loop(parser);
|
2024-04-04 01:08:11 +01:00
|
|
|
default:
|
|
|
|
parser->failed = true;
|
2024-04-05 03:14:35 +01:00
|
|
|
print_error("parser: expected operand", pos);
|
2024-04-04 01:08:11 +01:00
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_id(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
char* value = malloc(parser->current.length + 1);
|
|
|
|
value[parser->current.length] = '\0';
|
|
|
|
strncpy(value, &parser->text[parser->current.pos.index],
|
|
|
|
parser->current.length);
|
|
|
|
parser_step(parser);
|
2024-04-07 03:50:29 +01:00
|
|
|
return ast_node_new(ASTNodeType_Id, pos,
|
|
|
|
(ASTNode) {
|
|
|
|
.id_value = value,
|
|
|
|
.id_symbol = NULL,
|
|
|
|
});
|
2024-04-04 01:08:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_int(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
int value = (int)strtol(&parser->text[parser->current.length], NULL, 10);
|
|
|
|
parser_step(parser);
|
|
|
|
return ast_node_new(ASTNodeType_Int, pos, (ASTNode) { .int_value = value });
|
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_group(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
parser_step(parser);
|
|
|
|
ASTNode* expr = parser_parse_expr(parser);
|
|
|
|
if (parser->current.token_type != TokenType_RParen) {
|
|
|
|
parser->failed = true;
|
2024-04-05 03:14:35 +01:00
|
|
|
print_error("parser: expected ')'", parser->current.pos);
|
2024-04-04 01:08:11 +01:00
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
2024-04-05 03:14:35 +01:00
|
|
|
return ast_node_new(ASTNodeType_Group, pos,
|
|
|
|
(ASTNode) {
|
|
|
|
.group_value = expr,
|
|
|
|
});
|
2024-04-04 01:08:11 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
ASTNode* parser_parse_block(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
parser_step(parser);
|
|
|
|
ASTNodeVec statements;
|
|
|
|
ast_node_vec_construct(&statements);
|
|
|
|
while (!parser_done(parser)
|
|
|
|
&& parser->current.token_type != TokenType_RBrace) {
|
|
|
|
ASTNode* statement = parser_parse_statement(parser);
|
|
|
|
ast_node_vec_push(&statements, statement);
|
|
|
|
}
|
|
|
|
if (parser->current.token_type != TokenType_RBrace) {
|
|
|
|
parser->failed = true;
|
2024-04-05 03:14:35 +01:00
|
|
|
print_error("parser: expected '}'", parser->current.pos);
|
2024-04-04 01:08:11 +01:00
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
parser_step(parser);
|
|
|
|
return ast_node_new(
|
|
|
|
ASTNodeType_Block, pos, (ASTNode) { .statements = statements });
|
|
|
|
}
|
|
|
|
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* parser_parse_if(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
parser_step(parser);
|
|
|
|
ASTNode* condition = parser_parse_expr(parser);
|
|
|
|
if (parser->current.token_type != TokenType_LBrace) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected '{'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
ASTNode* truthy = parser_parse_block(parser);
|
|
|
|
ASTNode* falsy = NULL;
|
|
|
|
if (parser->current.token_type == TokenType_Else) {
|
|
|
|
parser_step(parser);
|
|
|
|
if (parser->current.token_type != TokenType_LBrace) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected '{'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
falsy = parser_parse_block(parser);
|
|
|
|
}
|
|
|
|
return ast_node_new(ASTNodeType_If, pos, (ASTNode) {
|
|
|
|
.if_node = (ASTIfNode) {
|
|
|
|
.condition = condition,
|
|
|
|
.truthy = truthy,
|
|
|
|
.falsy = falsy,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|
2024-04-04 01:08:11 +01:00
|
|
|
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* parser_parse_loop(Parser* parser)
|
|
|
|
{
|
|
|
|
Pos pos = parser->current.pos;
|
|
|
|
parser_step(parser);
|
|
|
|
if (parser->current.token_type != TokenType_RBrace) {
|
|
|
|
parser->failed = true;
|
|
|
|
print_error("parser: expected '}'", parser->current.pos);
|
|
|
|
return ast_node_new(ASTNodeType_Error, pos, (ASTNode) { 0 });
|
|
|
|
}
|
|
|
|
ASTNode* body = parser_parse_block(parser);
|
|
|
|
return ast_node_new(ASTNodeType_Loop, pos, (ASTNode) {
|
|
|
|
.loop_node = (ASTLoopNode) {
|
|
|
|
.body = body,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
}
|