2024-04-05 18:16:58 +01:00
|
|
|
#ifndef COMPILER_H
|
|
|
|
#define COMPILER_H
|
2024-04-02 16:21:48 +01:00
|
|
|
|
2024-04-02 18:47:16 +01:00
|
|
|
#include <stdbool.h>
|
2024-04-02 16:21:48 +01:00
|
|
|
#include <stddef.h>
|
|
|
|
|
2024-04-02 18:47:16 +01:00
|
|
|
typedef struct {
|
|
|
|
size_t index;
|
|
|
|
int line;
|
|
|
|
int col;
|
|
|
|
} Pos;
|
|
|
|
|
2024-04-04 01:08:11 +01:00
|
|
|
void print_error(const char* message, Pos pos);
|
|
|
|
|
2024-04-02 16:21:48 +01:00
|
|
|
typedef enum {
|
|
|
|
TokenType_Error,
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType_EOF,
|
2024-04-02 16:21:48 +01:00
|
|
|
TokenType_Id,
|
|
|
|
TokenType_Int,
|
|
|
|
TokenType_Not,
|
|
|
|
TokenType_And,
|
|
|
|
TokenType_Or,
|
2024-04-04 01:08:11 +01:00
|
|
|
TokenType_If,
|
2024-04-05 03:14:35 +01:00
|
|
|
TokenType_Else,
|
2024-04-02 16:21:48 +01:00
|
|
|
TokenType_Loop,
|
2024-04-05 03:14:35 +01:00
|
|
|
TokenType_Let,
|
2024-04-02 16:21:48 +01:00
|
|
|
TokenType_Fn,
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType_Return,
|
2024-04-02 16:21:48 +01:00
|
|
|
TokenType_Break,
|
|
|
|
TokenType_LParen,
|
|
|
|
TokenType_RParen,
|
|
|
|
TokenType_LBrace,
|
|
|
|
TokenType_RBrace,
|
|
|
|
TokenType_LBracket,
|
|
|
|
TokenType_RBracket,
|
|
|
|
TokenType_Comma,
|
|
|
|
TokenType_Semicolon,
|
|
|
|
TokenType_Plus,
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType_PlusEqual,
|
2024-04-02 16:21:48 +01:00
|
|
|
TokenType_Minus,
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType_MinusEqual,
|
2024-04-05 03:14:35 +01:00
|
|
|
TokenType_MinusGT,
|
2024-04-02 16:21:48 +01:00
|
|
|
TokenType_Asterisk,
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType_AsteriskEqual,
|
2024-04-04 01:08:11 +01:00
|
|
|
TokenType_Equal,
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType_EqualEqual,
|
|
|
|
TokenType_Exclamation,
|
|
|
|
TokenType_ExclamationEqual,
|
2024-04-02 16:21:48 +01:00
|
|
|
TokenType_LT,
|
2024-04-04 01:08:11 +01:00
|
|
|
TokenType_LTEqual,
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType_GT,
|
2024-04-04 01:08:11 +01:00
|
|
|
TokenType_GTEqual,
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType_Pipe,
|
|
|
|
TokenType_PipeGT,
|
2024-04-02 16:21:48 +01:00
|
|
|
} TokenType;
|
|
|
|
|
|
|
|
typedef struct {
|
2024-04-02 18:47:16 +01:00
|
|
|
TokenType token_type;
|
|
|
|
Pos pos;
|
2024-04-02 16:21:48 +01:00
|
|
|
size_t length;
|
|
|
|
} Token;
|
|
|
|
|
2024-04-02 18:47:16 +01:00
|
|
|
typedef struct {
|
|
|
|
const char* text;
|
|
|
|
size_t text_length;
|
|
|
|
size_t index;
|
|
|
|
int line;
|
|
|
|
int col;
|
2024-04-04 01:08:11 +01:00
|
|
|
bool failed;
|
2024-04-02 18:47:16 +01:00
|
|
|
} Lexer;
|
|
|
|
|
|
|
|
void lexer_construct(Lexer* lexer, const char* text, size_t text_length);
|
|
|
|
Token lexer_next(Lexer* lexer);
|
2024-04-04 01:08:11 +01:00
|
|
|
bool lexer_failed(const Lexer* lexer);
|
2024-04-02 18:47:16 +01:00
|
|
|
Token lexer_token(Lexer* lexer, TokenType token_type, Pos pos);
|
|
|
|
void lexer_step(Lexer* lexer);
|
|
|
|
bool lexer_done(const Lexer* lexer);
|
|
|
|
char lexer_current(const Lexer* lexer);
|
|
|
|
Pos lexer_pos(const Lexer* lexer);
|
|
|
|
|
2024-04-02 16:21:48 +01:00
|
|
|
typedef enum {
|
|
|
|
ASTNodeType_Error,
|
|
|
|
ASTNodeType_Id,
|
|
|
|
ASTNodeType_Int,
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNodeType_Group,
|
2024-04-02 16:21:48 +01:00
|
|
|
ASTNodeType_Block,
|
|
|
|
ASTNodeType_If,
|
|
|
|
ASTNodeType_Loop,
|
|
|
|
ASTNodeType_Call,
|
|
|
|
ASTNodeType_Index,
|
|
|
|
ASTNodeType_Unary,
|
|
|
|
ASTNodeType_Binary,
|
|
|
|
ASTNodeType_Assign,
|
|
|
|
ASTNodeType_Let,
|
|
|
|
ASTNodeType_Break,
|
|
|
|
ASTNodeType_Fn,
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNodeType_Return,
|
|
|
|
ASTNodeType_Statements,
|
2024-04-02 16:21:48 +01:00
|
|
|
} ASTNodeType;
|
|
|
|
|
|
|
|
typedef struct ASTNode ASTNode;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
ASTNode** data;
|
|
|
|
size_t length;
|
|
|
|
size_t capacity;
|
|
|
|
} ASTNodeVec;
|
|
|
|
|
2024-04-02 18:47:16 +01:00
|
|
|
int ast_node_vec_construct(ASTNodeVec* vec);
|
2024-04-02 16:21:48 +01:00
|
|
|
void ast_node_vec_destroy(ASTNodeVec* vec);
|
2024-04-02 18:47:16 +01:00
|
|
|
int ast_node_vec_push(ASTNodeVec* vec, ASTNode* item);
|
2024-04-02 16:21:48 +01:00
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
ASTNode* condition;
|
|
|
|
ASTNode* truthy;
|
|
|
|
ASTNode* falsy;
|
|
|
|
} ASTIfNode;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
ASTNode* body;
|
|
|
|
} ASTLoopNode;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
ASTNode* subject;
|
|
|
|
ASTNodeVec args;
|
|
|
|
} ASTCallNode;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
ASTNode* subject;
|
|
|
|
ASTNode* value;
|
|
|
|
} ASTIndexNode;
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
UnaryType_Not,
|
|
|
|
UnaryType_Negate,
|
|
|
|
} UnaryType;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
UnaryType unary_type;
|
|
|
|
ASTNode* subject;
|
|
|
|
} ASTUnaryNode;
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
BinaryType_And,
|
|
|
|
BinaryType_Or,
|
|
|
|
BinaryType_EE,
|
|
|
|
BinaryType_NE,
|
|
|
|
BinaryType_LT,
|
|
|
|
BinaryType_GT,
|
|
|
|
BinaryType_LTE,
|
|
|
|
BinaryType_GTE,
|
2024-04-05 03:14:35 +01:00
|
|
|
BinaryType_Add,
|
|
|
|
BinaryType_Subtract,
|
|
|
|
BinaryType_Multiply,
|
2024-04-02 16:21:48 +01:00
|
|
|
} BinaryType;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
BinaryType binary_type;
|
|
|
|
ASTNode* left;
|
|
|
|
ASTNode* right;
|
|
|
|
} ASTBinaryNode;
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
AssignType_Assign,
|
|
|
|
AssignType_Add,
|
|
|
|
AssignType_Subtract,
|
|
|
|
AssignType_Multiply,
|
|
|
|
} AssignType;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
AssignType assign_type;
|
|
|
|
ASTNode* subject;
|
|
|
|
ASTNode* value;
|
|
|
|
} ASTAssignNode;
|
|
|
|
|
|
|
|
typedef struct {
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* id;
|
2024-04-02 16:21:48 +01:00
|
|
|
ASTNode* value;
|
|
|
|
} ASTLetNode;
|
|
|
|
|
|
|
|
typedef struct {
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* id;
|
2024-04-02 16:21:48 +01:00
|
|
|
ASTNodeVec params;
|
|
|
|
ASTNode* body;
|
|
|
|
} ASTFnNode;
|
|
|
|
|
2024-04-05 03:14:35 +01:00
|
|
|
typedef struct {
|
|
|
|
ASTNode* value;
|
|
|
|
} ASTReturnNode;
|
|
|
|
|
2024-04-02 16:21:48 +01:00
|
|
|
struct ASTNode {
|
|
|
|
ASTNodeType node_type;
|
2024-04-04 01:08:11 +01:00
|
|
|
Pos pos;
|
2024-04-02 16:21:48 +01:00
|
|
|
union {
|
2024-04-05 23:43:45 +01:00
|
|
|
char* id_value;
|
2024-04-02 16:21:48 +01:00
|
|
|
int int_value;
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* group_value;
|
2024-04-05 23:43:45 +01:00
|
|
|
ASTNodeVec statements;
|
2024-04-02 16:21:48 +01:00
|
|
|
ASTIfNode if_node;
|
|
|
|
ASTLoopNode loop_node;
|
|
|
|
ASTCallNode call_node;
|
|
|
|
ASTIndexNode index_node;
|
|
|
|
ASTUnaryNode unary_node;
|
|
|
|
ASTBinaryNode binary_node;
|
|
|
|
ASTAssignNode assign_node;
|
|
|
|
ASTLetNode let_node;
|
|
|
|
ASTFnNode fn_node;
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTReturnNode return_node;
|
2024-04-02 16:21:48 +01:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2024-04-04 01:08:11 +01:00
|
|
|
ASTNode* ast_node_new(ASTNodeType node_type, Pos pos, ASTNode spec_init);
|
|
|
|
void ast_node_free(ASTNode* node);
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
const char* text;
|
|
|
|
size_t text_length;
|
|
|
|
Lexer lexer;
|
|
|
|
Token current;
|
|
|
|
bool failed;
|
|
|
|
} Parser;
|
|
|
|
|
|
|
|
void parser_construct(Parser* parser, const char* text, size_t text_length);
|
|
|
|
bool parser_failed(const Parser* parser);
|
|
|
|
void parser_step(Parser* parser);
|
|
|
|
bool parser_done(const Parser* parser);
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* parser_parse_statements(Parser* parser);
|
2024-04-04 01:08:11 +01:00
|
|
|
ASTNode* parser_parse_statement(Parser* parser);
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* parser_parse_fn(Parser* parser);
|
|
|
|
ASTNode* parser_parse_single_line_statement(Parser* parser);
|
|
|
|
ASTNode* parser_parse_let(Parser* parser);
|
|
|
|
ASTNode* parser_parse_return(Parser* parser);
|
|
|
|
ASTNode* parser_parse_break(Parser* parser);
|
|
|
|
ASTNode* parser_parse_assign(Parser* parser);
|
2024-04-04 01:08:11 +01:00
|
|
|
ASTNode* parser_parse_expr(Parser* parser);
|
2024-04-05 03:14:35 +01:00
|
|
|
ASTNode* parser_parse_or(Parser* parser);
|
|
|
|
ASTNode* parser_parse_and(Parser* parser);
|
|
|
|
ASTNode* parser_parse_equality(Parser* parser);
|
|
|
|
ASTNode* parser_parse_comparison(Parser* parser);
|
|
|
|
ASTNode* parser_parse_term(Parser* parser);
|
|
|
|
ASTNode* parser_parse_factor(Parser* parser);
|
|
|
|
ASTNode* parser_parse_unary(Parser* parser);
|
|
|
|
ASTNode* parser_parse_index_call(Parser* parser);
|
2024-04-04 01:08:11 +01:00
|
|
|
ASTNode* parser_parse_operand(Parser* parser);
|
|
|
|
ASTNode* parser_parse_id(Parser* parser);
|
|
|
|
ASTNode* parser_parse_int(Parser* parser);
|
|
|
|
ASTNode* parser_parse_group(Parser* parser);
|
|
|
|
ASTNode* parser_parse_block(Parser* parser);
|
|
|
|
ASTNode* parser_parse_if(Parser* parser);
|
|
|
|
ASTNode* parser_parse_loop(Parser* parser);
|
2024-04-02 16:21:48 +01:00
|
|
|
|
2024-04-05 18:16:58 +01:00
|
|
|
typedef enum {
|
|
|
|
SymbolType_Local,
|
|
|
|
SymbolType_Global,
|
|
|
|
SymbolType_Builtin,
|
|
|
|
} SymbolType;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
SymbolType type;
|
|
|
|
} Symbol;
|
|
|
|
|
|
|
|
size_t common_string_hash_djb2(const unsigned char* value, size_t length);
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
size_t key_hash;
|
|
|
|
Symbol value;
|
|
|
|
} StringSymbolMapEntry;
|
|
|
|
|
2024-04-05 03:14:35 +01:00
|
|
|
typedef struct {
|
2024-04-05 18:16:58 +01:00
|
|
|
StringSymbolMapEntry* data;
|
|
|
|
size_t length;
|
|
|
|
size_t capacity;
|
|
|
|
} StringSymbolMap;
|
|
|
|
|
|
|
|
int string_symbol_map_construct(StringSymbolMap* map);
|
|
|
|
void string_symbol_map_destroy(StringSymbolMap* map);
|
|
|
|
Symbol* string_symbol_map_get(const StringSymbolMap* map, size_t key_hash);
|
|
|
|
int string_symbol_map_set(StringSymbolMap* map, size_t key_hash, Symbol value);
|
|
|
|
|
2024-04-05 23:43:45 +01:00
|
|
|
typedef struct SymbolTable SymbolTable;
|
|
|
|
|
|
|
|
struct SymbolTable {
|
|
|
|
SymbolTable* parent;
|
|
|
|
StringSymbolMap map;
|
|
|
|
};
|
|
|
|
|
|
|
|
void symbol_table_construct(SymbolTable* table, SymbolTable* parent);
|
|
|
|
void symbol_table_destroy(SymbolTable* table);
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
ASTNode* statements;
|
|
|
|
SymbolTable table;
|
|
|
|
} StatementsSymbols;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
StatementsSymbols* data;
|
|
|
|
size_t length;
|
|
|
|
size_t capacity;
|
|
|
|
} StatementsSymbolsVec;
|
|
|
|
|
|
|
|
int statements_symbols_vec_construct(StatementsSymbolsVec* vec);
|
|
|
|
void statements_symbols_vec_destroy(StatementsSymbolsVec* vec);
|
|
|
|
int statements_symbols_vec_push(StatementsSymbolsVec* vec, StatementsSymbols pair);
|
|
|
|
|
2024-04-05 18:16:58 +01:00
|
|
|
typedef struct {
|
|
|
|
bool failed;
|
2024-04-05 23:43:45 +01:00
|
|
|
SymbolTable head_table;
|
|
|
|
StatementsSymbolsVec statements_symbols;
|
2024-04-05 03:14:35 +01:00
|
|
|
} Checker;
|
|
|
|
|
2024-04-05 23:43:45 +01:00
|
|
|
typedef struct {
|
|
|
|
SymbolTable head_table;
|
|
|
|
StatementsSymbolsVec statements_symbols;
|
|
|
|
} CheckerResult;
|
|
|
|
|
2024-04-05 03:14:35 +01:00
|
|
|
void checker_construct(Checker* checker);
|
2024-04-05 23:43:45 +01:00
|
|
|
void checker_destroy(Checker* checker);
|
2024-04-05 18:16:58 +01:00
|
|
|
bool checker_failed(const Checker* checker);
|
2024-04-05 23:43:45 +01:00
|
|
|
CheckerResult checker_result(Checker* checker);
|
2024-04-05 18:16:58 +01:00
|
|
|
void checker_check_statements(Checker* checker, ASTNode* node);
|
2024-04-05 23:43:45 +01:00
|
|
|
void checker_check_statement(Checker* checker, SymbolTable* table, ASTNode* node);
|
2024-04-05 03:14:35 +01:00
|
|
|
|
2024-04-02 16:21:48 +01:00
|
|
|
#endif
|