semos/compiler.h

324 lines
7.3 KiB
C
Raw Normal View History

2024-04-05 18:16:58 +01:00
#ifndef COMPILER_H
#define COMPILER_H
2024-04-02 16:21:48 +01:00
2024-04-02 18:47:16 +01:00
#include <stdbool.h>
2024-04-02 16:21:48 +01:00
#include <stddef.h>
2024-04-02 18:47:16 +01:00
typedef struct {
size_t index;
int line;
int col;
} Pos;
2024-04-04 01:08:11 +01:00
void print_error(const char* message, Pos pos);
2024-04-02 16:21:48 +01:00
typedef enum {
TokenType_Error,
2024-04-02 18:47:16 +01:00
TokenType_EOF,
2024-04-02 16:21:48 +01:00
TokenType_Id,
TokenType_Int,
TokenType_Not,
TokenType_And,
TokenType_Or,
2024-04-04 01:08:11 +01:00
TokenType_If,
2024-04-05 03:14:35 +01:00
TokenType_Else,
2024-04-02 16:21:48 +01:00
TokenType_Loop,
2024-04-05 03:14:35 +01:00
TokenType_Let,
2024-04-02 16:21:48 +01:00
TokenType_Fn,
2024-04-02 18:47:16 +01:00
TokenType_Return,
2024-04-02 16:21:48 +01:00
TokenType_Break,
TokenType_LParen,
TokenType_RParen,
TokenType_LBrace,
TokenType_RBrace,
TokenType_LBracket,
TokenType_RBracket,
TokenType_Comma,
TokenType_Semicolon,
TokenType_Plus,
2024-04-02 18:47:16 +01:00
TokenType_PlusEqual,
2024-04-02 16:21:48 +01:00
TokenType_Minus,
2024-04-02 18:47:16 +01:00
TokenType_MinusEqual,
2024-04-05 03:14:35 +01:00
TokenType_MinusGT,
2024-04-02 16:21:48 +01:00
TokenType_Asterisk,
2024-04-02 18:47:16 +01:00
TokenType_AsteriskEqual,
2024-04-04 01:08:11 +01:00
TokenType_Equal,
2024-04-02 18:47:16 +01:00
TokenType_EqualEqual,
TokenType_Exclamation,
TokenType_ExclamationEqual,
2024-04-02 16:21:48 +01:00
TokenType_LT,
2024-04-04 01:08:11 +01:00
TokenType_LTEqual,
2024-04-02 18:47:16 +01:00
TokenType_GT,
2024-04-04 01:08:11 +01:00
TokenType_GTEqual,
2024-04-02 18:47:16 +01:00
TokenType_Pipe,
TokenType_PipeGT,
2024-04-02 16:21:48 +01:00
} TokenType;
typedef struct {
2024-04-02 18:47:16 +01:00
TokenType token_type;
Pos pos;
2024-04-02 16:21:48 +01:00
size_t length;
} Token;
2024-04-02 18:47:16 +01:00
typedef struct {
const char* text;
size_t text_length;
size_t index;
int line;
int col;
2024-04-04 01:08:11 +01:00
bool failed;
2024-04-02 18:47:16 +01:00
} Lexer;
void lexer_construct(Lexer* lexer, const char* text, size_t text_length);
Token lexer_next(Lexer* lexer);
2024-04-04 01:08:11 +01:00
bool lexer_failed(const Lexer* lexer);
2024-04-02 18:47:16 +01:00
Token lexer_token(Lexer* lexer, TokenType token_type, Pos pos);
void lexer_step(Lexer* lexer);
bool lexer_done(const Lexer* lexer);
char lexer_current(const Lexer* lexer);
Pos lexer_pos(const Lexer* lexer);
2024-04-02 16:21:48 +01:00
typedef enum {
ASTNodeType_Error,
ASTNodeType_Id,
ASTNodeType_Int,
2024-04-05 03:14:35 +01:00
ASTNodeType_Group,
2024-04-02 16:21:48 +01:00
ASTNodeType_Block,
ASTNodeType_If,
ASTNodeType_Loop,
ASTNodeType_Call,
ASTNodeType_Index,
ASTNodeType_Unary,
ASTNodeType_Binary,
ASTNodeType_Assign,
ASTNodeType_Let,
ASTNodeType_Break,
ASTNodeType_Fn,
2024-04-05 03:14:35 +01:00
ASTNodeType_Return,
ASTNodeType_Statements,
2024-04-02 16:21:48 +01:00
} ASTNodeType;
typedef struct ASTNode ASTNode;
typedef struct {
ASTNode** data;
size_t length;
size_t capacity;
} ASTNodeVec;
2024-04-02 18:47:16 +01:00
int ast_node_vec_construct(ASTNodeVec* vec);
2024-04-02 16:21:48 +01:00
void ast_node_vec_destroy(ASTNodeVec* vec);
2024-04-02 18:47:16 +01:00
int ast_node_vec_push(ASTNodeVec* vec, ASTNode* item);
2024-04-02 16:21:48 +01:00
typedef struct {
ASTNode* condition;
ASTNode* truthy;
ASTNode* falsy;
} ASTIfNode;
typedef struct {
ASTNode* body;
} ASTLoopNode;
typedef struct {
ASTNode* subject;
ASTNodeVec args;
} ASTCallNode;
typedef struct {
ASTNode* subject;
ASTNode* value;
} ASTIndexNode;
typedef enum {
UnaryType_Not,
UnaryType_Negate,
} UnaryType;
typedef struct {
UnaryType unary_type;
ASTNode* subject;
} ASTUnaryNode;
typedef enum {
BinaryType_And,
BinaryType_Or,
BinaryType_EE,
BinaryType_NE,
BinaryType_LT,
BinaryType_GT,
BinaryType_LTE,
BinaryType_GTE,
2024-04-05 03:14:35 +01:00
BinaryType_Add,
BinaryType_Subtract,
BinaryType_Multiply,
2024-04-02 16:21:48 +01:00
} BinaryType;
typedef struct {
BinaryType binary_type;
ASTNode* left;
ASTNode* right;
} ASTBinaryNode;
typedef enum {
AssignType_Assign,
AssignType_Add,
AssignType_Subtract,
AssignType_Multiply,
} AssignType;
typedef struct {
AssignType assign_type;
ASTNode* subject;
ASTNode* value;
} ASTAssignNode;
typedef struct {
2024-04-05 03:14:35 +01:00
ASTNode* id;
2024-04-02 16:21:48 +01:00
ASTNode* value;
} ASTLetNode;
typedef struct {
2024-04-05 03:14:35 +01:00
ASTNode* id;
2024-04-02 16:21:48 +01:00
ASTNodeVec params;
ASTNode* body;
} ASTFnNode;
2024-04-05 03:14:35 +01:00
typedef struct {
ASTNode* value;
} ASTReturnNode;
2024-04-02 16:21:48 +01:00
struct ASTNode {
ASTNodeType node_type;
2024-04-04 01:08:11 +01:00
Pos pos;
2024-04-02 16:21:48 +01:00
union {
2024-04-05 23:43:45 +01:00
char* id_value;
2024-04-02 16:21:48 +01:00
int int_value;
2024-04-05 03:14:35 +01:00
ASTNode* group_value;
2024-04-05 23:43:45 +01:00
ASTNodeVec statements;
2024-04-02 16:21:48 +01:00
ASTIfNode if_node;
ASTLoopNode loop_node;
ASTCallNode call_node;
ASTIndexNode index_node;
ASTUnaryNode unary_node;
ASTBinaryNode binary_node;
ASTAssignNode assign_node;
ASTLetNode let_node;
ASTFnNode fn_node;
2024-04-05 03:14:35 +01:00
ASTReturnNode return_node;
2024-04-02 16:21:48 +01:00
};
};
2024-04-04 01:08:11 +01:00
ASTNode* ast_node_new(ASTNodeType node_type, Pos pos, ASTNode spec_init);
void ast_node_free(ASTNode* node);
typedef struct {
const char* text;
size_t text_length;
Lexer lexer;
Token current;
bool failed;
} Parser;
void parser_construct(Parser* parser, const char* text, size_t text_length);
bool parser_failed(const Parser* parser);
void parser_step(Parser* parser);
bool parser_done(const Parser* parser);
2024-04-05 03:14:35 +01:00
ASTNode* parser_parse_statements(Parser* parser);
2024-04-04 01:08:11 +01:00
ASTNode* parser_parse_statement(Parser* parser);
2024-04-05 03:14:35 +01:00
ASTNode* parser_parse_fn(Parser* parser);
ASTNode* parser_parse_single_line_statement(Parser* parser);
ASTNode* parser_parse_let(Parser* parser);
ASTNode* parser_parse_return(Parser* parser);
ASTNode* parser_parse_break(Parser* parser);
ASTNode* parser_parse_assign(Parser* parser);
2024-04-04 01:08:11 +01:00
ASTNode* parser_parse_expr(Parser* parser);
2024-04-05 03:14:35 +01:00
ASTNode* parser_parse_or(Parser* parser);
ASTNode* parser_parse_and(Parser* parser);
ASTNode* parser_parse_equality(Parser* parser);
ASTNode* parser_parse_comparison(Parser* parser);
ASTNode* parser_parse_term(Parser* parser);
ASTNode* parser_parse_factor(Parser* parser);
ASTNode* parser_parse_unary(Parser* parser);
ASTNode* parser_parse_index_call(Parser* parser);
2024-04-04 01:08:11 +01:00
ASTNode* parser_parse_operand(Parser* parser);
ASTNode* parser_parse_id(Parser* parser);
ASTNode* parser_parse_int(Parser* parser);
ASTNode* parser_parse_group(Parser* parser);
ASTNode* parser_parse_block(Parser* parser);
ASTNode* parser_parse_if(Parser* parser);
ASTNode* parser_parse_loop(Parser* parser);
2024-04-02 16:21:48 +01:00
2024-04-05 18:16:58 +01:00
typedef enum {
SymbolType_Local,
SymbolType_Global,
SymbolType_Builtin,
} SymbolType;
typedef struct {
SymbolType type;
} Symbol;
size_t common_string_hash_djb2(const unsigned char* value, size_t length);
typedef struct {
size_t key_hash;
Symbol value;
} StringSymbolMapEntry;
2024-04-05 03:14:35 +01:00
typedef struct {
2024-04-05 18:16:58 +01:00
StringSymbolMapEntry* data;
size_t length;
size_t capacity;
} StringSymbolMap;
int string_symbol_map_construct(StringSymbolMap* map);
void string_symbol_map_destroy(StringSymbolMap* map);
Symbol* string_symbol_map_get(const StringSymbolMap* map, size_t key_hash);
int string_symbol_map_set(StringSymbolMap* map, size_t key_hash, Symbol value);
2024-04-05 23:43:45 +01:00
typedef struct SymbolTable SymbolTable;
struct SymbolTable {
SymbolTable* parent;
StringSymbolMap map;
};
void symbol_table_construct(SymbolTable* table, SymbolTable* parent);
void symbol_table_destroy(SymbolTable* table);
typedef struct {
ASTNode* statements;
SymbolTable table;
} StatementsSymbols;
typedef struct {
StatementsSymbols* data;
size_t length;
size_t capacity;
} StatementsSymbolsVec;
int statements_symbols_vec_construct(StatementsSymbolsVec* vec);
void statements_symbols_vec_destroy(StatementsSymbolsVec* vec);
int statements_symbols_vec_push(StatementsSymbolsVec* vec, StatementsSymbols pair);
2024-04-05 18:16:58 +01:00
typedef struct {
bool failed;
2024-04-05 23:43:45 +01:00
SymbolTable head_table;
StatementsSymbolsVec statements_symbols;
2024-04-05 03:14:35 +01:00
} Checker;
2024-04-05 23:43:45 +01:00
typedef struct {
SymbolTable head_table;
StatementsSymbolsVec statements_symbols;
} CheckerResult;
2024-04-05 03:14:35 +01:00
void checker_construct(Checker* checker);
2024-04-05 23:43:45 +01:00
void checker_destroy(Checker* checker);
2024-04-05 18:16:58 +01:00
bool checker_failed(const Checker* checker);
2024-04-05 23:43:45 +01:00
CheckerResult checker_result(Checker* checker);
2024-04-05 18:16:58 +01:00
void checker_check_statements(Checker* checker, ASTNode* node);
2024-04-05 23:43:45 +01:00
void checker_check_statement(Checker* checker, SymbolTable* table, ASTNode* node);
2024-04-05 03:14:35 +01:00
2024-04-02 16:21:48 +01:00
#endif