semos/compiler.h
2024-04-07 04:50:29 +02:00

344 lines
8.1 KiB
C

#ifndef COMPILER_H
#define COMPILER_H
#include <stdbool.h>
#include <stddef.h>
typedef struct {
size_t index;
int line;
int col;
} Pos;
void print_error(const char* message, Pos pos);
typedef enum {
TokenType_Error,
TokenType_EOF,
TokenType_Id,
TokenType_Int,
TokenType_Not,
TokenType_And,
TokenType_Or,
TokenType_If,
TokenType_Else,
TokenType_Loop,
TokenType_Let,
TokenType_Fn,
TokenType_Return,
TokenType_Break,
TokenType_LParen,
TokenType_RParen,
TokenType_LBrace,
TokenType_RBrace,
TokenType_LBracket,
TokenType_RBracket,
TokenType_Comma,
TokenType_Semicolon,
TokenType_Plus,
TokenType_PlusEqual,
TokenType_Minus,
TokenType_MinusEqual,
TokenType_MinusGT,
TokenType_Asterisk,
TokenType_AsteriskEqual,
TokenType_Equal,
TokenType_EqualEqual,
TokenType_Exclamation,
TokenType_ExclamationEqual,
TokenType_LT,
TokenType_LTEqual,
TokenType_GT,
TokenType_GTEqual,
TokenType_Pipe,
TokenType_PipeGT,
} TokenType;
typedef struct {
TokenType token_type;
Pos pos;
size_t length;
} Token;
typedef struct {
const char* text;
size_t text_length;
size_t index;
int line;
int col;
bool failed;
} Lexer;
void lexer_construct(Lexer* lexer, const char* text, size_t text_length);
Token lexer_next(Lexer* lexer);
bool lexer_failed(const Lexer* lexer);
Token lexer_token(Lexer* lexer, TokenType token_type, Pos pos);
void lexer_step(Lexer* lexer);
bool lexer_done(const Lexer* lexer);
char lexer_current(const Lexer* lexer);
Pos lexer_pos(const Lexer* lexer);
typedef enum {
ASTNodeType_Error,
ASTNodeType_Id,
ASTNodeType_Int,
ASTNodeType_Group,
ASTNodeType_Block,
ASTNodeType_If,
ASTNodeType_Loop,
ASTNodeType_Call,
ASTNodeType_Index,
ASTNodeType_Unary,
ASTNodeType_Binary,
ASTNodeType_Assign,
ASTNodeType_Let,
ASTNodeType_Break,
ASTNodeType_Fn,
ASTNodeType_Return,
ASTNodeType_Statements,
} ASTNodeType;
typedef struct ASTNode ASTNode;
typedef struct {
ASTNode** data;
size_t length;
size_t capacity;
} ASTNodeVec;
int ast_node_vec_construct(ASTNodeVec* vec);
void ast_node_vec_destroy(ASTNodeVec* vec);
int ast_node_vec_push(ASTNodeVec* vec, ASTNode* item);
typedef struct {
ASTNode* condition;
ASTNode* truthy;
ASTNode* falsy;
} ASTIfNode;
typedef struct {
ASTNode* body;
} ASTLoopNode;
typedef struct {
ASTNode* subject;
ASTNodeVec args;
} ASTCallNode;
typedef struct {
ASTNode* subject;
ASTNode* value;
} ASTIndexNode;
typedef enum {
UnaryType_Not,
UnaryType_Negate,
} UnaryType;
typedef struct {
UnaryType unary_type;
ASTNode* subject;
} ASTUnaryNode;
typedef enum {
BinaryType_And,
BinaryType_Or,
BinaryType_EE,
BinaryType_NE,
BinaryType_LT,
BinaryType_GT,
BinaryType_LTE,
BinaryType_GTE,
BinaryType_Add,
BinaryType_Subtract,
BinaryType_Multiply,
} BinaryType;
typedef struct {
BinaryType binary_type;
ASTNode* left;
ASTNode* right;
} ASTBinaryNode;
typedef enum {
AssignType_Assign,
AssignType_Add,
AssignType_Subtract,
AssignType_Multiply,
} AssignType;
typedef struct {
AssignType assign_type;
ASTNode* subject;
ASTNode* value;
} ASTAssignNode;
typedef struct {
ASTNode* id;
ASTNode* value;
} ASTLetNode;
typedef struct {
ASTNode* id;
ASTNodeVec params;
ASTNode* body;
} ASTFnNode;
typedef struct {
ASTNode* value;
} ASTReturnNode;
typedef struct Symbol Symbol;
struct ASTNode {
ASTNodeType node_type;
Pos pos;
union {
struct {
char* id_value;
Symbol* id_symbol;
};
int int_value;
ASTNode* group_value;
ASTNodeVec statements;
ASTIfNode if_node;
ASTLoopNode loop_node;
ASTCallNode call_node;
ASTIndexNode index_node;
ASTUnaryNode unary_node;
ASTBinaryNode binary_node;
ASTAssignNode assign_node;
ASTLetNode let_node;
ASTFnNode fn_node;
ASTReturnNode return_node;
};
};
ASTNode* ast_node_new(ASTNodeType node_type, Pos pos, ASTNode spec_init);
void ast_node_free(ASTNode* node);
typedef struct {
const char* text;
size_t text_length;
Lexer lexer;
Token current;
bool failed;
} Parser;
void parser_construct(Parser* parser, const char* text, size_t text_length);
bool parser_failed(const Parser* parser);
void parser_step(Parser* parser);
bool parser_done(const Parser* parser);
ASTNode* parser_parse_statements(Parser* parser);
ASTNode* parser_parse_statement(Parser* parser);
ASTNode* parser_parse_fn(Parser* parser);
ASTNode* parser_parse_single_line_statement(Parser* parser);
ASTNode* parser_parse_let(Parser* parser);
ASTNode* parser_parse_return(Parser* parser);
ASTNode* parser_parse_break(Parser* parser);
ASTNode* parser_parse_assign(Parser* parser);
ASTNode* parser_parse_expr(Parser* parser);
ASTNode* parser_parse_or(Parser* parser);
ASTNode* parser_parse_and(Parser* parser);
ASTNode* parser_parse_equality(Parser* parser);
ASTNode* parser_parse_comparison(Parser* parser);
ASTNode* parser_parse_term(Parser* parser);
ASTNode* parser_parse_factor(Parser* parser);
ASTNode* parser_parse_unary(Parser* parser);
ASTNode* parser_parse_index_call(Parser* parser);
ASTNode* parser_parse_operand(Parser* parser);
ASTNode* parser_parse_id(Parser* parser);
ASTNode* parser_parse_int(Parser* parser);
ASTNode* parser_parse_group(Parser* parser);
ASTNode* parser_parse_block(Parser* parser);
ASTNode* parser_parse_if(Parser* parser);
ASTNode* parser_parse_loop(Parser* parser);
typedef enum {
SymbolType_Local,
SymbolType_Global,
SymbolType_Builtin,
} SymbolType;
struct Symbol {
SymbolType type;
union {
int local_location;
};
};
size_t string_hash_djb2(const char* value);
typedef struct {
size_t key_hash;
Symbol value;
} StringSymbolMapEntry;
typedef struct {
StringSymbolMapEntry* data;
size_t length;
size_t capacity;
} StringSymbolMap;
int string_symbol_map_construct(StringSymbolMap* map);
void string_symbol_map_destroy(StringSymbolMap* map);
Symbol* string_symbol_map_get(const StringSymbolMap* map, size_t key_hash);
int string_symbol_map_set(StringSymbolMap* map, size_t key_hash, Symbol value);
typedef struct SymbolTable SymbolTable;
struct SymbolTable {
SymbolTable* parent;
StringSymbolMap map;
};
void symbol_table_construct(SymbolTable* table, SymbolTable* parent);
void symbol_table_destroy(SymbolTable* table);
Symbol* symbol_table_resolve(SymbolTable* table, const char* id);
Symbol* symbol_table_resolve_hash(SymbolTable* table, size_t hash);
Symbol* symbol_table_resolve_local(SymbolTable* table, const char* id);
void symbol_table_define(SymbolTable* table, const char* id, Symbol symbol);
typedef struct {
ASTNode* statements;
SymbolTable table;
} StatementsSymbols;
typedef struct {
StatementsSymbols* data;
size_t length;
size_t capacity;
} StatementsSymbolsVec;
int statements_symbols_vec_construct(StatementsSymbolsVec* vec);
void statements_symbols_vec_destroy(StatementsSymbolsVec* vec);
int statements_symbols_vec_push(
StatementsSymbolsVec* vec, StatementsSymbols pair);
typedef struct {
bool failed;
SymbolTable head_table;
StatementsSymbolsVec statements_symbols;
} Checker;
typedef struct {
SymbolTable head_table;
StatementsSymbolsVec statements_symbols;
} CheckerResult;
void checker_construct(Checker* checker);
void checker_destroy(Checker* checker);
bool checker_failed(const Checker* checker);
CheckerResult checker_result(Checker* checker);
void checker_check_statements(
Checker* checker, SymbolTable* table, ASTNode* node);
void checker_check_statement(
Checker* checker, SymbolTable* table, ASTNode* node);
void checker_check_assign(Checker* checker, SymbolTable* table, ASTNode* node);
void checker_check_let(Checker* checker, SymbolTable* table, ASTNode* node);
void checker_check_fn(Checker* checker, SymbolTable* table, ASTNode* node);
void checker_check_expr(Checker* checker, SymbolTable* table, ASTNode* node);
void checker_check_id(Checker* checker, SymbolTable* table, ASTNode* node);
#endif