159 lines
3.7 KiB
C
159 lines
3.7 KiB
C
#include "parser.h"
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
void lexer_construct(Lexer* lexer, const char* text, size_t length)
|
|
{
|
|
*lexer = (Lexer) {
|
|
.text = text,
|
|
.text_length = length,
|
|
.index = 0,
|
|
.line = 1,
|
|
.col = 1,
|
|
};
|
|
}
|
|
|
|
static inline bool is_id_start_char(char c)
|
|
{
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
|
|
}
|
|
|
|
static inline bool is_id_char(char c)
|
|
{
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
|
|| (c >= '0' && c <= '9') || c == '_';
|
|
}
|
|
|
|
struct MatchIdToTokenTypeCase {
|
|
const char* keyword;
|
|
TokenType token_type;
|
|
};
|
|
|
|
static inline TokenType match_id_to_token_type(
|
|
const char* source, size_t length, struct MatchIdToTokenTypeCase cases[])
|
|
{
|
|
for (size_t i = 0; cases[i].keyword != NULL; ++i) {
|
|
if (strncmp(source, cases[i].keyword, length) == 0) {
|
|
return cases[i].token_type;
|
|
}
|
|
}
|
|
return TokenType_Id;
|
|
}
|
|
|
|
Token lexer_next(Lexer* lexer)
|
|
{
|
|
Pos pos = lexer_pos(lexer);
|
|
if (lexer_done(lexer)) {
|
|
return lexer_token(lexer, TokenType_EOF, pos);
|
|
}
|
|
char c = lexer_current(lexer);
|
|
if (c == ' ' || c == '\t' || c == '\n') {
|
|
lexer_step(lexer);
|
|
return lexer_next(lexer);
|
|
}
|
|
if (is_id_start_char(c)) {
|
|
lexer_step(lexer);
|
|
while (is_id_char(c)) {
|
|
lexer_step(lexer);
|
|
}
|
|
size_t length = lexer->index - pos.index;
|
|
TokenType token_type
|
|
= match_id_to_token_type(&lexer->text[pos.index], length,
|
|
(struct MatchIdToTokenTypeCase[]) {
|
|
{ "not", TokenType_Not },
|
|
{ "and", TokenType_And },
|
|
{ "or", TokenType_Or },
|
|
{ "loop", TokenType_Loop },
|
|
{ "fn", TokenType_Fn },
|
|
{ "return", TokenType_Return },
|
|
{ "break", TokenType_Break },
|
|
{ NULL, TokenType_Id },
|
|
});
|
|
return lexer_token(lexer, token_type, pos);
|
|
}
|
|
if (c >= '1' && c <= '9') {
|
|
lexer_step(lexer);
|
|
while (c >= '1' && c <= '9') {
|
|
lexer_step(lexer);
|
|
}
|
|
return lexer_token(lexer, TokenType_Int, pos);
|
|
}
|
|
return lexer_token(lexer, TokenType_Error, pos);
|
|
}
|
|
|
|
Token lexer_token(Lexer* lexer, TokenType token_type, Pos pos)
|
|
{
|
|
return (Token) {
|
|
.token_type = token_type,
|
|
.pos = pos,
|
|
.length = lexer->index - pos.index,
|
|
};
|
|
}
|
|
|
|
void lexer_step(Lexer* lexer)
|
|
{
|
|
if (lexer_done(lexer)) {
|
|
return;
|
|
}
|
|
lexer->index += 1;
|
|
if (lexer_current(lexer) == '\n') {
|
|
lexer->line += 1;
|
|
lexer->col = 1;
|
|
} else {
|
|
lexer->col += 1;
|
|
}
|
|
}
|
|
|
|
bool lexer_done(const Lexer* lexer)
|
|
{
|
|
return lexer->index >= lexer->text_length;
|
|
}
|
|
|
|
char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; }
|
|
|
|
Pos lexer_pos(const Lexer* lexer)
|
|
{
|
|
return (Pos) {
|
|
.index = lexer->index,
|
|
.line = lexer->line,
|
|
.col = lexer->col,
|
|
};
|
|
}
|
|
|
|
int ast_node_vec_construct(ASTNodeVec* vec)
|
|
{
|
|
const size_t capacity_start = 4;
|
|
*vec = (ASTNodeVec) {
|
|
.data = malloc(capacity_start),
|
|
.length = 0,
|
|
.capacity = capacity_start,
|
|
};
|
|
if (vec->data == NULL) {
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void ast_node_vec_destroy(ASTNodeVec* vec)
|
|
{
|
|
if (vec->data != NULL) {
|
|
free(vec->data);
|
|
}
|
|
}
|
|
|
|
int ast_node_vec_push(ASTNodeVec* vec, ASTNode* item)
|
|
{
|
|
if (vec->length + 1 > vec->capacity) {
|
|
vec->capacity *= 2;
|
|
ASTNode** data = realloc(vec->data, vec->capacity);
|
|
if (data == NULL) {
|
|
return -1;
|
|
}
|
|
vec->data = data;
|
|
}
|
|
vec->data[vec->length] = item;
|
|
vec->length += 1;
|
|
return 0;
|
|
}
|