rust match expression :copium:

This commit is contained in:
SimonFJ20 2024-04-02 19:47:16 +02:00
parent b5270924ba
commit 277471a743
2 changed files with 196 additions and 9 deletions

158
parser.c Normal file
View File

@ -0,0 +1,158 @@
#include "parser.h"
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
void lexer_construct(Lexer* lexer, const char* text, size_t length)
{
*lexer = (Lexer) {
.text = text,
.text_length = length,
.index = 0,
.line = 1,
.col = 1,
};
}
static inline bool is_id_start_char(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
}
static inline bool is_id_char(char c)
{
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c == '_';
}
struct MatchIdToTokenTypeCase {
const char* keyword;
TokenType token_type;
};
static inline TokenType match_id_to_token_type(
const char* source, size_t length, struct MatchIdToTokenTypeCase cases[])
{
for (size_t i = 0; cases[i].keyword != NULL; ++i) {
if (strncmp(source, cases[i].keyword, length) == 0) {
return cases[i].token_type;
}
}
return TokenType_Id;
}
Token lexer_next(Lexer* lexer)
{
Pos pos = lexer_pos(lexer);
if (lexer_done(lexer)) {
return lexer_token(lexer, TokenType_EOF, pos);
}
char c = lexer_current(lexer);
if (c == ' ' || c == '\t' || c == '\n') {
lexer_step(lexer);
return lexer_next(lexer);
}
if (is_id_start_char(c)) {
lexer_step(lexer);
while (is_id_char(c)) {
lexer_step(lexer);
}
size_t length = lexer->index - pos.index;
TokenType token_type
= match_id_to_token_type(&lexer->text[pos.index], length,
(struct MatchIdToTokenTypeCase[]) {
{ "not", TokenType_Not },
{ "and", TokenType_And },
{ "or", TokenType_Or },
{ "loop", TokenType_Loop },
{ "fn", TokenType_Fn },
{ "return", TokenType_Return },
{ "break", TokenType_Break },
{ NULL, TokenType_Id },
});
return lexer_token(lexer, token_type, pos);
}
if (c >= '1' && c <= '9') {
lexer_step(lexer);
while (c >= '1' && c <= '9') {
lexer_step(lexer);
}
return lexer_token(lexer, TokenType_Int, pos);
}
return lexer_token(lexer, TokenType_Error, pos);
}
Token lexer_token(Lexer* lexer, TokenType token_type, Pos pos)
{
return (Token) {
.token_type = token_type,
.pos = pos,
.length = lexer->index - pos.index,
};
}
void lexer_step(Lexer* lexer)
{
if (lexer_done(lexer)) {
return;
}
lexer->index += 1;
if (lexer_current(lexer) == '\n') {
lexer->line += 1;
lexer->col = 1;
} else {
lexer->col += 1;
}
}
bool lexer_done(const Lexer* lexer)
{
return lexer->index >= lexer->text_length;
}
char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; }
Pos lexer_pos(const Lexer* lexer)
{
return (Pos) {
.index = lexer->index,
.line = lexer->line,
.col = lexer->col,
};
}
int ast_node_vec_construct(ASTNodeVec* vec)
{
const size_t capacity_start = 4;
*vec = (ASTNodeVec) {
.data = malloc(capacity_start),
.length = 0,
.capacity = capacity_start,
};
if (vec->data == NULL) {
return -1;
}
return 0;
}
void ast_node_vec_destroy(ASTNodeVec* vec)
{
if (vec->data != NULL) {
free(vec->data);
}
}
int ast_node_vec_push(ASTNodeVec* vec, ASTNode* item)
{
if (vec->length + 1 > vec->capacity) {
vec->capacity *= 2;
ASTNode** data = realloc(vec->data, vec->capacity);
if (data == NULL) {
return -1;
}
vec->data = data;
}
vec->data[vec->length] = item;
vec->length += 1;
return 0;
}

View File

@ -1,10 +1,18 @@
#ifndef PARSER_H #ifndef PARSER_H
#define PARSER_H #define PARSER_H
#include <stdbool.h>
#include <stddef.h> #include <stddef.h>
typedef struct {
size_t index;
int line;
int col;
} Pos;
typedef enum { typedef enum {
TokenType_Error, TokenType_Error,
TokenType_EOF,
TokenType_Id, TokenType_Id,
TokenType_Int, TokenType_Int,
TokenType_Not, TokenType_Not,
@ -12,6 +20,7 @@ typedef enum {
TokenType_Or, TokenType_Or,
TokenType_Loop, TokenType_Loop,
TokenType_Fn, TokenType_Fn,
TokenType_Return,
TokenType_Break, TokenType_Break,
TokenType_LParen, TokenType_LParen,
TokenType_RParen, TokenType_RParen,
@ -21,25 +30,45 @@ typedef enum {
TokenType_RBracket, TokenType_RBracket,
TokenType_Comma, TokenType_Comma,
TokenType_Semicolon, TokenType_Semicolon,
TokenType_Exclamation,
TokenType_Plus, TokenType_Plus,
TokenType_PlusEqual,
TokenType_Minus, TokenType_Minus,
TokenType_MinusEqual,
TokenType_Asterisk, TokenType_Asterisk,
TokenType_EE, TokenType_AsteriskEqual,
TokenType_NE, TokenType_EqualEqual,
TokenType_Exclamation,
TokenType_ExclamationEqual,
TokenType_LT, TokenType_LT,
TokenType_GT,
TokenType_LTE, TokenType_LTE,
TokenType_GT,
TokenType_GTE, TokenType_GTE,
TokenType_Pipe,
TokenType_PipeGT,
} TokenType; } TokenType;
typedef struct { typedef struct {
TokenType token; TokenType token_type;
size_t index; Pos pos;
size_t length; size_t length;
int line;
} Token; } Token;
typedef struct {
const char* text;
size_t text_length;
size_t index;
int line;
int col;
} Lexer;
void lexer_construct(Lexer* lexer, const char* text, size_t text_length);
Token lexer_next(Lexer* lexer);
Token lexer_token(Lexer* lexer, TokenType token_type, Pos pos);
void lexer_step(Lexer* lexer);
bool lexer_done(const Lexer* lexer);
char lexer_current(const Lexer* lexer);
Pos lexer_pos(const Lexer* lexer);
typedef enum { typedef enum {
ASTNodeType_Error, ASTNodeType_Error,
ASTNodeType_Id, ASTNodeType_Id,
@ -65,9 +94,9 @@ typedef struct {
size_t capacity; size_t capacity;
} ASTNodeVec; } ASTNodeVec;
void ast_node_vec_construct(ASTNodeVec* vec); int ast_node_vec_construct(ASTNodeVec* vec);
void ast_node_vec_destroy(ASTNodeVec* vec); void ast_node_vec_destroy(ASTNodeVec* vec);
void ast_node_vec_push(ASTNodeVec* vec, ASTNode* item); int ast_node_vec_push(ASTNodeVec* vec, ASTNode* item);
typedef struct { typedef struct {
ASTNode* condition; ASTNode* condition;