codebased/scirpt/lexer.c
2023-04-12 01:59:42 +02:00

172 lines
4.1 KiB
C

#include "lexer.h"
#include "scirpt/lexer.h"
#include "scirpt/position.h"
#include "scirpt/token.h"
#include "utils/stringmap.h"
#include <stdlib.h>
#include <string.h>
static inline void step(ScirptLexer* lexer) { scirpt_lexer_step(lexer); }
static inline ScirptToken
token(const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start)
{
return scirpt_lexer_token(lexer, type, start);
}
static inline ScirptPosition pos(const ScirptLexer* lexer)
{
return scirpt_lexer_pos(lexer);
}
static inline bool current_is(const ScirptLexer* lexer, char value)
{
return scirpt_lexer_current_is(lexer, value);
}
static inline bool done(const ScirptLexer* lexer)
{
return scirpt_lexer_done(lexer);
}
static inline char current(const ScirptLexer* lexer)
{
return scirpt_lexer_current(lexer);
}
ScirptLexer* scirpt_lexer_new(const char* text, size_t text_length)
{
ScirptLexer* lexer = malloc(sizeof(ScirptLexer));
scirpt_lexer_create(lexer, text, text_length);
return lexer;
}
void scirpt_lexer_delete(ScirptLexer* lexer) { free(lexer); }
static inline void
add_keyword(StringMap* keywords, const char* key, ScirptTokenType value)
{
stringmap_set(keywords, key, strlen(key), value);
}
void scirpt_lexer_create(
ScirptLexer* lexer, const char* text, size_t text_length
)
{
StringMap* keywords = stringmap_new();
add_keyword(keywords, "null", ScirptTokenTypeNull);
add_keyword(keywords, "false", ScirptTokenTypeFalse);
add_keyword(keywords, "true", ScirptTokenTypeTrue);
add_keyword(keywords, "let", ScirptTokenTypeLet);
add_keyword(keywords, "if", ScirptTokenTypeIf);
add_keyword(keywords, "else", ScirptTokenTypeElse);
add_keyword(keywords, "while", ScirptTokenTypeWhile);
add_keyword(keywords, "for", ScirptTokenTypeFor);
add_keyword(keywords, "in", ScirptTokenTypeIn);
add_keyword(keywords, "break", ScirptTokenTypeBreak);
add_keyword(keywords, "fn", ScirptTokenTypeFn);
add_keyword(keywords, "return", ScirptTokenTypeReturn);
*lexer = (ScirptLexer) {
.text = text,
.text_length = text_length,
.index = 0,
.line = 1,
.col = 1,
.keywords = keywords,
};
}
void scirpt_lexer_destroy(ScirptLexer* lexer)
{
stringmap_delete(lexer->keywords);
}
static inline bool is_whitespace(char value)
{
return value == ' ' || value == '\t' || value == '\r' || value == '\n';
}
static inline bool is_id_char_excluding_numbers(char value)
{
return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z')
|| value == '_';
}
static inline bool is_int_char(char value)
{
return value >= '0' && value <= '9';
}
static inline bool is_id_char(char value)
{
return is_id_char_excluding_numbers(value) || is_int_char(value);
}
ScirptToken scirpt_lexer_next(ScirptLexer* lexer)
{
if (done(lexer)) {
return token(lexer, ScirptTokenTypeEof, pos(lexer));
} else if (is_whitespace(current(lexer))) {
step(lexer);
while (!done(lexer) && is_whitespace(current(lexer)))
step(lexer);
return scirpt_lexer_next(lexer);
} else if (is_id_char_excluding_numbers(current(lexer))) {
ScirptPosition start = pos(lexer);
step(lexer);
while (!done(lexer) && is_id_char(current(lexer)))
step(lexer);
return token(lexer, ScirptTokenTypeId, start);
} else {
switch (current(lexer)) {
default: {
ScirptPosition start = pos(lexer);
step(lexer);
return token(lexer, ScirptTokenTypeInvalidChar, start);
}
}
}
}
void scirpt_lexer_step(ScirptLexer* lexer)
{
lexer->index++;
if (!done(lexer)) {
if (current(lexer) == '\n') {
lexer->line++;
lexer->col = 1;
} else {
lexer->col++;
}
}
}
ScirptPosition scirpt_lexer_pos(const ScirptLexer* lexer)
{
return (ScirptPosition) {
.index = lexer->index,
.line = lexer->line,
.col = lexer->col,
};
}
ScirptToken scirpt_lexer_token(
const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start
)
{
return (ScirptToken) {
.type = type,
.pos = start,
.length = lexer->index - start.index,
};
}
bool scirpt_lexer_current_is(const ScirptLexer* lexer, char value)
{
return !done(lexer) && current(lexer) == value;
}
bool scirpt_lexer_done(const ScirptLexer* lexer)
{
return lexer->index >= lexer->text_length;
}
char scirpt_lexer_current(const ScirptLexer* lexer)
{
return lexer->text[lexer->index];
}