172 lines
4.1 KiB
C
172 lines
4.1 KiB
C
#include "lexer.h"
|
|
#include "scirpt/lexer.h"
|
|
#include "scirpt/position.h"
|
|
#include "scirpt/token.h"
|
|
#include "utils/stringmap.h"
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
static inline void step(ScirptLexer* lexer) { scirpt_lexer_step(lexer); }
|
|
static inline ScirptToken
|
|
token(const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start)
|
|
{
|
|
return scirpt_lexer_token(lexer, type, start);
|
|
}
|
|
static inline ScirptPosition pos(const ScirptLexer* lexer)
|
|
{
|
|
return scirpt_lexer_pos(lexer);
|
|
}
|
|
static inline bool current_is(const ScirptLexer* lexer, char value)
|
|
{
|
|
return scirpt_lexer_current_is(lexer, value);
|
|
}
|
|
static inline bool done(const ScirptLexer* lexer)
|
|
{
|
|
return scirpt_lexer_done(lexer);
|
|
}
|
|
static inline char current(const ScirptLexer* lexer)
|
|
{
|
|
return scirpt_lexer_current(lexer);
|
|
}
|
|
|
|
ScirptLexer* scirpt_lexer_new(const char* text, size_t text_length)
|
|
{
|
|
ScirptLexer* lexer = malloc(sizeof(ScirptLexer));
|
|
scirpt_lexer_create(lexer, text, text_length);
|
|
return lexer;
|
|
}
|
|
|
|
void scirpt_lexer_delete(ScirptLexer* lexer) { free(lexer); }
|
|
|
|
static inline void
|
|
add_keyword(StringMap* keywords, const char* key, ScirptTokenType value)
|
|
{
|
|
stringmap_set(keywords, key, strlen(key), value);
|
|
}
|
|
|
|
void scirpt_lexer_create(
|
|
ScirptLexer* lexer, const char* text, size_t text_length
|
|
)
|
|
{
|
|
StringMap* keywords = stringmap_new();
|
|
add_keyword(keywords, "null", ScirptTokenTypeNull);
|
|
add_keyword(keywords, "false", ScirptTokenTypeFalse);
|
|
add_keyword(keywords, "true", ScirptTokenTypeTrue);
|
|
add_keyword(keywords, "let", ScirptTokenTypeLet);
|
|
add_keyword(keywords, "if", ScirptTokenTypeIf);
|
|
add_keyword(keywords, "else", ScirptTokenTypeElse);
|
|
add_keyword(keywords, "while", ScirptTokenTypeWhile);
|
|
add_keyword(keywords, "for", ScirptTokenTypeFor);
|
|
add_keyword(keywords, "in", ScirptTokenTypeIn);
|
|
add_keyword(keywords, "break", ScirptTokenTypeBreak);
|
|
add_keyword(keywords, "fn", ScirptTokenTypeFn);
|
|
add_keyword(keywords, "return", ScirptTokenTypeReturn);
|
|
*lexer = (ScirptLexer) {
|
|
.text = text,
|
|
.text_length = text_length,
|
|
.index = 0,
|
|
.line = 1,
|
|
.col = 1,
|
|
.keywords = keywords,
|
|
};
|
|
}
|
|
|
|
void scirpt_lexer_destroy(ScirptLexer* lexer)
|
|
{
|
|
stringmap_delete(lexer->keywords);
|
|
}
|
|
|
|
static inline bool is_whitespace(char value)
|
|
{
|
|
return value == ' ' || value == '\t' || value == '\r' || value == '\n';
|
|
}
|
|
|
|
static inline bool is_id_char_excluding_numbers(char value)
|
|
{
|
|
return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z')
|
|
|| value == '_';
|
|
}
|
|
|
|
static inline bool is_int_char(char value)
|
|
{
|
|
return value >= '0' && value <= '9';
|
|
}
|
|
|
|
static inline bool is_id_char(char value)
|
|
{
|
|
return is_id_char_excluding_numbers(value) || is_int_char(value);
|
|
}
|
|
|
|
ScirptToken scirpt_lexer_next(ScirptLexer* lexer)
|
|
{
|
|
if (done(lexer)) {
|
|
return token(lexer, ScirptTokenTypeEof, pos(lexer));
|
|
} else if (is_whitespace(current(lexer))) {
|
|
step(lexer);
|
|
while (!done(lexer) && is_whitespace(current(lexer)))
|
|
step(lexer);
|
|
return scirpt_lexer_next(lexer);
|
|
} else if (is_id_char_excluding_numbers(current(lexer))) {
|
|
ScirptPosition start = pos(lexer);
|
|
step(lexer);
|
|
while (!done(lexer) && is_id_char(current(lexer)))
|
|
step(lexer);
|
|
return token(lexer, ScirptTokenTypeId, start);
|
|
} else {
|
|
switch (current(lexer)) {
|
|
default: {
|
|
ScirptPosition start = pos(lexer);
|
|
step(lexer);
|
|
return token(lexer, ScirptTokenTypeInvalidChar, start);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void scirpt_lexer_step(ScirptLexer* lexer)
|
|
{
|
|
lexer->index++;
|
|
if (!done(lexer)) {
|
|
if (current(lexer) == '\n') {
|
|
lexer->line++;
|
|
lexer->col = 1;
|
|
} else {
|
|
lexer->col++;
|
|
}
|
|
}
|
|
}
|
|
ScirptPosition scirpt_lexer_pos(const ScirptLexer* lexer)
|
|
{
|
|
return (ScirptPosition) {
|
|
.index = lexer->index,
|
|
.line = lexer->line,
|
|
.col = lexer->col,
|
|
};
|
|
}
|
|
|
|
ScirptToken scirpt_lexer_token(
|
|
const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start
|
|
)
|
|
{
|
|
return (ScirptToken) {
|
|
.type = type,
|
|
.pos = start,
|
|
.length = lexer->index - start.index,
|
|
};
|
|
}
|
|
|
|
bool scirpt_lexer_current_is(const ScirptLexer* lexer, char value)
|
|
{
|
|
return !done(lexer) && current(lexer) == value;
|
|
}
|
|
|
|
bool scirpt_lexer_done(const ScirptLexer* lexer)
|
|
{
|
|
return lexer->index >= lexer->text_length;
|
|
}
|
|
|
|
char scirpt_lexer_current(const ScirptLexer* lexer)
|
|
{
|
|
return lexer->text[lexer->index];
|
|
}
|