#include "lexer.h" #include "scirpt/lexer.h" #include "scirpt/position.h" #include "scirpt/token.h" #include "utils/stringmap.h" #include #include static inline void step(ScirptLexer* lexer) { scirpt_lexer_step(lexer); } static inline ScirptToken token(const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start) { return scirpt_lexer_token(lexer, type, start); } static inline ScirptPosition pos(const ScirptLexer* lexer) { return scirpt_lexer_pos(lexer); } static inline bool current_is(const ScirptLexer* lexer, char value) { return scirpt_lexer_current_is(lexer, value); } static inline bool done(const ScirptLexer* lexer) { return scirpt_lexer_done(lexer); } static inline char current(const ScirptLexer* lexer) { return scirpt_lexer_current(lexer); } ScirptLexer* scirpt_lexer_new(const char* text, size_t text_length) { ScirptLexer* lexer = malloc(sizeof(ScirptLexer)); scirpt_lexer_create(lexer, text, text_length); return lexer; } void scirpt_lexer_delete(ScirptLexer* lexer) { free(lexer); } static inline void add_keyword(StringMap* keywords, const char* key, ScirptTokenType value) { stringmap_set(keywords, key, strlen(key), value); } void scirpt_lexer_create( ScirptLexer* lexer, const char* text, size_t text_length ) { StringMap* keywords = stringmap_new(); add_keyword(keywords, "null", ScirptTokenTypeNull); add_keyword(keywords, "false", ScirptTokenTypeFalse); add_keyword(keywords, "true", ScirptTokenTypeTrue); add_keyword(keywords, "let", ScirptTokenTypeLet); add_keyword(keywords, "if", ScirptTokenTypeIf); add_keyword(keywords, "else", ScirptTokenTypeElse); add_keyword(keywords, "while", ScirptTokenTypeWhile); add_keyword(keywords, "for", ScirptTokenTypeFor); add_keyword(keywords, "in", ScirptTokenTypeIn); add_keyword(keywords, "break", ScirptTokenTypeBreak); add_keyword(keywords, "fn", ScirptTokenTypeFn); add_keyword(keywords, "return", ScirptTokenTypeReturn); *lexer = (ScirptLexer) { .text = text, .text_length = text_length, .index = 0, .line = 1, .col = 1, .keywords = keywords, }; } void scirpt_lexer_destroy(ScirptLexer* lexer) { stringmap_delete(lexer->keywords); } static inline bool is_whitespace(char value) { return value == ' ' || value == '\t' || value == '\r' || value == '\n'; } static inline bool is_id_char_excluding_numbers(char value) { return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || value == '_'; } static inline bool is_int_char(char value) { return value >= '0' && value <= '9'; } static inline bool is_id_char(char value) { return is_id_char_excluding_numbers(value) || is_int_char(value); } ScirptToken scirpt_lexer_next(ScirptLexer* lexer) { if (done(lexer)) { return token(lexer, ScirptTokenTypeEof, pos(lexer)); } else if (is_whitespace(current(lexer))) { step(lexer); while (!done(lexer) && is_whitespace(current(lexer))) step(lexer); return scirpt_lexer_next(lexer); } else if (is_id_char_excluding_numbers(current(lexer))) { ScirptPosition start = pos(lexer); step(lexer); while (!done(lexer) && is_id_char(current(lexer))) step(lexer); return token(lexer, ScirptTokenTypeId, start); } else { switch (current(lexer)) { default: { ScirptPosition start = pos(lexer); step(lexer); return token(lexer, ScirptTokenTypeInvalidChar, start); } } } } void scirpt_lexer_step(ScirptLexer* lexer) { lexer->index++; if (!done(lexer)) { if (current(lexer) == '\n') { lexer->line++; lexer->col = 1; } else { lexer->col++; } } } ScirptPosition scirpt_lexer_pos(const ScirptLexer* lexer) { return (ScirptPosition) { .index = lexer->index, .line = lexer->line, .col = lexer->col, }; } ScirptToken scirpt_lexer_token( const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start ) { return (ScirptToken) { .type = type, .pos = start, .length = lexer->index - start.index, }; } bool scirpt_lexer_current_is(const ScirptLexer* lexer, char value) { return !done(lexer) && current(lexer) == value; } bool scirpt_lexer_done(const ScirptLexer* lexer) { return lexer->index >= lexer->text_length; } char scirpt_lexer_current(const ScirptLexer* lexer) { return lexer->text[lexer->index]; }