#include "lexer.h" #include #include #include #include struct Lexer { const char* text; size_t index, length; int line, column; }; Token lexer_skip_whitespace(Lexer* lexer); Token lexer_make_int(Lexer* lexer); Token lexer_make_id(Lexer* lexer); bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value); Token lexer_make_static(Lexer* lexer); Token make_single_char_token(Lexer* lexer, TokenType type); Token make_slash_token(Lexer* lexer); Token lexer_make_invalid_char(Lexer* lexer); Position lexer_position(const Lexer* lexer); Token lexer_token(const Lexer* lexer, TokenType type, Position begin); bool lexer_done(const Lexer* lexer); char lexer_current(const Lexer* lexer); void lexer_step(Lexer* lexer); void lexer_create(Lexer* lexer, const char* text, size_t text_length) { *lexer = (Lexer) { .text = text, .length = text_length, .line = 1, .column = 1, }; } Token lexer_next(Lexer* lexer) { char c = lexer_current(lexer); if (lexer_done(lexer)) return lexer_token(lexer, TokenTypeEof, lexer_position(lexer)); else if (isspace(c)) return lexer_skip_whitespace(lexer); else if (isdigit(c)) return lexer_make_int(lexer); else if (isalpha(c) || c == '_') return lexer_make_id(lexer); else return lexer_make_static(lexer); } Token lexer_skip_whitespace(Lexer* lexer) { lexer_step(lexer); while (!lexer_done(lexer) && isspace(lexer_current(lexer))) lexer_step(lexer); return lexer_next(lexer); } Token lexer_make_int(Lexer* lexer) { Position begin = lexer_position(lexer); lexer_step(lexer); while (!lexer_done(lexer) && isdigit(lexer_current(lexer))) lexer_step(lexer); return lexer_token(lexer, TokenTypeInt, begin); } Token lexer_make_id(Lexer* lexer) { Position begin = lexer_position(lexer); lexer_step(lexer); while (!lexer_done(lexer) && (isalpha(lexer_current(lexer)) || isdigit(lexer_current(lexer)) || lexer_current(lexer) == '_')) lexer_step(lexer); if (lexer_span_matches(lexer, begin, "if")) return lexer_token(lexer, TokenTypeIf, begin); else if (lexer_span_matches(lexer, begin, "else")) return lexer_token(lexer, TokenTypeElse, begin); else if (lexer_span_matches(lexer, begin, "while")) return lexer_token(lexer, TokenTypeWhile, begin); else if (lexer_span_matches(lexer, begin, "break")) return lexer_token(lexer, TokenTypeBreak, begin); else return lexer_token(lexer, TokenTypeId, begin); } bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value) { size_t length = lexer->index - begin.index; if (length != strlen(value)) return false; return strncmp(&lexer->text[begin.index], value, length) == 0; } Token lexer_make_static(Lexer* lexer) { switch (lexer_current(lexer)) { case '(': return make_single_char_token(lexer, TokenTypeLParen); case ')': return make_single_char_token(lexer, TokenTypeRParen); case '{': return make_single_char_token(lexer, TokenTypeLBrace); case '}': return make_single_char_token(lexer, TokenTypeRBrace); case ';': return make_single_char_token(lexer, TokenTypeSemicolon); case '+': return make_single_char_token(lexer, TokenTypePlus); case '-': return make_single_char_token(lexer, TokenTypeMinus); case '*': return make_single_char_token(lexer, TokenTypeAsterisk); case '/': return make_slash_token(lexer); case '%': return make_single_char_token(lexer, TokenTypePercent); default: return lexer_make_invalid_char(lexer); } } Token make_single_char_token(Lexer* lexer, TokenType type) { Position begin = lexer_position(lexer); lexer_step(lexer); return lexer_token(lexer, type, begin); } Token skip_singleline_comment(Lexer* lexer); Token skip_multiline_comment(Lexer* lexer); Token make_slash_token(Lexer* lexer) { Position begin = lexer_position(lexer); lexer_step(lexer); switch (lexer_current(lexer)) { case '/': return skip_singleline_comment(lexer); default: return lexer_token(lexer, TokenTypeSlash, begin); } } Token lexer_make_invalid_char(Lexer* lexer) { Position begin = lexer_position(lexer); lexer_step(lexer); return lexer_token(lexer, TokenTypeInvalidChar, begin); } Position lexer_position(const Lexer* lexer) { return (Position) { .index = lexer->index, .line = lexer->line, .column = lexer->column, }; } Token lexer_token(const Lexer* lexer, TokenType type, Position begin) { return (Token) { .type = type, .position = begin, .length = lexer->index - begin.index, }; } bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; } char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; } void lexer_step(Lexer* lexer) { if (lexer_done(lexer)) return; if (lexer_current(lexer) == '\n') { lexer->line += 1; lexer->column = 1; } else { lexer->column += 1; } lexer->index += 1; }