188 lines
5.3 KiB
Plaintext
188 lines
5.3 KiB
Plaintext
#include "lexer.h"
|
|
#include <ctype.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
struct Lexer {
|
|
const char* text;
|
|
size_t index, length;
|
|
int line, column;
|
|
};
|
|
|
|
Token lexer_skip_whitespace(Lexer* lexer);
|
|
Token lexer_make_int(Lexer* lexer);
|
|
Token lexer_make_id(Lexer* lexer);
|
|
bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value);
|
|
Token lexer_make_static(Lexer* lexer);
|
|
Token make_single_char_token(Lexer* lexer, TokenType type);
|
|
Token make_slash_token(Lexer* lexer);
|
|
Token lexer_make_invalid_char(Lexer* lexer);
|
|
Position lexer_position(const Lexer* lexer);
|
|
Token lexer_token(const Lexer* lexer, TokenType type, Position begin);
|
|
bool lexer_done(const Lexer* lexer);
|
|
char lexer_current(const Lexer* lexer);
|
|
void lexer_step(Lexer* lexer);
|
|
|
|
void lexer_create(Lexer* lexer, const char* text, size_t text_length)
|
|
{
|
|
*lexer = (Lexer) {
|
|
.text = text,
|
|
.length = text_length,
|
|
.line = 1,
|
|
.column = 1,
|
|
};
|
|
}
|
|
|
|
Token lexer_next(Lexer* lexer)
|
|
{
|
|
char c = lexer_current(lexer);
|
|
if (lexer_done(lexer))
|
|
return lexer_token(lexer, TokenTypeEof, lexer_position(lexer));
|
|
else if (isspace(c))
|
|
return lexer_skip_whitespace(lexer);
|
|
else if (isdigit(c))
|
|
return lexer_make_int(lexer);
|
|
else if (isalpha(c) || c == '_')
|
|
return lexer_make_id(lexer);
|
|
else
|
|
return lexer_make_static(lexer);
|
|
}
|
|
|
|
Token lexer_skip_whitespace(Lexer* lexer)
|
|
{
|
|
lexer_step(lexer);
|
|
while (!lexer_done(lexer) && isspace(lexer_current(lexer)))
|
|
lexer_step(lexer);
|
|
return lexer_next(lexer);
|
|
}
|
|
|
|
Token lexer_make_int(Lexer* lexer)
|
|
{
|
|
Position begin = lexer_position(lexer);
|
|
lexer_step(lexer);
|
|
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
|
|
lexer_step(lexer);
|
|
return lexer_token(lexer, TokenTypeInt, begin);
|
|
}
|
|
|
|
Token lexer_make_id(Lexer* lexer)
|
|
{
|
|
Position begin = lexer_position(lexer);
|
|
lexer_step(lexer);
|
|
while (!lexer_done(lexer)
|
|
&& (isalpha(lexer_current(lexer)) || isdigit(lexer_current(lexer))
|
|
|| lexer_current(lexer) == '_'))
|
|
lexer_step(lexer);
|
|
if (lexer_span_matches(lexer, begin, "if"))
|
|
return lexer_token(lexer, TokenTypeIf, begin);
|
|
else if (lexer_span_matches(lexer, begin, "else"))
|
|
return lexer_token(lexer, TokenTypeElse, begin);
|
|
else if (lexer_span_matches(lexer, begin, "while"))
|
|
return lexer_token(lexer, TokenTypeWhile, begin);
|
|
else if (lexer_span_matches(lexer, begin, "break"))
|
|
return lexer_token(lexer, TokenTypeBreak, begin);
|
|
else
|
|
return lexer_token(lexer, TokenTypeId, begin);
|
|
}
|
|
|
|
bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value)
|
|
{
|
|
size_t length = lexer->index - begin.index;
|
|
if (length != strlen(value))
|
|
return false;
|
|
return strncmp(&lexer->text[begin.index], value, length) == 0;
|
|
}
|
|
|
|
Token lexer_make_static(Lexer* lexer)
|
|
{
|
|
switch (lexer_current(lexer)) {
|
|
case '(':
|
|
return make_single_char_token(lexer, TokenTypeLParen);
|
|
case ')':
|
|
return make_single_char_token(lexer, TokenTypeRParen);
|
|
case '{':
|
|
return make_single_char_token(lexer, TokenTypeLBrace);
|
|
case '}':
|
|
return make_single_char_token(lexer, TokenTypeRBrace);
|
|
case ';':
|
|
return make_single_char_token(lexer, TokenTypeSemicolon);
|
|
case '+':
|
|
return make_single_char_token(lexer, TokenTypePlus);
|
|
case '-':
|
|
return make_single_char_token(lexer, TokenTypeMinus);
|
|
case '*':
|
|
return make_single_char_token(lexer, TokenTypeAsterisk);
|
|
case '/':
|
|
return make_slash_token(lexer);
|
|
case '%':
|
|
return make_single_char_token(lexer, TokenTypePercent);
|
|
default:
|
|
return lexer_make_invalid_char(lexer);
|
|
}
|
|
}
|
|
|
|
Token make_single_char_token(Lexer* lexer, TokenType type)
|
|
{
|
|
Position begin = lexer_position(lexer);
|
|
lexer_step(lexer);
|
|
return lexer_token(lexer, type, begin);
|
|
}
|
|
|
|
Token skip_singleline_comment(Lexer* lexer);
|
|
Token skip_multiline_comment(Lexer* lexer);
|
|
|
|
Token make_slash_token(Lexer* lexer)
|
|
{
|
|
Position begin = lexer_position(lexer);
|
|
lexer_step(lexer);
|
|
switch (lexer_current(lexer)) {
|
|
case '/':
|
|
return skip_singleline_comment(lexer);
|
|
default:
|
|
return lexer_token(lexer, TokenTypeSlash, begin);
|
|
}
|
|
}
|
|
|
|
Token lexer_make_invalid_char(Lexer* lexer)
|
|
{
|
|
Position begin = lexer_position(lexer);
|
|
lexer_step(lexer);
|
|
return lexer_token(lexer, TokenTypeInvalidChar, begin);
|
|
}
|
|
|
|
Position lexer_position(const Lexer* lexer)
|
|
{
|
|
return (Position) {
|
|
.index = lexer->index,
|
|
.line = lexer->line,
|
|
.column = lexer->column,
|
|
};
|
|
}
|
|
|
|
Token lexer_token(const Lexer* lexer, TokenType type, Position begin)
|
|
{
|
|
return (Token) {
|
|
.type = type,
|
|
.position = begin,
|
|
.length = lexer->index - begin.index,
|
|
};
|
|
}
|
|
|
|
bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; }
|
|
|
|
char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; }
|
|
|
|
void lexer_step(Lexer* lexer)
|
|
{
|
|
if (lexer_done(lexer))
|
|
return;
|
|
if (lexer_current(lexer) == '\n') {
|
|
lexer->line += 1;
|
|
lexer->column = 1;
|
|
} else {
|
|
lexer->column += 1;
|
|
}
|
|
lexer->index += 1;
|
|
}
|