add lexer
This commit is contained in:
parent
6ede00be64
commit
edec4a2323
271
src/main.rs
271
src/main.rs
@ -1,23 +1,59 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::str::Chars;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TokenType {
|
||||
InvalidChar,
|
||||
MalformedString,
|
||||
MalformedComment,
|
||||
Id,
|
||||
Int,
|
||||
Decimal,
|
||||
String,
|
||||
False,
|
||||
True,
|
||||
If,
|
||||
Else,
|
||||
While,
|
||||
For,
|
||||
In,
|
||||
Break,
|
||||
Continue,
|
||||
Function,
|
||||
Return,
|
||||
End,
|
||||
Underscore,
|
||||
Plus,
|
||||
Minus,
|
||||
Asterisk,
|
||||
Slash,
|
||||
Percent,
|
||||
PlusEqual,
|
||||
MinusEqual,
|
||||
AsteriskEqual,
|
||||
SlashEqual,
|
||||
PercentEqual,
|
||||
LParen,
|
||||
RParen,
|
||||
LBrace,
|
||||
RBrace,
|
||||
LBracket,
|
||||
RBracket,
|
||||
Dot,
|
||||
Comma,
|
||||
Colon,
|
||||
Semicolon,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Position {
|
||||
index: usize,
|
||||
line: i32,
|
||||
col: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Token {
|
||||
token_type: TokenType,
|
||||
pos: Position,
|
||||
@ -26,12 +62,226 @@ struct Token {
|
||||
|
||||
struct Lexer<'a> {
|
||||
text: &'a str,
|
||||
i: i32,
|
||||
chars: Chars<'a>,
|
||||
current_char: Option<char>,
|
||||
index: usize,
|
||||
line: i32,
|
||||
col: i32,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(text: &'a str) -> Self {
|
||||
Self { text, i: 0 }
|
||||
let mut chars = text.chars();
|
||||
let first_char = chars.next();
|
||||
Self {
|
||||
text,
|
||||
chars,
|
||||
current_char: first_char,
|
||||
index: 0,
|
||||
line: 1,
|
||||
col: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> Option<Token> {
|
||||
if self.done() {
|
||||
return None;
|
||||
}
|
||||
match self.current() {
|
||||
' ' | '\t' | '\r' | '\n' => self.skip_whitespace(),
|
||||
'1'..='9' => Some(self.int_token()),
|
||||
'a'..='z' | 'A'..='Z' | '_' => Some(self.id_token()),
|
||||
'"' => Some(self.string_token()),
|
||||
'+' => {
|
||||
Some(self.single_or_double_char_token(TokenType::Plus, '=', TokenType::PlusEqual))
|
||||
}
|
||||
'-' => {
|
||||
Some(self.single_or_double_char_token(TokenType::Minus, '=', TokenType::MinusEqual))
|
||||
}
|
||||
'*' => Some(self.single_or_double_char_token(
|
||||
TokenType::Asterisk,
|
||||
'=',
|
||||
TokenType::AsteriskEqual,
|
||||
)),
|
||||
'/' => self.slash_token(),
|
||||
'%' => Some(self.single_or_double_char_token(
|
||||
TokenType::Percent,
|
||||
'=',
|
||||
TokenType::PercentEqual,
|
||||
)),
|
||||
'(' => Some(self.step_and_token(TokenType::LParen, self.pos())),
|
||||
')' => Some(self.step_and_token(TokenType::LParen, self.pos())),
|
||||
'.' => Some(self.dot_token()),
|
||||
_ => Some(self.step_and_token(TokenType::InvalidChar, self.pos())),
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) -> Option<Token> {
|
||||
while !self.done() && matches!(self.current(), ' ' | '\t' | '\r' | '\n') {
|
||||
self.step()
|
||||
}
|
||||
self.next_token()
|
||||
}
|
||||
|
||||
fn int_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
}
|
||||
self.token(TokenType::Int, start)
|
||||
}
|
||||
|
||||
fn string_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
let mut escaped = false;
|
||||
while !self.done() && (self.current() != '"' || escaped) {
|
||||
escaped = self.current() == '\\' && !escaped;
|
||||
self.step();
|
||||
}
|
||||
if self.done() || self.current() != '"' {
|
||||
self.step_and_token(TokenType::MalformedString, start)
|
||||
} else {
|
||||
self.step_and_token(TokenType::String, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn id_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
|
||||
self.step();
|
||||
}
|
||||
match &self.text[start.index..self.index] {
|
||||
"false" => self.token(TokenType::False, start),
|
||||
"true" => self.token(TokenType::True, start),
|
||||
"if" => self.token(TokenType::True, start),
|
||||
"else" => self.token(TokenType::True, start),
|
||||
"while" => self.token(TokenType::True, start),
|
||||
"for" => self.token(TokenType::True, start),
|
||||
"in" => self.token(TokenType::True, start),
|
||||
"break" => self.token(TokenType::True, start),
|
||||
"continue" => self.token(TokenType::True, start),
|
||||
"function" => self.token(TokenType::True, start),
|
||||
"return" => self.token(TokenType::True, start),
|
||||
"end" => self.token(TokenType::True, start),
|
||||
"underscore" => self.token(TokenType::True, start),
|
||||
_ => self.token(TokenType::Id, start),
|
||||
}
|
||||
}
|
||||
|
||||
fn single_or_double_char_token(
|
||||
&mut self,
|
||||
single_type: TokenType,
|
||||
double_char: char,
|
||||
double_type: TokenType,
|
||||
) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && self.current() == double_char {
|
||||
self.step_and_token(double_type, start)
|
||||
} else {
|
||||
self.token(single_type, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn slash_token(&mut self) -> Option<Token> {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && self.current() == '/' {
|
||||
self.step();
|
||||
while !self.done() && self.current() != '\n' {
|
||||
self.step();
|
||||
}
|
||||
self.next_token()
|
||||
} else if !self.done() && self.current() == '*' {
|
||||
self.step();
|
||||
let mut depth = 1;
|
||||
let mut last_char: Option<char> = None;
|
||||
while !self.done() {
|
||||
match (last_char, self.current()) {
|
||||
(Some('/'), '*') => {
|
||||
depth += 1;
|
||||
}
|
||||
(Some('*'), '/') => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
self.step();
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
last_char = Some(self.current());
|
||||
self.step();
|
||||
}
|
||||
if depth != 0 {
|
||||
Some(self.token(TokenType::MalformedComment, start))
|
||||
} else {
|
||||
self.next_token()
|
||||
}
|
||||
} else if !self.done() && self.current() == '=' {
|
||||
return Some(self.step_and_token(TokenType::SlashEqual, start));
|
||||
} else {
|
||||
return Some(self.token(TokenType::Slash, start));
|
||||
}
|
||||
}
|
||||
|
||||
fn dot_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
}
|
||||
self.token(TokenType::Decimal, start)
|
||||
} else {
|
||||
self.token(TokenType::Dot, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn step_and_token(&mut self, token_type: TokenType, start: Position) -> Token {
|
||||
self.step();
|
||||
self.token(token_type, start)
|
||||
}
|
||||
|
||||
fn token(&self, token_type: TokenType, start: Position) -> Token {
|
||||
Token {
|
||||
token_type,
|
||||
length: self.index - start.index,
|
||||
pos: start,
|
||||
}
|
||||
}
|
||||
|
||||
fn pos(&self) -> Position {
|
||||
Position {
|
||||
index: self.index,
|
||||
line: self.line,
|
||||
col: self.col,
|
||||
}
|
||||
}
|
||||
|
||||
fn done(&self) -> bool {
|
||||
self.current_char.is_none()
|
||||
}
|
||||
|
||||
fn current(&self) -> char {
|
||||
self.current_char.expect("done() not checked")
|
||||
}
|
||||
|
||||
fn step(&mut self) {
|
||||
self.index += 1;
|
||||
if !self.done() {
|
||||
if self.current() == '\n' {
|
||||
self.line += 1;
|
||||
self.col = 1;
|
||||
} else {
|
||||
self.col += 1;
|
||||
}
|
||||
}
|
||||
self.current_char = self.chars.next();
|
||||
}
|
||||
}
|
||||
|
||||
@ -39,10 +289,21 @@ impl<'a> Iterator for Lexer<'a> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
todo!()
|
||||
self.next_token()
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
enum Expr {
|
||||
Id(String),
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let text = "3.14 \"foo\" false true ( ) + += /* 1 /* 2 */ 3 */ // 4 \n 5";
|
||||
let lexer = Lexer::new(text);
|
||||
lexer.for_each(|token| {
|
||||
println!("{:?}", token);
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user