lexer code
This commit is contained in:
parent
91b2f4b43f
commit
67ed64e3b4
@ -1,68 +1,278 @@
|
||||
use std::{iter::Peekable, str::Chars};
|
||||
enum LexerErrorType {
|
||||
UnexpectedToken(char),
|
||||
InvalidConstructor,
|
||||
}
|
||||
|
||||
pub enum TokenType {
|
||||
SinglelineWhitespace,
|
||||
MultilineWhitespace,
|
||||
SinglelineComment,
|
||||
MultilineComment,
|
||||
struct LexerError {
|
||||
error: LexerErrorType,
|
||||
line: isize,
|
||||
col: isize,
|
||||
}
|
||||
|
||||
enum Token {
|
||||
Name(String),
|
||||
Class(String),
|
||||
SlWhitespace(String),
|
||||
MlWhitespace(String),
|
||||
SlComment(String),
|
||||
String(String),
|
||||
LBrace(String),
|
||||
RBrace(String),
|
||||
}
|
||||
|
||||
enum Mode {
|
||||
Name,
|
||||
Id,
|
||||
Class,
|
||||
|
||||
Int,
|
||||
Float,
|
||||
String,
|
||||
Null,
|
||||
False,
|
||||
True,
|
||||
|
||||
LBrace,
|
||||
RBrace,
|
||||
LBracket,
|
||||
RBracket,
|
||||
|
||||
Equal,
|
||||
Colon,
|
||||
SemiColon,
|
||||
Comma,
|
||||
EscapedString,
|
||||
SlWhitespace,
|
||||
MlWhitespace,
|
||||
SlComment,
|
||||
}
|
||||
|
||||
pub struct Token<'a> {
|
||||
token_type: TokenType,
|
||||
value: &'a str,
|
||||
line: u32,
|
||||
col: u32,
|
||||
}
|
||||
|
||||
pub struct LexerError {
|
||||
line: u32,
|
||||
col: u32,
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl LexerError {
|
||||
pub fn new(line: u32, col: u32, message: String) -> Self {
|
||||
Self { line, col, message }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
phantom: std::marker::PhantomData<&'a u32>,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(text: &'a str) -> Self {
|
||||
Self {
|
||||
phantom: std::marker::PhantomData {},
|
||||
impl Mode {
|
||||
fn token_constructor(&self) -> Result<Box<dyn Fn(String) -> Token>, LexerErrorType> {
|
||||
match self {
|
||||
Mode::Name => Ok(Box::new(Token::Name)),
|
||||
Mode::Class => Ok(Box::new(Token::Class)),
|
||||
Mode::String => Ok(Box::new(Token::String)),
|
||||
Mode::SlWhitespace => Ok(Box::new(Token::SlWhitespace)),
|
||||
Mode::MlWhitespace => Ok(Box::new(Token::MlWhitespace)),
|
||||
Mode::SlComment => Ok(Box::new(Token::SlComment)),
|
||||
Mode::EscapedString => Err(LexerErrorType::InvalidConstructor),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Lexer<'a> {
|
||||
type Item = Result<Token<'a>, String>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
todo!()
|
||||
fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut value = Vec::new();
|
||||
let mut iter = code.chars();
|
||||
let mut mode = Mode::SlWhitespace;
|
||||
let mut line = 0;
|
||||
let mut col = 0;
|
||||
let position_map = move |error: LexerErrorType| LexerError { error, line, col };
|
||||
loop {
|
||||
let c = iter.next();
|
||||
if c.is_none() {
|
||||
break;
|
||||
};
|
||||
match c.unwrap() {
|
||||
'.' => {
|
||||
match mode {
|
||||
m @ Mode::Name
|
||||
| m @ Mode::Class
|
||||
| m @ Mode::SlWhitespace
|
||||
| m @ Mode::MlWhitespace => {
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
mode = Mode::Class;
|
||||
}
|
||||
Mode::String | Mode::SlComment => {}
|
||||
Mode::EscapedString => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken('.'),
|
||||
})
|
||||
}
|
||||
};
|
||||
value.push('.');
|
||||
}
|
||||
'\\' => match mode {
|
||||
Mode::String => {
|
||||
value.push('\\');
|
||||
mode = Mode::EscapedString;
|
||||
}
|
||||
_ => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken('\\'),
|
||||
})
|
||||
}
|
||||
},
|
||||
'"' => {
|
||||
match mode {
|
||||
m @ Mode::String => {
|
||||
mode = Mode::SlWhitespace;
|
||||
value.push('"');
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
}
|
||||
m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => {
|
||||
mode = Mode::String;
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
value.push('"');
|
||||
}
|
||||
Mode::EscapedString => {
|
||||
value.push('"');
|
||||
mode = Mode::String;
|
||||
}
|
||||
Mode::SlComment => {
|
||||
value.push('"');
|
||||
}
|
||||
_ => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken('"'),
|
||||
})
|
||||
}
|
||||
};
|
||||
}
|
||||
'{' => match mode {
|
||||
m @ Mode::Name
|
||||
| m @ Mode::Class
|
||||
| m @ Mode::MlWhitespace
|
||||
| m @ Mode::SlWhitespace => {
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
mode = Mode::SlWhitespace;
|
||||
tokens.push(Token::LBrace(String::from('{')));
|
||||
}
|
||||
Mode::EscapedString => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken('{'),
|
||||
})
|
||||
}
|
||||
Mode::String | Mode::SlComment => {
|
||||
value.push('{');
|
||||
}
|
||||
},
|
||||
'}' => match mode {
|
||||
m @ Mode::Name
|
||||
| m @ Mode::Class
|
||||
| m @ Mode::MlWhitespace
|
||||
| m @ Mode::SlWhitespace => {
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
mode = Mode::SlWhitespace;
|
||||
tokens.push(Token::RBrace(String::from('}')));
|
||||
}
|
||||
Mode::String | Mode::SlComment => {
|
||||
value.push('}');
|
||||
}
|
||||
Mode::EscapedString => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken('}'),
|
||||
})
|
||||
}
|
||||
},
|
||||
c @ ' ' | c @ '\r' => {
|
||||
match mode {
|
||||
m @ Mode::Name | m @ Mode::Class => {
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
mode = Mode::SlWhitespace;
|
||||
}
|
||||
Mode::String | Mode::SlComment | Mode::MlWhitespace | Mode::SlWhitespace => {}
|
||||
Mode::EscapedString => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken(c),
|
||||
})
|
||||
}
|
||||
};
|
||||
value.push(c);
|
||||
}
|
||||
c @ '\n' => {
|
||||
match mode {
|
||||
m @ Mode::Name | m @ Mode::Class | m @ Mode::SlComment => {
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
mode = Mode::MlWhitespace;
|
||||
}
|
||||
Mode::MlWhitespace | Mode::SlWhitespace => {
|
||||
mode = Mode::MlWhitespace;
|
||||
}
|
||||
Mode::String => {}
|
||||
Mode::EscapedString => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken('\n'),
|
||||
})
|
||||
}
|
||||
};
|
||||
value.push(c);
|
||||
line += 1;
|
||||
col = -1;
|
||||
}
|
||||
'/' => {
|
||||
match mode {
|
||||
Mode::String | Mode::SlComment => {}
|
||||
m @ Mode::Name
|
||||
| m @ Mode::Class
|
||||
| m @ Mode::SlWhitespace
|
||||
| m @ Mode::MlWhitespace => {
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
mode = Mode::SlComment;
|
||||
}
|
||||
Mode::EscapedString => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken('/'),
|
||||
})
|
||||
}
|
||||
};
|
||||
value.push('/');
|
||||
}
|
||||
v @ 'A'..='Z' | v @ 'a'..='z' | v @ '0'..='9' => {
|
||||
match mode {
|
||||
Mode::String | Mode::SlComment | Mode::Name | Mode::Class => {}
|
||||
m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => {
|
||||
let string_value = value.iter().collect();
|
||||
let constructor = m.token_constructor().map_err(position_map)?;
|
||||
tokens.push(constructor(string_value));
|
||||
value.clear();
|
||||
mode = Mode::Name;
|
||||
}
|
||||
|
||||
Mode::EscapedString => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken(v),
|
||||
})
|
||||
}
|
||||
};
|
||||
value.push(v);
|
||||
}
|
||||
unrecognized_char => {
|
||||
return Err(LexerError {
|
||||
line,
|
||||
col,
|
||||
error: LexerErrorType::UnexpectedToken(unrecognized_char),
|
||||
})
|
||||
}
|
||||
}
|
||||
col += 1;
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user