implement id, lbracket, rbracket, refactoring

This commit is contained in:
Theis Pieter Hollebeek 2023-01-20 13:04:06 +01:00
parent 8b5d7cb0eb
commit 99da99e4c1

View File

@ -14,7 +14,7 @@ pub struct LexerError {
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Token { pub enum Token {
Name(String), Name(String),
Id(String), // not implemented Id(String),
Class(String), Class(String),
SlWhitespace(String), SlWhitespace(String),
MlWhitespace(String), MlWhitespace(String),
@ -28,13 +28,15 @@ pub enum Token {
False(String), // not implemented False(String), // not implemented
LBrace(String), LBrace(String),
RBrace(String), RBrace(String),
LBracket(String), // not implemented LBracket(String),
RBracket(String), // not implemented RBracket(String),
} }
#[derive(PartialEq)]
enum Mode { enum Mode {
Name, Name,
Class, Class,
Id,
String, String,
EscapedString, EscapedString,
SlWhitespace, SlWhitespace,
@ -51,12 +53,14 @@ impl Mode {
Mode::SlWhitespace => Ok(Box::new(Token::SlWhitespace)), Mode::SlWhitespace => Ok(Box::new(Token::SlWhitespace)),
Mode::MlWhitespace => Ok(Box::new(Token::MlWhitespace)), Mode::MlWhitespace => Ok(Box::new(Token::MlWhitespace)),
Mode::SlComment => Ok(Box::new(Token::SlComment)), Mode::SlComment => Ok(Box::new(Token::SlComment)),
Mode::Id => Ok(Box::new(Token::Id)),
Mode::EscapedString => Err(LexerErrorType::InvalidConstructor), Mode::EscapedString => Err(LexerErrorType::InvalidConstructor),
} }
} }
} }
pub fn lex(code: String) -> Result<Vec<Token>, LexerError> { #[allow(dead_code)]
pub fn lexer(code: String) -> Result<Vec<Token>, LexerError> {
let mut tokens = Vec::new(); let mut tokens = Vec::new();
let mut value = Vec::new(); let mut value = Vec::new();
let mut iter = code.chars(); let mut iter = code.chars();
@ -64,23 +68,37 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
let mut line = 0; let mut line = 0;
let mut col = 0; let mut col = 0;
let position_map = move |error: LexerErrorType| LexerError { error, line, col }; let position_map = move |error: LexerErrorType| LexerError { error, line, col };
loop { let collect_into_token_and_push =
let c = iter.next(); |constructor: Box<dyn Fn(String) -> Token>,
if c.is_none() { tokens: &mut Vec<Token>,
break; value: &mut Vec<char>| {
let token = constructor(value.iter().collect());
tokens.push(token);
value.clear();
}; };
match c.unwrap() { loop {
'.' => { let current_char = match iter.next() {
Some(c) => c,
None => break,
};
match current_char {
v @ '.' | v @ '#' => {
match mode { match mode {
m @ Mode::Name m @ Mode::Name
| m @ Mode::Class | m @ Mode::Class
| m @ Mode::Id
| m @ Mode::SlWhitespace | m @ Mode::SlWhitespace
| m @ Mode::MlWhitespace => { | m @ Mode::MlWhitespace => {
let string_value = value.iter().collect(); collect_into_token_and_push(
let constructor = m.token_constructor().map_err(position_map)?; m.token_constructor().map_err(position_map)?,
tokens.push(constructor(string_value)); &mut tokens,
value.clear(); &mut value,
mode = Mode::Class; );
mode = match v {
'.' => Mode::Class,
'#' => Mode::Id,
_ => panic!("race condition"),
};
} }
Mode::String | Mode::SlComment => {} Mode::String | Mode::SlComment => {}
Mode::EscapedString => { Mode::EscapedString => {
@ -91,7 +109,7 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
}) })
} }
}; };
value.push('.'); value.push(v);
} }
'\\' => match mode { '\\' => match mode {
Mode::String => { Mode::String => {
@ -111,17 +129,20 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
m @ Mode::String => { m @ Mode::String => {
mode = Mode::SlWhitespace; mode = Mode::SlWhitespace;
value.push('"'); value.push('"');
let string_value = value.iter().collect();
let constructor = m.token_constructor().map_err(position_map)?; collect_into_token_and_push(
tokens.push(constructor(string_value)); m.token_constructor().map_err(position_map)?,
value.clear(); &mut tokens,
&mut value,
);
} }
m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => { m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => {
mode = Mode::String; mode = Mode::String;
let string_value = value.iter().collect(); collect_into_token_and_push(
let constructor = m.token_constructor().map_err(position_map)?; m.token_constructor().map_err(position_map)?,
tokens.push(constructor(string_value)); &mut tokens,
value.clear(); &mut value,
);
value.push('"'); value.push('"');
} }
Mode::EscapedString => { Mode::EscapedString => {
@ -140,59 +161,47 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
} }
}; };
} }
'{' => match mode {
v @ '{' | v @ '}' | v @ '[' | v @ ']' => match mode {
m @ Mode::Name m @ Mode::Name
| m @ Mode::Class | m @ Mode::Class
| m @ Mode::Id
| m @ Mode::MlWhitespace | m @ Mode::MlWhitespace
| m @ Mode::SlWhitespace => { | m @ Mode::SlWhitespace => {
let string_value = value.iter().collect(); collect_into_token_and_push(
let constructor = m.token_constructor().map_err(position_map)?; m.token_constructor().map_err(position_map)?,
tokens.push(constructor(string_value)); &mut tokens,
value.clear(); &mut value,
);
mode = Mode::SlWhitespace; mode = Mode::SlWhitespace;
tokens.push(Token::LBrace(String::from('{'))); let constructor = match v {
'{' => Token::LBrace,
'}' => Token::RBrace,
'[' => Token::LBracket,
']' => Token::RBracket,
_ => panic!("race condition"),
};
tokens.push(constructor(String::from(v)));
} }
Mode::EscapedString => { Mode::EscapedString => {
return Err(LexerError { return Err(LexerError {
line, line,
col, col,
error: LexerErrorType::UnexpectedToken('{'), error: LexerErrorType::UnexpectedToken(v),
}) })
} }
Mode::String | Mode::SlComment => { Mode::String | Mode::SlComment => {
value.push('{'); value.push(v);
}
},
'}' => match mode {
m @ Mode::Name
| m @ Mode::Class
| m @ Mode::MlWhitespace
| m @ Mode::SlWhitespace => {
let string_value = value.iter().collect();
let constructor = m.token_constructor().map_err(position_map)?;
tokens.push(constructor(string_value));
value.clear();
mode = Mode::SlWhitespace;
tokens.push(Token::RBrace(String::from('}')));
}
Mode::String | Mode::SlComment => {
value.push('}');
}
Mode::EscapedString => {
return Err(LexerError {
line,
col,
error: LexerErrorType::UnexpectedToken('}'),
})
} }
}, },
c @ ' ' | c @ '\r' => { c @ ' ' | c @ '\r' => {
match mode { match mode {
m @ Mode::Name | m @ Mode::Class => { m @ Mode::Name | m @ Mode::Class | m @ Mode::Id => {
let string_value = value.iter().collect(); collect_into_token_and_push(
let constructor = m.token_constructor().map_err(position_map)?; m.token_constructor().map_err(position_map)?,
tokens.push(constructor(string_value)); &mut tokens,
value.clear(); &mut value,
);
mode = Mode::SlWhitespace; mode = Mode::SlWhitespace;
} }
Mode::String | Mode::SlComment | Mode::MlWhitespace | Mode::SlWhitespace => {} Mode::String | Mode::SlComment | Mode::MlWhitespace | Mode::SlWhitespace => {}
@ -208,11 +217,12 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
} }
c @ '\n' => { c @ '\n' => {
match mode { match mode {
m @ Mode::Name | m @ Mode::Class | m @ Mode::SlComment => { m @ Mode::Name | m @ Mode::Class | m @ Mode::Id | m @ Mode::SlComment => {
let string_value = value.iter().collect(); collect_into_token_and_push(
let constructor = m.token_constructor().map_err(position_map)?; m.token_constructor().map_err(position_map)?,
tokens.push(constructor(string_value)); &mut tokens,
value.clear(); &mut value,
);
mode = Mode::MlWhitespace; mode = Mode::MlWhitespace;
} }
Mode::MlWhitespace | Mode::SlWhitespace => { Mode::MlWhitespace | Mode::SlWhitespace => {
@ -236,12 +246,14 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
Mode::String | Mode::SlComment => {} Mode::String | Mode::SlComment => {}
m @ Mode::Name m @ Mode::Name
| m @ Mode::Class | m @ Mode::Class
| m @ Mode::Id
| m @ Mode::SlWhitespace | m @ Mode::SlWhitespace
| m @ Mode::MlWhitespace => { | m @ Mode::MlWhitespace => {
let string_value = value.iter().collect(); collect_into_token_and_push(
let constructor = m.token_constructor().map_err(position_map)?; m.token_constructor().map_err(position_map)?,
tokens.push(constructor(string_value)); &mut tokens,
value.clear(); &mut value,
);
mode = Mode::SlComment; mode = Mode::SlComment;
} }
Mode::EscapedString => { Mode::EscapedString => {
@ -256,15 +268,26 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
} }
v @ 'A'..='Z' | v @ 'a'..='z' | v @ '0'..='9' => { v @ 'A'..='Z' | v @ 'a'..='z' | v @ '0'..='9' => {
match mode { match mode {
Mode::String | Mode::SlComment | Mode::Name | Mode::Class => {} Mode::Name | Mode::Class | Mode::Id => {
if v.is_numeric() {
if value.len() == 0 || mode == Mode::Id && value.len() == 1 {
return Err(LexerError {
line,
col,
error: LexerErrorType::UnexpectedToken(v),
});
}
}
}
Mode::String | Mode::SlComment => {}
m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => { m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => {
let string_value = value.iter().collect(); collect_into_token_and_push(
let constructor = m.token_constructor().map_err(position_map)?; m.token_constructor().map_err(position_map)?,
tokens.push(constructor(string_value)); &mut tokens,
value.clear(); &mut value,
);
mode = Mode::Name; mode = Mode::Name;
} }
Mode::EscapedString => { Mode::EscapedString => {
return Err(LexerError { return Err(LexerError {
line, line,
@ -275,13 +298,18 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
}; };
value.push(v); value.push(v);
} }
unrecognized_char => { unrecognized_char => match mode {
return Err(LexerError { Mode::String => {
line, value.push(unrecognized_char);
col, }
error: LexerErrorType::UnexpectedToken(unrecognized_char), _ => {
}) return Err(LexerError {
} line,
col,
error: LexerErrorType::UnexpectedToken(unrecognized_char),
});
}
},
} }
col += 1; col += 1;
} }
@ -295,7 +323,7 @@ fn test_example_1() {
// text { \"hello world\" } // text { \"hello world\" }
\"hello world\" \"hello world\"
}"; }";
let tokens = lex(text.to_string()); let tokens = lexer(text.to_string());
assert_eq!( assert_eq!(
tokens, tokens,
Ok(vec![ Ok(vec![