implement id, lbracket, rbracket, refactoring
This commit is contained in:
parent
8b5d7cb0eb
commit
99da99e4c1
@ -14,7 +14,7 @@ pub struct LexerError {
|
|||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
Name(String),
|
Name(String),
|
||||||
Id(String), // not implemented
|
Id(String),
|
||||||
Class(String),
|
Class(String),
|
||||||
SlWhitespace(String),
|
SlWhitespace(String),
|
||||||
MlWhitespace(String),
|
MlWhitespace(String),
|
||||||
@ -28,13 +28,15 @@ pub enum Token {
|
|||||||
False(String), // not implemented
|
False(String), // not implemented
|
||||||
LBrace(String),
|
LBrace(String),
|
||||||
RBrace(String),
|
RBrace(String),
|
||||||
LBracket(String), // not implemented
|
LBracket(String),
|
||||||
RBracket(String), // not implemented
|
RBracket(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq)]
|
||||||
enum Mode {
|
enum Mode {
|
||||||
Name,
|
Name,
|
||||||
Class,
|
Class,
|
||||||
|
Id,
|
||||||
String,
|
String,
|
||||||
EscapedString,
|
EscapedString,
|
||||||
SlWhitespace,
|
SlWhitespace,
|
||||||
@ -51,12 +53,14 @@ impl Mode {
|
|||||||
Mode::SlWhitespace => Ok(Box::new(Token::SlWhitespace)),
|
Mode::SlWhitespace => Ok(Box::new(Token::SlWhitespace)),
|
||||||
Mode::MlWhitespace => Ok(Box::new(Token::MlWhitespace)),
|
Mode::MlWhitespace => Ok(Box::new(Token::MlWhitespace)),
|
||||||
Mode::SlComment => Ok(Box::new(Token::SlComment)),
|
Mode::SlComment => Ok(Box::new(Token::SlComment)),
|
||||||
|
Mode::Id => Ok(Box::new(Token::Id)),
|
||||||
Mode::EscapedString => Err(LexerErrorType::InvalidConstructor),
|
Mode::EscapedString => Err(LexerErrorType::InvalidConstructor),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
#[allow(dead_code)]
|
||||||
|
pub fn lexer(code: String) -> Result<Vec<Token>, LexerError> {
|
||||||
let mut tokens = Vec::new();
|
let mut tokens = Vec::new();
|
||||||
let mut value = Vec::new();
|
let mut value = Vec::new();
|
||||||
let mut iter = code.chars();
|
let mut iter = code.chars();
|
||||||
@ -64,23 +68,37 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
|||||||
let mut line = 0;
|
let mut line = 0;
|
||||||
let mut col = 0;
|
let mut col = 0;
|
||||||
let position_map = move |error: LexerErrorType| LexerError { error, line, col };
|
let position_map = move |error: LexerErrorType| LexerError { error, line, col };
|
||||||
loop {
|
let collect_into_token_and_push =
|
||||||
let c = iter.next();
|
|constructor: Box<dyn Fn(String) -> Token>,
|
||||||
if c.is_none() {
|
tokens: &mut Vec<Token>,
|
||||||
break;
|
value: &mut Vec<char>| {
|
||||||
|
let token = constructor(value.iter().collect());
|
||||||
|
tokens.push(token);
|
||||||
|
value.clear();
|
||||||
};
|
};
|
||||||
match c.unwrap() {
|
loop {
|
||||||
'.' => {
|
let current_char = match iter.next() {
|
||||||
|
Some(c) => c,
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
|
match current_char {
|
||||||
|
v @ '.' | v @ '#' => {
|
||||||
match mode {
|
match mode {
|
||||||
m @ Mode::Name
|
m @ Mode::Name
|
||||||
| m @ Mode::Class
|
| m @ Mode::Class
|
||||||
|
| m @ Mode::Id
|
||||||
| m @ Mode::SlWhitespace
|
| m @ Mode::SlWhitespace
|
||||||
| m @ Mode::MlWhitespace => {
|
| m @ Mode::MlWhitespace => {
|
||||||
let string_value = value.iter().collect();
|
collect_into_token_and_push(
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
m.token_constructor().map_err(position_map)?,
|
||||||
tokens.push(constructor(string_value));
|
&mut tokens,
|
||||||
value.clear();
|
&mut value,
|
||||||
mode = Mode::Class;
|
);
|
||||||
|
mode = match v {
|
||||||
|
'.' => Mode::Class,
|
||||||
|
'#' => Mode::Id,
|
||||||
|
_ => panic!("race condition"),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
Mode::String | Mode::SlComment => {}
|
Mode::String | Mode::SlComment => {}
|
||||||
Mode::EscapedString => {
|
Mode::EscapedString => {
|
||||||
@ -91,7 +109,7 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
value.push('.');
|
value.push(v);
|
||||||
}
|
}
|
||||||
'\\' => match mode {
|
'\\' => match mode {
|
||||||
Mode::String => {
|
Mode::String => {
|
||||||
@ -111,17 +129,20 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
|||||||
m @ Mode::String => {
|
m @ Mode::String => {
|
||||||
mode = Mode::SlWhitespace;
|
mode = Mode::SlWhitespace;
|
||||||
value.push('"');
|
value.push('"');
|
||||||
let string_value = value.iter().collect();
|
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
collect_into_token_and_push(
|
||||||
tokens.push(constructor(string_value));
|
m.token_constructor().map_err(position_map)?,
|
||||||
value.clear();
|
&mut tokens,
|
||||||
|
&mut value,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => {
|
m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => {
|
||||||
mode = Mode::String;
|
mode = Mode::String;
|
||||||
let string_value = value.iter().collect();
|
collect_into_token_and_push(
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
m.token_constructor().map_err(position_map)?,
|
||||||
tokens.push(constructor(string_value));
|
&mut tokens,
|
||||||
value.clear();
|
&mut value,
|
||||||
|
);
|
||||||
value.push('"');
|
value.push('"');
|
||||||
}
|
}
|
||||||
Mode::EscapedString => {
|
Mode::EscapedString => {
|
||||||
@ -140,59 +161,47 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
'{' => match mode {
|
|
||||||
|
v @ '{' | v @ '}' | v @ '[' | v @ ']' => match mode {
|
||||||
m @ Mode::Name
|
m @ Mode::Name
|
||||||
| m @ Mode::Class
|
| m @ Mode::Class
|
||||||
|
| m @ Mode::Id
|
||||||
| m @ Mode::MlWhitespace
|
| m @ Mode::MlWhitespace
|
||||||
| m @ Mode::SlWhitespace => {
|
| m @ Mode::SlWhitespace => {
|
||||||
let string_value = value.iter().collect();
|
collect_into_token_and_push(
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
m.token_constructor().map_err(position_map)?,
|
||||||
tokens.push(constructor(string_value));
|
&mut tokens,
|
||||||
value.clear();
|
&mut value,
|
||||||
|
);
|
||||||
mode = Mode::SlWhitespace;
|
mode = Mode::SlWhitespace;
|
||||||
tokens.push(Token::LBrace(String::from('{')));
|
let constructor = match v {
|
||||||
|
'{' => Token::LBrace,
|
||||||
|
'}' => Token::RBrace,
|
||||||
|
'[' => Token::LBracket,
|
||||||
|
']' => Token::RBracket,
|
||||||
|
_ => panic!("race condition"),
|
||||||
|
};
|
||||||
|
tokens.push(constructor(String::from(v)));
|
||||||
}
|
}
|
||||||
Mode::EscapedString => {
|
Mode::EscapedString => {
|
||||||
return Err(LexerError {
|
return Err(LexerError {
|
||||||
line,
|
line,
|
||||||
col,
|
col,
|
||||||
error: LexerErrorType::UnexpectedToken('{'),
|
error: LexerErrorType::UnexpectedToken(v),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
Mode::String | Mode::SlComment => {
|
Mode::String | Mode::SlComment => {
|
||||||
value.push('{');
|
value.push(v);
|
||||||
}
|
|
||||||
},
|
|
||||||
'}' => match mode {
|
|
||||||
m @ Mode::Name
|
|
||||||
| m @ Mode::Class
|
|
||||||
| m @ Mode::MlWhitespace
|
|
||||||
| m @ Mode::SlWhitespace => {
|
|
||||||
let string_value = value.iter().collect();
|
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
|
||||||
tokens.push(constructor(string_value));
|
|
||||||
value.clear();
|
|
||||||
mode = Mode::SlWhitespace;
|
|
||||||
tokens.push(Token::RBrace(String::from('}')));
|
|
||||||
}
|
|
||||||
Mode::String | Mode::SlComment => {
|
|
||||||
value.push('}');
|
|
||||||
}
|
|
||||||
Mode::EscapedString => {
|
|
||||||
return Err(LexerError {
|
|
||||||
line,
|
|
||||||
col,
|
|
||||||
error: LexerErrorType::UnexpectedToken('}'),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
c @ ' ' | c @ '\r' => {
|
c @ ' ' | c @ '\r' => {
|
||||||
match mode {
|
match mode {
|
||||||
m @ Mode::Name | m @ Mode::Class => {
|
m @ Mode::Name | m @ Mode::Class | m @ Mode::Id => {
|
||||||
let string_value = value.iter().collect();
|
collect_into_token_and_push(
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
m.token_constructor().map_err(position_map)?,
|
||||||
tokens.push(constructor(string_value));
|
&mut tokens,
|
||||||
value.clear();
|
&mut value,
|
||||||
|
);
|
||||||
mode = Mode::SlWhitespace;
|
mode = Mode::SlWhitespace;
|
||||||
}
|
}
|
||||||
Mode::String | Mode::SlComment | Mode::MlWhitespace | Mode::SlWhitespace => {}
|
Mode::String | Mode::SlComment | Mode::MlWhitespace | Mode::SlWhitespace => {}
|
||||||
@ -208,11 +217,12 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
|||||||
}
|
}
|
||||||
c @ '\n' => {
|
c @ '\n' => {
|
||||||
match mode {
|
match mode {
|
||||||
m @ Mode::Name | m @ Mode::Class | m @ Mode::SlComment => {
|
m @ Mode::Name | m @ Mode::Class | m @ Mode::Id | m @ Mode::SlComment => {
|
||||||
let string_value = value.iter().collect();
|
collect_into_token_and_push(
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
m.token_constructor().map_err(position_map)?,
|
||||||
tokens.push(constructor(string_value));
|
&mut tokens,
|
||||||
value.clear();
|
&mut value,
|
||||||
|
);
|
||||||
mode = Mode::MlWhitespace;
|
mode = Mode::MlWhitespace;
|
||||||
}
|
}
|
||||||
Mode::MlWhitespace | Mode::SlWhitespace => {
|
Mode::MlWhitespace | Mode::SlWhitespace => {
|
||||||
@ -236,12 +246,14 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
|||||||
Mode::String | Mode::SlComment => {}
|
Mode::String | Mode::SlComment => {}
|
||||||
m @ Mode::Name
|
m @ Mode::Name
|
||||||
| m @ Mode::Class
|
| m @ Mode::Class
|
||||||
|
| m @ Mode::Id
|
||||||
| m @ Mode::SlWhitespace
|
| m @ Mode::SlWhitespace
|
||||||
| m @ Mode::MlWhitespace => {
|
| m @ Mode::MlWhitespace => {
|
||||||
let string_value = value.iter().collect();
|
collect_into_token_and_push(
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
m.token_constructor().map_err(position_map)?,
|
||||||
tokens.push(constructor(string_value));
|
&mut tokens,
|
||||||
value.clear();
|
&mut value,
|
||||||
|
);
|
||||||
mode = Mode::SlComment;
|
mode = Mode::SlComment;
|
||||||
}
|
}
|
||||||
Mode::EscapedString => {
|
Mode::EscapedString => {
|
||||||
@ -256,15 +268,26 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
|||||||
}
|
}
|
||||||
v @ 'A'..='Z' | v @ 'a'..='z' | v @ '0'..='9' => {
|
v @ 'A'..='Z' | v @ 'a'..='z' | v @ '0'..='9' => {
|
||||||
match mode {
|
match mode {
|
||||||
Mode::String | Mode::SlComment | Mode::Name | Mode::Class => {}
|
Mode::Name | Mode::Class | Mode::Id => {
|
||||||
|
if v.is_numeric() {
|
||||||
|
if value.len() == 0 || mode == Mode::Id && value.len() == 1 {
|
||||||
|
return Err(LexerError {
|
||||||
|
line,
|
||||||
|
col,
|
||||||
|
error: LexerErrorType::UnexpectedToken(v),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Mode::String | Mode::SlComment => {}
|
||||||
m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => {
|
m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => {
|
||||||
let string_value = value.iter().collect();
|
collect_into_token_and_push(
|
||||||
let constructor = m.token_constructor().map_err(position_map)?;
|
m.token_constructor().map_err(position_map)?,
|
||||||
tokens.push(constructor(string_value));
|
&mut tokens,
|
||||||
value.clear();
|
&mut value,
|
||||||
|
);
|
||||||
mode = Mode::Name;
|
mode = Mode::Name;
|
||||||
}
|
}
|
||||||
|
|
||||||
Mode::EscapedString => {
|
Mode::EscapedString => {
|
||||||
return Err(LexerError {
|
return Err(LexerError {
|
||||||
line,
|
line,
|
||||||
@ -275,13 +298,18 @@ pub fn lex(code: String) -> Result<Vec<Token>, LexerError> {
|
|||||||
};
|
};
|
||||||
value.push(v);
|
value.push(v);
|
||||||
}
|
}
|
||||||
unrecognized_char => {
|
unrecognized_char => match mode {
|
||||||
return Err(LexerError {
|
Mode::String => {
|
||||||
line,
|
value.push(unrecognized_char);
|
||||||
col,
|
}
|
||||||
error: LexerErrorType::UnexpectedToken(unrecognized_char),
|
_ => {
|
||||||
})
|
return Err(LexerError {
|
||||||
}
|
line,
|
||||||
|
col,
|
||||||
|
error: LexerErrorType::UnexpectedToken(unrecognized_char),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
col += 1;
|
col += 1;
|
||||||
}
|
}
|
||||||
@ -295,7 +323,7 @@ fn test_example_1() {
|
|||||||
// text { \"hello world\" }
|
// text { \"hello world\" }
|
||||||
\"hello world\"
|
\"hello world\"
|
||||||
}";
|
}";
|
||||||
let tokens = lex(text.to_string());
|
let tokens = lexer(text.to_string());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
tokens,
|
tokens,
|
||||||
Ok(vec![
|
Ok(vec![
|
||||||
|
Loading…
Reference in New Issue
Block a user