From 99da99e4c179a03c5d0f8a9c04f95e5481b70cad Mon Sep 17 00:00:00 2001 From: Theis Pieter Hollebeek Date: Fri, 20 Jan 2023 13:04:06 +0100 Subject: [PATCH] implement id, lbracket, rbracket, refactoring --- src/bong/lexer.rs | 194 ++++++++++++++++++++++++++-------------------- 1 file changed, 111 insertions(+), 83 deletions(-) diff --git a/src/bong/lexer.rs b/src/bong/lexer.rs index ea45e13..16e9fde 100644 --- a/src/bong/lexer.rs +++ b/src/bong/lexer.rs @@ -14,7 +14,7 @@ pub struct LexerError { #[derive(Debug, Clone, PartialEq)] pub enum Token { Name(String), - Id(String), // not implemented + Id(String), Class(String), SlWhitespace(String), MlWhitespace(String), @@ -28,13 +28,15 @@ pub enum Token { False(String), // not implemented LBrace(String), RBrace(String), - LBracket(String), // not implemented - RBracket(String), // not implemented + LBracket(String), + RBracket(String), } +#[derive(PartialEq)] enum Mode { Name, Class, + Id, String, EscapedString, SlWhitespace, @@ -51,12 +53,14 @@ impl Mode { Mode::SlWhitespace => Ok(Box::new(Token::SlWhitespace)), Mode::MlWhitespace => Ok(Box::new(Token::MlWhitespace)), Mode::SlComment => Ok(Box::new(Token::SlComment)), + Mode::Id => Ok(Box::new(Token::Id)), Mode::EscapedString => Err(LexerErrorType::InvalidConstructor), } } } -pub fn lex(code: String) -> Result, LexerError> { +#[allow(dead_code)] +pub fn lexer(code: String) -> Result, LexerError> { let mut tokens = Vec::new(); let mut value = Vec::new(); let mut iter = code.chars(); @@ -64,23 +68,37 @@ pub fn lex(code: String) -> Result, LexerError> { let mut line = 0; let mut col = 0; let position_map = move |error: LexerErrorType| LexerError { error, line, col }; - loop { - let c = iter.next(); - if c.is_none() { - break; + let collect_into_token_and_push = + |constructor: Box Token>, + tokens: &mut Vec, + value: &mut Vec| { + let token = constructor(value.iter().collect()); + tokens.push(token); + value.clear(); }; - match c.unwrap() { - '.' => { + loop { + let current_char = match iter.next() { + Some(c) => c, + None => break, + }; + match current_char { + v @ '.' | v @ '#' => { match mode { m @ Mode::Name | m @ Mode::Class + | m @ Mode::Id | m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => { - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); - mode = Mode::Class; + collect_into_token_and_push( + m.token_constructor().map_err(position_map)?, + &mut tokens, + &mut value, + ); + mode = match v { + '.' => Mode::Class, + '#' => Mode::Id, + _ => panic!("race condition"), + }; } Mode::String | Mode::SlComment => {} Mode::EscapedString => { @@ -91,7 +109,7 @@ pub fn lex(code: String) -> Result, LexerError> { }) } }; - value.push('.'); + value.push(v); } '\\' => match mode { Mode::String => { @@ -111,17 +129,20 @@ pub fn lex(code: String) -> Result, LexerError> { m @ Mode::String => { mode = Mode::SlWhitespace; value.push('"'); - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); + + collect_into_token_and_push( + m.token_constructor().map_err(position_map)?, + &mut tokens, + &mut value, + ); } m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => { mode = Mode::String; - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); + collect_into_token_and_push( + m.token_constructor().map_err(position_map)?, + &mut tokens, + &mut value, + ); value.push('"'); } Mode::EscapedString => { @@ -140,59 +161,47 @@ pub fn lex(code: String) -> Result, LexerError> { } }; } - '{' => match mode { + + v @ '{' | v @ '}' | v @ '[' | v @ ']' => match mode { m @ Mode::Name | m @ Mode::Class + | m @ Mode::Id | m @ Mode::MlWhitespace | m @ Mode::SlWhitespace => { - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); + collect_into_token_and_push( + m.token_constructor().map_err(position_map)?, + &mut tokens, + &mut value, + ); mode = Mode::SlWhitespace; - tokens.push(Token::LBrace(String::from('{'))); + let constructor = match v { + '{' => Token::LBrace, + '}' => Token::RBrace, + '[' => Token::LBracket, + ']' => Token::RBracket, + _ => panic!("race condition"), + }; + tokens.push(constructor(String::from(v))); } Mode::EscapedString => { return Err(LexerError { line, col, - error: LexerErrorType::UnexpectedToken('{'), + error: LexerErrorType::UnexpectedToken(v), }) } Mode::String | Mode::SlComment => { - value.push('{'); - } - }, - '}' => match mode { - m @ Mode::Name - | m @ Mode::Class - | m @ Mode::MlWhitespace - | m @ Mode::SlWhitespace => { - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); - mode = Mode::SlWhitespace; - tokens.push(Token::RBrace(String::from('}'))); - } - Mode::String | Mode::SlComment => { - value.push('}'); - } - Mode::EscapedString => { - return Err(LexerError { - line, - col, - error: LexerErrorType::UnexpectedToken('}'), - }) + value.push(v); } }, c @ ' ' | c @ '\r' => { match mode { - m @ Mode::Name | m @ Mode::Class => { - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); + m @ Mode::Name | m @ Mode::Class | m @ Mode::Id => { + collect_into_token_and_push( + m.token_constructor().map_err(position_map)?, + &mut tokens, + &mut value, + ); mode = Mode::SlWhitespace; } Mode::String | Mode::SlComment | Mode::MlWhitespace | Mode::SlWhitespace => {} @@ -208,11 +217,12 @@ pub fn lex(code: String) -> Result, LexerError> { } c @ '\n' => { match mode { - m @ Mode::Name | m @ Mode::Class | m @ Mode::SlComment => { - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); + m @ Mode::Name | m @ Mode::Class | m @ Mode::Id | m @ Mode::SlComment => { + collect_into_token_and_push( + m.token_constructor().map_err(position_map)?, + &mut tokens, + &mut value, + ); mode = Mode::MlWhitespace; } Mode::MlWhitespace | Mode::SlWhitespace => { @@ -236,12 +246,14 @@ pub fn lex(code: String) -> Result, LexerError> { Mode::String | Mode::SlComment => {} m @ Mode::Name | m @ Mode::Class + | m @ Mode::Id | m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => { - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); + collect_into_token_and_push( + m.token_constructor().map_err(position_map)?, + &mut tokens, + &mut value, + ); mode = Mode::SlComment; } Mode::EscapedString => { @@ -256,15 +268,26 @@ pub fn lex(code: String) -> Result, LexerError> { } v @ 'A'..='Z' | v @ 'a'..='z' | v @ '0'..='9' => { match mode { - Mode::String | Mode::SlComment | Mode::Name | Mode::Class => {} + Mode::Name | Mode::Class | Mode::Id => { + if v.is_numeric() { + if value.len() == 0 || mode == Mode::Id && value.len() == 1 { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken(v), + }); + } + } + } + Mode::String | Mode::SlComment => {} m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => { - let string_value = value.iter().collect(); - let constructor = m.token_constructor().map_err(position_map)?; - tokens.push(constructor(string_value)); - value.clear(); + collect_into_token_and_push( + m.token_constructor().map_err(position_map)?, + &mut tokens, + &mut value, + ); mode = Mode::Name; } - Mode::EscapedString => { return Err(LexerError { line, @@ -275,13 +298,18 @@ pub fn lex(code: String) -> Result, LexerError> { }; value.push(v); } - unrecognized_char => { - return Err(LexerError { - line, - col, - error: LexerErrorType::UnexpectedToken(unrecognized_char), - }) - } + unrecognized_char => match mode { + Mode::String => { + value.push(unrecognized_char); + } + _ => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken(unrecognized_char), + }); + } + }, } col += 1; } @@ -295,7 +323,7 @@ fn test_example_1() { // text { \"hello world\" } \"hello world\" }"; - let tokens = lex(text.to_string()); + let tokens = lexer(text.to_string()); assert_eq!( tokens, Ok(vec![