diff --git a/src/bong/lexer.rs b/src/bong/lexer.rs index 5119103..7102fe8 100644 --- a/src/bong/lexer.rs +++ b/src/bong/lexer.rs @@ -1,12 +1,14 @@ #![allow(dead_code)] +const NO_MUT_PEEK_NEXT_MESSAGE: &str = "should not mutate between peek & next"; + use std::iter::Peekable; #[derive(PartialEq, Eq, Debug)] pub struct TokenError { error: String, - line: usize, col: usize, + line: usize, } #[derive(PartialEq, Eq, Debug)] @@ -37,25 +39,21 @@ pub enum Token { fn make_keyword_or_name>( iter: &mut Peekable, - line: &mut usize, + col: &mut usize, ) -> Token { let mut result: Vec = Vec::new(); loop { match iter.peek() { Some('A'..='Z' | 'a'..='z') => { - *line += 1; - let c = iter - .next() - .expect("iterator should not be mutated between peek & next"); + *col += 1; + let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); result.push(c); } Some('0'..='9') => { // we assert instead of returning an error because this means the lexer is written incorrectly assert_ne!(result.len(), 0); - *line += 1; - let c = iter - .next() - .expect("iterator should not be mutated between peek & next"); + *col += 1; + let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); result.push(c); } _ => { @@ -70,24 +68,52 @@ fn make_keyword_or_name>( } } +fn make_id_or_class>(iter: &mut Peekable, col: &mut usize) -> Token { + let mut result: Vec = vec![iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE)]; + loop { + match iter.peek() { + Some('A'..='Z' | 'a'..='z') => { + *col += 1; + let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); + result.push(c); + } + Some('0'..='9') => { + // we assert instead of returning an error because this means the lexer is written incorrectly + // atleast one character must be # and atleast one character must be A-Z | a-z + assert!(result.len() > 1); + *col += 1; + let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); + result.push(c); + } + _ => { + break if result.contains(&'#') { + Token::Id(String::from_iter(result)) + } else if result.contains(&'.') { + Token::Class(String::from_iter(result)) + } else { + panic!("should contain . or #") + } + } + } + } +} + fn make_number>( iter: &mut Peekable, - line: &mut usize, col: &mut usize, + line: &mut usize, ) -> Result { let mut result: Vec = Vec::new(); loop { let next = iter.peek(); match next { Some('0'..='9') => { - *line += 1; - let c = iter - .next() - .expect("iterator should not be mutated between peek & next"); + *col += 1; + let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); result.push(c); } Some('.') => { - *line += 1; + *col += 1; if result.contains(&'.') { iter.next(); return Err(TokenError { @@ -95,11 +121,10 @@ fn make_number>( col: *col, line: *line, }); - } else { - iter.next() - .expect("iterator should not be mutated between peek & next"); - result.push('.'); } + + iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); + result.push('.'); } _ => { break Ok(if result.contains(&'.') { @@ -114,40 +139,40 @@ fn make_number>( fn make_string>( iter: &mut T, - line: &mut usize, col: &mut usize, + line: &mut usize, ) -> Result { - let mut result: Vec = Vec::new(); + let mut result: Vec = vec![iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE)]; let mut escaped = false; - iter.next().expect("opening quote should exist"); loop { let next = iter.next().ok_or(TokenError { error: "unexpected end of string".to_string(), - line: *line, col: *col, + line: *line, })?; match next { '\\' => { - *line += 1; + *col += 1; escaped = !escaped; result.push('\\'); } '"' => { - *line += 1; + *col += 1; if escaped { result.push('"'); escaped = false; } else { + result.push('"'); break Ok(Token::String(String::from_iter(result))); } } '\n' => { - *line = 0; - *col += 1; + *col = 0; + *line += 1; result.push('\n'); } c => { - *line += 1; + *col += 1; escaped = false; result.push(c); } @@ -155,6 +180,51 @@ fn make_string>( } } +fn single_token, U: Fn(String) -> Token>( + iter: &mut Peekable, + constructor: U, + col: &mut usize, +) -> Token { + let char = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); + *col += 1; + constructor(char.to_string()) +} + +fn make_comment>( + iter: &mut Peekable, + col: &mut usize, + line: &mut usize, +) -> Result { + let first_slash = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); + let second_character = if let Some(c) = iter.next() { + c + } else { + return Err(TokenError { + error: "unexpected EOF".to_string(), + col: *col, + line: *line, + }); + }; + let mut result = vec![first_slash, second_character]; + *col += 2; + match second_character { + '/' => loop { + match iter.peek() { + Some('\n') | None => break Ok(Token::SlComment(String::from_iter(result))), + _ => result.push(iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE)), + } + }, + '*' => { + todo!(); + } + c => Err(TokenError { + error: format!("unexpected token {c}"), + col: *col, + line: *line, + }), + } +} + fn lexer(code: &str) -> Vec { let mut tokens = Vec::new(); let mut iter = code.chars().peekable(); @@ -169,32 +239,39 @@ fn lexer(code: &str) -> Vec { match char { '"' => { - let token = match make_string(&mut iter, &mut line, &mut col) { + let token = match make_string(&mut iter, &mut col, &mut line) { Ok(token) => token, Err(err) => Token::Error(err), }; tokens.push(token); } '0'..='9' => { - let token = match make_number(&mut iter, &mut line, &mut col) { + let token = match make_number(&mut iter, &mut col, &mut line) { Ok(token) => token, Err(err) => Token::Error(err), }; tokens.push(token); } 'a'..='z' | 'A'..='Z' => { - let token = make_keyword_or_name(&mut iter, &mut line); + let token = make_keyword_or_name(&mut iter, &mut col); tokens.push(token); } - ' ' => { + '{' => { + tokens.push(single_token(&mut iter, Token::LBrace, &mut col)); + } + '}' => { + tokens.push(single_token(&mut iter, Token::RBrace, &mut col)); + } + '#' | '.' => { + tokens.push(make_id_or_class(&mut iter, &mut col)); + } + ' ' | '\n' | '\r' => { let mut result: Vec = Vec::new(); let token = loop { let next = iter.peek(); match next { Some(' ' | '\n' | '\r') => { - let c = iter - .next() - .expect("should not mutate between next & unwrap"); + let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE); result.push(c); } _ => { @@ -208,7 +285,22 @@ fn lexer(code: &str) -> Vec { }; tokens.push(token); } - _ => todo!(), + '/' => { + let token = match make_comment(&mut iter, &mut col, &mut line) { + Ok(token) => token, + Err(err) => Token::Error(err), + }; + tokens.push(token); + } + c => { + tokens.push(Token::Error(TokenError { + error: format!("unrecognized character {c}"), + col, + line, + })); + iter.next(); + col += 1; + } } } } @@ -236,7 +328,7 @@ mod tests { space(" "), Token::False("false".to_string()), space(" "), - Token::String("string".to_string()), + Token::String("\"string\"".to_string()), ] ) } @@ -251,7 +343,6 @@ mod tests { assert_eq!( tokens, vec![ - Token::SlWhitespace("".to_string()), Token::Name("text".to_string()), Token::Class(".title".to_string()), Token::SlWhitespace(" ".to_string()),