pass test for example 1

This commit is contained in:
Theis Pieter Hollebeek 2023-02-07 13:40:40 +01:00
parent b88ea4bad1
commit 3fadfac622

View File

@ -1,12 +1,14 @@
#![allow(dead_code)] #![allow(dead_code)]
const NO_MUT_PEEK_NEXT_MESSAGE: &str = "should not mutate between peek & next";
use std::iter::Peekable; use std::iter::Peekable;
#[derive(PartialEq, Eq, Debug)] #[derive(PartialEq, Eq, Debug)]
pub struct TokenError { pub struct TokenError {
error: String, error: String,
line: usize,
col: usize, col: usize,
line: usize,
} }
#[derive(PartialEq, Eq, Debug)] #[derive(PartialEq, Eq, Debug)]
@ -37,25 +39,21 @@ pub enum Token {
fn make_keyword_or_name<T: Iterator<Item = char>>( fn make_keyword_or_name<T: Iterator<Item = char>>(
iter: &mut Peekable<T>, iter: &mut Peekable<T>,
line: &mut usize, col: &mut usize,
) -> Token { ) -> Token {
let mut result: Vec<char> = Vec::new(); let mut result: Vec<char> = Vec::new();
loop { loop {
match iter.peek() { match iter.peek() {
Some('A'..='Z' | 'a'..='z') => { Some('A'..='Z' | 'a'..='z') => {
*line += 1; *col += 1;
let c = iter let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
.next()
.expect("iterator should not be mutated between peek & next");
result.push(c); result.push(c);
} }
Some('0'..='9') => { Some('0'..='9') => {
// we assert instead of returning an error because this means the lexer is written incorrectly // we assert instead of returning an error because this means the lexer is written incorrectly
assert_ne!(result.len(), 0); assert_ne!(result.len(), 0);
*line += 1; *col += 1;
let c = iter let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
.next()
.expect("iterator should not be mutated between peek & next");
result.push(c); result.push(c);
} }
_ => { _ => {
@ -70,24 +68,52 @@ fn make_keyword_or_name<T: Iterator<Item = char>>(
} }
} }
fn make_id_or_class<T: Iterator<Item = char>>(iter: &mut Peekable<T>, col: &mut usize) -> Token {
let mut result: Vec<char> = vec![iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE)];
loop {
match iter.peek() {
Some('A'..='Z' | 'a'..='z') => {
*col += 1;
let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
result.push(c);
}
Some('0'..='9') => {
// we assert instead of returning an error because this means the lexer is written incorrectly
// atleast one character must be # and atleast one character must be A-Z | a-z
assert!(result.len() > 1);
*col += 1;
let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
result.push(c);
}
_ => {
break if result.contains(&'#') {
Token::Id(String::from_iter(result))
} else if result.contains(&'.') {
Token::Class(String::from_iter(result))
} else {
panic!("should contain . or #")
}
}
}
}
}
fn make_number<T: Iterator<Item = char>>( fn make_number<T: Iterator<Item = char>>(
iter: &mut Peekable<T>, iter: &mut Peekable<T>,
line: &mut usize,
col: &mut usize, col: &mut usize,
line: &mut usize,
) -> Result<Token, TokenError> { ) -> Result<Token, TokenError> {
let mut result: Vec<char> = Vec::new(); let mut result: Vec<char> = Vec::new();
loop { loop {
let next = iter.peek(); let next = iter.peek();
match next { match next {
Some('0'..='9') => { Some('0'..='9') => {
*line += 1; *col += 1;
let c = iter let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
.next()
.expect("iterator should not be mutated between peek & next");
result.push(c); result.push(c);
} }
Some('.') => { Some('.') => {
*line += 1; *col += 1;
if result.contains(&'.') { if result.contains(&'.') {
iter.next(); iter.next();
return Err(TokenError { return Err(TokenError {
@ -95,11 +121,10 @@ fn make_number<T: Iterator<Item = char>>(
col: *col, col: *col,
line: *line, line: *line,
}); });
} else {
iter.next()
.expect("iterator should not be mutated between peek & next");
result.push('.');
} }
iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
result.push('.');
} }
_ => { _ => {
break Ok(if result.contains(&'.') { break Ok(if result.contains(&'.') {
@ -114,40 +139,40 @@ fn make_number<T: Iterator<Item = char>>(
fn make_string<T: Iterator<Item = char>>( fn make_string<T: Iterator<Item = char>>(
iter: &mut T, iter: &mut T,
line: &mut usize,
col: &mut usize, col: &mut usize,
line: &mut usize,
) -> Result<Token, TokenError> { ) -> Result<Token, TokenError> {
let mut result: Vec<char> = Vec::new(); let mut result: Vec<char> = vec![iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE)];
let mut escaped = false; let mut escaped = false;
iter.next().expect("opening quote should exist");
loop { loop {
let next = iter.next().ok_or(TokenError { let next = iter.next().ok_or(TokenError {
error: "unexpected end of string".to_string(), error: "unexpected end of string".to_string(),
line: *line,
col: *col, col: *col,
line: *line,
})?; })?;
match next { match next {
'\\' => { '\\' => {
*line += 1; *col += 1;
escaped = !escaped; escaped = !escaped;
result.push('\\'); result.push('\\');
} }
'"' => { '"' => {
*line += 1; *col += 1;
if escaped { if escaped {
result.push('"'); result.push('"');
escaped = false; escaped = false;
} else { } else {
result.push('"');
break Ok(Token::String(String::from_iter(result))); break Ok(Token::String(String::from_iter(result)));
} }
} }
'\n' => { '\n' => {
*line = 0; *col = 0;
*col += 1; *line += 1;
result.push('\n'); result.push('\n');
} }
c => { c => {
*line += 1; *col += 1;
escaped = false; escaped = false;
result.push(c); result.push(c);
} }
@ -155,6 +180,51 @@ fn make_string<T: Iterator<Item = char>>(
} }
} }
fn single_token<T: Iterator<Item = char>, U: Fn(String) -> Token>(
iter: &mut Peekable<T>,
constructor: U,
col: &mut usize,
) -> Token {
let char = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
*col += 1;
constructor(char.to_string())
}
fn make_comment<T: Iterator<Item = char>>(
iter: &mut Peekable<T>,
col: &mut usize,
line: &mut usize,
) -> Result<Token, TokenError> {
let first_slash = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
let second_character = if let Some(c) = iter.next() {
c
} else {
return Err(TokenError {
error: "unexpected EOF".to_string(),
col: *col,
line: *line,
});
};
let mut result = vec![first_slash, second_character];
*col += 2;
match second_character {
'/' => loop {
match iter.peek() {
Some('\n') | None => break Ok(Token::SlComment(String::from_iter(result))),
_ => result.push(iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE)),
}
},
'*' => {
todo!();
}
c => Err(TokenError {
error: format!("unexpected token {c}"),
col: *col,
line: *line,
}),
}
}
fn lexer(code: &str) -> Vec<Token> { fn lexer(code: &str) -> Vec<Token> {
let mut tokens = Vec::new(); let mut tokens = Vec::new();
let mut iter = code.chars().peekable(); let mut iter = code.chars().peekable();
@ -169,32 +239,39 @@ fn lexer(code: &str) -> Vec<Token> {
match char { match char {
'"' => { '"' => {
let token = match make_string(&mut iter, &mut line, &mut col) { let token = match make_string(&mut iter, &mut col, &mut line) {
Ok(token) => token, Ok(token) => token,
Err(err) => Token::Error(err), Err(err) => Token::Error(err),
}; };
tokens.push(token); tokens.push(token);
} }
'0'..='9' => { '0'..='9' => {
let token = match make_number(&mut iter, &mut line, &mut col) { let token = match make_number(&mut iter, &mut col, &mut line) {
Ok(token) => token, Ok(token) => token,
Err(err) => Token::Error(err), Err(err) => Token::Error(err),
}; };
tokens.push(token); tokens.push(token);
} }
'a'..='z' | 'A'..='Z' => { 'a'..='z' | 'A'..='Z' => {
let token = make_keyword_or_name(&mut iter, &mut line); let token = make_keyword_or_name(&mut iter, &mut col);
tokens.push(token); tokens.push(token);
} }
' ' => { '{' => {
tokens.push(single_token(&mut iter, Token::LBrace, &mut col));
}
'}' => {
tokens.push(single_token(&mut iter, Token::RBrace, &mut col));
}
'#' | '.' => {
tokens.push(make_id_or_class(&mut iter, &mut col));
}
' ' | '\n' | '\r' => {
let mut result: Vec<char> = Vec::new(); let mut result: Vec<char> = Vec::new();
let token = loop { let token = loop {
let next = iter.peek(); let next = iter.peek();
match next { match next {
Some(' ' | '\n' | '\r') => { Some(' ' | '\n' | '\r') => {
let c = iter let c = iter.next().expect(NO_MUT_PEEK_NEXT_MESSAGE);
.next()
.expect("should not mutate between next & unwrap");
result.push(c); result.push(c);
} }
_ => { _ => {
@ -208,7 +285,22 @@ fn lexer(code: &str) -> Vec<Token> {
}; };
tokens.push(token); tokens.push(token);
} }
_ => todo!(), '/' => {
let token = match make_comment(&mut iter, &mut col, &mut line) {
Ok(token) => token,
Err(err) => Token::Error(err),
};
tokens.push(token);
}
c => {
tokens.push(Token::Error(TokenError {
error: format!("unrecognized character {c}"),
col,
line,
}));
iter.next();
col += 1;
}
} }
} }
} }
@ -236,7 +328,7 @@ mod tests {
space(" "), space(" "),
Token::False("false".to_string()), Token::False("false".to_string()),
space(" "), space(" "),
Token::String("string".to_string()), Token::String("\"string\"".to_string()),
] ]
) )
} }
@ -251,7 +343,6 @@ mod tests {
assert_eq!( assert_eq!(
tokens, tokens,
vec![ vec![
Token::SlWhitespace("".to_string()),
Token::Name("text".to_string()), Token::Name("text".to_string()),
Token::Class(".title".to_string()), Token::Class(".title".to_string()),
Token::SlWhitespace(" ".to_string()), Token::SlWhitespace(" ".to_string()),