From 67ed64e3b41417a61740b65a51644393d85f073f Mon Sep 17 00:00:00 2001 From: Theis Pieter Hollebeek Date: Thu, 19 Jan 2023 14:56:25 +0100 Subject: [PATCH] lexer code --- src/bong/lexer.rs | 318 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 264 insertions(+), 54 deletions(-) diff --git a/src/bong/lexer.rs b/src/bong/lexer.rs index 8ef23db..818aed6 100644 --- a/src/bong/lexer.rs +++ b/src/bong/lexer.rs @@ -1,68 +1,278 @@ -use std::{iter::Peekable, str::Chars}; +enum LexerErrorType { + UnexpectedToken(char), + InvalidConstructor, +} -pub enum TokenType { - SinglelineWhitespace, - MultilineWhitespace, - SinglelineComment, - MultilineComment, +struct LexerError { + error: LexerErrorType, + line: isize, + col: isize, +} +enum Token { + Name(String), + Class(String), + SlWhitespace(String), + MlWhitespace(String), + SlComment(String), + String(String), + LBrace(String), + RBrace(String), +} + +enum Mode { Name, - Id, Class, - - Int, - Float, String, - Null, - False, - True, - - LBrace, - RBrace, - LBracket, - RBracket, - - Equal, - Colon, - SemiColon, - Comma, + EscapedString, + SlWhitespace, + MlWhitespace, + SlComment, } -pub struct Token<'a> { - token_type: TokenType, - value: &'a str, - line: u32, - col: u32, -} - -pub struct LexerError { - line: u32, - col: u32, - message: String, -} - -impl LexerError { - pub fn new(line: u32, col: u32, message: String) -> Self { - Self { line, col, message } - } -} - -pub struct Lexer<'a> { - phantom: std::marker::PhantomData<&'a u32>, -} - -impl<'a> Lexer<'a> { - pub fn new(text: &'a str) -> Self { - Self { - phantom: std::marker::PhantomData {}, +impl Mode { + fn token_constructor(&self) -> Result Token>, LexerErrorType> { + match self { + Mode::Name => Ok(Box::new(Token::Name)), + Mode::Class => Ok(Box::new(Token::Class)), + Mode::String => Ok(Box::new(Token::String)), + Mode::SlWhitespace => Ok(Box::new(Token::SlWhitespace)), + Mode::MlWhitespace => Ok(Box::new(Token::MlWhitespace)), + Mode::SlComment => Ok(Box::new(Token::SlComment)), + Mode::EscapedString => Err(LexerErrorType::InvalidConstructor), } } } -impl<'a> Iterator for Lexer<'a> { - type Item = Result, String>; +fn lex(code: String) -> Result, LexerError> { + let mut tokens = Vec::new(); + let mut value = Vec::new(); + let mut iter = code.chars(); + let mut mode = Mode::SlWhitespace; + let mut line = 0; + let mut col = 0; + let position_map = move |error: LexerErrorType| LexerError { error, line, col }; + loop { + let c = iter.next(); + if c.is_none() { + break; + }; + match c.unwrap() { + '.' => { + match mode { + m @ Mode::Name + | m @ Mode::Class + | m @ Mode::SlWhitespace + | m @ Mode::MlWhitespace => { + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + mode = Mode::Class; + } + Mode::String | Mode::SlComment => {} + Mode::EscapedString => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken('.'), + }) + } + }; + value.push('.'); + } + '\\' => match mode { + Mode::String => { + value.push('\\'); + mode = Mode::EscapedString; + } + _ => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken('\\'), + }) + } + }, + '"' => { + match mode { + m @ Mode::String => { + mode = Mode::SlWhitespace; + value.push('"'); + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + } + m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => { + mode = Mode::String; + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + value.push('"'); + } + Mode::EscapedString => { + value.push('"'); + mode = Mode::String; + } + Mode::SlComment => { + value.push('"'); + } + _ => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken('"'), + }) + } + }; + } + '{' => match mode { + m @ Mode::Name + | m @ Mode::Class + | m @ Mode::MlWhitespace + | m @ Mode::SlWhitespace => { + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + mode = Mode::SlWhitespace; + tokens.push(Token::LBrace(String::from('{'))); + } + Mode::EscapedString => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken('{'), + }) + } + Mode::String | Mode::SlComment => { + value.push('{'); + } + }, + '}' => match mode { + m @ Mode::Name + | m @ Mode::Class + | m @ Mode::MlWhitespace + | m @ Mode::SlWhitespace => { + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + mode = Mode::SlWhitespace; + tokens.push(Token::RBrace(String::from('}'))); + } + Mode::String | Mode::SlComment => { + value.push('}'); + } + Mode::EscapedString => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken('}'), + }) + } + }, + c @ ' ' | c @ '\r' => { + match mode { + m @ Mode::Name | m @ Mode::Class => { + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + mode = Mode::SlWhitespace; + } + Mode::String | Mode::SlComment | Mode::MlWhitespace | Mode::SlWhitespace => {} + Mode::EscapedString => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken(c), + }) + } + }; + value.push(c); + } + c @ '\n' => { + match mode { + m @ Mode::Name | m @ Mode::Class | m @ Mode::SlComment => { + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + mode = Mode::MlWhitespace; + } + Mode::MlWhitespace | Mode::SlWhitespace => { + mode = Mode::MlWhitespace; + } + Mode::String => {} + Mode::EscapedString => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken('\n'), + }) + } + }; + value.push(c); + line += 1; + col = -1; + } + '/' => { + match mode { + Mode::String | Mode::SlComment => {} + m @ Mode::Name + | m @ Mode::Class + | m @ Mode::SlWhitespace + | m @ Mode::MlWhitespace => { + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + mode = Mode::SlComment; + } + Mode::EscapedString => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken('/'), + }) + } + }; + value.push('/'); + } + v @ 'A'..='Z' | v @ 'a'..='z' | v @ '0'..='9' => { + match mode { + Mode::String | Mode::SlComment | Mode::Name | Mode::Class => {} + m @ Mode::SlWhitespace | m @ Mode::MlWhitespace => { + let string_value = value.iter().collect(); + let constructor = m.token_constructor().map_err(position_map)?; + tokens.push(constructor(string_value)); + value.clear(); + mode = Mode::Name; + } - fn next(&mut self) -> Option { - todo!() + Mode::EscapedString => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken(v), + }) + } + }; + value.push(v); + } + unrecognized_char => { + return Err(LexerError { + line, + col, + error: LexerErrorType::UnexpectedToken(unrecognized_char), + }) + } + } + col += 1; } + + Ok(tokens) }