add lexer
This commit is contained in:
		
							parent
							
								
									6ede00be64
								
							
						
					
					
						commit
						edec4a2323
					
				
							
								
								
									
										271
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										271
									
								
								src/main.rs
									
									
									
									
									
								
							| @ -1,23 +1,59 @@ | ||||
| #![allow(dead_code)] | ||||
| 
 | ||||
| use std::str::Chars; | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| enum TokenType { | ||||
|     InvalidChar, | ||||
|     MalformedString, | ||||
|     MalformedComment, | ||||
|     Id, | ||||
|     Int, | ||||
|     Decimal, | ||||
|     String, | ||||
|     False, | ||||
|     True, | ||||
|     If, | ||||
|     Else, | ||||
|     While, | ||||
|     For, | ||||
|     In, | ||||
|     Break, | ||||
|     Continue, | ||||
|     Function, | ||||
|     Return, | ||||
|     End, | ||||
|     Underscore, | ||||
|     Plus, | ||||
|     Minus, | ||||
|     Asterisk, | ||||
|     Slash, | ||||
|     Percent, | ||||
|     PlusEqual, | ||||
|     MinusEqual, | ||||
|     AsteriskEqual, | ||||
|     SlashEqual, | ||||
|     PercentEqual, | ||||
|     LParen, | ||||
|     RParen, | ||||
|     LBrace, | ||||
|     RBrace, | ||||
|     LBracket, | ||||
|     RBracket, | ||||
|     Dot, | ||||
|     Comma, | ||||
|     Colon, | ||||
|     Semicolon, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| struct Position { | ||||
|     index: usize, | ||||
|     line: i32, | ||||
|     col: i32, | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug)] | ||||
| struct Token { | ||||
|     token_type: TokenType, | ||||
|     pos: Position, | ||||
| @ -26,12 +62,226 @@ struct Token { | ||||
| 
 | ||||
| struct Lexer<'a> { | ||||
|     text: &'a str, | ||||
|     i: i32, | ||||
|     chars: Chars<'a>, | ||||
|     current_char: Option<char>, | ||||
|     index: usize, | ||||
|     line: i32, | ||||
|     col: i32, | ||||
| } | ||||
| 
 | ||||
| impl<'a> Lexer<'a> { | ||||
|     pub fn new(text: &'a str) -> Self { | ||||
|         Self { text, i: 0 } | ||||
|         let mut chars = text.chars(); | ||||
|         let first_char = chars.next(); | ||||
|         Self { | ||||
|             text, | ||||
|             chars, | ||||
|             current_char: first_char, | ||||
|             index: 0, | ||||
|             line: 1, | ||||
|             col: 1, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn next_token(&mut self) -> Option<Token> { | ||||
|         if self.done() { | ||||
|             return None; | ||||
|         } | ||||
|         match self.current() { | ||||
|             ' ' | '\t' | '\r' | '\n' => self.skip_whitespace(), | ||||
|             '1'..='9' => Some(self.int_token()), | ||||
|             'a'..='z' | 'A'..='Z' | '_' => Some(self.id_token()), | ||||
|             '"' => Some(self.string_token()), | ||||
|             '+' => { | ||||
|                 Some(self.single_or_double_char_token(TokenType::Plus, '=', TokenType::PlusEqual)) | ||||
|             } | ||||
|             '-' => { | ||||
|                 Some(self.single_or_double_char_token(TokenType::Minus, '=', TokenType::MinusEqual)) | ||||
|             } | ||||
|             '*' => Some(self.single_or_double_char_token( | ||||
|                 TokenType::Asterisk, | ||||
|                 '=', | ||||
|                 TokenType::AsteriskEqual, | ||||
|             )), | ||||
|             '/' => self.slash_token(), | ||||
|             '%' => Some(self.single_or_double_char_token( | ||||
|                 TokenType::Percent, | ||||
|                 '=', | ||||
|                 TokenType::PercentEqual, | ||||
|             )), | ||||
|             '(' => Some(self.step_and_token(TokenType::LParen, self.pos())), | ||||
|             ')' => Some(self.step_and_token(TokenType::LParen, self.pos())), | ||||
|             '.' => Some(self.dot_token()), | ||||
|             _ => Some(self.step_and_token(TokenType::InvalidChar, self.pos())), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn skip_whitespace(&mut self) -> Option<Token> { | ||||
|         while !self.done() && matches!(self.current(), ' ' | '\t' | '\r' | '\n') { | ||||
|             self.step() | ||||
|         } | ||||
|         self.next_token() | ||||
|     } | ||||
| 
 | ||||
|     fn int_token(&mut self) -> Token { | ||||
|         let start = self.pos(); | ||||
|         self.step(); | ||||
|         while !self.done() && matches!(self.current(), '0'..='9') { | ||||
|             self.step(); | ||||
|         } | ||||
|         self.token(TokenType::Int, start) | ||||
|     } | ||||
| 
 | ||||
|     fn string_token(&mut self) -> Token { | ||||
|         let start = self.pos(); | ||||
|         self.step(); | ||||
|         let mut escaped = false; | ||||
|         while !self.done() && (self.current() != '"' || escaped) { | ||||
|             escaped = self.current() == '\\' && !escaped; | ||||
|             self.step(); | ||||
|         } | ||||
|         if self.done() || self.current() != '"' { | ||||
|             self.step_and_token(TokenType::MalformedString, start) | ||||
|         } else { | ||||
|             self.step_and_token(TokenType::String, start) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn id_token(&mut self) -> Token { | ||||
|         let start = self.pos(); | ||||
|         self.step(); | ||||
|         while !self.done() && matches!(self.current(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') { | ||||
|             self.step(); | ||||
|         } | ||||
|         match &self.text[start.index..self.index] { | ||||
|             "false" => self.token(TokenType::False, start), | ||||
|             "true" => self.token(TokenType::True, start), | ||||
|             "if" => self.token(TokenType::True, start), | ||||
|             "else" => self.token(TokenType::True, start), | ||||
|             "while" => self.token(TokenType::True, start), | ||||
|             "for" => self.token(TokenType::True, start), | ||||
|             "in" => self.token(TokenType::True, start), | ||||
|             "break" => self.token(TokenType::True, start), | ||||
|             "continue" => self.token(TokenType::True, start), | ||||
|             "function" => self.token(TokenType::True, start), | ||||
|             "return" => self.token(TokenType::True, start), | ||||
|             "end" => self.token(TokenType::True, start), | ||||
|             "underscore" => self.token(TokenType::True, start), | ||||
|             _ => self.token(TokenType::Id, start), | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn single_or_double_char_token( | ||||
|         &mut self, | ||||
|         single_type: TokenType, | ||||
|         double_char: char, | ||||
|         double_type: TokenType, | ||||
|     ) -> Token { | ||||
|         let start = self.pos(); | ||||
|         self.step(); | ||||
|         if !self.done() && self.current() == double_char { | ||||
|             self.step_and_token(double_type, start) | ||||
|         } else { | ||||
|             self.token(single_type, start) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn slash_token(&mut self) -> Option<Token> { | ||||
|         let start = self.pos(); | ||||
|         self.step(); | ||||
|         if !self.done() && self.current() == '/' { | ||||
|             self.step(); | ||||
|             while !self.done() && self.current() != '\n' { | ||||
|                 self.step(); | ||||
|             } | ||||
|             self.next_token() | ||||
|         } else if !self.done() && self.current() == '*' { | ||||
|             self.step(); | ||||
|             let mut depth = 1; | ||||
|             let mut last_char: Option<char> = None; | ||||
|             while !self.done() { | ||||
|                 match (last_char, self.current()) { | ||||
|                     (Some('/'), '*') => { | ||||
|                         depth += 1; | ||||
|                     } | ||||
|                     (Some('*'), '/') => { | ||||
|                         depth -= 1; | ||||
|                         if depth == 0 { | ||||
|                             self.step(); | ||||
|                             break; | ||||
|                         } | ||||
|                     } | ||||
|                     _ => {} | ||||
|                 } | ||||
|                 last_char = Some(self.current()); | ||||
|                 self.step(); | ||||
|             } | ||||
|             if depth != 0 { | ||||
|                 Some(self.token(TokenType::MalformedComment, start)) | ||||
|             } else { | ||||
|                 self.next_token() | ||||
|             } | ||||
|         } else if !self.done() && self.current() == '=' { | ||||
|             return Some(self.step_and_token(TokenType::SlashEqual, start)); | ||||
|         } else { | ||||
|             return Some(self.token(TokenType::Slash, start)); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn dot_token(&mut self) -> Token { | ||||
|         let start = self.pos(); | ||||
|         self.step(); | ||||
|         if !self.done() && matches!(self.current(), '0'..='9') { | ||||
|             self.step(); | ||||
|             while !self.done() && matches!(self.current(), '0'..='9') { | ||||
|                 self.step(); | ||||
|             } | ||||
|             self.token(TokenType::Decimal, start) | ||||
|         } else { | ||||
|             self.token(TokenType::Dot, start) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn step_and_token(&mut self, token_type: TokenType, start: Position) -> Token { | ||||
|         self.step(); | ||||
|         self.token(token_type, start) | ||||
|     } | ||||
| 
 | ||||
|     fn token(&self, token_type: TokenType, start: Position) -> Token { | ||||
|         Token { | ||||
|             token_type, | ||||
|             length: self.index - start.index, | ||||
|             pos: start, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn pos(&self) -> Position { | ||||
|         Position { | ||||
|             index: self.index, | ||||
|             line: self.line, | ||||
|             col: self.col, | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn done(&self) -> bool { | ||||
|         self.current_char.is_none() | ||||
|     } | ||||
| 
 | ||||
|     fn current(&self) -> char { | ||||
|         self.current_char.expect("done() not checked") | ||||
|     } | ||||
| 
 | ||||
|     fn step(&mut self) { | ||||
|         self.index += 1; | ||||
|         if !self.done() { | ||||
|             if self.current() == '\n' { | ||||
|                 self.line += 1; | ||||
|                 self.col = 1; | ||||
|             } else { | ||||
|                 self.col += 1; | ||||
|             } | ||||
|         } | ||||
|         self.current_char = self.chars.next(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| @ -39,10 +289,21 @@ impl<'a> Iterator for Lexer<'a> { | ||||
|     type Item = Token; | ||||
| 
 | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         todo!() | ||||
|         self.next_token() | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn main() { | ||||
|     println!("Hello, world!"); | ||||
| enum Expr { | ||||
|     Id(String), | ||||
|     Int(i64), | ||||
|     Float(f64), | ||||
|     String(String), | ||||
| } | ||||
| 
 | ||||
| fn main() { | ||||
|     let text = "3.14 \"foo\" false true ( ) + += /* 1 /* 2 */ 3 */  // 4 \n 5"; | ||||
|     let lexer = Lexer::new(text); | ||||
|     lexer.for_each(|token| { | ||||
|         println!("{:?}", token); | ||||
|     }) | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user