extract into files and indirect lexer

This commit is contained in:
SimonFJ20 2023-03-15 16:23:49 +01:00
parent c9ed3333f1
commit 8705272a77
5 changed files with 875 additions and 853 deletions

75
src/ast.rs Normal file
View File

@ -0,0 +1,75 @@
use crate::tokens::Position;
#[derive(Debug)]
pub struct Node<T> {
pub value: T,
pub pos: Position,
}
#[derive(Debug)]
pub enum Expr {
Unit,
Id(String),
Int(i64),
Float(f64),
String(String),
Bool(bool),
Array(Vec<Node<Expr>>),
Object(Vec<ObjectEntry>),
Tuple(Vec<Node<Expr>>),
Member {
subject: Box<Node<Expr>>,
value: String,
},
Index {
subject: Box<Node<Expr>>,
value: Box<Node<Expr>>,
},
Call {
subject: Box<Node<Expr>>,
arguments: Vec<Node<Expr>>,
},
Unary {
unary_type: UnaryType,
subject: Box<Node<Expr>>,
},
Binary {
binary_type: BinaryType,
left: Box<Node<Expr>>,
right: Box<Node<Expr>>,
},
}
#[derive(Debug)]
pub enum ObjectEntry {
Pair(Box<Node<Expr>>, Box<Expr>),
}
#[derive(Debug)]
pub enum UnaryType {
Not,
Negate,
Reference,
ReferenceMut,
Dereference,
}
#[derive(Debug)]
pub enum BinaryType {
Exponentiate,
Multiply,
Divide,
Modulo,
Add,
Subtract,
LT,
LTE,
GT,
GTE,
In,
Equal,
Inequal,
And,
Or,
}

281
src/lexer.rs Normal file
View File

@ -0,0 +1,281 @@
use crate::tokens::{Position, PositionKnowing, Token, TokenType};
use std::str::Chars;
pub struct Lexer<'a> {
text: &'a str,
chars: Chars<'a>,
current_char: Option<char>,
index: usize,
line: i32,
col: i32,
}
impl<'a> Lexer<'a> {
pub fn new(text: &'a str) -> Self {
let mut chars = text.chars();
let first_char = chars.next();
Self {
text,
chars,
current_char: first_char,
index: 0,
line: 1,
col: 1,
}
}
fn next_token(&mut self) -> Option<Token> {
if self.done() {
return None;
}
match self.current() {
' ' | '\t' | '\r' | '\n' => self.skip_whitespace(),
'1'..='9' => Some(self.int_token()),
'a'..='z' | 'A'..='Z' | '_' => Some(self.id_token()),
'"' => Some(self.string_token()),
'+' => {
Some(self.single_or_double_char_token(TokenType::Plus, '=', TokenType::PlusEqual))
}
'-' => {
Some(self.single_or_double_char_token(TokenType::Minus, '=', TokenType::MinusEqual))
}
'*' => Some(self.asterisk_token()),
'/' => self.slash_token(),
'%' => Some(self.single_or_double_char_token(
TokenType::Percent,
'=',
TokenType::PercentEqual,
)),
'=' => {
Some(self.single_or_double_char_token(TokenType::Equal, '=', TokenType::EqualEqual))
}
'!' => Some(self.single_or_double_char_token(
TokenType::Exclamation,
'=',
TokenType::ExclamationEqual,
)),
'<' => Some(self.single_or_double_char_token(
TokenType::LessThan,
'=',
TokenType::LessThanEqual,
)),
'>' => Some(self.single_or_double_char_token(
TokenType::GreaterThan,
'=',
TokenType::GreaterThanEqual,
)),
'(' => Some(self.step_and_token(TokenType::LParen, self.pos())),
')' => Some(self.step_and_token(TokenType::RParen, self.pos())),
'{' => Some(self.step_and_token(TokenType::LBrace, self.pos())),
'}' => Some(self.step_and_token(TokenType::RBrace, self.pos())),
'[' => Some(self.step_and_token(TokenType::LBracket, self.pos())),
']' => Some(self.step_and_token(TokenType::RBracket, self.pos())),
'.' => Some(self.dot_token()),
',' => Some(self.step_and_token(TokenType::Comma, self.pos())),
':' => Some(self.step_and_token(TokenType::Colon, self.pos())),
';' => Some(self.step_and_token(TokenType::Semicolon, self.pos())),
'&' => Some(self.step_and_token(TokenType::Ampersand, self.pos())),
_ => Some(self.step_and_token(TokenType::InvalidChar, self.pos())),
}
}
fn skip_whitespace(&mut self) -> Option<Token> {
while !self.done() && matches!(self.current(), ' ' | '\t' | '\r' | '\n') {
self.step()
}
self.next_token()
}
fn int_token(&mut self) -> Token {
let start = self.pos();
self.step();
while !self.done() && matches!(self.current(), '0'..='9') {
self.step();
}
self.token(TokenType::Int, start)
}
fn string_token(&mut self) -> Token {
let start = self.pos();
self.step();
let mut escaped = false;
while !self.done() && (self.current() != '"' || escaped) {
escaped = self.current() == '\\' && !escaped;
self.step();
}
if self.done() || self.current() != '"' {
self.step_and_token(TokenType::MalformedString, start)
} else {
self.step_and_token(TokenType::String, start)
}
}
fn id_token(&mut self) -> Token {
let start = self.pos();
self.step();
while !self.done() && matches!(self.current(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
self.step();
}
self.token(
match &self.text[start.index..self.index] {
"false" => TokenType::False,
"true" => TokenType::True,
"let" => TokenType::Let,
"mut" => TokenType::Mut,
"if" => TokenType::If,
"else" => TokenType::Else,
"while" => TokenType::While,
"for" => TokenType::For,
"in" => TokenType::In,
"break" => TokenType::Break,
"continue" => TokenType::Continue,
"fn" => TokenType::Fn,
"return" => TokenType::Return,
"end" => TokenType::End,
"_" => TokenType::Underscore,
_ => TokenType::Id,
},
start,
)
}
fn single_or_double_char_token(
&mut self,
single_type: TokenType,
double_char: char,
double_type: TokenType,
) -> Token {
let start = self.pos();
self.step();
if !self.done() && self.current() == double_char {
self.step_and_token(double_type, start)
} else {
self.token(single_type, start)
}
}
fn asterisk_token(&mut self) -> Token {
let start = self.pos();
self.step();
if !self.done() && self.current() == '*' {
self.step();
if !self.done() && self.current() == '=' {
self.step_and_token(TokenType::DoubleAsteriskEqual, start)
} else {
self.token(TokenType::DoubleAsterisk, start)
}
} else if !self.done() && self.current() == '=' {
self.step_and_token(TokenType::AsteriskEqual, start)
} else {
self.token(TokenType::Asterisk, start)
}
}
fn slash_token(&mut self) -> Option<Token> {
let start = self.pos();
self.step();
if !self.done() && self.current() == '/' {
self.step();
while !self.done() && self.current() != '\n' {
self.step();
}
self.next_token()
} else if !self.done() && self.current() == '*' {
self.step();
let mut depth = 1;
let mut last_char: Option<char> = None;
while !self.done() {
match (last_char, self.current()) {
(Some('/'), '*') => {
depth += 1;
}
(Some('*'), '/') => {
depth -= 1;
if depth == 0 {
self.step();
break;
}
}
_ => {}
}
last_char = Some(self.current());
self.step();
}
if depth != 0 {
Some(self.token(TokenType::MalformedComment, start))
} else {
self.next_token()
}
} else if !self.done() && self.current() == '=' {
return Some(self.step_and_token(TokenType::SlashEqual, start));
} else {
return Some(self.token(TokenType::Slash, start));
}
}
fn dot_token(&mut self) -> Token {
let start = self.pos();
self.step();
if !self.done() && matches!(self.current(), '0'..='9') {
self.step();
while !self.done() && matches!(self.current(), '0'..='9') {
self.step();
}
self.token(TokenType::Decimal, start)
} else {
self.token(TokenType::Dot, start)
}
}
fn step_and_token(&mut self, token_type: TokenType, start: Position) -> Token {
self.step();
self.token(token_type, start)
}
fn token(&self, token_type: TokenType, start: Position) -> Token {
Token {
token_type,
length: self.index - start.index,
pos: start,
}
}
fn done(&self) -> bool {
self.current_char.is_none()
}
fn current(&self) -> char {
self.current_char.expect("done() checked")
}
fn step(&mut self) {
self.index += 1;
if !self.done() {
if self.current() == '\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
}
self.current_char = self.chars.next();
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
impl<'a> PositionKnowing for Lexer<'a> {
fn pos(&self) -> Position {
Position {
index: self.index,
line: self.line,
col: self.col,
}
}
}

View File

@ -1,859 +1,12 @@
#![allow(dead_code)]
use std::str::Chars;
mod ast;
mod lexer;
mod parser;
mod tokens;
#[derive(Debug, Clone)]
struct Position {
pub index: usize,
pub line: i32,
pub col: i32,
}
impl Position {
pub fn new(index: usize, line: i32, col: i32) -> Self {
Self { index, line, col }
}
}
#[derive(Debug, PartialEq)]
enum TokenType {
InvalidChar,
MalformedString,
MalformedComment,
Id,
Int,
Decimal,
String,
False,
True,
Let,
Mut,
If,
Else,
While,
For,
In,
Break,
Continue,
Fn,
Return,
End,
Not,
And,
Or,
Underscore,
Plus,
Minus,
Asterisk,
Slash,
Percent,
DoubleAsterisk,
Equal,
Exclamation,
LessThan,
GreaterThan,
PlusEqual,
MinusEqual,
AsteriskEqual,
SlashEqual,
PercentEqual,
DoubleAsteriskEqual,
EqualEqual,
ExclamationEqual,
LessThanEqual,
GreaterThanEqual,
LParen,
RParen,
LBrace,
RBrace,
LBracket,
RBracket,
Dot,
Comma,
Colon,
Semicolon,
Ampersand,
}
#[derive(Debug)]
struct Token {
pub token_type: TokenType,
pub pos: Position,
pub length: usize,
}
struct Lexer<'a> {
text: &'a str,
chars: Chars<'a>,
current_char: Option<char>,
index: usize,
line: i32,
col: i32,
}
impl<'a> Lexer<'a> {
pub fn new(text: &'a str) -> Self {
let mut chars = text.chars();
let first_char = chars.next();
Self {
text,
chars,
current_char: first_char,
index: 0,
line: 1,
col: 1,
}
}
fn next_token(&mut self) -> Option<Token> {
if self.done() {
return None;
}
match self.current() {
' ' | '\t' | '\r' | '\n' => self.skip_whitespace(),
'1'..='9' => Some(self.int_token()),
'a'..='z' | 'A'..='Z' | '_' => Some(self.id_token()),
'"' => Some(self.string_token()),
'+' => {
Some(self.single_or_double_char_token(TokenType::Plus, '=', TokenType::PlusEqual))
}
'-' => {
Some(self.single_or_double_char_token(TokenType::Minus, '=', TokenType::MinusEqual))
}
'*' => Some(self.asterisk_token()),
'/' => self.slash_token(),
'%' => Some(self.single_or_double_char_token(
TokenType::Percent,
'=',
TokenType::PercentEqual,
)),
'=' => {
Some(self.single_or_double_char_token(TokenType::Equal, '=', TokenType::EqualEqual))
}
'!' => Some(self.single_or_double_char_token(
TokenType::Exclamation,
'=',
TokenType::ExclamationEqual,
)),
'<' => Some(self.single_or_double_char_token(
TokenType::LessThan,
'=',
TokenType::LessThanEqual,
)),
'>' => Some(self.single_or_double_char_token(
TokenType::GreaterThan,
'=',
TokenType::GreaterThanEqual,
)),
'(' => Some(self.step_and_token(TokenType::LParen, self.pos())),
')' => Some(self.step_and_token(TokenType::RParen, self.pos())),
'{' => Some(self.step_and_token(TokenType::LBrace, self.pos())),
'}' => Some(self.step_and_token(TokenType::RBrace, self.pos())),
'[' => Some(self.step_and_token(TokenType::LBracket, self.pos())),
']' => Some(self.step_and_token(TokenType::RBracket, self.pos())),
'.' => Some(self.dot_token()),
',' => Some(self.step_and_token(TokenType::Comma, self.pos())),
':' => Some(self.step_and_token(TokenType::Colon, self.pos())),
';' => Some(self.step_and_token(TokenType::Semicolon, self.pos())),
'&' => Some(self.step_and_token(TokenType::Ampersand, self.pos())),
_ => Some(self.step_and_token(TokenType::InvalidChar, self.pos())),
}
}
fn skip_whitespace(&mut self) -> Option<Token> {
while !self.done() && matches!(self.current(), ' ' | '\t' | '\r' | '\n') {
self.step()
}
self.next_token()
}
fn int_token(&mut self) -> Token {
let start = self.pos();
self.step();
while !self.done() && matches!(self.current(), '0'..='9') {
self.step();
}
self.token(TokenType::Int, start)
}
fn string_token(&mut self) -> Token {
let start = self.pos();
self.step();
let mut escaped = false;
while !self.done() && (self.current() != '"' || escaped) {
escaped = self.current() == '\\' && !escaped;
self.step();
}
if self.done() || self.current() != '"' {
self.step_and_token(TokenType::MalformedString, start)
} else {
self.step_and_token(TokenType::String, start)
}
}
fn id_token(&mut self) -> Token {
let start = self.pos();
self.step();
while !self.done() && matches!(self.current(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
self.step();
}
self.token(
match &self.text[start.index..self.index] {
"false" => TokenType::False,
"true" => TokenType::True,
"let" => TokenType::Let,
"mut" => TokenType::Mut,
"if" => TokenType::If,
"else" => TokenType::Else,
"while" => TokenType::While,
"for" => TokenType::For,
"in" => TokenType::In,
"break" => TokenType::Break,
"continue" => TokenType::Continue,
"fn" => TokenType::Fn,
"return" => TokenType::Return,
"end" => TokenType::End,
"_" => TokenType::Underscore,
_ => TokenType::Id,
},
start,
)
}
fn single_or_double_char_token(
&mut self,
single_type: TokenType,
double_char: char,
double_type: TokenType,
) -> Token {
let start = self.pos();
self.step();
if !self.done() && self.current() == double_char {
self.step_and_token(double_type, start)
} else {
self.token(single_type, start)
}
}
fn asterisk_token(&mut self) -> Token {
let start = self.pos();
self.step();
if !self.done() && self.current() == '*' {
self.step();
if !self.done() && self.current() == '=' {
self.step_and_token(TokenType::DoubleAsteriskEqual, start)
} else {
self.token(TokenType::DoubleAsterisk, start)
}
} else if !self.done() && self.current() == '=' {
self.step_and_token(TokenType::AsteriskEqual, start)
} else {
self.token(TokenType::Asterisk, start)
}
}
fn slash_token(&mut self) -> Option<Token> {
let start = self.pos();
self.step();
if !self.done() && self.current() == '/' {
self.step();
while !self.done() && self.current() != '\n' {
self.step();
}
self.next_token()
} else if !self.done() && self.current() == '*' {
self.step();
let mut depth = 1;
let mut last_char: Option<char> = None;
while !self.done() {
match (last_char, self.current()) {
(Some('/'), '*') => {
depth += 1;
}
(Some('*'), '/') => {
depth -= 1;
if depth == 0 {
self.step();
break;
}
}
_ => {}
}
last_char = Some(self.current());
self.step();
}
if depth != 0 {
Some(self.token(TokenType::MalformedComment, start))
} else {
self.next_token()
}
} else if !self.done() && self.current() == '=' {
return Some(self.step_and_token(TokenType::SlashEqual, start));
} else {
return Some(self.token(TokenType::Slash, start));
}
}
fn dot_token(&mut self) -> Token {
let start = self.pos();
self.step();
if !self.done() && matches!(self.current(), '0'..='9') {
self.step();
while !self.done() && matches!(self.current(), '0'..='9') {
self.step();
}
self.token(TokenType::Decimal, start)
} else {
self.token(TokenType::Dot, start)
}
}
fn step_and_token(&mut self, token_type: TokenType, start: Position) -> Token {
self.step();
self.token(token_type, start)
}
fn token(&self, token_type: TokenType, start: Position) -> Token {
Token {
token_type,
length: self.index - start.index,
pos: start,
}
}
pub fn pos(&self) -> Position {
Position {
index: self.index,
line: self.line,
col: self.col,
}
}
fn done(&self) -> bool {
self.current_char.is_none()
}
fn current(&self) -> char {
self.current_char.expect("done() checked")
}
fn step(&mut self) {
self.index += 1;
if !self.done() {
if self.current() == '\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
}
self.current_char = self.chars.next();
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
#[derive(Debug)]
struct Node<T> {
pub value: T,
pub pos: Position,
}
#[derive(Debug)]
enum Expr {
Unit,
Id(String),
Int(i64),
Float(f64),
String(String),
Bool(bool),
Array(Vec<Node<Expr>>),
Object(Vec<ObjectEntry>),
Tuple(Vec<Node<Expr>>),
Member {
subject: Box<Node<Expr>>,
value: String,
},
Index {
subject: Box<Node<Expr>>,
value: Box<Node<Expr>>,
},
Call {
subject: Box<Node<Expr>>,
arguments: Vec<Node<Expr>>,
},
Unary {
unary_type: UnaryType,
subject: Box<Node<Expr>>,
},
Binary {
binary_type: BinaryType,
left: Box<Node<Expr>>,
right: Box<Node<Expr>>,
},
}
#[derive(Debug)]
enum ObjectEntry {
Pair(Box<Node<Expr>>, Box<Expr>),
}
#[derive(Debug)]
enum UnaryType {
Not,
Negate,
Reference,
ReferenceMut,
Dereference,
}
#[derive(Debug)]
enum BinaryType {
Exponentiate,
Multiply,
Divide,
Modulo,
Add,
Subtract,
LT,
LTE,
GT,
GTE,
In,
Equal,
Inequal,
And,
Or,
}
#[derive(Debug)]
struct ParserError {
pos: Position,
message: String,
}
struct Parser<'a> {
text: &'a str,
lexer: Lexer<'a>,
current_token: Option<Token>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str, mut lexer: Lexer<'a>) -> Self {
Self {
text,
current_token: lexer.next(),
lexer,
}
}
pub fn parse_expr(&mut self) -> Result<Node<Expr>, ParserError> {
self.parse_prec_or()
}
fn parse_prec_or(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_and()?;
while !self.done() {
if self.current_is(TokenType::Or) {
self.step();
let right = self.parse_prec_and()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Or,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_and(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_equal_inequal()?;
while !self.done() {
if self.current_is(TokenType::And) {
self.step();
let right = self.parse_prec_equal_inequal()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::And,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_equal_inequal(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_lt_lte_gt_gte_in()?;
while !self.done() {
if self.current_is(TokenType::EqualEqual) {
self.step();
let right = self.parse_prec_lt_lte_gt_gte_in()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Equal,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::ExclamationEqual) {
self.step();
let right = self.parse_prec_lt_lte_gt_gte_in()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Inequal,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_lt_lte_gt_gte_in(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_add_subtract()?;
while !self.done() {
if self.current_is(TokenType::LessThan) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::LT,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::GreaterThan) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::GT,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::LessThanEqual) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::LTE,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::GreaterThanEqual) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::GTE,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::In) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::In,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_add_subtract(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_multiply_divide_modulo()?;
while !self.done() {
if self.current_is(TokenType::Plus) {
self.step();
let right = self.parse_prec_multiply_divide_modulo()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Add,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::Minus) {
self.step();
let right = self.parse_prec_multiply_divide_modulo()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Subtract,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_multiply_divide_modulo(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_unary()?;
while !self.done() {
if self.current_is(TokenType::Asterisk) {
self.step();
let right = self.parse_prec_unary()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Multiply,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::Slash) {
self.step();
let right = self.parse_prec_unary()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Divide,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::Percent) {
self.step();
let right = self.parse_prec_unary()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Modulo,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_unary(&mut self) -> Result<Node<Expr>, ParserError> {
if !self.done() && self.current_is(TokenType::Not) {
self.step();
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::Not,
subject,
})
} else if !self.done() && self.current_is(TokenType::Minus) {
self.step();
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::Negate,
subject,
})
} else if !self.done() && self.current_is(TokenType::Ampersand) {
self.step();
if !self.done() && self.current_is(TokenType::Mut) {
self.step();
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::ReferenceMut,
subject,
})
} else {
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::Reference,
subject,
})
}
} else if !self.done() && self.current_is(TokenType::Asterisk) {
self.step();
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::Dereference,
subject,
})
} else {
self.parse_prec_exponentiate()
}
}
fn parse_prec_exponentiate(&mut self) -> Result<Node<Expr>, ParserError> {
let left = self.parse_prec_member_index_call()?;
if !self.done() && self.current_is(TokenType::AsteriskEqual) {
let right = self.parse_prec_exponentiate()?;
self.step_and_ok_node(Expr::Binary {
binary_type: BinaryType::Exponentiate,
left: Box::new(left),
right: Box::new(right),
})
} else {
Ok(left)
}
}
fn parse_prec_member_index_call(&mut self) -> Result<Node<Expr>, ParserError> {
let mut subject = self.parse_operand()?;
while !self.done() {
if self.current_is(TokenType::Dot) {
self.step();
if self.done() || self.current_is(TokenType::Id) {
return self.error("expected identifier");
}
let value = self.token_string(self.current());
self.step();
subject = self.node(Expr::Member {
subject: Box::new(subject),
value,
});
} else if self.current_is(TokenType::LBracket) {
self.step();
let value = self.parse_expr()?;
if self.done() || !self.current_is(TokenType::RBracket) {
return self.error("expected ']'");
}
subject = self.node(Expr::Index {
subject: Box::new(subject),
value: Box::new(value),
});
} else if self.current_is(TokenType::LParen) {
self.step();
let mut arguments = Vec::<Node<Expr>>::new();
if !self.done() && !self.current_is(TokenType::RParen) {
arguments.push(self.parse_expr()?);
while !self.done() && self.current_is(TokenType::Comma) {
self.step();
if self.done() || self.current_is(TokenType::RParen) {
self.step();
}
arguments.push(self.parse_expr()?);
}
}
if self.done() || !self.current_is(TokenType::RParen) {
return self.error("expected ')'");
}
self.step();
subject = self.node(Expr::Call {
subject: Box::new(subject),
arguments,
})
} else {
break;
}
}
Ok(subject)
}
fn parse_operand(&mut self) -> Result<Node<Expr>, ParserError> {
if self.done() {
return self.error("expected value, got eof");
}
match self.current().token_type {
TokenType::Id => self.step_and_ok_node(Expr::Id(self.token_string(self.current()))),
TokenType::Int => {
let mut value_string = self.token_string(self.current());
self.step();
if !self.done() && self.current_is(TokenType::Decimal) {
value_string.push_str(&self.token_string(self.current()));
self.step_and_ok_node(Expr::Float(
value_string.parse::<f64>().expect("valid f64"),
))
} else {
self.ok_node(Expr::Int((value_string).parse::<i64>().expect("valid i64")))
}
}
TokenType::Decimal => self.step_and_ok_node(Expr::Float(
self.token_string(self.current())
.parse::<f64>()
.expect("valid f64"),
)),
TokenType::False => self.step_and_ok_node(Expr::Bool(false)),
TokenType::True => self.step_and_ok_node(Expr::Bool(true)),
TokenType::LParen => self.parse_unit_group_or_tuple(),
TokenType::LBrace => self.parse_object(),
TokenType::LBracket => self.parse_array(),
TokenType::Fn => self.parse_function(),
_ => self.error("expected value"),
}
}
fn parse_unit_group_or_tuple(&mut self) -> Result<Node<Expr>, ParserError> {
self.step();
if !self.done() && !self.current_is(TokenType::LParen) {
todo!()
} else {
self.step_and_ok_node(Expr::Unit)
}
}
fn parse_object(&mut self) -> Result<Node<Expr>, ParserError> {
todo!()
}
fn parse_array(&mut self) -> Result<Node<Expr>, ParserError> {
todo!()
}
fn parse_function(&mut self) -> Result<Node<Expr>, ParserError> {
todo!()
}
fn token_string(&self, token: &Token) -> String {
self.text[token.pos.index..token.pos.index + token.length].to_string()
}
fn step_and_ok_node<T>(&mut self, value: T) -> Result<Node<T>, ParserError> {
self.step();
self.ok_node(value)
}
fn ok_node<T>(&self, value: T) -> Result<Node<T>, ParserError> {
Ok(Node {
value,
pos: self.lexer.pos(),
})
}
fn step_and_node<T>(&mut self, value: T) -> Node<T> {
self.step();
self.node(value)
}
fn node<T>(&self, value: T) -> Node<T> {
Node {
value,
pos: self.lexer.pos(),
}
}
fn step_and<T>(&mut self, value: T) -> T {
self.step();
value
}
fn error(&self, message: &str) -> Result<Node<Expr>, ParserError> {
Err(ParserError {
pos: self.lexer.pos(),
message: message.to_string(),
})
}
fn done(&self) -> bool {
self.current_token.is_none()
}
fn current_is(&self, token_type: TokenType) -> bool {
self.current().token_type == token_type
}
fn current(&self) -> &Token {
self.current_token.as_ref().expect("done() checked")
}
fn step(&mut self) {
self.current_token = self.lexer.next();
}
}
use crate::lexer::Lexer;
use crate::parser::Parser;
fn main() {
println!("tokens = [");

425
src/parser.rs Normal file
View File

@ -0,0 +1,425 @@
use crate::ast::{BinaryType, Expr, Node, UnaryType};
use crate::tokens::{Position, PositionKnowing, Token, TokenType};
#[derive(Debug)]
pub struct ParserError {
pos: Position,
message: String,
}
pub struct Parser<'a, Tokens>
where
Tokens: PositionKnowing + Iterator<Item = Token>,
{
text: &'a str,
tokens: Tokens,
current_token: Option<Token>,
}
impl<'a, Tokens> Parser<'a, Tokens>
where
Tokens: PositionKnowing + Iterator<Item = Token>,
{
pub fn new(text: &'a str, mut lexer: Tokens) -> Self {
Self {
text,
current_token: lexer.next(),
tokens: lexer,
}
}
pub fn parse_expr(&mut self) -> Result<Node<Expr>, ParserError> {
self.parse_prec_or()
}
fn parse_prec_or(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_and()?;
while !self.done() {
if self.current_is(TokenType::Or) {
self.step();
let right = self.parse_prec_and()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Or,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_and(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_equal_inequal()?;
while !self.done() {
if self.current_is(TokenType::And) {
self.step();
let right = self.parse_prec_equal_inequal()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::And,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_equal_inequal(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_lt_lte_gt_gte_in()?;
while !self.done() {
if self.current_is(TokenType::EqualEqual) {
self.step();
let right = self.parse_prec_lt_lte_gt_gte_in()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Equal,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::ExclamationEqual) {
self.step();
let right = self.parse_prec_lt_lte_gt_gte_in()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Inequal,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_lt_lte_gt_gte_in(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_add_subtract()?;
while !self.done() {
if self.current_is(TokenType::LessThan) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::LT,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::GreaterThan) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::GT,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::LessThanEqual) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::LTE,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::GreaterThanEqual) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::GTE,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::In) {
self.step();
let right = self.parse_prec_add_subtract()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::In,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_add_subtract(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_multiply_divide_modulo()?;
while !self.done() {
if self.current_is(TokenType::Plus) {
self.step();
let right = self.parse_prec_multiply_divide_modulo()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Add,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::Minus) {
self.step();
let right = self.parse_prec_multiply_divide_modulo()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Subtract,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_multiply_divide_modulo(&mut self) -> Result<Node<Expr>, ParserError> {
let mut left = self.parse_prec_unary()?;
while !self.done() {
if self.current_is(TokenType::Asterisk) {
self.step();
let right = self.parse_prec_unary()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Multiply,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::Slash) {
self.step();
let right = self.parse_prec_unary()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Divide,
left: Box::new(left),
right: Box::new(right),
});
} else if self.current_is(TokenType::Percent) {
self.step();
let right = self.parse_prec_unary()?;
left = self.node(Expr::Binary {
binary_type: BinaryType::Modulo,
left: Box::new(left),
right: Box::new(right),
});
} else {
break;
}
}
Ok(left)
}
fn parse_prec_unary(&mut self) -> Result<Node<Expr>, ParserError> {
if !self.done() && self.current_is(TokenType::Not) {
self.step();
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::Not,
subject,
})
} else if !self.done() && self.current_is(TokenType::Minus) {
self.step();
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::Negate,
subject,
})
} else if !self.done() && self.current_is(TokenType::Ampersand) {
self.step();
if !self.done() && self.current_is(TokenType::Mut) {
self.step();
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::ReferenceMut,
subject,
})
} else {
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::Reference,
subject,
})
}
} else if !self.done() && self.current_is(TokenType::Asterisk) {
self.step();
let subject = Box::new(self.parse_prec_unary()?);
self.ok_node(Expr::Unary {
unary_type: UnaryType::Dereference,
subject,
})
} else {
self.parse_prec_exponentiate()
}
}
fn parse_prec_exponentiate(&mut self) -> Result<Node<Expr>, ParserError> {
let left = self.parse_prec_member_index_call()?;
if !self.done() && self.current_is(TokenType::AsteriskEqual) {
let right = self.parse_prec_exponentiate()?;
self.step_and_ok_node(Expr::Binary {
binary_type: BinaryType::Exponentiate,
left: Box::new(left),
right: Box::new(right),
})
} else {
Ok(left)
}
}
fn parse_prec_member_index_call(&mut self) -> Result<Node<Expr>, ParserError> {
let mut subject = self.parse_operand()?;
while !self.done() {
if self.current_is(TokenType::Dot) {
self.step();
if self.done() || self.current_is(TokenType::Id) {
return self.error("expected identifier");
}
let value = self.token_string(self.current());
self.step();
subject = self.node(Expr::Member {
subject: Box::new(subject),
value,
});
} else if self.current_is(TokenType::LBracket) {
self.step();
let value = self.parse_expr()?;
if self.done() || !self.current_is(TokenType::RBracket) {
return self.error("expected ']'");
}
subject = self.node(Expr::Index {
subject: Box::new(subject),
value: Box::new(value),
});
} else if self.current_is(TokenType::LParen) {
self.step();
let mut arguments = Vec::<Node<Expr>>::new();
if !self.done() && !self.current_is(TokenType::RParen) {
arguments.push(self.parse_expr()?);
while !self.done() && self.current_is(TokenType::Comma) {
self.step();
if self.done() || self.current_is(TokenType::RParen) {
self.step();
}
arguments.push(self.parse_expr()?);
}
}
if self.done() || !self.current_is(TokenType::RParen) {
return self.error("expected ')'");
}
self.step();
subject = self.node(Expr::Call {
subject: Box::new(subject),
arguments,
})
} else {
break;
}
}
Ok(subject)
}
fn parse_operand(&mut self) -> Result<Node<Expr>, ParserError> {
if self.done() {
return self.error("expected value, got eof");
}
match self.current().token_type {
TokenType::Id => self.step_and_ok_node(Expr::Id(self.token_string(self.current()))),
TokenType::Int => {
let mut value_string = self.token_string(self.current());
self.step();
if !self.done() && self.current_is(TokenType::Decimal) {
value_string.push_str(&self.token_string(self.current()));
self.step_and_ok_node(Expr::Float(
value_string.parse::<f64>().expect("valid f64"),
))
} else {
self.ok_node(Expr::Int((value_string).parse::<i64>().expect("valid i64")))
}
}
TokenType::Decimal => self.step_and_ok_node(Expr::Float(
self.token_string(self.current())
.parse::<f64>()
.expect("valid f64"),
)),
TokenType::False => self.step_and_ok_node(Expr::Bool(false)),
TokenType::True => self.step_and_ok_node(Expr::Bool(true)),
TokenType::LParen => self.parse_unit_group_or_tuple(),
TokenType::LBrace => self.parse_object(),
TokenType::LBracket => self.parse_array(),
TokenType::Fn => self.parse_function(),
_ => self.error("expected value"),
}
}
fn parse_unit_group_or_tuple(&mut self) -> Result<Node<Expr>, ParserError> {
self.step();
if !self.done() && !self.current_is(TokenType::LParen) {
todo!()
} else {
self.step_and_ok_node(Expr::Unit)
}
}
fn parse_object(&mut self) -> Result<Node<Expr>, ParserError> {
todo!()
}
fn parse_array(&mut self) -> Result<Node<Expr>, ParserError> {
todo!()
}
fn parse_function(&mut self) -> Result<Node<Expr>, ParserError> {
todo!()
}
fn token_string(&self, token: &Token) -> String {
self.text[token.pos.index..token.pos.index + token.length].to_string()
}
fn step_and_ok_node<T>(&mut self, value: T) -> Result<Node<T>, ParserError> {
self.step();
self.ok_node(value)
}
fn ok_node<T>(&self, value: T) -> Result<Node<T>, ParserError> {
Ok(Node {
value,
pos: self.tokens.pos(),
})
}
fn step_and_node<T>(&mut self, value: T) -> Node<T> {
self.step();
self.node(value)
}
fn node<T>(&self, value: T) -> Node<T> {
Node {
value,
pos: self.tokens.pos(),
}
}
fn step_and<T>(&mut self, value: T) -> T {
self.step();
value
}
fn error(&self, message: &str) -> Result<Node<Expr>, ParserError> {
Err(ParserError {
pos: self.tokens.pos(),
message: message.to_string(),
})
}
fn done(&self) -> bool {
self.current_token.is_none()
}
fn current_is(&self, token_type: TokenType) -> bool {
self.current().token_type == token_type
}
fn current(&self) -> &Token {
self.current_token.as_ref().expect("done() checked")
}
fn step(&mut self) {
self.current_token = self.tokens.next();
}
}

88
src/tokens.rs Normal file
View File

@ -0,0 +1,88 @@
#[derive(Debug, Clone)]
pub struct Position {
pub index: usize,
pub line: i32,
pub col: i32,
}
impl Position {
pub fn new(index: usize, line: i32, col: i32) -> Self {
Self { index, line, col }
}
}
#[derive(Debug, PartialEq)]
pub enum TokenType {
InvalidChar,
MalformedString,
MalformedComment,
Id,
Int,
Decimal,
String,
False,
True,
Let,
Mut,
If,
Else,
While,
For,
In,
Break,
Continue,
Fn,
Return,
End,
Not,
And,
Or,
Underscore,
Plus,
Minus,
Asterisk,
Slash,
Percent,
DoubleAsterisk,
Equal,
Exclamation,
LessThan,
GreaterThan,
PlusEqual,
MinusEqual,
AsteriskEqual,
SlashEqual,
PercentEqual,
DoubleAsteriskEqual,
EqualEqual,
ExclamationEqual,
LessThanEqual,
GreaterThanEqual,
LParen,
RParen,
LBrace,
RBrace,
LBracket,
RBracket,
Dot,
Comma,
Colon,
Semicolon,
Ampersand,
}
#[derive(Debug)]
pub struct Token {
pub token_type: TokenType,
pub pos: Position,
pub length: usize,
}
pub trait PositionKnowing {
fn pos(&self) -> Position;
}