extract into files and indirect lexer
This commit is contained in:
parent
c9ed3333f1
commit
8705272a77
75
src/ast.rs
Normal file
75
src/ast.rs
Normal file
@ -0,0 +1,75 @@
|
||||
use crate::tokens::Position;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Node<T> {
|
||||
pub value: T,
|
||||
pub pos: Position,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Expr {
|
||||
Unit,
|
||||
Id(String),
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
Bool(bool),
|
||||
Array(Vec<Node<Expr>>),
|
||||
Object(Vec<ObjectEntry>),
|
||||
Tuple(Vec<Node<Expr>>),
|
||||
|
||||
Member {
|
||||
subject: Box<Node<Expr>>,
|
||||
value: String,
|
||||
},
|
||||
Index {
|
||||
subject: Box<Node<Expr>>,
|
||||
value: Box<Node<Expr>>,
|
||||
},
|
||||
Call {
|
||||
subject: Box<Node<Expr>>,
|
||||
arguments: Vec<Node<Expr>>,
|
||||
},
|
||||
Unary {
|
||||
unary_type: UnaryType,
|
||||
subject: Box<Node<Expr>>,
|
||||
},
|
||||
Binary {
|
||||
binary_type: BinaryType,
|
||||
left: Box<Node<Expr>>,
|
||||
right: Box<Node<Expr>>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ObjectEntry {
|
||||
Pair(Box<Node<Expr>>, Box<Expr>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum UnaryType {
|
||||
Not,
|
||||
Negate,
|
||||
Reference,
|
||||
ReferenceMut,
|
||||
Dereference,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BinaryType {
|
||||
Exponentiate,
|
||||
Multiply,
|
||||
Divide,
|
||||
Modulo,
|
||||
Add,
|
||||
Subtract,
|
||||
LT,
|
||||
LTE,
|
||||
GT,
|
||||
GTE,
|
||||
In,
|
||||
Equal,
|
||||
Inequal,
|
||||
And,
|
||||
Or,
|
||||
}
|
281
src/lexer.rs
Normal file
281
src/lexer.rs
Normal file
@ -0,0 +1,281 @@
|
||||
use crate::tokens::{Position, PositionKnowing, Token, TokenType};
|
||||
use std::str::Chars;
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
text: &'a str,
|
||||
chars: Chars<'a>,
|
||||
current_char: Option<char>,
|
||||
index: usize,
|
||||
line: i32,
|
||||
col: i32,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(text: &'a str) -> Self {
|
||||
let mut chars = text.chars();
|
||||
let first_char = chars.next();
|
||||
Self {
|
||||
text,
|
||||
chars,
|
||||
current_char: first_char,
|
||||
index: 0,
|
||||
line: 1,
|
||||
col: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> Option<Token> {
|
||||
if self.done() {
|
||||
return None;
|
||||
}
|
||||
match self.current() {
|
||||
' ' | '\t' | '\r' | '\n' => self.skip_whitespace(),
|
||||
'1'..='9' => Some(self.int_token()),
|
||||
'a'..='z' | 'A'..='Z' | '_' => Some(self.id_token()),
|
||||
'"' => Some(self.string_token()),
|
||||
'+' => {
|
||||
Some(self.single_or_double_char_token(TokenType::Plus, '=', TokenType::PlusEqual))
|
||||
}
|
||||
'-' => {
|
||||
Some(self.single_or_double_char_token(TokenType::Minus, '=', TokenType::MinusEqual))
|
||||
}
|
||||
'*' => Some(self.asterisk_token()),
|
||||
'/' => self.slash_token(),
|
||||
'%' => Some(self.single_or_double_char_token(
|
||||
TokenType::Percent,
|
||||
'=',
|
||||
TokenType::PercentEqual,
|
||||
)),
|
||||
'=' => {
|
||||
Some(self.single_or_double_char_token(TokenType::Equal, '=', TokenType::EqualEqual))
|
||||
}
|
||||
'!' => Some(self.single_or_double_char_token(
|
||||
TokenType::Exclamation,
|
||||
'=',
|
||||
TokenType::ExclamationEqual,
|
||||
)),
|
||||
'<' => Some(self.single_or_double_char_token(
|
||||
TokenType::LessThan,
|
||||
'=',
|
||||
TokenType::LessThanEqual,
|
||||
)),
|
||||
'>' => Some(self.single_or_double_char_token(
|
||||
TokenType::GreaterThan,
|
||||
'=',
|
||||
TokenType::GreaterThanEqual,
|
||||
)),
|
||||
'(' => Some(self.step_and_token(TokenType::LParen, self.pos())),
|
||||
')' => Some(self.step_and_token(TokenType::RParen, self.pos())),
|
||||
'{' => Some(self.step_and_token(TokenType::LBrace, self.pos())),
|
||||
'}' => Some(self.step_and_token(TokenType::RBrace, self.pos())),
|
||||
'[' => Some(self.step_and_token(TokenType::LBracket, self.pos())),
|
||||
']' => Some(self.step_and_token(TokenType::RBracket, self.pos())),
|
||||
'.' => Some(self.dot_token()),
|
||||
',' => Some(self.step_and_token(TokenType::Comma, self.pos())),
|
||||
':' => Some(self.step_and_token(TokenType::Colon, self.pos())),
|
||||
';' => Some(self.step_and_token(TokenType::Semicolon, self.pos())),
|
||||
'&' => Some(self.step_and_token(TokenType::Ampersand, self.pos())),
|
||||
_ => Some(self.step_and_token(TokenType::InvalidChar, self.pos())),
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) -> Option<Token> {
|
||||
while !self.done() && matches!(self.current(), ' ' | '\t' | '\r' | '\n') {
|
||||
self.step()
|
||||
}
|
||||
self.next_token()
|
||||
}
|
||||
|
||||
fn int_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
}
|
||||
self.token(TokenType::Int, start)
|
||||
}
|
||||
|
||||
fn string_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
let mut escaped = false;
|
||||
while !self.done() && (self.current() != '"' || escaped) {
|
||||
escaped = self.current() == '\\' && !escaped;
|
||||
self.step();
|
||||
}
|
||||
if self.done() || self.current() != '"' {
|
||||
self.step_and_token(TokenType::MalformedString, start)
|
||||
} else {
|
||||
self.step_and_token(TokenType::String, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn id_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
|
||||
self.step();
|
||||
}
|
||||
self.token(
|
||||
match &self.text[start.index..self.index] {
|
||||
"false" => TokenType::False,
|
||||
"true" => TokenType::True,
|
||||
"let" => TokenType::Let,
|
||||
"mut" => TokenType::Mut,
|
||||
"if" => TokenType::If,
|
||||
"else" => TokenType::Else,
|
||||
"while" => TokenType::While,
|
||||
"for" => TokenType::For,
|
||||
"in" => TokenType::In,
|
||||
"break" => TokenType::Break,
|
||||
"continue" => TokenType::Continue,
|
||||
"fn" => TokenType::Fn,
|
||||
"return" => TokenType::Return,
|
||||
"end" => TokenType::End,
|
||||
"_" => TokenType::Underscore,
|
||||
_ => TokenType::Id,
|
||||
},
|
||||
start,
|
||||
)
|
||||
}
|
||||
|
||||
fn single_or_double_char_token(
|
||||
&mut self,
|
||||
single_type: TokenType,
|
||||
double_char: char,
|
||||
double_type: TokenType,
|
||||
) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && self.current() == double_char {
|
||||
self.step_and_token(double_type, start)
|
||||
} else {
|
||||
self.token(single_type, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn asterisk_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && self.current() == '*' {
|
||||
self.step();
|
||||
if !self.done() && self.current() == '=' {
|
||||
self.step_and_token(TokenType::DoubleAsteriskEqual, start)
|
||||
} else {
|
||||
self.token(TokenType::DoubleAsterisk, start)
|
||||
}
|
||||
} else if !self.done() && self.current() == '=' {
|
||||
self.step_and_token(TokenType::AsteriskEqual, start)
|
||||
} else {
|
||||
self.token(TokenType::Asterisk, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn slash_token(&mut self) -> Option<Token> {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && self.current() == '/' {
|
||||
self.step();
|
||||
while !self.done() && self.current() != '\n' {
|
||||
self.step();
|
||||
}
|
||||
self.next_token()
|
||||
} else if !self.done() && self.current() == '*' {
|
||||
self.step();
|
||||
let mut depth = 1;
|
||||
let mut last_char: Option<char> = None;
|
||||
while !self.done() {
|
||||
match (last_char, self.current()) {
|
||||
(Some('/'), '*') => {
|
||||
depth += 1;
|
||||
}
|
||||
(Some('*'), '/') => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
self.step();
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
last_char = Some(self.current());
|
||||
self.step();
|
||||
}
|
||||
if depth != 0 {
|
||||
Some(self.token(TokenType::MalformedComment, start))
|
||||
} else {
|
||||
self.next_token()
|
||||
}
|
||||
} else if !self.done() && self.current() == '=' {
|
||||
return Some(self.step_and_token(TokenType::SlashEqual, start));
|
||||
} else {
|
||||
return Some(self.token(TokenType::Slash, start));
|
||||
}
|
||||
}
|
||||
|
||||
fn dot_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
}
|
||||
self.token(TokenType::Decimal, start)
|
||||
} else {
|
||||
self.token(TokenType::Dot, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn step_and_token(&mut self, token_type: TokenType, start: Position) -> Token {
|
||||
self.step();
|
||||
self.token(token_type, start)
|
||||
}
|
||||
|
||||
fn token(&self, token_type: TokenType, start: Position) -> Token {
|
||||
Token {
|
||||
token_type,
|
||||
length: self.index - start.index,
|
||||
pos: start,
|
||||
}
|
||||
}
|
||||
|
||||
fn done(&self) -> bool {
|
||||
self.current_char.is_none()
|
||||
}
|
||||
|
||||
fn current(&self) -> char {
|
||||
self.current_char.expect("done() checked")
|
||||
}
|
||||
|
||||
fn step(&mut self) {
|
||||
self.index += 1;
|
||||
if !self.done() {
|
||||
if self.current() == '\n' {
|
||||
self.line += 1;
|
||||
self.col = 1;
|
||||
} else {
|
||||
self.col += 1;
|
||||
}
|
||||
}
|
||||
self.current_char = self.chars.next();
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Lexer<'a> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next_token()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PositionKnowing for Lexer<'a> {
|
||||
fn pos(&self) -> Position {
|
||||
Position {
|
||||
index: self.index,
|
||||
line: self.line,
|
||||
col: self.col,
|
||||
}
|
||||
}
|
||||
}
|
859
src/main.rs
859
src/main.rs
@ -1,859 +1,12 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::str::Chars;
|
||||
mod ast;
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod tokens;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct Position {
|
||||
pub index: usize,
|
||||
pub line: i32,
|
||||
pub col: i32,
|
||||
}
|
||||
|
||||
impl Position {
|
||||
pub fn new(index: usize, line: i32, col: i32) -> Self {
|
||||
Self { index, line, col }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
enum TokenType {
|
||||
InvalidChar,
|
||||
MalformedString,
|
||||
MalformedComment,
|
||||
|
||||
Id,
|
||||
Int,
|
||||
Decimal,
|
||||
String,
|
||||
|
||||
False,
|
||||
True,
|
||||
Let,
|
||||
Mut,
|
||||
If,
|
||||
Else,
|
||||
While,
|
||||
For,
|
||||
In,
|
||||
Break,
|
||||
Continue,
|
||||
Fn,
|
||||
Return,
|
||||
End,
|
||||
Not,
|
||||
And,
|
||||
Or,
|
||||
Underscore,
|
||||
|
||||
Plus,
|
||||
Minus,
|
||||
Asterisk,
|
||||
Slash,
|
||||
Percent,
|
||||
DoubleAsterisk,
|
||||
Equal,
|
||||
Exclamation,
|
||||
LessThan,
|
||||
GreaterThan,
|
||||
|
||||
PlusEqual,
|
||||
MinusEqual,
|
||||
AsteriskEqual,
|
||||
SlashEqual,
|
||||
PercentEqual,
|
||||
DoubleAsteriskEqual,
|
||||
EqualEqual,
|
||||
ExclamationEqual,
|
||||
LessThanEqual,
|
||||
GreaterThanEqual,
|
||||
|
||||
LParen,
|
||||
RParen,
|
||||
LBrace,
|
||||
RBrace,
|
||||
LBracket,
|
||||
RBracket,
|
||||
Dot,
|
||||
Comma,
|
||||
Colon,
|
||||
Semicolon,
|
||||
Ampersand,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Token {
|
||||
pub token_type: TokenType,
|
||||
pub pos: Position,
|
||||
pub length: usize,
|
||||
}
|
||||
|
||||
struct Lexer<'a> {
|
||||
text: &'a str,
|
||||
chars: Chars<'a>,
|
||||
current_char: Option<char>,
|
||||
index: usize,
|
||||
line: i32,
|
||||
col: i32,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(text: &'a str) -> Self {
|
||||
let mut chars = text.chars();
|
||||
let first_char = chars.next();
|
||||
Self {
|
||||
text,
|
||||
chars,
|
||||
current_char: first_char,
|
||||
index: 0,
|
||||
line: 1,
|
||||
col: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> Option<Token> {
|
||||
if self.done() {
|
||||
return None;
|
||||
}
|
||||
match self.current() {
|
||||
' ' | '\t' | '\r' | '\n' => self.skip_whitespace(),
|
||||
'1'..='9' => Some(self.int_token()),
|
||||
'a'..='z' | 'A'..='Z' | '_' => Some(self.id_token()),
|
||||
'"' => Some(self.string_token()),
|
||||
'+' => {
|
||||
Some(self.single_or_double_char_token(TokenType::Plus, '=', TokenType::PlusEqual))
|
||||
}
|
||||
'-' => {
|
||||
Some(self.single_or_double_char_token(TokenType::Minus, '=', TokenType::MinusEqual))
|
||||
}
|
||||
'*' => Some(self.asterisk_token()),
|
||||
'/' => self.slash_token(),
|
||||
'%' => Some(self.single_or_double_char_token(
|
||||
TokenType::Percent,
|
||||
'=',
|
||||
TokenType::PercentEqual,
|
||||
)),
|
||||
'=' => {
|
||||
Some(self.single_or_double_char_token(TokenType::Equal, '=', TokenType::EqualEqual))
|
||||
}
|
||||
'!' => Some(self.single_or_double_char_token(
|
||||
TokenType::Exclamation,
|
||||
'=',
|
||||
TokenType::ExclamationEqual,
|
||||
)),
|
||||
'<' => Some(self.single_or_double_char_token(
|
||||
TokenType::LessThan,
|
||||
'=',
|
||||
TokenType::LessThanEqual,
|
||||
)),
|
||||
'>' => Some(self.single_or_double_char_token(
|
||||
TokenType::GreaterThan,
|
||||
'=',
|
||||
TokenType::GreaterThanEqual,
|
||||
)),
|
||||
'(' => Some(self.step_and_token(TokenType::LParen, self.pos())),
|
||||
')' => Some(self.step_and_token(TokenType::RParen, self.pos())),
|
||||
'{' => Some(self.step_and_token(TokenType::LBrace, self.pos())),
|
||||
'}' => Some(self.step_and_token(TokenType::RBrace, self.pos())),
|
||||
'[' => Some(self.step_and_token(TokenType::LBracket, self.pos())),
|
||||
']' => Some(self.step_and_token(TokenType::RBracket, self.pos())),
|
||||
'.' => Some(self.dot_token()),
|
||||
',' => Some(self.step_and_token(TokenType::Comma, self.pos())),
|
||||
':' => Some(self.step_and_token(TokenType::Colon, self.pos())),
|
||||
';' => Some(self.step_and_token(TokenType::Semicolon, self.pos())),
|
||||
'&' => Some(self.step_and_token(TokenType::Ampersand, self.pos())),
|
||||
_ => Some(self.step_and_token(TokenType::InvalidChar, self.pos())),
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_whitespace(&mut self) -> Option<Token> {
|
||||
while !self.done() && matches!(self.current(), ' ' | '\t' | '\r' | '\n') {
|
||||
self.step()
|
||||
}
|
||||
self.next_token()
|
||||
}
|
||||
|
||||
fn int_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
}
|
||||
self.token(TokenType::Int, start)
|
||||
}
|
||||
|
||||
fn string_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
let mut escaped = false;
|
||||
while !self.done() && (self.current() != '"' || escaped) {
|
||||
escaped = self.current() == '\\' && !escaped;
|
||||
self.step();
|
||||
}
|
||||
if self.done() || self.current() != '"' {
|
||||
self.step_and_token(TokenType::MalformedString, start)
|
||||
} else {
|
||||
self.step_and_token(TokenType::String, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn id_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') {
|
||||
self.step();
|
||||
}
|
||||
self.token(
|
||||
match &self.text[start.index..self.index] {
|
||||
"false" => TokenType::False,
|
||||
"true" => TokenType::True,
|
||||
"let" => TokenType::Let,
|
||||
"mut" => TokenType::Mut,
|
||||
"if" => TokenType::If,
|
||||
"else" => TokenType::Else,
|
||||
"while" => TokenType::While,
|
||||
"for" => TokenType::For,
|
||||
"in" => TokenType::In,
|
||||
"break" => TokenType::Break,
|
||||
"continue" => TokenType::Continue,
|
||||
"fn" => TokenType::Fn,
|
||||
"return" => TokenType::Return,
|
||||
"end" => TokenType::End,
|
||||
"_" => TokenType::Underscore,
|
||||
_ => TokenType::Id,
|
||||
},
|
||||
start,
|
||||
)
|
||||
}
|
||||
|
||||
fn single_or_double_char_token(
|
||||
&mut self,
|
||||
single_type: TokenType,
|
||||
double_char: char,
|
||||
double_type: TokenType,
|
||||
) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && self.current() == double_char {
|
||||
self.step_and_token(double_type, start)
|
||||
} else {
|
||||
self.token(single_type, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn asterisk_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && self.current() == '*' {
|
||||
self.step();
|
||||
if !self.done() && self.current() == '=' {
|
||||
self.step_and_token(TokenType::DoubleAsteriskEqual, start)
|
||||
} else {
|
||||
self.token(TokenType::DoubleAsterisk, start)
|
||||
}
|
||||
} else if !self.done() && self.current() == '=' {
|
||||
self.step_and_token(TokenType::AsteriskEqual, start)
|
||||
} else {
|
||||
self.token(TokenType::Asterisk, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn slash_token(&mut self) -> Option<Token> {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && self.current() == '/' {
|
||||
self.step();
|
||||
while !self.done() && self.current() != '\n' {
|
||||
self.step();
|
||||
}
|
||||
self.next_token()
|
||||
} else if !self.done() && self.current() == '*' {
|
||||
self.step();
|
||||
let mut depth = 1;
|
||||
let mut last_char: Option<char> = None;
|
||||
while !self.done() {
|
||||
match (last_char, self.current()) {
|
||||
(Some('/'), '*') => {
|
||||
depth += 1;
|
||||
}
|
||||
(Some('*'), '/') => {
|
||||
depth -= 1;
|
||||
if depth == 0 {
|
||||
self.step();
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
last_char = Some(self.current());
|
||||
self.step();
|
||||
}
|
||||
if depth != 0 {
|
||||
Some(self.token(TokenType::MalformedComment, start))
|
||||
} else {
|
||||
self.next_token()
|
||||
}
|
||||
} else if !self.done() && self.current() == '=' {
|
||||
return Some(self.step_and_token(TokenType::SlashEqual, start));
|
||||
} else {
|
||||
return Some(self.token(TokenType::Slash, start));
|
||||
}
|
||||
}
|
||||
|
||||
fn dot_token(&mut self) -> Token {
|
||||
let start = self.pos();
|
||||
self.step();
|
||||
if !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
while !self.done() && matches!(self.current(), '0'..='9') {
|
||||
self.step();
|
||||
}
|
||||
self.token(TokenType::Decimal, start)
|
||||
} else {
|
||||
self.token(TokenType::Dot, start)
|
||||
}
|
||||
}
|
||||
|
||||
fn step_and_token(&mut self, token_type: TokenType, start: Position) -> Token {
|
||||
self.step();
|
||||
self.token(token_type, start)
|
||||
}
|
||||
|
||||
fn token(&self, token_type: TokenType, start: Position) -> Token {
|
||||
Token {
|
||||
token_type,
|
||||
length: self.index - start.index,
|
||||
pos: start,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> Position {
|
||||
Position {
|
||||
index: self.index,
|
||||
line: self.line,
|
||||
col: self.col,
|
||||
}
|
||||
}
|
||||
|
||||
fn done(&self) -> bool {
|
||||
self.current_char.is_none()
|
||||
}
|
||||
|
||||
fn current(&self) -> char {
|
||||
self.current_char.expect("done() checked")
|
||||
}
|
||||
|
||||
fn step(&mut self) {
|
||||
self.index += 1;
|
||||
if !self.done() {
|
||||
if self.current() == '\n' {
|
||||
self.line += 1;
|
||||
self.col = 1;
|
||||
} else {
|
||||
self.col += 1;
|
||||
}
|
||||
}
|
||||
self.current_char = self.chars.next();
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Lexer<'a> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next_token()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Node<T> {
|
||||
pub value: T,
|
||||
pub pos: Position,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Expr {
|
||||
Unit,
|
||||
Id(String),
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
Bool(bool),
|
||||
Array(Vec<Node<Expr>>),
|
||||
Object(Vec<ObjectEntry>),
|
||||
Tuple(Vec<Node<Expr>>),
|
||||
|
||||
Member {
|
||||
subject: Box<Node<Expr>>,
|
||||
value: String,
|
||||
},
|
||||
Index {
|
||||
subject: Box<Node<Expr>>,
|
||||
value: Box<Node<Expr>>,
|
||||
},
|
||||
Call {
|
||||
subject: Box<Node<Expr>>,
|
||||
arguments: Vec<Node<Expr>>,
|
||||
},
|
||||
Unary {
|
||||
unary_type: UnaryType,
|
||||
subject: Box<Node<Expr>>,
|
||||
},
|
||||
Binary {
|
||||
binary_type: BinaryType,
|
||||
left: Box<Node<Expr>>,
|
||||
right: Box<Node<Expr>>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ObjectEntry {
|
||||
Pair(Box<Node<Expr>>, Box<Expr>),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum UnaryType {
|
||||
Not,
|
||||
Negate,
|
||||
Reference,
|
||||
ReferenceMut,
|
||||
Dereference,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum BinaryType {
|
||||
Exponentiate,
|
||||
Multiply,
|
||||
Divide,
|
||||
Modulo,
|
||||
Add,
|
||||
Subtract,
|
||||
LT,
|
||||
LTE,
|
||||
GT,
|
||||
GTE,
|
||||
In,
|
||||
Equal,
|
||||
Inequal,
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ParserError {
|
||||
pos: Position,
|
||||
message: String,
|
||||
}
|
||||
|
||||
struct Parser<'a> {
|
||||
text: &'a str,
|
||||
lexer: Lexer<'a>,
|
||||
current_token: Option<Token>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub fn new(text: &'a str, mut lexer: Lexer<'a>) -> Self {
|
||||
Self {
|
||||
text,
|
||||
current_token: lexer.next(),
|
||||
lexer,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_expr(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
self.parse_prec_or()
|
||||
}
|
||||
|
||||
fn parse_prec_or(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_and()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::Or) {
|
||||
self.step();
|
||||
let right = self.parse_prec_and()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Or,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_and(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_equal_inequal()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::And) {
|
||||
self.step();
|
||||
let right = self.parse_prec_equal_inequal()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::And,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_equal_inequal(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_lt_lte_gt_gte_in()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::EqualEqual) {
|
||||
self.step();
|
||||
let right = self.parse_prec_lt_lte_gt_gte_in()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Equal,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::ExclamationEqual) {
|
||||
self.step();
|
||||
let right = self.parse_prec_lt_lte_gt_gte_in()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Inequal,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_lt_lte_gt_gte_in(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_add_subtract()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::LessThan) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::LT,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::GreaterThan) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::GT,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::LessThanEqual) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::LTE,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::GreaterThanEqual) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::GTE,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::In) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::In,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_add_subtract(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_multiply_divide_modulo()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::Plus) {
|
||||
self.step();
|
||||
let right = self.parse_prec_multiply_divide_modulo()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Add,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::Minus) {
|
||||
self.step();
|
||||
let right = self.parse_prec_multiply_divide_modulo()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Subtract,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_multiply_divide_modulo(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_unary()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::Asterisk) {
|
||||
self.step();
|
||||
let right = self.parse_prec_unary()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Multiply,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::Slash) {
|
||||
self.step();
|
||||
let right = self.parse_prec_unary()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Divide,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::Percent) {
|
||||
self.step();
|
||||
let right = self.parse_prec_unary()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Modulo,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_unary(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
if !self.done() && self.current_is(TokenType::Not) {
|
||||
self.step();
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::Not,
|
||||
subject,
|
||||
})
|
||||
} else if !self.done() && self.current_is(TokenType::Minus) {
|
||||
self.step();
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::Negate,
|
||||
subject,
|
||||
})
|
||||
} else if !self.done() && self.current_is(TokenType::Ampersand) {
|
||||
self.step();
|
||||
if !self.done() && self.current_is(TokenType::Mut) {
|
||||
self.step();
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::ReferenceMut,
|
||||
subject,
|
||||
})
|
||||
} else {
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::Reference,
|
||||
subject,
|
||||
})
|
||||
}
|
||||
} else if !self.done() && self.current_is(TokenType::Asterisk) {
|
||||
self.step();
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::Dereference,
|
||||
subject,
|
||||
})
|
||||
} else {
|
||||
self.parse_prec_exponentiate()
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_prec_exponentiate(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let left = self.parse_prec_member_index_call()?;
|
||||
if !self.done() && self.current_is(TokenType::AsteriskEqual) {
|
||||
let right = self.parse_prec_exponentiate()?;
|
||||
self.step_and_ok_node(Expr::Binary {
|
||||
binary_type: BinaryType::Exponentiate,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
})
|
||||
} else {
|
||||
Ok(left)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_prec_member_index_call(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut subject = self.parse_operand()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::Dot) {
|
||||
self.step();
|
||||
if self.done() || self.current_is(TokenType::Id) {
|
||||
return self.error("expected identifier");
|
||||
}
|
||||
let value = self.token_string(self.current());
|
||||
self.step();
|
||||
subject = self.node(Expr::Member {
|
||||
subject: Box::new(subject),
|
||||
value,
|
||||
});
|
||||
} else if self.current_is(TokenType::LBracket) {
|
||||
self.step();
|
||||
let value = self.parse_expr()?;
|
||||
if self.done() || !self.current_is(TokenType::RBracket) {
|
||||
return self.error("expected ']'");
|
||||
}
|
||||
subject = self.node(Expr::Index {
|
||||
subject: Box::new(subject),
|
||||
value: Box::new(value),
|
||||
});
|
||||
} else if self.current_is(TokenType::LParen) {
|
||||
self.step();
|
||||
let mut arguments = Vec::<Node<Expr>>::new();
|
||||
if !self.done() && !self.current_is(TokenType::RParen) {
|
||||
arguments.push(self.parse_expr()?);
|
||||
while !self.done() && self.current_is(TokenType::Comma) {
|
||||
self.step();
|
||||
if self.done() || self.current_is(TokenType::RParen) {
|
||||
self.step();
|
||||
}
|
||||
arguments.push(self.parse_expr()?);
|
||||
}
|
||||
}
|
||||
if self.done() || !self.current_is(TokenType::RParen) {
|
||||
return self.error("expected ')'");
|
||||
}
|
||||
self.step();
|
||||
subject = self.node(Expr::Call {
|
||||
subject: Box::new(subject),
|
||||
arguments,
|
||||
})
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(subject)
|
||||
}
|
||||
|
||||
fn parse_operand(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
if self.done() {
|
||||
return self.error("expected value, got eof");
|
||||
}
|
||||
match self.current().token_type {
|
||||
TokenType::Id => self.step_and_ok_node(Expr::Id(self.token_string(self.current()))),
|
||||
TokenType::Int => {
|
||||
let mut value_string = self.token_string(self.current());
|
||||
self.step();
|
||||
if !self.done() && self.current_is(TokenType::Decimal) {
|
||||
value_string.push_str(&self.token_string(self.current()));
|
||||
self.step_and_ok_node(Expr::Float(
|
||||
value_string.parse::<f64>().expect("valid f64"),
|
||||
))
|
||||
} else {
|
||||
self.ok_node(Expr::Int((value_string).parse::<i64>().expect("valid i64")))
|
||||
}
|
||||
}
|
||||
TokenType::Decimal => self.step_and_ok_node(Expr::Float(
|
||||
self.token_string(self.current())
|
||||
.parse::<f64>()
|
||||
.expect("valid f64"),
|
||||
)),
|
||||
TokenType::False => self.step_and_ok_node(Expr::Bool(false)),
|
||||
TokenType::True => self.step_and_ok_node(Expr::Bool(true)),
|
||||
TokenType::LParen => self.parse_unit_group_or_tuple(),
|
||||
TokenType::LBrace => self.parse_object(),
|
||||
TokenType::LBracket => self.parse_array(),
|
||||
TokenType::Fn => self.parse_function(),
|
||||
_ => self.error("expected value"),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_unit_group_or_tuple(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
self.step();
|
||||
if !self.done() && !self.current_is(TokenType::LParen) {
|
||||
todo!()
|
||||
} else {
|
||||
self.step_and_ok_node(Expr::Unit)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_object(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn parse_array(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn parse_function(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn token_string(&self, token: &Token) -> String {
|
||||
self.text[token.pos.index..token.pos.index + token.length].to_string()
|
||||
}
|
||||
|
||||
fn step_and_ok_node<T>(&mut self, value: T) -> Result<Node<T>, ParserError> {
|
||||
self.step();
|
||||
self.ok_node(value)
|
||||
}
|
||||
|
||||
fn ok_node<T>(&self, value: T) -> Result<Node<T>, ParserError> {
|
||||
Ok(Node {
|
||||
value,
|
||||
pos: self.lexer.pos(),
|
||||
})
|
||||
}
|
||||
|
||||
fn step_and_node<T>(&mut self, value: T) -> Node<T> {
|
||||
self.step();
|
||||
self.node(value)
|
||||
}
|
||||
|
||||
fn node<T>(&self, value: T) -> Node<T> {
|
||||
Node {
|
||||
value,
|
||||
pos: self.lexer.pos(),
|
||||
}
|
||||
}
|
||||
|
||||
fn step_and<T>(&mut self, value: T) -> T {
|
||||
self.step();
|
||||
value
|
||||
}
|
||||
|
||||
fn error(&self, message: &str) -> Result<Node<Expr>, ParserError> {
|
||||
Err(ParserError {
|
||||
pos: self.lexer.pos(),
|
||||
message: message.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn done(&self) -> bool {
|
||||
self.current_token.is_none()
|
||||
}
|
||||
|
||||
fn current_is(&self, token_type: TokenType) -> bool {
|
||||
self.current().token_type == token_type
|
||||
}
|
||||
|
||||
fn current(&self) -> &Token {
|
||||
self.current_token.as_ref().expect("done() checked")
|
||||
}
|
||||
|
||||
fn step(&mut self) {
|
||||
self.current_token = self.lexer.next();
|
||||
}
|
||||
}
|
||||
use crate::lexer::Lexer;
|
||||
use crate::parser::Parser;
|
||||
|
||||
fn main() {
|
||||
println!("tokens = [");
|
||||
|
425
src/parser.rs
Normal file
425
src/parser.rs
Normal file
@ -0,0 +1,425 @@
|
||||
use crate::ast::{BinaryType, Expr, Node, UnaryType};
|
||||
use crate::tokens::{Position, PositionKnowing, Token, TokenType};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ParserError {
|
||||
pos: Position,
|
||||
message: String,
|
||||
}
|
||||
|
||||
pub struct Parser<'a, Tokens>
|
||||
where
|
||||
Tokens: PositionKnowing + Iterator<Item = Token>,
|
||||
{
|
||||
text: &'a str,
|
||||
tokens: Tokens,
|
||||
current_token: Option<Token>,
|
||||
}
|
||||
|
||||
impl<'a, Tokens> Parser<'a, Tokens>
|
||||
where
|
||||
Tokens: PositionKnowing + Iterator<Item = Token>,
|
||||
{
|
||||
pub fn new(text: &'a str, mut lexer: Tokens) -> Self {
|
||||
Self {
|
||||
text,
|
||||
current_token: lexer.next(),
|
||||
tokens: lexer,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_expr(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
self.parse_prec_or()
|
||||
}
|
||||
|
||||
fn parse_prec_or(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_and()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::Or) {
|
||||
self.step();
|
||||
let right = self.parse_prec_and()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Or,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_and(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_equal_inequal()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::And) {
|
||||
self.step();
|
||||
let right = self.parse_prec_equal_inequal()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::And,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_equal_inequal(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_lt_lte_gt_gte_in()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::EqualEqual) {
|
||||
self.step();
|
||||
let right = self.parse_prec_lt_lte_gt_gte_in()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Equal,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::ExclamationEqual) {
|
||||
self.step();
|
||||
let right = self.parse_prec_lt_lte_gt_gte_in()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Inequal,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_lt_lte_gt_gte_in(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_add_subtract()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::LessThan) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::LT,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::GreaterThan) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::GT,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::LessThanEqual) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::LTE,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::GreaterThanEqual) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::GTE,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::In) {
|
||||
self.step();
|
||||
let right = self.parse_prec_add_subtract()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::In,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_add_subtract(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_multiply_divide_modulo()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::Plus) {
|
||||
self.step();
|
||||
let right = self.parse_prec_multiply_divide_modulo()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Add,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::Minus) {
|
||||
self.step();
|
||||
let right = self.parse_prec_multiply_divide_modulo()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Subtract,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_multiply_divide_modulo(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut left = self.parse_prec_unary()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::Asterisk) {
|
||||
self.step();
|
||||
let right = self.parse_prec_unary()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Multiply,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::Slash) {
|
||||
self.step();
|
||||
let right = self.parse_prec_unary()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Divide,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else if self.current_is(TokenType::Percent) {
|
||||
self.step();
|
||||
let right = self.parse_prec_unary()?;
|
||||
left = self.node(Expr::Binary {
|
||||
binary_type: BinaryType::Modulo,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn parse_prec_unary(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
if !self.done() && self.current_is(TokenType::Not) {
|
||||
self.step();
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::Not,
|
||||
subject,
|
||||
})
|
||||
} else if !self.done() && self.current_is(TokenType::Minus) {
|
||||
self.step();
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::Negate,
|
||||
subject,
|
||||
})
|
||||
} else if !self.done() && self.current_is(TokenType::Ampersand) {
|
||||
self.step();
|
||||
if !self.done() && self.current_is(TokenType::Mut) {
|
||||
self.step();
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::ReferenceMut,
|
||||
subject,
|
||||
})
|
||||
} else {
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::Reference,
|
||||
subject,
|
||||
})
|
||||
}
|
||||
} else if !self.done() && self.current_is(TokenType::Asterisk) {
|
||||
self.step();
|
||||
let subject = Box::new(self.parse_prec_unary()?);
|
||||
self.ok_node(Expr::Unary {
|
||||
unary_type: UnaryType::Dereference,
|
||||
subject,
|
||||
})
|
||||
} else {
|
||||
self.parse_prec_exponentiate()
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_prec_exponentiate(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let left = self.parse_prec_member_index_call()?;
|
||||
if !self.done() && self.current_is(TokenType::AsteriskEqual) {
|
||||
let right = self.parse_prec_exponentiate()?;
|
||||
self.step_and_ok_node(Expr::Binary {
|
||||
binary_type: BinaryType::Exponentiate,
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
})
|
||||
} else {
|
||||
Ok(left)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_prec_member_index_call(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
let mut subject = self.parse_operand()?;
|
||||
while !self.done() {
|
||||
if self.current_is(TokenType::Dot) {
|
||||
self.step();
|
||||
if self.done() || self.current_is(TokenType::Id) {
|
||||
return self.error("expected identifier");
|
||||
}
|
||||
let value = self.token_string(self.current());
|
||||
self.step();
|
||||
subject = self.node(Expr::Member {
|
||||
subject: Box::new(subject),
|
||||
value,
|
||||
});
|
||||
} else if self.current_is(TokenType::LBracket) {
|
||||
self.step();
|
||||
let value = self.parse_expr()?;
|
||||
if self.done() || !self.current_is(TokenType::RBracket) {
|
||||
return self.error("expected ']'");
|
||||
}
|
||||
subject = self.node(Expr::Index {
|
||||
subject: Box::new(subject),
|
||||
value: Box::new(value),
|
||||
});
|
||||
} else if self.current_is(TokenType::LParen) {
|
||||
self.step();
|
||||
let mut arguments = Vec::<Node<Expr>>::new();
|
||||
if !self.done() && !self.current_is(TokenType::RParen) {
|
||||
arguments.push(self.parse_expr()?);
|
||||
while !self.done() && self.current_is(TokenType::Comma) {
|
||||
self.step();
|
||||
if self.done() || self.current_is(TokenType::RParen) {
|
||||
self.step();
|
||||
}
|
||||
arguments.push(self.parse_expr()?);
|
||||
}
|
||||
}
|
||||
if self.done() || !self.current_is(TokenType::RParen) {
|
||||
return self.error("expected ')'");
|
||||
}
|
||||
self.step();
|
||||
subject = self.node(Expr::Call {
|
||||
subject: Box::new(subject),
|
||||
arguments,
|
||||
})
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(subject)
|
||||
}
|
||||
|
||||
fn parse_operand(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
if self.done() {
|
||||
return self.error("expected value, got eof");
|
||||
}
|
||||
match self.current().token_type {
|
||||
TokenType::Id => self.step_and_ok_node(Expr::Id(self.token_string(self.current()))),
|
||||
TokenType::Int => {
|
||||
let mut value_string = self.token_string(self.current());
|
||||
self.step();
|
||||
if !self.done() && self.current_is(TokenType::Decimal) {
|
||||
value_string.push_str(&self.token_string(self.current()));
|
||||
self.step_and_ok_node(Expr::Float(
|
||||
value_string.parse::<f64>().expect("valid f64"),
|
||||
))
|
||||
} else {
|
||||
self.ok_node(Expr::Int((value_string).parse::<i64>().expect("valid i64")))
|
||||
}
|
||||
}
|
||||
TokenType::Decimal => self.step_and_ok_node(Expr::Float(
|
||||
self.token_string(self.current())
|
||||
.parse::<f64>()
|
||||
.expect("valid f64"),
|
||||
)),
|
||||
TokenType::False => self.step_and_ok_node(Expr::Bool(false)),
|
||||
TokenType::True => self.step_and_ok_node(Expr::Bool(true)),
|
||||
TokenType::LParen => self.parse_unit_group_or_tuple(),
|
||||
TokenType::LBrace => self.parse_object(),
|
||||
TokenType::LBracket => self.parse_array(),
|
||||
TokenType::Fn => self.parse_function(),
|
||||
_ => self.error("expected value"),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_unit_group_or_tuple(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
self.step();
|
||||
if !self.done() && !self.current_is(TokenType::LParen) {
|
||||
todo!()
|
||||
} else {
|
||||
self.step_and_ok_node(Expr::Unit)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_object(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn parse_array(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn parse_function(&mut self) -> Result<Node<Expr>, ParserError> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn token_string(&self, token: &Token) -> String {
|
||||
self.text[token.pos.index..token.pos.index + token.length].to_string()
|
||||
}
|
||||
|
||||
fn step_and_ok_node<T>(&mut self, value: T) -> Result<Node<T>, ParserError> {
|
||||
self.step();
|
||||
self.ok_node(value)
|
||||
}
|
||||
|
||||
fn ok_node<T>(&self, value: T) -> Result<Node<T>, ParserError> {
|
||||
Ok(Node {
|
||||
value,
|
||||
pos: self.tokens.pos(),
|
||||
})
|
||||
}
|
||||
|
||||
fn step_and_node<T>(&mut self, value: T) -> Node<T> {
|
||||
self.step();
|
||||
self.node(value)
|
||||
}
|
||||
|
||||
fn node<T>(&self, value: T) -> Node<T> {
|
||||
Node {
|
||||
value,
|
||||
pos: self.tokens.pos(),
|
||||
}
|
||||
}
|
||||
|
||||
fn step_and<T>(&mut self, value: T) -> T {
|
||||
self.step();
|
||||
value
|
||||
}
|
||||
|
||||
fn error(&self, message: &str) -> Result<Node<Expr>, ParserError> {
|
||||
Err(ParserError {
|
||||
pos: self.tokens.pos(),
|
||||
message: message.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn done(&self) -> bool {
|
||||
self.current_token.is_none()
|
||||
}
|
||||
|
||||
fn current_is(&self, token_type: TokenType) -> bool {
|
||||
self.current().token_type == token_type
|
||||
}
|
||||
|
||||
fn current(&self) -> &Token {
|
||||
self.current_token.as_ref().expect("done() checked")
|
||||
}
|
||||
|
||||
fn step(&mut self) {
|
||||
self.current_token = self.tokens.next();
|
||||
}
|
||||
}
|
88
src/tokens.rs
Normal file
88
src/tokens.rs
Normal file
@ -0,0 +1,88 @@
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Position {
|
||||
pub index: usize,
|
||||
pub line: i32,
|
||||
pub col: i32,
|
||||
}
|
||||
|
||||
impl Position {
|
||||
pub fn new(index: usize, line: i32, col: i32) -> Self {
|
||||
Self { index, line, col }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum TokenType {
|
||||
InvalidChar,
|
||||
MalformedString,
|
||||
MalformedComment,
|
||||
|
||||
Id,
|
||||
Int,
|
||||
Decimal,
|
||||
String,
|
||||
|
||||
False,
|
||||
True,
|
||||
Let,
|
||||
Mut,
|
||||
If,
|
||||
Else,
|
||||
While,
|
||||
For,
|
||||
In,
|
||||
Break,
|
||||
Continue,
|
||||
Fn,
|
||||
Return,
|
||||
End,
|
||||
Not,
|
||||
And,
|
||||
Or,
|
||||
Underscore,
|
||||
|
||||
Plus,
|
||||
Minus,
|
||||
Asterisk,
|
||||
Slash,
|
||||
Percent,
|
||||
DoubleAsterisk,
|
||||
Equal,
|
||||
Exclamation,
|
||||
LessThan,
|
||||
GreaterThan,
|
||||
|
||||
PlusEqual,
|
||||
MinusEqual,
|
||||
AsteriskEqual,
|
||||
SlashEqual,
|
||||
PercentEqual,
|
||||
DoubleAsteriskEqual,
|
||||
EqualEqual,
|
||||
ExclamationEqual,
|
||||
LessThanEqual,
|
||||
GreaterThanEqual,
|
||||
|
||||
LParen,
|
||||
RParen,
|
||||
LBrace,
|
||||
RBrace,
|
||||
LBracket,
|
||||
RBracket,
|
||||
Dot,
|
||||
Comma,
|
||||
Colon,
|
||||
Semicolon,
|
||||
Ampersand,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Token {
|
||||
pub token_type: TokenType,
|
||||
pub pos: Position,
|
||||
pub length: usize,
|
||||
}
|
||||
|
||||
pub trait PositionKnowing {
|
||||
fn pos(&self) -> Position;
|
||||
}
|
Loading…
Reference in New Issue
Block a user