From ae5ead91da065a8117d621afd519d09ea46af593 Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Fri, 17 Mar 2023 07:45:38 +0100 Subject: [PATCH] add compiler + runtime --- Cargo.toml | 4 - src/ast.rs | 229 ++++++++++++++- src/builtins.rs | 25 ++ src/bytecode.rs | 81 +++++- src/compiler.rs | 351 +++++++++++++++++++++++ src/main.rs | 63 +++-- src/parser.rs | 70 ++++- src/runtime.rs | 722 ++++++++++++++++++++++++++++++++++++++++++++++++ src/tokens.rs | 16 ++ 9 files changed, 1507 insertions(+), 54 deletions(-) create mode 100644 src/builtins.rs create mode 100644 src/runtime.rs diff --git a/Cargo.toml b/Cargo.toml index 1ad33b6..3a2f603 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,3 @@ name = "thiselang" version = "0.1.0" edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] diff --git a/src/ast.rs b/src/ast.rs index 3102b6f..16c2de7 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,11 +1,21 @@ use crate::tokens::Position; +pub trait AstNode { + fn fancy_string(&self, depth: usize) -> String; +} + #[derive(Debug)] -pub struct Node { +pub struct Node { pub value: T, pub pos: Position, } +impl Node { + pub fn fancy_string(&self, depth: usize) -> String { + self.value.fancy_string(depth) + } +} + #[derive(Debug)] pub enum Expr { Error(String), @@ -148,3 +158,220 @@ pub enum Parameter { Id { name: String, mutable: bool }, Spread(Box>), } + +fn indent(depth: usize) -> String { + "┊ ".repeat(depth) +} + +pub fn escape_string(text: &str) -> String { + let mut result = String::new(); + for c in text.chars() { + match c { + '\\' => result.push_str("\\\\"), + '\0' => result.push_str("\\0"), + '\t' => result.push_str("\\t"), + '\r' => result.push_str("\\r"), + '\n' => result.push_str("\\n"), + '\'' => result.push_str("\\\'"), + '\"' => result.push_str("\\\""), + c => result.push(c), + }; + } + return result; +} + +impl AstNode for Expr { + fn fancy_string(&self, depth: usize) -> String { + match self { + Expr::Error(message) => format!("Error({message})"), + Expr::Unit => format!("Unit"), + Expr::Id(v) => format!("Id(`{v}`)"), + Expr::Int(v) => format!("Int({v})"), + Expr::Float(v) => format!("Float({v})"), + Expr::String(v) => format!("String(\"{}\")", escape_string(v)), + Expr::Bool(v) => format!("Bool({v})"), + Expr::Array(values) => format!( + "Array\n{}", + values + .iter() + .map(|v| format!( + "{}{}\n", + indent(depth + 1), + (*v).value.fancy_string(depth + 1) + )) + .reduce(|mut acc, v| { + acc.push_str(&v); + acc + }) + .unwrap_or("".to_string()), + ), + Expr::Object(_) => format!("Object <...>"), + Expr::Tuple(_) => format!("Tuple <...>"), + Expr::If { + condition, + truthy, + falsy, + } => format!( + "If\n{}condition: {}\n{}truthy: {}\n{}falsy: {}", + indent(depth + 1), + condition.fancy_string(depth + 1), + indent(depth + 1), + truthy.fancy_string(depth + 1), + indent(depth + 1), + match falsy { + Some(expr) => expr.fancy_string(depth + 1), + None => "None".to_string(), + }, + ), + Expr::FunctionValue { parameters, body } => format!("FunctionValue <...>"), + Expr::Member { subject, value } => format!("Member <...>"), + Expr::Index { subject, value } => format!("Index <...>"), + Expr::Call { subject, arguments } => format!( + "Call\n{}subject: {}\n{}arguments:\n{}", + indent(depth + 1), + subject.fancy_string(depth + 1), + indent(depth + 1), + arguments + .iter() + .map(|v| format!( + "{}{}\n", + indent(depth + 2), + (*v).value.fancy_string(depth + 2) + )) + .reduce(|mut acc, v| { + acc.push_str(&v); + acc + }) + .unwrap_or("".to_string()), + ), + Expr::Unary { + unary_type, + subject, + } => format!( + "Unary\n{}unary_type: {:?}\n{}subject: {}", + indent(depth + 1), + unary_type, + indent(depth + 1), + subject.fancy_string(depth + 1), + ), + Expr::Binary { + binary_type, + left, + right, + } => format!( + "Binary\n{}binary_type: {:?}\n{}left: {}\n{}right: {}", + indent(depth + 1), + binary_type, + indent(depth + 1), + left.fancy_string(depth + 1), + indent(depth + 1), + right.fancy_string(depth + 1), + ), + Expr::RangeExclusive { begin, end } => format!("RangeExclusive <...>"), + Expr::RangeInclusive { begin, end } => format!("RangeInclusive <...>"), + Expr::Assign { + assign_type, + subject, + value, + } => format!("Assign <...>"), + Expr::Let { subject, value } => format!( + "Let {{\n{}subject: {}\n{}value: {}\n{}}}", + indent(depth + 1), + subject.fancy_string(depth + 1), + indent(depth + 1), + match value { + Some(expr) => expr.fancy_string(depth + 1), + None => "None".to_string(), + }, + indent(depth), + ), + Expr::Continue => format!("Continue"), + Expr::Break => format!("Break"), + Expr::While { condition, body } => format!("While <...>"), + Expr::For { + subject, + value, + body, + } => format!("For <...>"), + Expr::Return(value) => format!( + "Return{}", + match value { + Some(v) => format!( + "\n{}value: {}", + indent(depth + 1), + v.fancy_string(depth + 1) + ), + None => "".to_string(), + } + ), + Expr::Function { + name, + parameters, + body, + } => format!( + "Function\n{}name: {}\n{}parameters:\n{}{}body: {}", + indent(depth + 1), + name, + indent(depth + 1), + parameters + .iter() + .map(|v| format!( + "{}{}\n", + indent(depth + 2), + (*v).value.fancy_string(depth + 2) + )) + .reduce(|mut acc, v| { + acc.push_str(&v); + acc + }) + .unwrap_or("".to_string()), + indent(depth + 1), + body.fancy_string(depth + 1) + ), + Expr::Block { statements, value } => format!( + "Block\n{}statements:\n{}{}value: {}", + indent(depth + 1), + statements + .iter() + .map(|v| format!( + "{}{}\n", + indent(depth + 2), + (*v).value.fancy_string(depth + 2) + )) + .reduce(|mut acc, v| { + acc.push_str(&v); + acc + }) + .unwrap_or("".to_string()), + indent(depth + 1), + match value { + Some(expr) => expr.fancy_string(depth + 1), + None => "None".to_string(), + }, + ), + Expr::Spread(_) => format!("Spread <...>"), + } + } +} + +impl AstNode for ObjectEntry { + fn fancy_string(&self, depth: usize) -> String { + match self { + ObjectEntry::Error(message) => format!("Error({message})"), + ObjectEntry::Pair { key, value } => format!("Pair <...>"), + ObjectEntry::Spread(_) => format!("Spread <...>"), + } + } +} + +impl AstNode for Parameter { + fn fancy_string(&self, depth: usize) -> String { + match self { + Parameter::Error(message) => format!("Error({message})"), + Parameter::Id { name, mutable } => { + format!("Id(`{name}`{})", if *mutable { ", mutable" } else { "" }) + } + Parameter::Spread(_) => format!("Spread <...>"), + } + } +} diff --git a/src/builtins.rs b/src/builtins.rs new file mode 100644 index 0000000..5f933dc --- /dev/null +++ b/src/builtins.rs @@ -0,0 +1,25 @@ +use crate::runtime::{RuntimeError, Value}; + +pub fn builtin_print(stack: &mut Vec) -> Result { + print!( + "{}", + match stack.pop().ok_or_else(|| RuntimeError { + message: "expected 1 argument".to_string() + })? { + Value::Unit => format!("()"), + Value::Int(value) => format!("{value}"), + Value::Float(value) => format!("{value}"), + Value::String(value) => format!("{value}"), + Value::Bool(value) => format!("{value}"), + Value::FunctionReference(addr) => format!(""), + Value::HeapReference(addr, gen) => format!(""), + }, + ); + Ok(Value::Unit) +} + +pub fn builtin_println(stack: &mut Vec) -> Result { + builtin_print(stack)?; + println!(""); + Ok(Value::Unit) +} diff --git a/src/bytecode.rs b/src/bytecode.rs index 65f9ccf..ed2db21 100644 --- a/src/bytecode.rs +++ b/src/bytecode.rs @@ -1,25 +1,29 @@ #[derive(Debug)] -pub enum Instructions { +pub enum Instruction { Duplicate, Pop, Jump(usize), JumpIf(usize), + Call, + Return, - Reserve(usize), - Load(usize), - Store(usize), + LoadLocal(usize), + StoreLocal(usize), + + HeapLoad, + HeapStore, + HeapAlloc, PushUnit, PushInt(i64), - PushFloat(i64), + PushFloat(f64), PushString(String), PushBool(bool), + PushFunctionReference(usize), Not, Negate, - Reference, - Dereference, Add, Subtract, @@ -38,3 +42,66 @@ pub enum Instructions { In, NotIn, } + +#[derive(Debug)] +pub struct Module { + pub functions: Vec, + pub entry_function: usize, +} + +#[derive(Debug)] +pub enum Function { + BuiltIn { + arguments: usize, + }, + UserDefined { + instructions: Vec, + arguments: usize, + locals_reserve: usize, + }, +} + +impl Module { + pub fn fancy_string(&self) -> String { + format!( + "Module\n{}", + self.functions + .iter() + .map(|f| f.fancy_string()) + .reduce(|mut acc, v| { + acc.push_str(&v); + acc + }) + .unwrap_or("".to_string()) + ) + } +} + +impl Function { + pub fn instructions(&self) -> &Vec { + match self { + Self::UserDefined { instructions, .. } => instructions, + _ => panic!("expected user defined function"), + } + } + pub fn arguments(&self) -> usize { + match self { + Self::BuiltIn { arguments } | + Self::UserDefined { arguments, .. } => *arguments, + } + } + pub fn locals_reserve(&self) -> usize { + match self { + Self::UserDefined { locals_reserve, .. } => *locals_reserve, + _ => panic!("expected user defined function"), + } + } + + pub fn fancy_string(&self) -> String { + match self { + Self::BuiltIn { arguments } => format!(" - BuiltIn Function\n arguments: {}", arguments), + Self::UserDefined { instructions, arguments, locals_reserve } => + format!(" - Function\n arguments: {}\n locals_reserve: {}\n instructions:\n{}", arguments, locals_reserve, instructions.iter().map(|i| format!(" - {:?}\n", i)).reduce(|mut acc, v|{acc.push_str(&v); acc}).unwrap_or("".to_string())) + } + } +} diff --git a/src/compiler.rs b/src/compiler.rs index 8b13789..bfa1cb3 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1 +1,352 @@ +use std::collections::HashMap; +use crate::{ + ast::{BinaryType, Expr, Node, Parameter, UnaryType}, + bytecode::{Function, Instruction, Module}, + tokens::Position, +}; + +#[derive(Debug)] +pub struct CompileError { + message: String, + pos: Position, +} + +pub struct ModuleContext { + functions: Vec, + global_functions: HashMap, +} + +pub fn compile_module(ast: Node) -> Result { + let mut context = ModuleContext { + functions: vec![], + global_functions: HashMap::new(), + }; + + context.functions.push(Function::BuiltIn { arguments: 1 }); + context.global_functions.insert( + "print".to_string(), + Local { + address: 0, + mutable: false, + }, + ); + context.functions.push(Function::BuiltIn { arguments: 1 }); + context.global_functions.insert( + "println".to_string(), + Local { + address: 1, + mutable: false, + }, + ); + + let start_function = FunctionCompiler::new(&mut context).compile(Vec::new(), ast)?; + let start_function_index = context.functions.len(); + context.functions.push(start_function); + Ok(Module { + functions: context.functions, + entry_function: start_function_index, + }) +} + +struct Local { + address: usize, + mutable: bool, +} + +pub struct FunctionCompiler<'a> { + instructions: Vec, + context: &'a mut ModuleContext, + locals: Vec>, + local_counter: usize, +} + +impl<'a> FunctionCompiler<'a> { + pub fn new(context: &'a mut ModuleContext) -> Self { + Self { + instructions: Vec::new(), + context, + locals: vec![HashMap::new()], + local_counter: 0, + } + } + + pub fn compile( + mut self, + parameters: Vec>, + body: Node, + ) -> Result { + let mut param_stores = Vec::::new(); + for param in ¶meters { + match ¶m.value { + Parameter::Error(_) => todo!(), + Parameter::Id { name, mutable } => { + let addr = match self.define_local(name.to_string(), *mutable) { + Ok(addr) => addr, + Err(error) => { + return Err(CompileError { + message: error, + pos: param.pos.clone(), + }) + } + }; + param_stores.push(Instruction::StoreLocal(addr)); + } + Parameter::Spread(_) => todo!(), + } + } + for store in param_stores.into_iter().rev() { + self.push(store); + } + self.compile_expr(body)?; + self.push(Instruction::Return); + Ok(Function::UserDefined { + arguments: parameters.len(), + locals_reserve: self.local_counter, + instructions: self.instructions, + }) + } + + fn compile_expr(&mut self, expr: Node) -> Result<(), CompileError> { + match expr.value { + Expr::Error(message) => Err(CompileError { + message, + pos: expr.pos, + }), + Expr::Unit => todo!(), + Expr::Id(name) => match self.get_local(&name) { + Some(local) => { + self.push(Instruction::LoadLocal(local.address)); + Ok(()) + } + None => { + return Err(CompileError { + message: format!("id \"{}\" not defined", name), + pos: expr.pos, + }) + } + }, + Expr::Int(value) => { + self.push(Instruction::PushInt(value)); + Ok(()) + } + Expr::Float(value) => { + self.push(Instruction::PushFloat(value)); + Ok(()) + } + Expr::String(value) => { + self.push(Instruction::PushString(value)); + Ok(()) + } + Expr::Bool(value) => { + self.push(Instruction::PushBool(value)); + Ok(()) + } + Expr::Array(_) => todo!(), + Expr::Object(_) => todo!(), + Expr::Tuple(_) => todo!(), + Expr::If { + condition, + truthy, + falsy, + } => { + self.compile_expr(*condition)?; + self.push(Instruction::Not); + let from_then_to_else = self.instructions.len(); + self.push(Instruction::JumpIf(0)); + self.compile_expr(*truthy)?; + if let Some(expr) = falsy { + self.instructions[from_then_to_else] = + Instruction::JumpIf(self.instructions.len() + 1); + let from_else_to_end = self.instructions.len(); + self.push(Instruction::Jump(0)); + self.compile_expr(*expr)?; + self.instructions[from_else_to_end] = + Instruction::Jump(self.instructions.len()); + } else { + self.instructions[from_then_to_else] = + Instruction::JumpIf(self.instructions.len()); + } + Ok(()) + } + Expr::FunctionValue { parameters, body } => todo!(), + Expr::Member { subject, value } => todo!(), + Expr::Index { subject, value } => todo!(), + Expr::Call { subject, arguments } => { + for arg in arguments { + self.compile_expr(arg)?; + } + self.compile_expr(*subject)?; + self.push(Instruction::Call); + Ok(()) + } + Expr::Unary { + unary_type, + subject, + } => { + self.compile_expr(*subject)?; + self.push(match unary_type { + UnaryType::Not => Instruction::Not, + UnaryType::Negate => Instruction::Negate, + UnaryType::Reference => todo!(), + UnaryType::ReferenceMut => todo!(), + UnaryType::Dereference => todo!(), + }); + Ok(()) + } + Expr::Binary { + binary_type, + left, + right, + } => { + self.compile_expr(*left)?; + self.compile_expr(*right)?; + self.push(match binary_type { + BinaryType::Exponentiate => Instruction::Exponentiate, + BinaryType::Multiply => Instruction::Multiply, + BinaryType::Divide => Instruction::Divide, + BinaryType::Modulo => Instruction::Modulo, + BinaryType::Add => Instruction::Add, + BinaryType::Subtract => Instruction::Subtract, + BinaryType::LT => Instruction::LT, + BinaryType::LTE => Instruction::LTE, + BinaryType::GT => Instruction::GT, + BinaryType::GTE => Instruction::GTE, + BinaryType::In => Instruction::In, + BinaryType::NotIn => Instruction::NotIn, + BinaryType::Equal => Instruction::Equal, + BinaryType::Inequal => Instruction::Inequal, + BinaryType::And => Instruction::And, + BinaryType::Or => Instruction::Or, + }); + Ok(()) + } + Expr::RangeExclusive { begin, end } => todo!(), + Expr::RangeInclusive { begin, end } => todo!(), + Expr::Assign { + assign_type, + subject, + value, + } => todo!(), + Expr::Let { subject, value } => match subject.value { + Parameter::Error(_) => todo!(), + Parameter::Id { name, mutable } => { + let addr = match self.define_local(name, mutable) { + Ok(addr) => addr, + Err(error) => { + return Err(CompileError { + message: error, + pos: expr.pos, + }) + } + }; + match value { + Some(expr) => { + self.compile_expr(*expr)?; + self.push(Instruction::StoreLocal(addr)); + } + None => {} + } + self.push(Instruction::PushUnit); + Ok(()) + } + Parameter::Spread(_) => todo!(), + }, + Expr::Continue => todo!(), + Expr::Break => todo!(), + Expr::While { condition, body } => todo!(), + Expr::For { + subject, + value, + body, + } => todo!(), + Expr::Return(value) => { + if let Some(value) = value { + self.compile_expr(*value)?; + } else { + self.push(Instruction::PushUnit); + } + self.push(Instruction::Return); + Ok(()) + } + Expr::Function { + name, + parameters, + body, + } => { + let function_compiler = FunctionCompiler::new(self.context); + let function = function_compiler.compile(parameters, *body)?; + let function_addr = self.context.functions.len(); + self.context.functions.push(function); + let local_addr = match self.define_local(name, false) { + Ok(addr) => addr, + Err(message) => { + return Err(CompileError { + message, + pos: expr.pos, + }) + } + }; + self.push(Instruction::PushFunctionReference(function_addr)); + self.push(Instruction::StoreLocal(local_addr)); + self.push(Instruction::PushUnit); + Ok(()) + } + Expr::Block { statements, value } => { + self.enter_scope(); + for s in statements { + self.compile_expr(s)?; + self.instructions.pop().expect("pushed"); + } + if let Some(value) = value { + self.compile_expr(*value)?; + } else { + self.push(Instruction::PushUnit); + }; + self.leave_scope(); + Ok(()) + } + Expr::Spread(_) => todo!(), + } + } + + fn enter_scope(&mut self) { + self.locals.push(HashMap::new()) + } + + fn leave_scope(&mut self) { + self.locals.pop().unwrap(); + } + + fn define_local(&mut self, name: String, mutable: bool) -> Result { + if self.locals.last().unwrap().contains_key(&name) { + Err(format!("identifier \"{}\" already defined", name)) + } else { + let address = self.local_counter; + self.local_counter += 1; + self.locals + .last_mut() + .unwrap() + .insert(name, Local { address, mutable }); + Ok(address) + } + } + + fn get_local(&self, name: &str) -> Option<&Local> { + for table in self.locals.iter().rev() { + if table.contains_key(name) { + return table.get(name); + } + } + if self.context.global_functions.contains_key(name) { + if let Some(local) = self.context.global_functions.get(name) { + return Some(local); + } + } + None + } + + fn push(&mut self, instruction: Instruction) { + self.instructions.push(instruction); + } +} diff --git a/src/main.rs b/src/main.rs index d383b40..2f2c6ac 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,44 +1,51 @@ #![allow(dead_code)] mod ast; +mod builtins; mod bytecode; mod compiler; mod lexer; mod parser; +mod runtime; mod tokens; +use crate::ast::AstNode; +use crate::compiler::compile_module; use crate::lexer::Lexer; use crate::parser::Parser; +use crate::runtime::Runtime; fn main() { - // println!("tokens = ["); - // let text = "** 3.14 \"foo\" false true ( ) + += /* 1 /* 2 */ 3 */ // 4 \n 5"; - // let lexer = Lexer::new(text); - // lexer.for_each(|token| { - // println!( - // " {:?} = `{}`,", - // token.token_type, - // &text[token.pos.index..token.pos.index + token.length] - // ); - // }); - // println!("]"); + let text = r#" + { + + fn myfunc(a, b) { + return a + b; + } + myfunc(1, 2); + println("hello world"); - let text2 = "[1, 1..=2, ...a]"; - let lexer2 = Lexer::new(text2); - println!("tokens = ["); - lexer2.for_each(|token| { - println!( - " ({:?}, `{}`, [{}], {}, {}:{}),", - token.token_type, - &text2[token.pos.index..token.pos.index + token.length], - token.pos.index, - token.length, - token.pos.line, - token.pos.col, - ); - }); - println!("]"); - let mut parser = Parser::new(text2, Lexer::new(text2)); + }; +"#; + println!("=== text ===\n {}", text.replace("\n", " \n")); + println!("=== tokenizing... ==="); + let lexer = Lexer::new(text); + println!("=== tokens ==="); + lexer.for_each(|token| println!(" {}", token.fancy_string(text))); + println!("=== parsing... ==="); + let mut parser = Parser::new(text, Lexer::new(text)); let expr = parser.parse_expr(); - println!("ast = {:#?}", expr); + println!("=== ast ===\n{}", expr.value.fancy_string(0)); + println!("=== compiling... ==="); + let module = match compile_module(expr) { + Ok(module) => module, + Err(error) => return println!("{:#?}", error), + }; + println!("=== module ===\n{}", module.fancy_string()); + println!("=== executing... ==="); + let runtime = Runtime::new(module); + match runtime.run() { + Ok(value) => println!("=== result ===\n{:?}", value), + Err(error) => println!("{:#?}", error), + } } diff --git a/src/parser.rs b/src/parser.rs index deb8264..53e9bd2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ use std::iter::Peekable; use std::str::Chars; -use crate::ast::{AssignType, BinaryType, Expr, Node, ObjectEntry, Parameter, UnaryType}; +use crate::ast::{AssignType, AstNode, BinaryType, Expr, Node, ObjectEntry, Parameter, UnaryType}; use crate::tokens::{PositionKnowing, Token, TokenType}; pub struct Parser<'a, Tokens> @@ -25,7 +25,7 @@ where } } - pub fn parse_top_level_statements(&mut self) -> Vec> { + pub fn parse(mut self) -> Vec> { let mut statements = Vec::>::new(); while !self.done() { statements.push(self.parse_statement()); @@ -45,6 +45,8 @@ where } if self.done() || !self.current_is(TokenType::RBrace) { statements.push(self.error("expected '}'")); + } else { + self.step(); } self.node(Expr::Block { statements, @@ -58,13 +60,53 @@ where } match self.current().token_type { TokenType::Fn => self.parse_function(), - TokenType::Return => self.parse_return(), + TokenType::Return => { + let statement = self.parse_return(); + if self.done() || !self.current_is(TokenType::Semicolon) { + self.error("expected ';'") + } else { + self.step(); + statement + } + } TokenType::For => self.parse_for(), TokenType::While => self.parse_while(), - TokenType::Break => self.parse_break(), - TokenType::Continue => self.parse_continue(), - TokenType::Let => self.parse_let(), - _ => self.parse_assign(), + TokenType::Break => { + let statement = self.parse_break(); + if self.done() || !self.current_is(TokenType::Semicolon) { + self.error("expected ';'") + } else { + self.step(); + statement + } + } + TokenType::Continue => { + let statement = self.parse_continue(); + if self.done() || !self.current_is(TokenType::Semicolon) { + self.error("expected ';'") + } else { + self.step(); + statement + } + } + TokenType::Let => { + let statement = self.parse_let(); + if self.done() || !self.current_is(TokenType::Semicolon) { + self.error("expected ';'") + } else { + self.step(); + statement + } + } + _ => { + let statement = self.parse_assign(); + if self.done() || !self.current_is(TokenType::Semicolon) { + self.error("expected ';'") + } else { + self.step(); + statement + } + } } } @@ -91,7 +133,7 @@ where return Err(self.error("expected '('")); } self.step(); - let mut parameters = self.parse_function_parameters(); + let parameters = self.parse_function_parameters(); if self.done() || !self.current_is(TokenType::RParen) { return Err(self.error("expected ')'")); } @@ -102,7 +144,7 @@ where self.step(); self.parse_expr() } else { - return Err(self.error("expected '{'")); + return Err(self.error("expected '{' or '=>'")); }; Ok((parameters, body)) } @@ -684,7 +726,7 @@ where } self.step(); let mut values = Vec::>::new(); - if !self.done() && !self.current_is(TokenType::RBracket) { + if !self.done() && !self.current_is(TokenType::RBrace) { if self.current_is(TokenType::DotDotDot) { values.push(self.parse_object_entry()); } else { @@ -702,8 +744,8 @@ where } } } - if self.done() || !self.current_is(TokenType::RBracket) { - return self.error("expected ']'"); + if self.done() || !self.current_is(TokenType::RBrace) { + return self.error("expected '}'"); } self.step_and_node(Expr::Object(values)) } @@ -798,11 +840,11 @@ where self.text[token.pos.index..token.pos.index + token.length].to_string() } - fn step_and_node(&mut self, value: T) -> Node { + fn step_and_node(&mut self, value: T) -> Node { self.step_and(self.node(value)) } - fn node(&self, value: T) -> Node { + fn node(&self, value: T) -> Node { Node { value, pos: self.tokens.pos(), diff --git a/src/runtime.rs b/src/runtime.rs new file mode 100644 index 0000000..cf595cf --- /dev/null +++ b/src/runtime.rs @@ -0,0 +1,722 @@ +use std::collections::VecDeque; + +use crate::{ + builtins::{builtin_print, builtin_println}, + bytecode::{Function, Instruction, Module}, +}; + +#[derive(Debug, Clone)] +pub enum Value { + Unit, + Int(i64), + Float(f64), + String(String), + Bool(bool), + FunctionReference(usize), + HeapReference(usize, usize), +} + +#[derive(Debug)] +pub struct RuntimeError { + pub message: String, +} + +struct Call { + return_addr: usize, + instruction_ptr: usize, + stack_base: usize, + locals_base: usize, +} + +struct HeapEntry { + value: Value, + generation: usize, + gc_marker: bool, +} + +pub struct Runtime { + functions: Vec, + call_stack: Vec, + stack: Vec, + stack_base: usize, + locals: Vec, + locals_base: usize, + heap: Vec>, + heap_generation: usize, + deallocated_addrs: VecDeque, + function_ptr: usize, + instruction_ptr: usize, +} + +impl Runtime { + pub fn new(module: Module) -> Self { + Self { + functions: module.functions, + call_stack: Vec::new(), + stack: Vec::new(), + stack_base: 0, + locals: Vec::new(), + locals_base: 0, + heap: Vec::new(), + heap_generation: 0, + deallocated_addrs: VecDeque::new(), + function_ptr: module.entry_function, + instruction_ptr: 0, + } + } + + pub fn run(mut self) -> Result { + self.call_stack.push(Call { + return_addr: 0, + instruction_ptr: 0, + stack_base: 0, + locals_base: 0, + }); + for _ in 0..self.functions[self.function_ptr].locals_reserve() { + self.locals.push(Value::Unit); + } + while self.call_stack.len() > 0 { + let debug = false; + if debug { + println!("function_ptr = {}", self.function_ptr); + println!("instruction_ptr = {}", self.instruction_ptr); + println!("ins = {:?}", self.current_instruction()); + println!("stack before = {:?}", self.stack); + println!("locals before = {:?}", self.locals); + } + self.run_instruction()?; + if debug { + println!("stack after = {:?}", self.stack); + println!("locals after = {:?}", self.locals); + println!("\n"); + } + } + self.stack + .pop() + .ok_or_else(|| self.error_value("stack empty")) + } + + fn run_instruction(&mut self) -> Result<(), RuntimeError> { + if self.instruction_ptr >= self.current_function().instructions().len() { + return self.error("exceeded function end"); + } + match &self.functions[self.function_ptr].instructions()[self.instruction_ptr] { + Instruction::Duplicate => { + self.assert_stack_amount(1)?; + self.stack + .push(self.stack.last().expect("stack size asserted").clone()); + self.step_and_ok() + } + Instruction::Pop => { + self.assert_stack_amount(1)?; + if self.stack.len() == self.stack_base {} + let _ = self.stack.pop().expect("stack size asserted"); + self.step_and_ok() + } + Instruction::Jump(addr) => { + if !(0..self.current_function().instructions().len()).contains(&addr) { + return self.error("address outside function"); + } + self.instruction_ptr = *addr; + Ok(()) + } + Instruction::JumpIf(addr) => { + self.assert_stack_amount(1)?; + let value = self.stack.pop().expect("stack size asserted"); + let should_jump = match value { + Value::Bool(should_jump) => should_jump, + _ => return self.error("expected bool value"), + }; + if !(0..self.current_function().instructions().len()).contains(&addr) { + return self.error("instruction address out of range"); + } + if should_jump { + self.instruction_ptr = *addr; + } else { + self.step(); + } + Ok(()) + } + Instruction::Call => { + self.assert_stack_amount(1)?; + let value = self.stack.pop().expect("stack size asserted"); + let addr = match value { + Value::FunctionReference(addr) => addr, + _ => return self.error("expected function reference"), + }; + if !(0..self.functions.len()).contains(&addr) { + return self.error("function address out of range"); + } + let function = &self.functions[addr]; + if let Function::BuiltIn { .. } = function { + let value = match addr { + 0 => builtin_print(&mut self.stack)?, + 1 => builtin_println(&mut self.stack)?, + _ => return self.error("unknown builtin function"), + }; + self.stack.push(value); + return Ok(()); + } + if self.stack.len() < self.stack_base + function.arguments() { + return self.error(&format!( + "function expects {} arguments, {} present", + function.arguments(), + self.stack.len() - self.stack_base + )); + } + self.call_stack.push(Call { + return_addr: self.function_ptr, + instruction_ptr: self.instruction_ptr + 1, + stack_base: self.stack_base, + locals_base: self.locals_base, + }); + self.stack_base = self.stack.len() - function.arguments(); + self.locals_base = self.locals.len(); + self.locals + .reserve(self.locals_base + function.locals_reserve()); + for _ in 0..function.locals_reserve() { + self.locals.push(Value::Unit); + } + self.function_ptr = addr; + self.instruction_ptr = 0; + Ok(()) + } + Instruction::Return => { + let call = self + .call_stack + .pop() + .ok_or_else(|| self.error_value("call stack underflow"))?; + if self.stack.len() != self.stack_base + 1 { + return self.error(&format!( + "expected 1 value on stack on return, got {}", + self.stack.len() - self.stack_base + )); + } + self.stack_base = call.stack_base; + for _ in 0..self.locals.len() - self.locals_base { + self.locals.pop().expect("size asserted"); + } + self.locals_base = call.locals_base; + self.function_ptr = call.return_addr; + self.instruction_ptr = call.instruction_ptr; + Ok(()) + } + Instruction::LoadLocal(addr) => { + if *addr == 0 { + self.stack.push(Value::FunctionReference(0)); + return self.step_and_ok(); + } else if *addr == 1 { + self.stack.push(Value::FunctionReference(1)); + return self.step_and_ok(); + } + if !(0..self.locals.len() - self.locals_base).contains(&addr) { + return self.error("local address out of range"); + } + self.stack + .push(self.locals[self.locals_base + addr].clone()); + self.step_and_ok() + } + Instruction::StoreLocal(addr) => { + self.assert_stack_amount(1)?; + if !(0..self.locals.len() - self.locals_base).contains(&addr) { + return self.error("local address out of range"); + } + self.locals[self.locals_base + *addr] = self.stack.pop().expect("value"); + self.step_and_ok() + } + Instruction::HeapLoad => { + self.assert_stack_amount(1)?; + let (addr, generation) = match self.stack.pop() { + Some(Value::HeapReference(addr, generation)) => (addr, generation), + _ => return self.error("expected heap address"), + }; + if !(0..self.heap.len()).contains(&addr) { + return self.error("heap address out of range"); + }; + let entry = match &self.heap[addr] { + Some(entry) if entry.generation == generation => entry, + _ => return self.error("invalid heap address"), + }; + self.stack.push(entry.value.clone()); + self.step_and_ok() + } + Instruction::HeapStore => { + self.assert_stack_amount(2)?; + let (addr, generation) = match self.stack.pop() { + Some(Value::HeapReference(addr, generation)) => (addr, generation), + _ => return self.error("expected heap address"), + }; + if !(0..self.heap.len()).contains(&addr) { + return self.error("heap address out of range"); + }; + let entry = match &mut self.heap[addr] { + Some(entry) if entry.generation == generation => entry, + _ => return self.error("invalid heap address"), + }; + entry.value = self.stack.pop().expect("stack size asserted"); + Ok(()) + } + Instruction::HeapAlloc => { + let addr = if let Some(addr) = self.deallocated_addrs.pop_front() { + self.heap[addr] = Some(HeapEntry { + value: Value::Unit, + generation: self.heap_generation, + gc_marker: false, + }); + addr + } else { + let addr = self.heap.len(); + self.heap.push(Some(HeapEntry { + value: Value::Unit, + generation: self.heap_generation, + gc_marker: false, + })); + addr + }; + self.stack + .push(Value::HeapReference(addr, self.heap_generation)); + self.heap_generation = self.heap_generation.wrapping_add(1); + self.step_and_ok() + } + Instruction::PushUnit => { + self.stack.push(Value::Unit); + self.step_and_ok() + } + Instruction::PushInt(value) => { + self.stack.push(Value::Int(*value)); + self.step_and_ok() + } + Instruction::PushFloat(value) => { + self.stack.push(Value::Float(*value)); + self.step_and_ok() + } + Instruction::PushString(value) => { + self.stack.push(Value::String(value.clone())); + self.step_and_ok() + } + Instruction::PushBool(value) => { + self.stack.push(Value::Bool(*value)); + self.step_and_ok() + } + Instruction::PushFunctionReference(value) => { + self.stack.push(Value::FunctionReference(*value)); + self.step_and_ok() + } + Instruction::Not => { + self.assert_stack_amount(1)?; + match self.stack.pop().expect("stack size asserted") { + Value::Bool(value) => { + self.stack.push(Value::Bool(!value)); + self.step_and_ok() + } + _ => self.error("invalid type for 'not', expected bool"), + } + } + Instruction::Negate => { + self.assert_stack_amount(1)?; + match self.stack.pop().expect("stack size asserted") { + Value::Int(value) => { + self.stack.push(Value::Int(-value)); + self.step_and_ok() + } + Value::Float(value) => { + self.stack.push(Value::Float(-value)); + self.step_and_ok() + } + _ => self.error("invalid type for 'negate', expected int or float"), + } + } + Instruction::Add => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Int(left + right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Float(left + right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Float((left as f64) + right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Float(left + right)); + self.step_and_ok() + } + _ => self.error("invalid type for 'add', expected (int|float, int|float)"), + } + } + Instruction::Subtract => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Int(left - right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Float(left - right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Float((left as f64) - right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Float(left - right)); + self.step_and_ok() + } + _ => self.error("invalid type for 'subtract', expected (int|float, int|float)"), + } + } + Instruction::Multiply => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Int(left * right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Float(left * right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Float((left as f64) * right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Float(left * right)); + self.step_and_ok() + } + _ => self.error("invalid type for 'multiply', expected (int|float, int|float)"), + } + } + Instruction::Divide => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Int(left / right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Float(left / right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Float((left as f64) / right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Float(left / right)); + self.step_and_ok() + } + _ => self.error("invalid type for 'divide', expected (int|float, int|float)"), + } + } + Instruction::Modulo => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Int(left % right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Float(left % right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Float((left as f64) % right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Float(left % right)); + self.step_and_ok() + } + _ => self.error("invalid type for 'modulo', expected (int|float, int|float)"), + } + } + Instruction::Exponentiate => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Int(left.pow(right as u32))); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Float(left.powf(right as f64))); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Float((left as f64).powf(right))); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Float(left.powf(right))); + self.step_and_ok() + } + _ => self + .error("invalid type for 'exponentiate', expected (int|float, int|float)"), + } + } + Instruction::LT => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Bool(left < right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left < right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Bool((left as f64) < right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left < right)); + self.step_and_ok() + } + _ => { + self.error("invalid type for 'less than', expected (int|float, int|float)") + } + } + } + Instruction::LTE => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Bool(left <= right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left <= right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Bool((left as f64) <= right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left <= right)); + self.step_and_ok() + } + _ => self.error( + "invalid type for 'less than or equal', expected (int|float, int|float)", + ), + } + } + Instruction::GT => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Bool(left > right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left > right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Bool((left as f64) > right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left > right)); + self.step_and_ok() + } + _ => self + .error("invalid type for 'greater than', expected (int|float, int|float)"), + } + } + Instruction::GTE => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Bool(left >= right)); + self.step_and_ok() + } + (Value::Int(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left >= right as f64)); + self.step_and_ok() + } + (Value::Float(right), Value::Int(left)) => { + self.stack.push(Value::Bool((left as f64) >= right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left >= right)); + self.step_and_ok() + } + _ => self.error( + "invalid type for 'greater than or equal', expected (int|float, int|float)", + ), + } + } + Instruction::Equal => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Unit, Value::Unit) => { + self.stack.push(Value::Bool(true)); + self.step_and_ok() + } + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Bool(left == right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left == right)); + self.step_and_ok() + } + (Value::String(right), Value::String(left)) => { + self.stack.push(Value::Bool(left == right)); + self.step_and_ok() + } + (Value::Bool(right), Value::Bool(left)) => { + self.stack.push(Value::Bool(left == right)); + self.step_and_ok() + } + _ => { + self.error( + "invalid type for 'equal', expected (unit, unit), (int, int), (float, float), (string, string) or (bool, bool)" + ) + } + } + } + Instruction::Inequal => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Unit, Value::Unit) => { + self.stack.push(Value::Bool(false)); + self.step_and_ok() + } + (Value::Int(right), Value::Int(left)) => { + self.stack.push(Value::Bool(left != right)); + self.step_and_ok() + } + (Value::Float(right), Value::Float(left)) => { + self.stack.push(Value::Bool(left != right)); + self.step_and_ok() + } + (Value::String(right), Value::String(left)) => { + self.stack.push(Value::Bool(left != right)); + self.step_and_ok() + } + (Value::Bool(right), Value::Bool(left)) => { + self.stack.push(Value::Bool(left != right)); + self.step_and_ok() + } + _ => { + self.error( + "invalid type for 'inequal', expected (unit, unit), (int, int), (float, float), (string, string) or (bool, bool)" + ) + } + } + } + Instruction::And => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Bool(right), Value::Bool(left)) => { + self.stack.push(Value::Bool(left && right)); + self.step_and_ok() + } + _ => self.error("invalid type for 'and', expected (bool, bool)"), + } + } + Instruction::Or => { + self.assert_stack_amount(2)?; + match ( + self.stack.pop().expect("stack size asserted"), + self.stack.pop().expect("stack size asserted"), + ) { + (Value::Bool(right), Value::Bool(left)) => { + self.stack.push(Value::Bool(left || right)); + self.step_and_ok() + } + _ => self.error("invalid type for 'or', expected (bool, bool)"), + } + } + Instruction::In => self.error("'not in' not implemented"), + Instruction::NotIn => self.error("'not in' not implemented"), + } + } + + fn error(&self, message: &str) -> Result<(), RuntimeError> { + Err(self.error_value(message)) + } + + fn error_value(&self, message: &str) -> RuntimeError { + RuntimeError { + message: message.to_string(), + } + } + + fn assert_stack_amount(&self, amount: usize) -> Result<(), RuntimeError> { + if self.stack.len() < self.stack_base + amount { + self.error(&format!( + "expected {} value(s) on the stack, {} present", + amount, + self.stack.len() - self.stack_base + )) + } else { + Ok(()) + } + } + + fn step_and_ok(&mut self) -> Result<(), RuntimeError> { + self.step(); + Ok(()) + } + + fn step(&mut self) { + self.instruction_ptr += 1; + } + + fn current_instruction(&self) -> &Instruction { + &self.current_function().instructions()[self.instruction_ptr] + } + + fn current_function(&self) -> &Function { + &self.functions[self.function_ptr] + } +} diff --git a/src/tokens.rs b/src/tokens.rs index af7c1aa..7e3e0c0 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,3 +1,5 @@ +use crate::ast::escape_string; + #[derive(Debug, Clone)] pub struct Position { pub index: usize, @@ -90,3 +92,17 @@ pub struct Token { pub trait PositionKnowing { fn pos(&self) -> Position; } + +impl Token { + pub fn fancy_string(&self, text: &str) -> String { + format!( + "[{}:{}]\t{}:{}\t{:?}(`{}`)", + self.pos.index, + self.length, + self.pos.line, + self.pos.col, + self.token_type, + escape_string(&text[self.pos.index..self.pos.index + self.length]), + ) + } +}