From d55e9b3c70b8dc54ce703c55e4abfcae2f7c4ac7 Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Thu, 30 May 2024 13:49:41 +0200 Subject: [PATCH] chewker done ^w^ --- Cargo.lock | 91 +++++++++ Cargo.toml | 2 + src/checked.rs | 3 +- src/checker.rs | 395 ++++++++++++++++++++++++++++++--------- src/lexer.rs | 2 +- src/main.rs | 2 +- src/parser.rs | 4 +- src/sym.rs | 100 ++++++++-- src/{hash.rs => util.rs} | 0 9 files changed, 486 insertions(+), 113 deletions(-) rename src/{hash.rs => util.rs} (100%) diff --git a/Cargo.lock b/Cargo.lock index 5e9bb7a..e91ffc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,97 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "diff" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "pretty_assertions" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +dependencies = [ + "diff", + "yansi", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + [[package]] name = "yapping" version = "0.1.0" +dependencies = [ + "pretty_assertions", + "rand", +] diff --git a/Cargo.toml b/Cargo.toml index d5eb827..6e4bc20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +pretty_assertions = "1.4.0" +rand = "0.8.5" diff --git a/src/checked.rs b/src/checked.rs index ae77e83..23bd74b 100644 --- a/src/checked.rs +++ b/src/checked.rs @@ -2,6 +2,7 @@ pub struct Node { pub kind: NodeKind, pub typ: Type, + pub table_id: usize, } #[derive(Clone, PartialEq, Debug)] @@ -36,7 +37,7 @@ pub enum NodeKind { Fn { subject: Box, params: Vec, - return_typ: Box, + return_typ: Type, body: Box, }, Return { diff --git a/src/checker.rs b/src/checker.rs index 42b1700..e898ccc 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -1,79 +1,130 @@ #![allow(unused_variables)] +use rand::random; + use crate::{ checked::{Node, NodeKind, Type}, - hash::hash, itertewls::Itertewls, parsed, sym::Syms, + util::hash, }; -pub struct Checker {} +pub trait IdGen { + fn new() -> Self; + fn gen(&mut self) -> u64; +} +pub struct RandIdGen; +impl IdGen for RandIdGen { + fn new() -> Self { + Self + } + fn gen(&mut self) -> u64 { + random() + } +} + +pub struct Checker { + syms: Syms, + fn_id_gen: FnIdGen, +} impl Checker { pub fn new() -> Self { - Self {} + Self { + syms: Syms::new(), + fn_id_gen: RandIdGen::new(), + } + } +} + +impl Checker { + pub fn new_with_fn_id_gen() -> Self { + Self { + syms: Syms::new(), + fn_id_gen: FnIdGen::new(), + } } - pub fn check(&mut self, ast: &Vec) -> Vec { - let mut syms = Syms::new(); - ast.into_iter() - .map(|stmt| self.check_expr(stmt, &mut syms)) - .collect() + pub fn check(&mut self, ast: &[parsed::Node]) -> Vec { + self.fn_scan(ast); + ast.iter().map(|stmt| self.check_expr(stmt)).collect() } - fn fn_scan<'syms>( - &mut self, - ast: &Vec, - syms: &mut Syms<'syms>, - ) -> Result<(), ()> { + fn fn_scan(&mut self, ast: &[parsed::Node]) { for node in ast { - match node { - parsed::Node::Fn { - subject, - params, - return_typ, - body, - } => { - let params = params.into_iter().map(|param| { - let parsed::Node::Param { subject, typ } = param else { unreachable!() }; - let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; - let typ = self.check_type(typ.as_ref().ok_or(())?); - Ok((*id, self.node(NodeKind::Param { subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), typ: Some(typ) }, Type::Unit))) - }).collect::, _>>()?; + if let parsed::Node::Fn { + subject, + params, + return_typ, + body, + } = node + { + let Ok(params) = self.fn_scan_params(params) else { + continue; + }; - if let Some(id) = params.iter().map(|(id, _)| *id).find_first_duplicate() { - self.error("redefinition param"); - return Err(()); - } - - let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; - - if syms.defined_locally(*id) { - self.error("redefinition fn"); - return Err(()); - } - //syms.define(*id, typ.clone()); + if let Some(id) = params.iter().map(|(id, _)| *id).find_first_duplicate() { + self.error("redefinition param"); + continue; } - _ => {} + + let params = params.into_iter().map(|(_, param)| param).collect(); + + let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; + + if self.syms.defined_locally(*id) { + self.error("redefinition fn"); + continue; + } + + let return_typ = self.check_type(return_typ); + + let typ = Type::Fn { + id: self.fn_id_gen.gen(), + params, + return_typ: Box::new(return_typ), + }; + + self.syms.define(*id, typ.clone()); } } - Ok(()) } - fn check_expr<'syms>(&mut self, node: &parsed::Node, syms: &mut Syms<'syms>) -> Node { + fn fn_scan_params(&self, params: &[parsed::Node]) -> Result, ()> { + params + .iter() + .map(|param| { + let parsed::Node::Param { subject, typ } = param else { unreachable!() }; + let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; + let typ = self.check_type(typ.as_ref().ok_or(())?); + Ok(( + *id, + self.node( + NodeKind::Param { + subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), + typ: Some(typ), + }, + Type::Unit, + ), + )) + }) + .collect::, _>>() + } + + fn check_expr(&mut self, node: &parsed::Node) -> Node { match node { parsed::Node::Error => self.node(NodeKind::Error, Type::Unit), parsed::Node::Id(id) => { - let Some(sym) = syms.get(*id) else { + let Some(sym) = self.syms.get(*id) else { self.error("undefined >~<"); return self.node(NodeKind::Error, Type::Error); }; - self.node(NodeKind::Id(*id), sym.typ) + self.node(NodeKind::Id(*id), sym.typ.clone()) } parsed::Node::Int(value) => self.node( NodeKind::Int(*value), - if *value as i32 > i32::MAX { + if *value > i32::MAX as i64 { Type::U32 } else { Type::I32 @@ -81,16 +132,17 @@ impl Checker { ), parsed::Node::String(value) => self.node(NodeKind::String(value.clone()), Type::String), parsed::Node::Group(expr) => { - let expr = self.check_expr(expr, syms); + let expr = self.check_expr(expr); let typ = expr.typ.clone(); self.node(NodeKind::Group(Box::new(expr)), typ) } parsed::Node::Block(stmts) => { - let mut child_syms = syms.child(); + self.syms.enter_scope(); let stmts = stmts - .into_iter() - .map(|stmt| self.check_expr(stmt, &mut child_syms)) + .iter() + .map(|stmt| self.check_expr(stmt)) .collect::>(); + self.syms.leave_scope().unwrap(); let typ = stmts .last() .map(|stmt| stmt.typ.clone()) @@ -98,11 +150,11 @@ impl Checker { self.node(NodeKind::Block(stmts), typ) } parsed::Node::Call { subject, args } => { - let subject = Box::new(self.check_expr(subject, syms)); + let subject = Box::new(self.check_expr(subject)); let args = args - .into_iter() - .map(|arg| self.check_expr(arg, syms)) + .iter() + .map(|arg| self.check_expr(arg)) .collect::>(); let typ = 'br: { match subject.typ.clone() { @@ -139,11 +191,9 @@ impl Checker { truthy, falsy, } => { - let cond = Box::new(self.check_expr(cond, syms)); - let truthy = Box::new(self.check_expr(truthy, syms)); - let falsy = falsy - .as_ref() - .map(|block| Box::new(self.check_expr(block, syms))); + let cond = Box::new(self.check_expr(cond)); + let truthy = Box::new(self.check_expr(truthy)); + let falsy = falsy.as_ref().map(|block| Box::new(self.check_expr(block))); let typ = 'br: { match falsy.as_ref().map(|block| block.typ.clone()) { Some(falsy_typ) => { @@ -166,14 +216,15 @@ impl Checker { ) } parsed::Node::Loop { body } => { - let body = Box::new(self.check_expr(body, &mut syms.child())); + self.syms.enter_scope(); + let body = Box::new(self.check_expr(body)); let typ = body.typ.clone(); self.node(NodeKind::Loop { body }, typ) } parsed::Node::Break => self.node(NodeKind::Break, Type::Unit), parsed::Node::Assign { subject, value } => { - let subject = Box::new(self.check_expr(subject, syms)); - let value = Box::new(self.check_expr(value, syms)); + let subject = Box::new(self.check_expr(subject)); + let value = Box::new(self.check_expr(value)); let typ = if !self.compatible(&subject.typ, &value.typ) { self.error("incompatible types #3"); @@ -191,7 +242,7 @@ impl Checker { _ => unreachable!(), }; - let value = Box::new(self.check_expr(value, syms)); + let value = Box::new(self.check_expr(value)); let typ = value.typ.clone(); if subject_typ @@ -205,11 +256,11 @@ impl Checker { let subject = match subject.as_ref() { parsed::Node::Id(id) => { - if syms.defined_locally(*id) { + if self.syms.defined_locally(*id) { self.error("redefinition"); return self.node(NodeKind::Error, Type::Error); } - syms.define(*id, typ.clone()); + self.syms.define(*id, typ.clone()); Box::new(self.node( NodeKind::Param { subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), @@ -225,16 +276,53 @@ impl Checker { } parsed::Node::Fn { subject, - params, - return_typ, + params: _, + return_typ: _, body, } => { - todo!("symbol lookup"); + let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; + + let Some(sym) = self.syms.get(*id).cloned() else { + // rejected in fn scanner + return self.node(NodeKind::Error,Type::Error); + }; + + let Type::Fn { id: fn_id, params, return_typ } = sym.typ else { + self.error("redefintion"); + return self.node(NodeKind::Error,Type::Error); + }; + + self.syms.enter_scope(); + + for param in ¶ms { + let NodeKind::Param { + ref subject, + typ: Some(ref typ), + } = param.kind else { unreachable!() }; + let NodeKind::Id(id) = subject.kind else { unreachable!() }; + self.syms.define(id, typ.clone()); + } + + let body = Box::new(self.check_expr(body)); + + if !self.compatible(&return_typ, &body.typ) { + self.error("return type violated"); + } + + self.syms.leave_scope().unwrap(); + + self.node( + NodeKind::Fn { + subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), + params, + return_typ: *return_typ, + body, + }, + Type::Unit, + ) } parsed::Node::Return { value } => { - let value = value - .as_ref() - .map(|value| Box::new(self.check_expr(value, syms))); + let value = value.as_ref().map(|value| Box::new(self.check_expr(value))); let typ = value .as_ref() .map(|value| value.typ.clone()) @@ -266,7 +354,11 @@ impl Checker { } fn node(&self, kind: NodeKind, typ: Type) -> Node { - Node { kind, typ } + Node { + kind, + typ, + table_id: self.syms.table_id(), + } } fn error>(&mut self, msg: S) { @@ -278,10 +370,24 @@ impl Checker { #[test] fn test_checker() { use crate::parser::Parser; - use NodeKind::{Block, Id, Int, Let, Param}; + use pretty_assertions::assert_eq; + use NodeKind::{Block, Call, Error, Fn, Id, Int, Let, Param, Return}; use Type::{Unit, I32}; - let check = |text| Checker::new().check(&Parser::new(text).parse()); + struct SeqIdGen(u64); + impl IdGen for SeqIdGen { + fn new() -> Self { + Self(0) + } + + fn gen(&mut self) -> u64 { + let v = self.0; + self.0 += 1; + v + } + } + + let check = |text| Checker::::new_with_fn_id_gen().check(&Parser::new(text).parse()); assert_eq!( check("let a = 5; a;"), @@ -292,26 +398,69 @@ fn test_checker() { kind: Param { subject: Box::new(Node { kind: Id(hash("a")), - typ: Unit + typ: Unit, + table_id: 0, }), typ: Some(Unit) }, - typ: Unit + typ: Unit, + table_id: 0, }), value: Box::new(Node { kind: Int(5), - typ: I32 + typ: I32, + table_id: 0, }) }, - typ: I32 + typ: I32, + table_id: 0, }, Node { kind: Id(hash("a")), - typ: I32 + typ: I32, + table_id: 0, } ] ); + assert_eq!( + check("let a = 5; { a; }"), + vec![ + Node { + kind: Let { + subject: Box::new(Node { + kind: Param { + subject: Box::new(Node { + kind: Id(hash("a")), + typ: Unit, + table_id: 0, + }), + typ: Some(Unit) + }, + typ: Unit, + table_id: 0, + }), + value: Box::new(Node { + kind: Int(5), + typ: I32, + table_id: 0, + }) + }, + typ: I32, + table_id: 0, + }, + Node { + kind: Block(vec![Node { + kind: Id(hash("a")), + typ: I32, + table_id: 1, + },]), + typ: I32, + table_id: 0, + }, + ] + ); + assert_eq!( check("let a = 5; a; { a; let b = 5; b; } a; b;"), vec![ @@ -321,28 +470,34 @@ fn test_checker() { kind: Param { subject: Box::new(Node { kind: Id(hash("a")), - typ: Unit + typ: Unit, + table_id: 0, }), typ: Some(Unit) }, - typ: Unit + typ: Unit, + table_id: 0, }), value: Box::new(Node { kind: Int(5), - typ: I32 + typ: I32, + table_id: 0, }) }, - typ: I32 + typ: I32, + table_id: 0, }, Node { kind: Id(hash("a")), - typ: I32 + typ: I32, + table_id: 0, }, Node { kind: Block(vec![ Node { kind: Id(hash("a")), - typ: I32 + typ: I32, + table_id: 1, }, Node { kind: Let { @@ -350,34 +505,92 @@ fn test_checker() { kind: Param { subject: Box::new(Node { kind: Id(hash("b")), - typ: Unit + typ: Unit, + table_id: 1, }), typ: Some(Unit) }, - typ: Unit + typ: Unit, + table_id: 1, }), value: Box::new(Node { kind: Int(5), - typ: I32 + typ: I32, + table_id: 1, }) }, - typ: I32 + typ: I32, + table_id: 1, }, Node { kind: Id(hash("b")), - typ: I32 + typ: I32, + table_id: 1, } ]), - typ: I32 + typ: I32, + table_id: 0, }, Node { kind: Id(hash("a")), - typ: I32 + typ: I32, + table_id: 0, }, Node { - kind: NodeKind::Error, - typ: Type::Error + kind: Error, + typ: Type::Error, + table_id: 0, } ] ); + + assert_eq!( + check("fn a() -> i32 { return 0; } \n a();"), + vec![ + Node { + kind: Fn { + subject: Box::new(Node { + kind: Id(hash("a")), + typ: Unit, + table_id: 0, + }), + params: vec![], + return_typ: I32, + body: Box::new(Node { + kind: Block(vec![Node { + kind: Return { + value: Some(Box::new(Node { + kind: Int(0), + typ: I32, + table_id: 2, + }),), + }, + typ: I32, + table_id: 2, + },],), + typ: I32, + table_id: 1, + }), + }, + typ: Unit, + table_id: 0, + }, + Node { + kind: Call { + subject: Box::new(Node { + kind: Id(hash("a")), + typ: Type::Fn { + id: 0, + params: vec![], + return_typ: Box::new(I32) + }, + table_id: 0 + }), + args: vec![] + }, + typ: I32, + table_id: 0, + }, + ] + ); } diff --git a/src/lexer.rs b/src/lexer.rs index e074eb5..cbce116 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,6 +1,6 @@ use crate::{ - hash::hash, token::{Token, TokenKind, TokenValue}, + util::hash, }; use std::{collections::HashMap, str::Chars}; diff --git a/src/main.rs b/src/main.rs index b2fe783..7ec1052 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,13 +2,13 @@ mod checked; mod checker; -mod hash; mod itertewls; mod lexer; mod parsed; mod parser; mod sym; mod token; +mod util; fn main() { println!("Hello, world!"); diff --git a/src/parser.rs b/src/parser.rs index 82bd0a1..5b11b25 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -205,7 +205,7 @@ impl<'a> Parser<'a> { self.step(); let mut args = Vec::new(); match self.curr_kind() { - None | Some(TokenKind::LParen) => {} + None | Some(TokenKind::RParen) => {} Some(_) => { args.push(self.parse_expr()); while let Some(TokenKind::Comma) = self.curr_kind() { @@ -378,7 +378,7 @@ impl<'a> Parser<'a> { #[test] fn test_parser() { - use crate::hash::hash; + use crate::util::hash; use Node::*; let parse = |text| Parser::new(text).parse(); diff --git a/src/sym.rs b/src/sym.rs index 5b73808..698522e 100644 --- a/src/sym.rs +++ b/src/sym.rs @@ -9,40 +9,106 @@ pub struct Sym { pub typ: Type, } -pub struct Syms<'syms> { - parent: Option<&'syms Syms<'syms>>, - map: HashMap, +#[derive(Debug)] +pub struct Syms { + tables: Vec, + current_id: usize, } -impl<'syms> Syms<'syms> { +impl Syms { pub fn new() -> Self { Self { - parent: None, - map: HashMap::new(), + tables: vec![SymTable::new()], + current_id: 0, } } - pub fn child(&'syms self) -> Self { - Self { - parent: Some(self), - map: HashMap::new(), - } + pub fn enter_scope(&mut self) { + let new_id = self.tables.len(); + self.tables.push(SymTable::from(self.current_id)); + self.current_id = new_id; } - pub fn get(&self, id: u64) -> Option { - if let Some(sym) = self.map.get(&id) { - return Some(sym.clone()); - } - if let Some(parent) = self.parent { - return parent.get(id); + pub fn leave_scope(&mut self) -> Result<(), ()> { + let parent = self.tables[self.current_id].parent.ok_or(())?; + self.current_id = parent; + Ok(()) + } + + pub fn get(&self, id: u64) -> Option<&Sym> { + self.get_rec(self.current_id, id) + } + + fn get_rec(&self, table_id: usize, id: u64) -> Option<&Sym> { + if let Some(sym) = self.tables[table_id].get(id) { + return Some(sym); } + if let Some(parent_id) = self.tables[table_id].parent { + return self.get_rec(parent_id, id); + }; None } pub fn defined_locally(&self, id: u64) -> bool { + self.tables[self.current_id].defined(id) + } + + pub fn define(&mut self, id: u64, typ: Type) { + self.tables[self.current_id].define(id, typ); + } + + pub fn table_id(&self) -> usize { + self.current_id + } + + pub fn view(&self, table_id: usize) -> SymView { + SymView { + syms: self, + current_id: table_id, + } + } +} + +pub struct SymView<'a> { + syms: &'a Syms, + current_id: usize, +} + +impl<'a> SymView<'a> { + pub fn get(&self, id: u64) -> Option<&Sym> { + self.syms.get_rec(self.current_id, id) + } +} + +#[derive(Debug)] +struct SymTable { + map: HashMap, + parent: Option, +} + +impl SymTable { + pub fn new() -> Self { + Self { + map: HashMap::new(), + parent: None, + } + } + + pub fn from(parent: usize) -> Self { + Self { + map: HashMap::new(), + parent: Some(parent), + } + } + + pub fn defined(&self, id: u64) -> bool { self.map.contains_key(&id) } + pub fn get(&self, id: u64) -> Option<&Sym> { + self.map.get(&id) + } + pub fn define(&mut self, id: u64, typ: Type) { self.map.insert(id, Sym { id, typ }); } diff --git a/src/hash.rs b/src/util.rs similarity index 100% rename from src/hash.rs rename to src/util.rs