From 1331ba100948ca6b02af374543417c58132f3fe7 Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Mon, 10 Jun 2024 15:49:16 +0200 Subject: [PATCH] add pos to stuff --- Cargo.lock | 7 + Cargo.toml | 1 + src/checker.rs | 163 ++++++++-------- src/itertewls.rs | 29 --- src/lexer.rs | 99 +++++----- src/main.rs | 7 +- src/parsed.rs | 16 +- src/parser.rs | 479 ++++++++++++++++++++++++++++++++--------------- src/pos.rs | 44 +++++ src/token.rs | 6 +- src/util.rs | 29 +++ 11 files changed, 574 insertions(+), 306 deletions(-) delete mode 100644 src/itertewls.rs create mode 100644 src/pos.rs diff --git a/Cargo.lock b/Cargo.lock index e91ffc8..ab213b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "assert_matches" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" + [[package]] name = "cfg-if" version = "1.0.0" @@ -93,6 +99,7 @@ checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" name = "yapping" version = "0.1.0" dependencies = [ + "assert_matches", "pretty_assertions", "rand", ] diff --git a/Cargo.toml b/Cargo.toml index 6e4bc20..e6c24a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +assert_matches = "1.5.0" pretty_assertions = "1.4.0" rand = "0.8.5" diff --git a/src/checker.rs b/src/checker.rs index c055cbe..8039806 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -1,11 +1,13 @@ +use std::{rc::Rc, sync::Mutex}; + use rand::random; use crate::{ checked::{Node, NodeKind, Type}, - itertewls::Itertewls, parsed, + pos::{Error, ErrorAcc, Pos}, sym::Syms, - util::hash, + util::{hash, Itertewls}, }; pub trait IdGen { @@ -26,22 +28,25 @@ impl IdGen for RandIdGen { pub struct Checker { syms: Syms, fn_id_gen: FnIdGen, + error_acc: Rc>, } impl Checker { - pub fn new() -> Self { + pub fn new(error_acc: Rc>) -> Self { Self { syms: Syms::new(), fn_id_gen: RandIdGen::new(), + error_acc, } } } impl Checker { - pub fn new_with_fn_id_gen() -> Self { + pub fn new_with_fn_id_gen(error_acc: Rc>) -> Self { Self { syms: Syms::new(), fn_id_gen: FnIdGen::new(), + error_acc, } } @@ -56,19 +61,20 @@ impl Checker { fn fn_scan(&mut self, ast: &[parsed::Node]) { for node in ast { - if let parsed::Node::Fn { + let pos = node.pos.clone(); + if let parsed::NodeKind::Fn { subject, params, return_typ, body: _, - } = node + } = &node.kind { - let Ok(params) = self.fn_scan_params(params) else { + let Ok(params) = self.fn_scan_params(¶ms) else { continue; }; if let Some(_) = params.iter().map(|(id, _)| *id).find_first_duplicate() { - self.error("redefinition param"); + self.error("redefinition param", pos.clone()); continue; } @@ -80,14 +86,14 @@ impl Checker { }) .collect::>(); - let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; + let parsed::NodeKind::Id(id) = subject.as_ref().kind else { unreachable!() }; - if self.syms.defined_locally(*id) { - self.error("redefinition fn"); + if self.syms.defined_locally(id) { + self.error("redefinition fn", pos.clone()); continue; } - let return_typ = self.check_type(return_typ); + let return_typ = self.check_type(&return_typ); let typ = Type::Fn { id: self.fn_id_gen.gen(), @@ -95,7 +101,7 @@ impl Checker { return_typ: Box::new(return_typ), }; - self.syms.define(*id, typ.clone()); + self.syms.define(id, typ.clone()); } } } @@ -104,14 +110,14 @@ impl Checker { params .iter() .map(|param| { - let parsed::Node::Param { subject, typ } = param else { unreachable!() }; - let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; + let parsed::NodeKind::Param { subject, typ } = ¶m.kind else { unreachable!() }; + let parsed::NodeKind::Id(id) = subject.as_ref().kind else { unreachable!() }; let typ = self.check_type(typ.as_ref().ok_or(())?); Ok(( - *id, + id, self.node( NodeKind::Param { - subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), + subject: Box::new(self.node(NodeKind::Id(id), Type::Unit)), typ: Some(typ), }, Type::Unit, @@ -122,16 +128,17 @@ impl Checker { } fn check_expr(&mut self, node: &parsed::Node) -> Node { - match node { - parsed::Node::Error => self.node(NodeKind::Error, Type::Unit), - parsed::Node::Id(id) => { + let pos = node.pos.clone(); + match &node.kind { + parsed::NodeKind::Error => self.node(NodeKind::Error, Type::Unit), + parsed::NodeKind::Id(id) => { let Some(sym) = self.syms.get(*id) else { - self.error("undefined >~<"); + self.error("undefined >~<", pos.clone()); return self.node(NodeKind::Error, Type::Error); }; self.node(NodeKind::Id(*id), sym.typ.clone()) } - parsed::Node::Int(value) => self.node( + parsed::NodeKind::Int(value) => self.node( NodeKind::Int(*value), if *value > i32::MAX as i64 { Type::U32 @@ -139,13 +146,13 @@ impl Checker { Type::I32 }, ), - parsed::Node::Str(value) => self.node(NodeKind::Str(value.clone()), Type::Str), - parsed::Node::Group(expr) => { - let expr = self.check_expr(expr); + parsed::NodeKind::Str(value) => self.node(NodeKind::Str(value.clone()), Type::Str), + parsed::NodeKind::Group(expr) => { + let expr = self.check_expr(&expr); let typ = expr.typ.clone(); self.node(NodeKind::Group(Box::new(expr)), typ) } - parsed::Node::Block(stmts) => { + parsed::NodeKind::Block(stmts) => { self.syms.enter_scope(); let stmts = stmts .iter() @@ -158,8 +165,8 @@ impl Checker { .unwrap_or(Type::Unit); self.node(NodeKind::Block(stmts), typ) } - parsed::Node::Call { subject, args } => { - let subject = Box::new(self.check_expr(subject)); + parsed::NodeKind::Call { subject, args } => { + let subject = Box::new(self.check_expr(&subject)); let args = args .iter() @@ -173,7 +180,7 @@ impl Checker { return_typ, } => { if args.len() != params.len() { - self.error("too few/many args"); + self.error("too few/many args", pos.clone()); break 'br Type::Error; } if args @@ -182,32 +189,32 @@ impl Checker { .map(|(arg, param)| self.compatible(&arg.typ, ¶m)) .any(|is_compatible| !is_compatible) { - self.error("incorrect args"); + self.error("incorrect args", pos.clone()); break 'br Type::Error; } *return_typ } _ => { - self.error("not a function"); + self.error("not a function", pos.clone()); Type::Error } } }; self.node(NodeKind::Call { subject, args }, typ) } - parsed::Node::If { + parsed::NodeKind::If { cond, truthy, falsy, } => { - let cond = Box::new(self.check_expr(cond)); - let truthy = Box::new(self.check_expr(truthy)); + let cond = Box::new(self.check_expr(&cond)); + let truthy = Box::new(self.check_expr(&truthy)); let falsy = falsy.as_ref().map(|block| Box::new(self.check_expr(block))); let typ = 'br: { match falsy.as_ref().map(|block| block.typ.clone()) { Some(falsy_typ) => { if !self.compatible(&truthy.typ, &falsy_typ) { - self.error("incompatible types #2"); + self.error("incompatible types #2", pos.clone()); break 'br Type::Error; } falsy_typ @@ -224,16 +231,16 @@ impl Checker { typ, ) } - parsed::Node::Loop { body } => { + parsed::NodeKind::Loop { body } => { self.syms.enter_scope(); - let body = Box::new(self.check_expr(body)); + let body = Box::new(self.check_expr(&body)); let typ = body.typ.clone(); self.node(NodeKind::Loop { body }, typ) } - parsed::Node::Break => self.node(NodeKind::Break, Type::Unit), - parsed::Node::Assign { subject, value } => { - let subject = Box::new(self.check_expr(subject)); - let value = Box::new(self.check_expr(value)); + parsed::NodeKind::Break => self.node(NodeKind::Break, Type::Unit), + parsed::NodeKind::Assign { subject, value } => { + let subject = Box::new(self.check_expr(&subject)); + let value = Box::new(self.check_expr(&value)); match subject.kind { NodeKind::Error => { @@ -241,28 +248,28 @@ impl Checker { } NodeKind::Id(_) => {} _ => { - self.error("cannot assign to expr"); + self.error("cannot assign to expr", pos.clone()); return self.node(NodeKind::Error, Type::Error); } } let _typ = if !self.compatible(&subject.typ, &value.typ) { - self.error("incompatible types #3"); + self.error("incompatible types #3", pos.clone()); Type::Error } else { subject.typ.clone() }; self.node(NodeKind::Assign { subject, value }, Type::Unit) } - parsed::Node::Let { subject, value } => { - let (subject, subject_typ) = match subject.as_ref() { - parsed::Node::Param { subject, typ } => { + parsed::NodeKind::Let { subject, value } => { + let (subject, subject_typ) = match &subject.as_ref().kind { + parsed::NodeKind::Param { subject, typ } => { (subject, typ.as_ref().map(|typ| self.check_type(typ))) } _ => unreachable!(), }; - let value = Box::new(self.check_expr(value)); + let value = Box::new(self.check_expr(&value)); let typ = value.typ.clone(); if subject_typ @@ -270,20 +277,20 @@ impl Checker { .map(|subject_typ| !self.compatible(subject_typ, &typ)) .unwrap_or(false) { - self.error("incompatible types #1"); + self.error("incompatible types #1", pos.clone()); return self.node(NodeKind::Error, Type::Error); } - let subject = match subject.as_ref() { - parsed::Node::Id(id) => { - if self.syms.defined_locally(*id) { - self.error("redefinition"); + let subject = match subject.as_ref().kind { + parsed::NodeKind::Id(id) => { + if self.syms.defined_locally(id) { + self.error("redefinition", pos.clone()); return self.node(NodeKind::Error, Type::Error); } - self.syms.define(*id, typ.clone()); + self.syms.define(id, typ.clone()); Box::new(self.node( NodeKind::Param { - subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), + subject: Box::new(self.node(NodeKind::Id(id), Type::Unit)), typ: Some(Type::Unit), }, Type::Unit, @@ -294,21 +301,21 @@ impl Checker { self.node(NodeKind::Let { subject, value }, Type::Unit) } - parsed::Node::Fn { + parsed::NodeKind::Fn { subject, params, return_typ: _, body, } => { - let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; + let parsed::NodeKind::Id(id) = subject.as_ref().kind else { unreachable!() }; - let Some(sym) = self.syms.get(*id).cloned() else { + let Some(sym) = self.syms.get(id).cloned() else { // rejected in fn scanner return self.node(NodeKind::Error,Type::Error); }; let Type::Fn { id: fn_id, params: param_typs, return_typ } = sym.typ else { - self.error("redefintion"); + self.error("redefintion", pos.clone()); return self.node(NodeKind::Error,Type::Error); }; @@ -318,11 +325,11 @@ impl Checker { .iter() .zip(param_typs) .map(|(param, typ)| { - let parsed::Node::Param { subject, .. } = param else { unreachable!() }; - let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() }; + let parsed::NodeKind::Param { subject, .. } = ¶m.kind else { unreachable!() }; + let parsed::NodeKind::Id(id) = subject.as_ref().kind else { unreachable!() }; self.node( NodeKind::Param { - subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), + subject: Box::new(self.node(NodeKind::Id(id), Type::Unit)), typ: Some(typ), }, Type::Unit, @@ -336,17 +343,17 @@ impl Checker { self.syms.define(id, typ.as_ref().cloned().unwrap()); } - let body = Box::new(self.check_expr(body)); + let body = Box::new(self.check_expr(&body)); if !self.compatible(&return_typ, &body.typ) { - self.error("return type violated"); + self.error("return type violated", pos.clone()); } self.syms.leave_scope().unwrap(); self.node( NodeKind::Fn { - subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), + subject: Box::new(self.node(NodeKind::Id(id), Type::Unit)), params, return_typ: *return_typ, body, @@ -355,7 +362,7 @@ impl Checker { Type::Unit, ) } - parsed::Node::Return { value } => { + parsed::NodeKind::Return { value } => { let value = value.as_ref().map(|value| Box::new(self.check_expr(value))); let typ = value .as_ref() @@ -363,17 +370,17 @@ impl Checker { .unwrap_or(Type::Unit); self.node(NodeKind::Return { value }, typ) } - parsed::Node::Param { .. } => unreachable!("handle elsewhere"), + parsed::NodeKind::Param { .. } => unreachable!("handle elsewhere"), } } fn check_type(&self, node: &parsed::Node) -> Type { - match node { - parsed::Node::Error => Type::Error, - parsed::Node::Id(value) => { - if *value == hash("i32") { + match node.kind { + parsed::NodeKind::Error => Type::Error, + parsed::NodeKind::Id(value) => { + if value == hash("i32") { Type::I32 - } else if *value == hash("u32") { + } else if value == hash("u32") { Type::U32 } else { todo!("symbol lookup idk") @@ -395,9 +402,13 @@ impl Checker { } } - fn error>(&mut self, msg: S) { + fn error>(&mut self, msg: S, pos: Pos) { let msg = msg.into(); - println!("checker error: {msg}"); + self.error_acc.lock().unwrap().add(Error { + kind: crate::pos::ErrorKind::CheckerError, + pos: Some(pos), + msg, + }); } } @@ -421,7 +432,11 @@ fn test_checker() { } } - let check = |text| Checker::::new_with_fn_id_gen().check(&Parser::new(text).parse()); + let check = |text| { + let error_acc = Rc::new(Mutex::new(ErrorAcc::new())); + Checker::::new_with_fn_id_gen(error_acc.clone()) + .check(&Parser::new(text, error_acc).parse()) + }; assert_eq!( check("let a = 5; a;"), diff --git a/src/itertewls.rs b/src/itertewls.rs deleted file mode 100644 index 76db65f..0000000 --- a/src/itertewls.rs +++ /dev/null @@ -1,29 +0,0 @@ -enum Duplicate { - None(std::collections::HashMap), - Found(T), -} - -pub trait Itertewls -where - Self: Iterator + Sized, -{ - fn find_first_duplicate(self) -> Option; -} - -impl Itertewls for I -where - I: Iterator + Sized, - Item: std::cmp::PartialEq + Clone, -{ - fn find_first_duplicate(mut self) -> Option { - self.try_fold(Vec::new(), |mut used, item| { - if used.contains(&item) { - Err(item) - } else { - used.push(item); - Ok(used) - } - }) - .err() - } -} diff --git a/src/lexer.rs b/src/lexer.rs index bef64de..e3a90a1 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,22 +1,24 @@ use crate::{ + pos::{Error, ErrorAcc, Pos}, token::{Token, TokenKind, TokenValue}, util::hash, }; -use std::{collections::HashMap, str::Chars}; +use std::{collections::HashMap, rc::Rc, str::Chars, sync::Mutex}; pub struct Lexer<'a> { text: &'a str, chars: Chars<'a>, current: Option, index: usize, - line: i32, - col: i32, + line: i64, + col: i64, symbols: HashMap, keywords: HashMap, + error_acc: Rc>, } impl<'a> Lexer<'a> { - pub fn new(text: &'a str) -> Self { + pub fn new(text: &'a str, error_acc: Rc>) -> Self { let mut chars = text.chars(); let current = chars.next(); Self { @@ -28,6 +30,7 @@ impl<'a> Lexer<'a> { col: 1, symbols: HashMap::new(), keywords: Self::make_keywords(), + error_acc, } } @@ -48,6 +51,7 @@ impl<'a> Lexer<'a> { } fn next_token(&mut self) -> Option { + let pos = self.pos(); match self.current { None => None, Some(' ' | '\t' | '\n') => { @@ -66,11 +70,11 @@ impl<'a> Lexer<'a> { } _ => { if let Some(kind) = self.keywords.get(&value) { - return self.token(kind.clone()); + return self.token(kind.clone(), pos); } let id = hash(&value); self.symbols.insert(id, value); - break self.token_with_value(TokenKind::Id, TokenValue::Id(id)); + break self.token_with_value(TokenKind::Id, TokenValue::Id(id), pos); } } } @@ -87,7 +91,11 @@ impl<'a> Lexer<'a> { } _ => { let value = value.replace('_', "").parse::().unwrap(); - break self.token_with_value(TokenKind::Int, TokenValue::Int(value)); + break self.token_with_value( + TokenKind::Int, + TokenValue::Int(value), + pos, + ); } } } @@ -106,22 +114,25 @@ impl<'a> Lexer<'a> { Some('0') => value.push('\0'), Some(ch) => value.push(ch), None => { - self.error("malformed string"); - break self.token(TokenKind::Error); + self.error("malformed string", pos.clone()); + break self.token(TokenKind::Error, pos); } } } Some('"') => { self.step(); - break self - .token_with_value(TokenKind::Str, TokenValue::Str(value)); + break self.token_with_value( + TokenKind::Str, + TokenValue::Str(value), + pos, + ); } Some(ch) => { value.push(ch); } _ => { - self.error("malformed string"); - break self.token(TokenKind::Error); + self.error("malformed string", pos.clone()); + break self.token(TokenKind::Error, pos); } } self.step() @@ -152,8 +163,8 @@ impl<'a> Lexer<'a> { self.step(); } (_, None) => { - self.error("malformed /**/ comment"); - break self.token(TokenKind::Error); + self.error("malformed /**/ comment", pos.clone()); + break self.token(TokenKind::Error, pos); } } } @@ -166,7 +177,7 @@ impl<'a> Lexer<'a> { match self.current { Some('>') => { self.step(); - self.token(TokenKind::MinusLt) + self.token(TokenKind::MinusLt, pos) } _ => todo!(), } @@ -174,21 +185,21 @@ impl<'a> Lexer<'a> { Some(ch @ ('0' | '(' | ')' | '{' | '}' | ':' | ',' | ';' | '=')) => { self.step(); match ch { - '0' => self.token_with_value(TokenKind::Int, TokenValue::Int(0)), - '(' => self.token(TokenKind::LParen), - ')' => self.token(TokenKind::RParen), - '{' => self.token(TokenKind::LBrace), - '}' => self.token(TokenKind::RBrace), - ':' => self.token(TokenKind::Colon), - ',' => self.token(TokenKind::Comma), - ';' => self.token(TokenKind::Semicolon), - '=' => self.token(TokenKind::Equal), + '0' => self.token_with_value(TokenKind::Int, TokenValue::Int(0), pos), + '(' => self.token(TokenKind::LParen, pos), + ')' => self.token(TokenKind::RParen, pos), + '{' => self.token(TokenKind::LBrace, pos), + '}' => self.token(TokenKind::RBrace, pos), + ':' => self.token(TokenKind::Colon, pos), + ',' => self.token(TokenKind::Comma, pos), + ';' => self.token(TokenKind::Semicolon, pos), + '=' => self.token(TokenKind::Equal, pos), _ => unreachable!(), } } Some(ch) => { - self.error(format!("unknown char '{ch}'")); - self.token(TokenKind::Error) + self.error(format!("unknown char '{ch}'"), pos.clone()); + self.token(TokenKind::Error, pos) } } } @@ -210,29 +221,33 @@ impl<'a> Lexer<'a> { } } - fn token(&self, kind: TokenKind) -> Option { + fn token(&self, kind: TokenKind, pos: Pos) -> Option { Some(Token { kind, value: TokenValue::None, - index: self.index, - line: self.line, - col: self.col, + pos, }) } - fn token_with_value(&self, kind: TokenKind, value: TokenValue) -> Option { - Some(Token { - kind, - value, - index: self.index, - line: self.line, - col: self.col, - }) + fn token_with_value(&self, kind: TokenKind, value: TokenValue, pos: Pos) -> Option { + Some(Token { kind, value, pos }) } - fn error>(&mut self, msg: S) { + fn error>(&mut self, msg: S, pos: Pos) { let msg = msg.into(); - println!("lexer error: {msg}, line {}", self.line) + self.error_acc.lock().unwrap().add(Error { + kind: crate::pos::ErrorKind::LexerError, + pos: Some(pos), + msg, + }); + } + + fn pos(&self) -> Pos { + Pos { + index: self.index, + line: self.line, + col: self.col, + } } fn done(&self) -> bool { @@ -254,7 +269,7 @@ fn test_lexer() { use TokenValue as TV; let lex = |text| { - Lexer::new(text) + Lexer::new(text, Rc::new(Mutex::new(ErrorAcc::new()))) .map(|token| (token.kind, token.value)) .collect::>() }; diff --git a/src/main.rs b/src/main.rs index 0c18c6a..e23754c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,16 +2,15 @@ mod checked; mod checker; -mod ir; -mod ir_compiler; -mod itertewls; +mod hir; +mod hir_compiler; mod lexer; mod parsed; mod parser; +mod pos; mod sym; mod token; mod util; -mod vm; fn main() { println!("Hello, world!"); diff --git a/src/parsed.rs b/src/parsed.rs index 23ee9b3..6fca65e 100644 --- a/src/parsed.rs +++ b/src/parsed.rs @@ -1,5 +1,19 @@ +use crate::pos::Pos; + #[derive(Clone, PartialEq, Debug)] -pub enum Node { +pub struct Node { + pub kind: NodeKind, + pub pos: Pos, +} + +impl Node { + pub fn new(kind: NodeKind, pos: Pos) -> Self { + Self { kind, pos } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub enum NodeKind { Error, Id(u64), Int(i64), diff --git a/src/parser.rs b/src/parser.rs index 1bd41cd..74e53a7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,21 +1,27 @@ -use std::collections::HashMap; +use std::{collections::HashMap, rc::Rc, sync::Mutex}; use crate::{ lexer::Lexer, - parsed::Node, + parsed::{Node, NodeKind}, + pos::{Error, ErrorAcc, Pos}, token::{Token, TokenKind, TokenValue}, }; pub struct Parser<'a> { lexer: Lexer<'a>, current: Option, + error_acc: Rc>, } impl<'a> Parser<'a> { - pub fn new(text: &'a str) -> Self { - let mut lexer = Lexer::new(text); + pub fn new(text: &'a str, error_acc: Rc>) -> Self { + let mut lexer = Lexer::new(text, error_acc.clone()); let current = lexer.next(); - Self { lexer, current } + Self { + lexer, + current, + error_acc, + } } pub fn parse(&mut self) -> Vec { @@ -37,6 +43,7 @@ impl<'a> Parser<'a> { } fn parse_stmt(&mut self) -> Node { + let pos = self.pos().unwrap(); match self.curr_kind() { Some(TokenKind::LBrace) => self.parse_block(), Some(TokenKind::If) => self.parse_if(), @@ -47,7 +54,7 @@ impl<'a> Parser<'a> { Some(TokenKind::Let) => self.parse_let(), Some(TokenKind::Break) => { self.step(); - Node::Break + self.node(NodeKind::Break, pos.clone()) } Some(TokenKind::Return) => { self.step(); @@ -55,7 +62,7 @@ impl<'a> Parser<'a> { Some(TokenKind::Semicolon) => None, _ => Some(Box::new(self.parse_expr())), }; - Node::Return { value } + self.node(NodeKind::Return { value }, pos.clone()) } _ => self.parse_assign(), }; @@ -65,8 +72,8 @@ impl<'a> Parser<'a> { stmt } _ => { - self.error("expected ';'"); - Node::Error + self.error("expected ';'", pos.clone()); + self.error_node(pos) } } } @@ -74,85 +81,95 @@ impl<'a> Parser<'a> { } fn parse_fn(&mut self) -> Node { + let pos = self.pos().unwrap(); self.step(); if !self.curr_is(TokenKind::Id) { - self.error("expected id"); - return Node::Error; + self.error("expected id", pos.clone()); + return self.error_node(pos); } let subject = Box::new(self.parse_id()); if !self.curr_is(TokenKind::LParen) { - self.error("expected '('"); - return Node::Error; + self.error("expected '('", pos.clone()); + return self.error_node(pos); } let params = match self.parse_fn_params() { Ok(params) => params, Err(expr) => return expr, }; if !self.curr_is(TokenKind::MinusLt) { - self.error("expected '->'"); - return Node::Error; + self.error("expected '->'", pos.clone()); + return self.error_node(pos); } self.step(); let return_typ = Box::new(self.parse_typ()); if !self.curr_is(TokenKind::LBrace) { - self.error("expected '{'"); - return Node::Error; + self.error("expected '{'", pos.clone()); + return self.error_node(pos); } let body = Box::new(self.parse_block()); - Node::Fn { - subject, - params, - return_typ, - body, - } + self.node( + NodeKind::Fn { + subject, + params, + return_typ, + body, + }, + pos, + ) } fn parse_fn_params(&mut self) -> Result, Node> { + let pos = self.pos().unwrap(); self.step(); let mut params = Vec::new(); if !self.curr_is(TokenKind::RParen) { if !self.curr_is(TokenKind::Id) { - self.error("expected id"); - return Err(Node::Error); + self.error("expected id", pos.clone()); + return Err(self.error_node(pos)); } params.push(self.parse_param()); while let Some(TokenKind::Comma) = self.curr_kind() { self.step(); if self.curr_is(TokenKind::RParen) { - self.error("expected ')'"); + self.error("expected ')'", pos.clone()); break; } params.push(self.parse_param()); } } if !self.curr_is(TokenKind::RParen) { - self.error("expected ')'"); - return Err(Node::Error); + self.error("expected ')'", pos.clone()); + return Err(self.error_node(pos)); } self.step(); Ok(params) } fn parse_let(&mut self) -> Node { + let pos = self.pos().unwrap(); self.step(); if !self.curr_is(TokenKind::Id) { - self.error("expected id"); - return Node::Error; + self.error("expected id", pos.clone()); + return self.error_node(pos); } let subject = self.parse_param(); if !self.curr_is(TokenKind::Equal) { - self.error("expected '='"); - return Node::Error; + self.error("expected '='", pos.clone()); + return self.error_node(pos); } self.step(); let value = self.parse_expr(); - Node::Let { - subject: Box::new(subject), - value: Box::new(value), - } + self.node( + NodeKind::Let { + subject: Box::new(subject), + value: Box::new(value), + }, + pos, + ) } fn parse_param(&mut self) -> Node { + let pos = self.pos().unwrap(); let subject = Box::new(self.parse_id()); let typ = if let Some(TokenKind::Colon) = self.curr_kind() { self.step(); @@ -160,30 +177,35 @@ impl<'a> Parser<'a> { } else { None }; - Node::Param { subject, typ } + self.node(NodeKind::Param { subject, typ }, pos) } fn parse_typ(&mut self) -> Node { + let pos = self.pos().unwrap(); match self.curr_kind() { Some(TokenKind::Id) => self.parse_id(), _ => { - self.error("expected type"); + self.error("expected type", pos.clone()); self.step(); - Node::Error + self.error_node(pos) } } } fn parse_assign(&mut self) -> Node { + let pos = self.pos().unwrap(); let subject = self.parse_expr(); match self.curr_kind() { Some(TokenKind::Equal) => { self.step(); let value = self.parse_expr(); - Node::Assign { - subject: Box::new(subject), - value: Box::new(value), - } + self.node( + NodeKind::Assign { + subject: Box::new(subject), + value: Box::new(value), + }, + pos, + ) } _ => subject, } @@ -194,6 +216,7 @@ impl<'a> Parser<'a> { } fn parse_call(&mut self) -> Node { + let pos = self.pos().unwrap(); let mut subject = self.parse_value(); loop { match self.curr_kind() { @@ -216,15 +239,18 @@ impl<'a> Parser<'a> { match self.curr_kind() { Some(TokenKind::RParen) => {} _ => { - self.error("expected ')'"); - return Node::Error; + self.error("expected ')'", pos.clone()); + return self.error_node(pos); } } self.step(); - subject = Node::Call { - subject: Box::new(subject), - args, - }; + subject = self.node( + NodeKind::Call { + subject: Box::new(subject), + args, + }, + pos.clone(), + ); } _ => break subject, } @@ -232,6 +258,7 @@ impl<'a> Parser<'a> { } fn parse_value(&mut self) -> Node { + let pos = self.pos().unwrap(); match self.curr_kind() { Some(TokenKind::Id) => self.parse_id(), Some(TokenKind::Int) => self.parse_int(), @@ -241,14 +268,15 @@ impl<'a> Parser<'a> { Some(TokenKind::If) => self.parse_if(), Some(TokenKind::Loop) => self.parse_loop(), _ => { - self.error("expected value"); + self.error("expected value", pos.clone()); self.step(); - Node::Error + self.error_node(pos) } } } fn parse_id(&mut self) -> Node { + let pos = self.pos().unwrap(); let Some(Token { kind: TokenKind::Id, value: TokenValue::Id(value), @@ -258,10 +286,11 @@ impl<'a> Parser<'a> { unreachable!() }; self.step(); - Node::Id(value) + self.node(NodeKind::Id(value), pos) } fn parse_int(&mut self) -> Node { + let pos = self.pos().unwrap(); let Some(Token { kind: TokenKind::Int, value: TokenValue::Int(value), @@ -271,10 +300,11 @@ impl<'a> Parser<'a> { unreachable!() }; self.step(); - Node::Int(value) + self.node(NodeKind::Int(value), pos) } fn parse_string(&mut self) -> Node { + let pos = self.pos().unwrap(); let Some(Token { kind: TokenKind::Str, value: TokenValue::Str(value), @@ -284,32 +314,34 @@ impl<'a> Parser<'a> { unreachable!() }; self.step(); - Node::Str(value.clone()) + self.node(NodeKind::Str(value.clone()), pos) } fn parse_group(&mut self) -> Node { + let pos = self.pos().unwrap(); self.step(); let expr = Box::new(self.parse_expr()); if !self.curr_is(TokenKind::RParen) { - self.error("expected ')'"); - return Node::Error; + self.error("expected ')'", pos.clone()); + return self.error_node(pos); } self.step(); - Node::Group(expr) + self.node(NodeKind::Group(expr), pos) } fn parse_block(&mut self) -> Node { + let pos = self.pos().unwrap(); self.step(); let mut stmts = Vec::new(); loop { match self.curr_kind() { None => { - self.error("expected ')'"); - break Node::Error; + self.error("expected ')'", pos.clone()); + break self.error_node(pos); } Some(TokenKind::RBrace) => { self.step(); - break Node::Block(stmts); + break self.node(NodeKind::Block(stmts), pos); } _ => stmts.push(self.parse_stmt()), } @@ -317,56 +349,74 @@ impl<'a> Parser<'a> { } fn parse_if(&mut self) -> Node { + let pos = self.pos().unwrap(); self.step(); let cond = Box::new(self.parse_expr()); if !self.curr_is(TokenKind::LBrace) { - self.error("expected '}'"); - return Node::Error; + self.error("expected '}'", pos.clone()); + return self.error_node(pos); } let truthy = Box::new(self.parse_block()); let falsy = match self.curr_kind() { Some(TokenKind::Else) => { self.step(); if !self.curr_is(TokenKind::LBrace) { - self.error("expected '}'"); - return Node::Error; + self.error("expected '}'", pos.clone()); + return self.error_node(pos); } Some(Box::new(self.parse_block())) } _ => None, }; - Node::If { - cond, - truthy, - falsy, - } + self.node( + NodeKind::If { + cond, + truthy, + falsy, + }, + pos, + ) } fn parse_loop(&mut self) -> Node { + let pos = self.pos().unwrap(); self.step(); if !self.curr_is(TokenKind::LBrace) { - self.error("expected '}'"); - return Node::Error; + self.error("expected '}'", pos.clone()); + return self.error_node(pos); } let body = Box::new(self.parse_block()); - Node::Loop { body } + self.node(NodeKind::Loop { body }, pos) } - fn error>(&mut self, msg: S) { + fn error>(&mut self, msg: S, pos: Pos) { let msg = msg.into(); - println!( - "parser error: {msg}, line {}", - self.current - .as_ref() - .map(|t| t.line.to_string()) - .unwrap_or_else(|| "-".to_string()) - ) + self.error_acc.lock().unwrap().add(Error { + kind: crate::pos::ErrorKind::ParserError, + pos: Some(pos), + msg, + }); } fn step(&mut self) { self.current = self.lexer.next(); } + fn node(&self, kind: NodeKind, pos: Pos) -> Node { + Node { kind, pos } + } + + fn error_node(&self, pos: Pos) -> Node { + Node { + kind: NodeKind::Error, + pos, + } + } + + fn pos(&self) -> Option { + self.current.as_ref().map(|token| token.pos.clone()) + } + fn curr_is(&self, kind: TokenKind) -> bool { self.curr_kind() == Some(kind) } @@ -379,100 +429,223 @@ impl<'a> Parser<'a> { #[test] fn test_parser() { use crate::util::hash; - use Node::*; + use assert_matches::assert_matches; + // use pretty_assertions::assert_eq; + use NodeKind::*; - let parse = |text| Parser::new(text).parse(); + macro_rules! node { + ($kind:pat) => { + Node { kind: $kind, .. } + }; + } + + let parse = |text| Parser::new(text, Rc::new(Mutex::new(ErrorAcc::new()))).parse(); #[allow(non_snake_case)] fn B(v: T) -> Box { Box::new(v) } - assert_eq!(Parser::new("abc;").parse(), vec![Id(hash("abc"))]); - assert_eq!(Parser::new("123;").parse(), vec![Int(123)]); - assert_eq!( - Parser::new("\"hello\";").parse(), - vec![Str("hello".to_string())] - ); - assert_eq!(Parser::new("0;").parse(), vec![Int(0)]); - assert_eq!(Parser::new("0;abc;").parse(), vec![Int(0), Id(hash("abc"))]); + assert_matches!(parse("abc;")[..], [node!(Id(id))] if id == hash("abc")); + assert_matches!(parse("123;")[..], [node!(Int(123))]); + assert_matches!(&parse("\"hello\";")[..], [node!(Str(v))] if *v == "hello".to_string()); + assert_matches!(parse("0;")[..], [node!(Int(0))]); + assert_matches!(parse("0;abc;")[..], [node!(Int(0)), node!(Id(id))] if id == hash("abc")); assert_eq!( parse("add(mul(12, 34), 56);"), - vec![Call { - subject: B(Id(hash("add"))), - args: vec![ - Call { - subject: B(Id(hash("mul"))), - args: vec![Int(12), Int(34)] - }, - Int(56) - ] + vec![Node { + kind: Call { + subject: B(Node { + kind: Id(hash("add")), + pos: Pos { + index: 0, + line: 1, + col: 1 + } + }), + args: vec![ + Node { + kind: Call { + subject: B(Node { + kind: Id(14581412793212634142), + pos: Pos { + index: 4, + line: 1, + col: 5 + } + }), + args: vec![ + Node { + kind: Int(12), + pos: Pos { + index: 8, + line: 1, + col: 9 + } + }, + Node { + kind: Int(34), + pos: Pos { + index: 12, + line: 1, + col: 13 + } + } + ] + }, + pos: Pos { + index: 4, + line: 1, + col: 5 + } + }, + Node { + kind: Int(56), + pos: Pos { + index: 17, + line: 1, + col: 18 + } + } + ] + }, + pos: Pos { + index: 0, + line: 1, + col: 1 + } }] ); + assert_matches!( + &parse("a = 123;")[..], + [node!(Assign { + subject, + value + })] if matches!(subject.kind, Id(id) if id == hash("a")) && matches!(value.kind, Int(123)) + ); + assert_matches!(parse("break;")[..], [node!(Break)]); + assert_matches!(parse("return;")[..], [node!(Return { value: None })]); assert_eq!( - parse("a = 123;"), - vec![Assign { - subject: B(Id(hash("a"))), - value: B(Int(123)) + parse("return add(1, 2);")[..], + vec![Node { + kind: Return { + value: Some(B(Node { + kind: Call { + subject: B(Node { + kind: Id(hash("add")), + pos: Pos { + index: 7, + line: 1, + col: 8 + } + }), + args: vec![ + Node { + kind: Int(1), + pos: Pos { + index: 11, + line: 1, + col: 12 + } + }, + Node { + kind: Int(2), + pos: Pos { + index: 14, + line: 1, + col: 15 + } + } + ] + }, + pos: Pos { + index: 7, + line: 1, + col: 8 + } + })) + }, + pos: Pos { + index: 0, + line: 1, + col: 1 + } }] ); - assert_eq!(parse("break;"), vec![Break]); - assert_eq!(parse("return;"), vec![Return { value: None }]); - assert_eq!( - parse("return add(1, 2);"), - vec![Return { - value: Some(B(Call { - subject: B(Id(hash("add"))), - args: vec![Int(1), Int(2)] - })) - }] + assert_matches!( + &parse("a = 5;")[..], + [node!(Assign { + subject, + value + })] if matches!(subject.kind, Id(id) if id == hash("a")) && matches!(value.kind, Int(5)) ); assert_eq!( - parse("a = 5;"), - vec![Assign { - subject: B(Id(hash("a"))), - value: B(Int(5)) + parse("let a = 5;")[..], + vec![Node { + kind: Let { + subject: B(Node { + kind: Param { + subject: B(Node { + kind: Id(hash("a")), + pos: Pos { + index: 4, + line: 1, + col: 5 + } + }), + typ: None + }, + pos: Pos { + index: 4, + line: 1, + col: 5 + } + }), + value: B(Node { + kind: Int(5), + pos: Pos { + index: 8, + line: 1, + col: 9 + } + }) + }, + pos: Pos { + index: 0, + line: 1, + col: 1 + } }] ); - assert_eq!( - parse("let a = 5;"), - vec![Let { - subject: B(Param { - subject: B(Id(hash("a"))), - typ: None - }), - value: B(Int(5)) - }] + assert_matches!( + &parse("fn test() -> i32 {}")[..], + [node!(Fn { + subject, + params, + return_typ, + body + })] if subject.kind == Id(hash("test")) && *params == vec![] && return_typ.kind == Id(hash("i32")) && body.kind == Block(vec![]) ); - assert_eq!( - parse("fn test() -> i32 {}"), - vec![Fn { - subject: B(Id(hash("test"))), - params: vec![], - return_typ: B(Id(hash("i32"))), - body: B(Block(vec![])) - }] - ); - assert_eq!( - parse("if 0 {}"), - vec![If { - cond: B(Int(0)), - truthy: B(Block(vec![])), + assert_matches!( + &parse("if 0 {}")[..], + [node!(If { + cond, + truthy, falsy: None - }] + })] if matches!(cond.kind, Int(0)) && truthy.kind == Block(vec![]) ); - assert_eq!( - parse("if 0 {} else {}"), - vec![If { - cond: B(Int(0)), - truthy: B(Block(vec![])), - falsy: Some(B(Block(vec![]))), - }] + assert_matches!( + &parse("if 0 {} else {}")[..], + [node!(If { + cond, + truthy, + falsy: Some(falsy), + })] if matches!(cond.kind, Int(0)) && truthy.kind == Block(vec![]) && falsy.kind == Block(vec![]) ); - assert_eq!( - parse("loop {}"), - vec![Loop { - body: B(Block(vec![])), - }] + assert_matches!( + &parse("loop {}")[..], + [node!(Loop { + body, + })] if body.kind == Block(vec![]) ); } diff --git a/src/pos.rs b/src/pos.rs new file mode 100644 index 0000000..d4dc755 --- /dev/null +++ b/src/pos.rs @@ -0,0 +1,44 @@ +#[derive(Clone, PartialEq, Debug)] +pub struct Pos { + pub index: usize, + pub line: i64, + pub col: i64, +} + +impl Pos { + pub fn new(index: usize, line: i64, col: i64) -> Self { + Self { index, line, col } + } +} + +#[derive(Clone, Debug)] +pub struct Error { + pub kind: ErrorKind, + pub pos: Option, + pub msg: String, +} + +#[derive(Clone, Debug)] +pub enum ErrorKind { + LexerError, + ParserError, + CheckerError, +} + +pub struct ErrorAcc { + errors: Vec, +} + +impl ErrorAcc { + pub fn new() -> Self { + Self { errors: Vec::new() } + } + + pub fn ok(&self) -> bool { + self.errors.is_empty() + } + + pub fn add(&mut self, error: Error) { + self.errors.push(error) + } +} diff --git a/src/token.rs b/src/token.rs index 17f0f75..5f2a8ee 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1,10 +1,10 @@ +use crate::pos::Pos; + #[derive(Clone, PartialEq, Debug)] pub struct Token { pub kind: TokenKind, pub value: TokenValue, - pub index: usize, - pub line: i32, - pub col: i32, + pub pos: Pos, } #[derive(Clone, PartialEq, Debug)] diff --git a/src/util.rs b/src/util.rs index 4164ef7..7bcec60 100644 --- a/src/util.rs +++ b/src/util.rs @@ -5,3 +5,32 @@ pub fn hash(value: H) -> u64 { value.hash(&mut hasher); hasher.finish() } +enum Duplicate { + None(std::collections::HashMap), + Found(T), +} + +pub trait Itertewls +where + Self: Iterator + Sized, +{ + fn find_first_duplicate(self) -> Option; +} + +impl Itertewls for I +where + I: Iterator + Sized, + Item: std::cmp::PartialEq + Clone, +{ + fn find_first_duplicate(mut self) -> Option { + self.try_fold(Vec::new(), |mut used, item| { + if used.contains(&item) { + Err(item) + } else { + used.push(item); + Ok(used) + } + }) + .err() + } +}