use std::{collections::HashMap, rc::Rc, sync::Mutex}; use crate::{ lexer::Lexer, parsed::{Node, NodeKind}, pos::{Error, ErrorAcc, Pos}, token::{Token, TokenKind, TokenValue}, }; pub struct Parser<'a> { lexer: Lexer<'a>, current: Option, error_acc: Rc>, } impl<'a> Parser<'a> { pub fn new(text: &'a str, error_acc: Rc>) -> Self { let mut lexer = Lexer::new(text, error_acc.clone()); let current = lexer.next(); Self { lexer, current, error_acc, } } pub fn parse(&mut self) -> Vec { self.parse_file() } pub fn symbols(self) -> HashMap { self.lexer.symbols() } fn parse_file(&mut self) -> Vec { let mut stmts = Vec::new(); loop { match self.current { Some(_) => stmts.push(self.parse_stmt()), None => break stmts, } } } fn parse_stmt(&mut self) -> Node { let pos = self.pos().unwrap(); match self.curr_kind() { Some(TokenKind::LBrace) => self.parse_block(), Some(TokenKind::If) => self.parse_if(), Some(TokenKind::Loop) => self.parse_loop(), Some(TokenKind::Fn) => self.parse_fn(), _ => { let stmt = match self.curr_kind() { Some(TokenKind::Let) => self.parse_let(), Some(TokenKind::Break) => { self.step(); self.node(NodeKind::Break, pos.clone()) } Some(TokenKind::Return) => { self.step(); let value = match self.curr_kind() { Some(TokenKind::Semicolon) => None, _ => Some(Box::new(self.parse_expr())), }; self.node(NodeKind::Return { value }, pos.clone()) } _ => self.parse_assign(), }; match self.curr_kind() { Some(TokenKind::Semicolon) => { self.step(); stmt } _ => { self.error("expected ';'", pos.clone()); self.error_node(pos) } } } } } fn parse_fn(&mut self) -> Node { let pos = self.pos().unwrap(); self.step(); if !self.curr_is(TokenKind::Id) { self.error("expected id", pos.clone()); return self.error_node(pos); } let subject = Box::new(self.parse_id()); if !self.curr_is(TokenKind::LParen) { self.error("expected '('", pos.clone()); return self.error_node(pos); } let params = match self.parse_fn_params() { Ok(params) => params, Err(expr) => return expr, }; if !self.curr_is(TokenKind::MinusLt) { self.error("expected '->'", pos.clone()); return self.error_node(pos); } self.step(); let return_typ = Box::new(self.parse_typ()); if !self.curr_is(TokenKind::LBrace) { self.error("expected '{'", pos.clone()); return self.error_node(pos); } let body = Box::new(self.parse_block()); self.node( NodeKind::Fn { subject, params, return_typ, body, }, pos, ) } fn parse_fn_params(&mut self) -> Result, Node> { let pos = self.pos().unwrap(); self.step(); let mut params = Vec::new(); if !self.curr_is(TokenKind::RParen) { if !self.curr_is(TokenKind::Id) { self.error("expected id", pos.clone()); return Err(self.error_node(pos)); } params.push(self.parse_param()); while let Some(TokenKind::Comma) = self.curr_kind() { self.step(); if self.curr_is(TokenKind::RParen) { self.error("expected ')'", pos.clone()); break; } params.push(self.parse_param()); } } if !self.curr_is(TokenKind::RParen) { self.error("expected ')'", pos.clone()); return Err(self.error_node(pos)); } self.step(); Ok(params) } fn parse_let(&mut self) -> Node { let pos = self.pos().unwrap(); self.step(); if !self.curr_is(TokenKind::Id) { self.error("expected id", pos.clone()); return self.error_node(pos); } let subject = self.parse_param(); if !self.curr_is(TokenKind::Equal) { self.error("expected '='", pos.clone()); return self.error_node(pos); } self.step(); let value = self.parse_expr(); self.node( NodeKind::Let { subject: Box::new(subject), value: Box::new(value), }, pos, ) } fn parse_param(&mut self) -> Node { let pos = self.pos().unwrap(); let subject = Box::new(self.parse_id()); let typ = if let Some(TokenKind::Colon) = self.curr_kind() { self.step(); Some(Box::new(self.parse_typ())) } else { None }; self.node(NodeKind::Param { subject, typ }, pos) } fn parse_typ(&mut self) -> Node { let pos = self.pos().unwrap(); match self.curr_kind() { Some(TokenKind::Id) => self.parse_id(), _ => { self.error("expected type", pos.clone()); self.step(); self.error_node(pos) } } } fn parse_assign(&mut self) -> Node { let pos = self.pos().unwrap(); let subject = self.parse_expr(); match self.curr_kind() { Some(TokenKind::Equal) => { self.step(); let value = self.parse_expr(); self.node( NodeKind::Assign { subject: Box::new(subject), value: Box::new(value), }, pos, ) } _ => subject, } } fn parse_expr(&mut self) -> Node { self.parse_call() } fn parse_call(&mut self) -> Node { let pos = self.pos().unwrap(); let mut subject = self.parse_value(); loop { match self.curr_kind() { Some(TokenKind::LParen) => { self.step(); let mut args = Vec::new(); match self.curr_kind() { None | Some(TokenKind::RParen) => {} Some(_) => { args.push(self.parse_expr()); while let Some(TokenKind::Comma) = self.curr_kind() { self.step(); if let Some(TokenKind::RParen) = self.curr_kind() { break; } args.push(self.parse_expr()); } } } match self.curr_kind() { Some(TokenKind::RParen) => {} _ => { self.error("expected ')'", pos.clone()); return self.error_node(pos); } } self.step(); subject = self.node( NodeKind::Call { subject: Box::new(subject), args, }, pos.clone(), ); } _ => break subject, } } } fn parse_value(&mut self) -> Node { let pos = self.pos().unwrap(); match self.curr_kind() { Some(TokenKind::Id) => self.parse_id(), Some(TokenKind::Int) => self.parse_int(), Some(TokenKind::Str) => self.parse_string(), Some(TokenKind::LParen) => self.parse_group(), Some(TokenKind::LBrace) => self.parse_block(), Some(TokenKind::If) => self.parse_if(), Some(TokenKind::Loop) => self.parse_loop(), _ => { self.error("expected value", pos.clone()); self.step(); self.error_node(pos) } } } fn parse_id(&mut self) -> Node { let pos = self.pos().unwrap(); let Some(Token { kind: TokenKind::Id, value: TokenValue::Id(value), .. }) = self.current else { unreachable!() }; self.step(); self.node(NodeKind::Id(value), pos) } fn parse_int(&mut self) -> Node { let pos = self.pos().unwrap(); let Some(Token { kind: TokenKind::Int, value: TokenValue::Int(value), .. }) = self.current else { unreachable!() }; self.step(); self.node(NodeKind::Int(value), pos) } fn parse_string(&mut self) -> Node { let pos = self.pos().unwrap(); let Some(Token { kind: TokenKind::Str, value: TokenValue::Str(value), .. }) = self.current.clone() else { unreachable!() }; self.step(); self.node(NodeKind::Str(value.clone()), pos) } fn parse_group(&mut self) -> Node { let pos = self.pos().unwrap(); self.step(); let expr = Box::new(self.parse_expr()); if !self.curr_is(TokenKind::RParen) { self.error("expected ')'", pos.clone()); return self.error_node(pos); } self.step(); self.node(NodeKind::Group(expr), pos) } fn parse_block(&mut self) -> Node { let pos = self.pos().unwrap(); self.step(); let mut stmts = Vec::new(); loop { match self.curr_kind() { None => { self.error("expected ')'", pos.clone()); break self.error_node(pos); } Some(TokenKind::RBrace) => { self.step(); break self.node(NodeKind::Block(stmts), pos); } _ => stmts.push(self.parse_stmt()), } } } fn parse_if(&mut self) -> Node { let pos = self.pos().unwrap(); self.step(); let cond = Box::new(self.parse_expr()); if !self.curr_is(TokenKind::LBrace) { self.error("expected '}'", pos.clone()); return self.error_node(pos); } let truthy = Box::new(self.parse_block()); let falsy = match self.curr_kind() { Some(TokenKind::Else) => { self.step(); if !self.curr_is(TokenKind::LBrace) { self.error("expected '}'", pos.clone()); return self.error_node(pos); } Some(Box::new(self.parse_block())) } _ => None, }; self.node( NodeKind::If { cond, truthy, falsy, }, pos, ) } fn parse_loop(&mut self) -> Node { let pos = self.pos().unwrap(); self.step(); if !self.curr_is(TokenKind::LBrace) { self.error("expected '}'", pos.clone()); return self.error_node(pos); } let body = Box::new(self.parse_block()); self.node(NodeKind::Loop { body }, pos) } fn error>(&mut self, msg: S, pos: Pos) { let msg = msg.into(); self.error_acc.lock().unwrap().add(Error { kind: crate::pos::ErrorKind::ParserError, pos: Some(pos), msg, }); } fn step(&mut self) { self.current = self.lexer.next(); } fn node(&self, kind: NodeKind, pos: Pos) -> Node { Node { kind, pos } } fn error_node(&self, pos: Pos) -> Node { Node { kind: NodeKind::Error, pos, } } fn pos(&self) -> Option { self.current.as_ref().map(|token| token.pos.clone()) } fn curr_is(&self, kind: TokenKind) -> bool { self.curr_kind() == Some(kind) } fn curr_kind(&self) -> Option { self.current.as_ref().map(|t| t.kind.clone()) } } #[test] fn test_parser() { use crate::util::hash; use assert_matches::assert_matches; // use pretty_assertions::assert_eq; use NodeKind::*; macro_rules! node { ($kind:pat) => { Node { kind: $kind, .. } }; } let parse = |text| Parser::new(text, Rc::new(Mutex::new(ErrorAcc::new()))).parse(); #[allow(non_snake_case)] fn B(v: T) -> Box { Box::new(v) } assert_matches!(parse("abc;")[..], [node!(Id(id))] if id == hash("abc")); assert_matches!(parse("123;")[..], [node!(Int(123))]); assert_matches!(&parse("\"hello\";")[..], [node!(Str(v))] if *v == "hello".to_string()); assert_matches!(parse("0;")[..], [node!(Int(0))]); assert_matches!(parse("0;abc;")[..], [node!(Int(0)), node!(Id(id))] if id == hash("abc")); assert_eq!( parse("add(mul(12, 34), 56);"), vec![Node { kind: Call { subject: B(Node { kind: Id(hash("add")), pos: Pos { index: 0, line: 1, col: 1 } }), args: vec![ Node { kind: Call { subject: B(Node { kind: Id(14581412793212634142), pos: Pos { index: 4, line: 1, col: 5 } }), args: vec![ Node { kind: Int(12), pos: Pos { index: 8, line: 1, col: 9 } }, Node { kind: Int(34), pos: Pos { index: 12, line: 1, col: 13 } } ] }, pos: Pos { index: 4, line: 1, col: 5 } }, Node { kind: Int(56), pos: Pos { index: 17, line: 1, col: 18 } } ] }, pos: Pos { index: 0, line: 1, col: 1 } }] ); assert_matches!( &parse("a = 123;")[..], [node!(Assign { subject, value })] if matches!(subject.kind, Id(id) if id == hash("a")) && matches!(value.kind, Int(123)) ); assert_matches!(parse("break;")[..], [node!(Break)]); assert_matches!(parse("return;")[..], [node!(Return { value: None })]); assert_eq!( parse("return add(1, 2);")[..], vec![Node { kind: Return { value: Some(B(Node { kind: Call { subject: B(Node { kind: Id(hash("add")), pos: Pos { index: 7, line: 1, col: 8 } }), args: vec![ Node { kind: Int(1), pos: Pos { index: 11, line: 1, col: 12 } }, Node { kind: Int(2), pos: Pos { index: 14, line: 1, col: 15 } } ] }, pos: Pos { index: 7, line: 1, col: 8 } })) }, pos: Pos { index: 0, line: 1, col: 1 } }] ); assert_matches!( &parse("a = 5;")[..], [node!(Assign { subject, value })] if matches!(subject.kind, Id(id) if id == hash("a")) && matches!(value.kind, Int(5)) ); assert_eq!( parse("let a = 5;")[..], vec![Node { kind: Let { subject: B(Node { kind: Param { subject: B(Node { kind: Id(hash("a")), pos: Pos { index: 4, line: 1, col: 5 } }), typ: None }, pos: Pos { index: 4, line: 1, col: 5 } }), value: B(Node { kind: Int(5), pos: Pos { index: 8, line: 1, col: 9 } }) }, pos: Pos { index: 0, line: 1, col: 1 } }] ); assert_matches!( &parse("fn test() -> i32 {}")[..], [node!(Fn { subject, params, return_typ, body })] if subject.kind == Id(hash("test")) && *params == vec![] && return_typ.kind == Id(hash("i32")) && body.kind == Block(vec![]) ); assert_matches!( &parse("if 0 {}")[..], [node!(If { cond, truthy, falsy: None })] if matches!(cond.kind, Int(0)) && truthy.kind == Block(vec![]) ); assert_matches!( &parse("if 0 {} else {}")[..], [node!(If { cond, truthy, falsy: Some(falsy), })] if matches!(cond.kind, Int(0)) && truthy.kind == Block(vec![]) && falsy.kind == Block(vec![]) ); assert_matches!( &parse("loop {}")[..], [node!(Loop { body, })] if body.kind == Block(vec![]) ); }