yapping/src/parser.rs
2024-08-28 21:32:17 +02:00

652 lines
20 KiB
Rust

use std::{collections::HashMap, rc::Rc, sync::Mutex};
use crate::{
lexer::Lexer,
parsed::{Node, NodeKind},
pos::{Error, ErrorAcc, Pos},
token::{Token, TokenKind, TokenValue},
};
pub struct Parser<'a> {
lexer: Lexer<'a>,
current: Option<Token>,
error_acc: Rc<Mutex<ErrorAcc>>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str, error_acc: Rc<Mutex<ErrorAcc>>) -> Self {
let mut lexer = Lexer::new(text, error_acc.clone());
let current = lexer.next();
Self {
lexer,
current,
error_acc,
}
}
pub fn parse(&mut self) -> Vec<Node> {
self.parse_file()
}
pub fn symbols(self) -> HashMap<u64, String> {
self.lexer.symbols()
}
fn parse_file(&mut self) -> Vec<Node> {
let mut stmts = Vec::new();
loop {
match self.current {
Some(_) => stmts.push(self.parse_stmt()),
None => break stmts,
}
}
}
fn parse_stmt(&mut self) -> Node {
let pos = self.pos().unwrap();
match self.curr_kind() {
Some(TokenKind::LBrace) => self.parse_block(),
Some(TokenKind::If) => self.parse_if(),
Some(TokenKind::Loop) => self.parse_loop(),
Some(TokenKind::Fn) => self.parse_fn(),
_ => {
let stmt = match self.curr_kind() {
Some(TokenKind::Let) => self.parse_let(),
Some(TokenKind::Break) => {
self.step();
self.node(NodeKind::Break, pos.clone())
}
Some(TokenKind::Return) => {
self.step();
let value = match self.curr_kind() {
Some(TokenKind::Semicolon) => None,
_ => Some(Box::new(self.parse_expr())),
};
self.node(NodeKind::Return { value }, pos.clone())
}
_ => self.parse_assign(),
};
match self.curr_kind() {
Some(TokenKind::Semicolon) => {
self.step();
stmt
}
_ => {
self.error("expected ';'", pos.clone());
self.error_node(pos)
}
}
}
}
}
fn parse_fn(&mut self) -> Node {
let pos = self.pos().unwrap();
self.step();
if !self.curr_is(TokenKind::Id) {
self.error("expected id", pos.clone());
return self.error_node(pos);
}
let subject = Box::new(self.parse_id());
if !self.curr_is(TokenKind::LParen) {
self.error("expected '('", pos.clone());
return self.error_node(pos);
}
let params = match self.parse_fn_params() {
Ok(params) => params,
Err(expr) => return expr,
};
if !self.curr_is(TokenKind::MinusLt) {
self.error("expected '->'", pos.clone());
return self.error_node(pos);
}
self.step();
let return_typ = Box::new(self.parse_typ());
if !self.curr_is(TokenKind::LBrace) {
self.error("expected '{'", pos.clone());
return self.error_node(pos);
}
let body = Box::new(self.parse_block());
self.node(
NodeKind::Fn {
subject,
params,
return_typ,
body,
},
pos,
)
}
fn parse_fn_params(&mut self) -> Result<Vec<Node>, Node> {
let pos = self.pos().unwrap();
self.step();
let mut params = Vec::new();
if !self.curr_is(TokenKind::RParen) {
if !self.curr_is(TokenKind::Id) {
self.error("expected id", pos.clone());
return Err(self.error_node(pos));
}
params.push(self.parse_param());
while let Some(TokenKind::Comma) = self.curr_kind() {
self.step();
if self.curr_is(TokenKind::RParen) {
self.error("expected ')'", pos.clone());
break;
}
params.push(self.parse_param());
}
}
if !self.curr_is(TokenKind::RParen) {
self.error("expected ')'", pos.clone());
return Err(self.error_node(pos));
}
self.step();
Ok(params)
}
fn parse_let(&mut self) -> Node {
let pos = self.pos().unwrap();
self.step();
if !self.curr_is(TokenKind::Id) {
self.error("expected id", pos.clone());
return self.error_node(pos);
}
let subject = self.parse_param();
if !self.curr_is(TokenKind::Equal) {
self.error("expected '='", pos.clone());
return self.error_node(pos);
}
self.step();
let value = self.parse_expr();
self.node(
NodeKind::Let {
subject: Box::new(subject),
value: Box::new(value),
},
pos,
)
}
fn parse_param(&mut self) -> Node {
let pos = self.pos().unwrap();
let subject = Box::new(self.parse_id());
let typ = if let Some(TokenKind::Colon) = self.curr_kind() {
self.step();
Some(Box::new(self.parse_typ()))
} else {
None
};
self.node(NodeKind::Param { subject, typ }, pos)
}
fn parse_typ(&mut self) -> Node {
let pos = self.pos().unwrap();
match self.curr_kind() {
Some(TokenKind::Id) => self.parse_id(),
_ => {
self.error("expected type", pos.clone());
self.step();
self.error_node(pos)
}
}
}
fn parse_assign(&mut self) -> Node {
let pos = self.pos().unwrap();
let subject = self.parse_expr();
match self.curr_kind() {
Some(TokenKind::Equal) => {
self.step();
let value = self.parse_expr();
self.node(
NodeKind::Assign {
subject: Box::new(subject),
value: Box::new(value),
},
pos,
)
}
_ => subject,
}
}
fn parse_expr(&mut self) -> Node {
self.parse_call()
}
fn parse_call(&mut self) -> Node {
let pos = self.pos().unwrap();
let mut subject = self.parse_value();
loop {
match self.curr_kind() {
Some(TokenKind::LParen) => {
self.step();
let mut args = Vec::new();
match self.curr_kind() {
None | Some(TokenKind::RParen) => {}
Some(_) => {
args.push(self.parse_expr());
while let Some(TokenKind::Comma) = self.curr_kind() {
self.step();
if let Some(TokenKind::RParen) = self.curr_kind() {
break;
}
args.push(self.parse_expr());
}
}
}
match self.curr_kind() {
Some(TokenKind::RParen) => {}
_ => {
self.error("expected ')'", pos.clone());
return self.error_node(pos);
}
}
self.step();
subject = self.node(
NodeKind::Call {
subject: Box::new(subject),
args,
},
pos.clone(),
);
}
_ => break subject,
}
}
}
fn parse_value(&mut self) -> Node {
let pos = self.pos().unwrap();
match self.curr_kind() {
Some(TokenKind::Id) => self.parse_id(),
Some(TokenKind::Int) => self.parse_int(),
Some(TokenKind::Str) => self.parse_string(),
Some(TokenKind::LParen) => self.parse_group(),
Some(TokenKind::LBrace) => self.parse_block(),
Some(TokenKind::If) => self.parse_if(),
Some(TokenKind::Loop) => self.parse_loop(),
_ => {
self.error("expected value", pos.clone());
self.step();
self.error_node(pos)
}
}
}
fn parse_id(&mut self) -> Node {
let pos = self.pos().unwrap();
let Some(Token {
kind: TokenKind::Id,
value: TokenValue::Id(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
self.node(NodeKind::Id(value), pos)
}
fn parse_int(&mut self) -> Node {
let pos = self.pos().unwrap();
let Some(Token {
kind: TokenKind::Int,
value: TokenValue::Int(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
self.node(NodeKind::Int(value), pos)
}
fn parse_string(&mut self) -> Node {
let pos = self.pos().unwrap();
let Some(Token {
kind: TokenKind::Str,
value: TokenValue::Str(value),
..
}) = self.current.clone()
else {
unreachable!()
};
self.step();
self.node(NodeKind::Str(value.clone()), pos)
}
fn parse_group(&mut self) -> Node {
let pos = self.pos().unwrap();
self.step();
let expr = Box::new(self.parse_expr());
if !self.curr_is(TokenKind::RParen) {
self.error("expected ')'", pos.clone());
return self.error_node(pos);
}
self.step();
self.node(NodeKind::Group(expr), pos)
}
fn parse_block(&mut self) -> Node {
let pos = self.pos().unwrap();
self.step();
let mut stmts = Vec::new();
loop {
match self.curr_kind() {
None => {
self.error("expected ')'", pos.clone());
break self.error_node(pos);
}
Some(TokenKind::RBrace) => {
self.step();
break self.node(NodeKind::Block(stmts), pos);
}
_ => stmts.push(self.parse_stmt()),
}
}
}
fn parse_if(&mut self) -> Node {
let pos = self.pos().unwrap();
self.step();
let cond = Box::new(self.parse_expr());
if !self.curr_is(TokenKind::LBrace) {
self.error("expected '}'", pos.clone());
return self.error_node(pos);
}
let truthy = Box::new(self.parse_block());
let falsy = match self.curr_kind() {
Some(TokenKind::Else) => {
self.step();
if !self.curr_is(TokenKind::LBrace) {
self.error("expected '}'", pos.clone());
return self.error_node(pos);
}
Some(Box::new(self.parse_block()))
}
_ => None,
};
self.node(
NodeKind::If {
cond,
truthy,
falsy,
},
pos,
)
}
fn parse_loop(&mut self) -> Node {
let pos = self.pos().unwrap();
self.step();
if !self.curr_is(TokenKind::LBrace) {
self.error("expected '}'", pos.clone());
return self.error_node(pos);
}
let body = Box::new(self.parse_block());
self.node(NodeKind::Loop { body }, pos)
}
fn error<S: Into<String>>(&mut self, msg: S, pos: Pos) {
let msg = msg.into();
self.error_acc.lock().unwrap().add(Error {
kind: crate::pos::ErrorKind::ParserError,
pos: Some(pos),
msg,
});
}
fn step(&mut self) {
self.current = self.lexer.next();
}
fn node(&self, kind: NodeKind, pos: Pos) -> Node {
Node { kind, pos }
}
fn error_node(&self, pos: Pos) -> Node {
Node {
kind: NodeKind::Error,
pos,
}
}
fn pos(&self) -> Option<Pos> {
self.current.as_ref().map(|token| token.pos.clone())
}
fn curr_is(&self, kind: TokenKind) -> bool {
self.curr_kind() == Some(kind)
}
fn curr_kind(&self) -> Option<TokenKind> {
self.current.as_ref().map(|t| t.kind.clone())
}
}
#[test]
fn test_parser() {
use crate::util::hash;
use assert_matches::assert_matches;
// use pretty_assertions::assert_eq;
use NodeKind::*;
macro_rules! node {
($kind:pat) => {
Node { kind: $kind, .. }
};
}
let parse = |text| Parser::new(text, Rc::new(Mutex::new(ErrorAcc::new()))).parse();
#[allow(non_snake_case)]
fn B<T>(v: T) -> Box<T> {
Box::new(v)
}
assert_matches!(parse("abc;")[..], [node!(Id(id))] if id == hash("abc"));
assert_matches!(parse("123;")[..], [node!(Int(123))]);
assert_matches!(&parse("\"hello\";")[..], [node!(Str(v))] if *v == "hello".to_string());
assert_matches!(parse("0;")[..], [node!(Int(0))]);
assert_matches!(parse("0;abc;")[..], [node!(Int(0)), node!(Id(id))] if id == hash("abc"));
assert_eq!(
parse("add(mul(12, 34), 56);"),
vec![Node {
kind: Call {
subject: B(Node {
kind: Id(hash("add")),
pos: Pos {
index: 0,
line: 1,
col: 1
}
}),
args: vec![
Node {
kind: Call {
subject: B(Node {
kind: Id(14581412793212634142),
pos: Pos {
index: 4,
line: 1,
col: 5
}
}),
args: vec![
Node {
kind: Int(12),
pos: Pos {
index: 8,
line: 1,
col: 9
}
},
Node {
kind: Int(34),
pos: Pos {
index: 12,
line: 1,
col: 13
}
}
]
},
pos: Pos {
index: 4,
line: 1,
col: 5
}
},
Node {
kind: Int(56),
pos: Pos {
index: 17,
line: 1,
col: 18
}
}
]
},
pos: Pos {
index: 0,
line: 1,
col: 1
}
}]
);
assert_matches!(
&parse("a = 123;")[..],
[node!(Assign {
subject,
value
})] if matches!(subject.kind, Id(id) if id == hash("a")) && matches!(value.kind, Int(123))
);
assert_matches!(parse("break;")[..], [node!(Break)]);
assert_matches!(parse("return;")[..], [node!(Return { value: None })]);
assert_eq!(
parse("return add(1, 2);")[..],
vec![Node {
kind: Return {
value: Some(B(Node {
kind: Call {
subject: B(Node {
kind: Id(hash("add")),
pos: Pos {
index: 7,
line: 1,
col: 8
}
}),
args: vec![
Node {
kind: Int(1),
pos: Pos {
index: 11,
line: 1,
col: 12
}
},
Node {
kind: Int(2),
pos: Pos {
index: 14,
line: 1,
col: 15
}
}
]
},
pos: Pos {
index: 7,
line: 1,
col: 8
}
}))
},
pos: Pos {
index: 0,
line: 1,
col: 1
}
}]
);
assert_matches!(
&parse("a = 5;")[..],
[node!(Assign {
subject,
value
})] if matches!(subject.kind, Id(id) if id == hash("a")) && matches!(value.kind, Int(5))
);
assert_eq!(
parse("let a = 5;")[..],
vec![Node {
kind: Let {
subject: B(Node {
kind: Param {
subject: B(Node {
kind: Id(hash("a")),
pos: Pos {
index: 4,
line: 1,
col: 5
}
}),
typ: None
},
pos: Pos {
index: 4,
line: 1,
col: 5
}
}),
value: B(Node {
kind: Int(5),
pos: Pos {
index: 8,
line: 1,
col: 9
}
})
},
pos: Pos {
index: 0,
line: 1,
col: 1
}
}]
);
assert_matches!(
&parse("fn test() -> i32 {}")[..],
[node!(Fn {
subject,
params,
return_typ,
body
})] if subject.kind == Id(hash("test")) && *params == vec![] && return_typ.kind == Id(hash("i32")) && body.kind == Block(vec![])
);
assert_matches!(
&parse("if 0 {}")[..],
[node!(If {
cond,
truthy,
falsy: None
})] if matches!(cond.kind, Int(0)) && truthy.kind == Block(vec![])
);
assert_matches!(
&parse("if 0 {} else {}")[..],
[node!(If {
cond,
truthy,
falsy: Some(falsy),
})] if matches!(cond.kind, Int(0)) && truthy.kind == Block(vec![]) && falsy.kind == Block(vec![])
);
assert_matches!(
&parse("loop {}")[..],
[node!(Loop {
body,
})] if body.kind == Block(vec![])
);
}