652 lines
20 KiB
Rust
652 lines
20 KiB
Rust
use std::{collections::HashMap, rc::Rc, sync::Mutex};
|
|
|
|
use crate::{
|
|
lexer::Lexer,
|
|
parsed::{Node, NodeKind},
|
|
pos::{Error, ErrorAcc, Pos},
|
|
token::{Token, TokenKind, TokenValue},
|
|
};
|
|
|
|
pub struct Parser<'a> {
|
|
lexer: Lexer<'a>,
|
|
current: Option<Token>,
|
|
error_acc: Rc<Mutex<ErrorAcc>>,
|
|
}
|
|
|
|
impl<'a> Parser<'a> {
|
|
pub fn new(text: &'a str, error_acc: Rc<Mutex<ErrorAcc>>) -> Self {
|
|
let mut lexer = Lexer::new(text, error_acc.clone());
|
|
let current = lexer.next();
|
|
Self {
|
|
lexer,
|
|
current,
|
|
error_acc,
|
|
}
|
|
}
|
|
|
|
pub fn parse(&mut self) -> Vec<Node> {
|
|
self.parse_file()
|
|
}
|
|
|
|
pub fn symbols(self) -> HashMap<u64, String> {
|
|
self.lexer.symbols()
|
|
}
|
|
|
|
fn parse_file(&mut self) -> Vec<Node> {
|
|
let mut stmts = Vec::new();
|
|
loop {
|
|
match self.current {
|
|
Some(_) => stmts.push(self.parse_stmt()),
|
|
None => break stmts,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_stmt(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
match self.curr_kind() {
|
|
Some(TokenKind::LBrace) => self.parse_block(),
|
|
Some(TokenKind::If) => self.parse_if(),
|
|
Some(TokenKind::Loop) => self.parse_loop(),
|
|
Some(TokenKind::Fn) => self.parse_fn(),
|
|
_ => {
|
|
let stmt = match self.curr_kind() {
|
|
Some(TokenKind::Let) => self.parse_let(),
|
|
Some(TokenKind::Break) => {
|
|
self.step();
|
|
self.node(NodeKind::Break, pos.clone())
|
|
}
|
|
Some(TokenKind::Return) => {
|
|
self.step();
|
|
let value = match self.curr_kind() {
|
|
Some(TokenKind::Semicolon) => None,
|
|
_ => Some(Box::new(self.parse_expr())),
|
|
};
|
|
self.node(NodeKind::Return { value }, pos.clone())
|
|
}
|
|
_ => self.parse_assign(),
|
|
};
|
|
match self.curr_kind() {
|
|
Some(TokenKind::Semicolon) => {
|
|
self.step();
|
|
stmt
|
|
}
|
|
_ => {
|
|
self.error("expected ';'", pos.clone());
|
|
self.error_node(pos)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_fn(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
self.step();
|
|
if !self.curr_is(TokenKind::Id) {
|
|
self.error("expected id", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
let subject = Box::new(self.parse_id());
|
|
if !self.curr_is(TokenKind::LParen) {
|
|
self.error("expected '('", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
let params = match self.parse_fn_params() {
|
|
Ok(params) => params,
|
|
Err(expr) => return expr,
|
|
};
|
|
if !self.curr_is(TokenKind::MinusLt) {
|
|
self.error("expected '->'", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
self.step();
|
|
let return_typ = Box::new(self.parse_typ());
|
|
if !self.curr_is(TokenKind::LBrace) {
|
|
self.error("expected '{'", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
let body = Box::new(self.parse_block());
|
|
self.node(
|
|
NodeKind::Fn {
|
|
subject,
|
|
params,
|
|
return_typ,
|
|
body,
|
|
},
|
|
pos,
|
|
)
|
|
}
|
|
|
|
fn parse_fn_params(&mut self) -> Result<Vec<Node>, Node> {
|
|
let pos = self.pos().unwrap();
|
|
self.step();
|
|
let mut params = Vec::new();
|
|
if !self.curr_is(TokenKind::RParen) {
|
|
if !self.curr_is(TokenKind::Id) {
|
|
self.error("expected id", pos.clone());
|
|
return Err(self.error_node(pos));
|
|
}
|
|
params.push(self.parse_param());
|
|
while let Some(TokenKind::Comma) = self.curr_kind() {
|
|
self.step();
|
|
if self.curr_is(TokenKind::RParen) {
|
|
self.error("expected ')'", pos.clone());
|
|
break;
|
|
}
|
|
params.push(self.parse_param());
|
|
}
|
|
}
|
|
if !self.curr_is(TokenKind::RParen) {
|
|
self.error("expected ')'", pos.clone());
|
|
return Err(self.error_node(pos));
|
|
}
|
|
self.step();
|
|
Ok(params)
|
|
}
|
|
|
|
fn parse_let(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
self.step();
|
|
if !self.curr_is(TokenKind::Id) {
|
|
self.error("expected id", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
let subject = self.parse_param();
|
|
if !self.curr_is(TokenKind::Equal) {
|
|
self.error("expected '='", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
self.step();
|
|
let value = self.parse_expr();
|
|
self.node(
|
|
NodeKind::Let {
|
|
subject: Box::new(subject),
|
|
value: Box::new(value),
|
|
},
|
|
pos,
|
|
)
|
|
}
|
|
|
|
fn parse_param(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
let subject = Box::new(self.parse_id());
|
|
let typ = if let Some(TokenKind::Colon) = self.curr_kind() {
|
|
self.step();
|
|
Some(Box::new(self.parse_typ()))
|
|
} else {
|
|
None
|
|
};
|
|
self.node(NodeKind::Param { subject, typ }, pos)
|
|
}
|
|
|
|
fn parse_typ(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
match self.curr_kind() {
|
|
Some(TokenKind::Id) => self.parse_id(),
|
|
_ => {
|
|
self.error("expected type", pos.clone());
|
|
self.step();
|
|
self.error_node(pos)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_assign(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
let subject = self.parse_expr();
|
|
match self.curr_kind() {
|
|
Some(TokenKind::Equal) => {
|
|
self.step();
|
|
let value = self.parse_expr();
|
|
self.node(
|
|
NodeKind::Assign {
|
|
subject: Box::new(subject),
|
|
value: Box::new(value),
|
|
},
|
|
pos,
|
|
)
|
|
}
|
|
_ => subject,
|
|
}
|
|
}
|
|
|
|
fn parse_expr(&mut self) -> Node {
|
|
self.parse_call()
|
|
}
|
|
|
|
fn parse_call(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
let mut subject = self.parse_value();
|
|
loop {
|
|
match self.curr_kind() {
|
|
Some(TokenKind::LParen) => {
|
|
self.step();
|
|
let mut args = Vec::new();
|
|
match self.curr_kind() {
|
|
None | Some(TokenKind::RParen) => {}
|
|
Some(_) => {
|
|
args.push(self.parse_expr());
|
|
while let Some(TokenKind::Comma) = self.curr_kind() {
|
|
self.step();
|
|
if let Some(TokenKind::RParen) = self.curr_kind() {
|
|
break;
|
|
}
|
|
args.push(self.parse_expr());
|
|
}
|
|
}
|
|
}
|
|
match self.curr_kind() {
|
|
Some(TokenKind::RParen) => {}
|
|
_ => {
|
|
self.error("expected ')'", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
}
|
|
self.step();
|
|
subject = self.node(
|
|
NodeKind::Call {
|
|
subject: Box::new(subject),
|
|
args,
|
|
},
|
|
pos.clone(),
|
|
);
|
|
}
|
|
_ => break subject,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_value(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
match self.curr_kind() {
|
|
Some(TokenKind::Id) => self.parse_id(),
|
|
Some(TokenKind::Int) => self.parse_int(),
|
|
Some(TokenKind::Str) => self.parse_string(),
|
|
Some(TokenKind::LParen) => self.parse_group(),
|
|
Some(TokenKind::LBrace) => self.parse_block(),
|
|
Some(TokenKind::If) => self.parse_if(),
|
|
Some(TokenKind::Loop) => self.parse_loop(),
|
|
_ => {
|
|
self.error("expected value", pos.clone());
|
|
self.step();
|
|
self.error_node(pos)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_id(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
let Some(Token {
|
|
kind: TokenKind::Id,
|
|
value: TokenValue::Id(value),
|
|
..
|
|
}) = self.current
|
|
else {
|
|
unreachable!()
|
|
};
|
|
self.step();
|
|
self.node(NodeKind::Id(value), pos)
|
|
}
|
|
|
|
fn parse_int(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
let Some(Token {
|
|
kind: TokenKind::Int,
|
|
value: TokenValue::Int(value),
|
|
..
|
|
}) = self.current
|
|
else {
|
|
unreachable!()
|
|
};
|
|
self.step();
|
|
self.node(NodeKind::Int(value), pos)
|
|
}
|
|
|
|
fn parse_string(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
let Some(Token {
|
|
kind: TokenKind::Str,
|
|
value: TokenValue::Str(value),
|
|
..
|
|
}) = self.current.clone()
|
|
else {
|
|
unreachable!()
|
|
};
|
|
self.step();
|
|
self.node(NodeKind::Str(value.clone()), pos)
|
|
}
|
|
|
|
fn parse_group(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
self.step();
|
|
let expr = Box::new(self.parse_expr());
|
|
if !self.curr_is(TokenKind::RParen) {
|
|
self.error("expected ')'", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
self.step();
|
|
self.node(NodeKind::Group(expr), pos)
|
|
}
|
|
|
|
fn parse_block(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
self.step();
|
|
let mut stmts = Vec::new();
|
|
loop {
|
|
match self.curr_kind() {
|
|
None => {
|
|
self.error("expected ')'", pos.clone());
|
|
break self.error_node(pos);
|
|
}
|
|
Some(TokenKind::RBrace) => {
|
|
self.step();
|
|
break self.node(NodeKind::Block(stmts), pos);
|
|
}
|
|
_ => stmts.push(self.parse_stmt()),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_if(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
self.step();
|
|
let cond = Box::new(self.parse_expr());
|
|
if !self.curr_is(TokenKind::LBrace) {
|
|
self.error("expected '}'", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
let truthy = Box::new(self.parse_block());
|
|
let falsy = match self.curr_kind() {
|
|
Some(TokenKind::Else) => {
|
|
self.step();
|
|
if !self.curr_is(TokenKind::LBrace) {
|
|
self.error("expected '}'", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
Some(Box::new(self.parse_block()))
|
|
}
|
|
_ => None,
|
|
};
|
|
self.node(
|
|
NodeKind::If {
|
|
cond,
|
|
truthy,
|
|
falsy,
|
|
},
|
|
pos,
|
|
)
|
|
}
|
|
|
|
fn parse_loop(&mut self) -> Node {
|
|
let pos = self.pos().unwrap();
|
|
self.step();
|
|
if !self.curr_is(TokenKind::LBrace) {
|
|
self.error("expected '}'", pos.clone());
|
|
return self.error_node(pos);
|
|
}
|
|
let body = Box::new(self.parse_block());
|
|
self.node(NodeKind::Loop { body }, pos)
|
|
}
|
|
|
|
fn error<S: Into<String>>(&mut self, msg: S, pos: Pos) {
|
|
let msg = msg.into();
|
|
self.error_acc.lock().unwrap().add(Error {
|
|
kind: crate::pos::ErrorKind::ParserError,
|
|
pos: Some(pos),
|
|
msg,
|
|
});
|
|
}
|
|
|
|
fn step(&mut self) {
|
|
self.current = self.lexer.next();
|
|
}
|
|
|
|
fn node(&self, kind: NodeKind, pos: Pos) -> Node {
|
|
Node { kind, pos }
|
|
}
|
|
|
|
fn error_node(&self, pos: Pos) -> Node {
|
|
Node {
|
|
kind: NodeKind::Error,
|
|
pos,
|
|
}
|
|
}
|
|
|
|
fn pos(&self) -> Option<Pos> {
|
|
self.current.as_ref().map(|token| token.pos.clone())
|
|
}
|
|
|
|
fn curr_is(&self, kind: TokenKind) -> bool {
|
|
self.curr_kind() == Some(kind)
|
|
}
|
|
|
|
fn curr_kind(&self) -> Option<TokenKind> {
|
|
self.current.as_ref().map(|t| t.kind.clone())
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_parser() {
|
|
use crate::util::hash;
|
|
use assert_matches::assert_matches;
|
|
// use pretty_assertions::assert_eq;
|
|
use NodeKind::*;
|
|
|
|
macro_rules! node {
|
|
($kind:pat) => {
|
|
Node { kind: $kind, .. }
|
|
};
|
|
}
|
|
|
|
let parse = |text| Parser::new(text, Rc::new(Mutex::new(ErrorAcc::new()))).parse();
|
|
|
|
#[allow(non_snake_case)]
|
|
fn B<T>(v: T) -> Box<T> {
|
|
Box::new(v)
|
|
}
|
|
|
|
assert_matches!(parse("abc;")[..], [node!(Id(id))] if id == hash("abc"));
|
|
assert_matches!(parse("123;")[..], [node!(Int(123))]);
|
|
assert_matches!(&parse("\"hello\";")[..], [node!(Str(v))] if *v == "hello".to_string());
|
|
assert_matches!(parse("0;")[..], [node!(Int(0))]);
|
|
assert_matches!(parse("0;abc;")[..], [node!(Int(0)), node!(Id(id))] if id == hash("abc"));
|
|
assert_eq!(
|
|
parse("add(mul(12, 34), 56);"),
|
|
vec![Node {
|
|
kind: Call {
|
|
subject: B(Node {
|
|
kind: Id(hash("add")),
|
|
pos: Pos {
|
|
index: 0,
|
|
line: 1,
|
|
col: 1
|
|
}
|
|
}),
|
|
args: vec![
|
|
Node {
|
|
kind: Call {
|
|
subject: B(Node {
|
|
kind: Id(14581412793212634142),
|
|
pos: Pos {
|
|
index: 4,
|
|
line: 1,
|
|
col: 5
|
|
}
|
|
}),
|
|
args: vec![
|
|
Node {
|
|
kind: Int(12),
|
|
pos: Pos {
|
|
index: 8,
|
|
line: 1,
|
|
col: 9
|
|
}
|
|
},
|
|
Node {
|
|
kind: Int(34),
|
|
pos: Pos {
|
|
index: 12,
|
|
line: 1,
|
|
col: 13
|
|
}
|
|
}
|
|
]
|
|
},
|
|
pos: Pos {
|
|
index: 4,
|
|
line: 1,
|
|
col: 5
|
|
}
|
|
},
|
|
Node {
|
|
kind: Int(56),
|
|
pos: Pos {
|
|
index: 17,
|
|
line: 1,
|
|
col: 18
|
|
}
|
|
}
|
|
]
|
|
},
|
|
pos: Pos {
|
|
index: 0,
|
|
line: 1,
|
|
col: 1
|
|
}
|
|
}]
|
|
);
|
|
assert_matches!(
|
|
&parse("a = 123;")[..],
|
|
[node!(Assign {
|
|
subject,
|
|
value
|
|
})] if matches!(subject.kind, Id(id) if id == hash("a")) && matches!(value.kind, Int(123))
|
|
);
|
|
assert_matches!(parse("break;")[..], [node!(Break)]);
|
|
assert_matches!(parse("return;")[..], [node!(Return { value: None })]);
|
|
assert_eq!(
|
|
parse("return add(1, 2);")[..],
|
|
vec![Node {
|
|
kind: Return {
|
|
value: Some(B(Node {
|
|
kind: Call {
|
|
subject: B(Node {
|
|
kind: Id(hash("add")),
|
|
pos: Pos {
|
|
index: 7,
|
|
line: 1,
|
|
col: 8
|
|
}
|
|
}),
|
|
args: vec![
|
|
Node {
|
|
kind: Int(1),
|
|
pos: Pos {
|
|
index: 11,
|
|
line: 1,
|
|
col: 12
|
|
}
|
|
},
|
|
Node {
|
|
kind: Int(2),
|
|
pos: Pos {
|
|
index: 14,
|
|
line: 1,
|
|
col: 15
|
|
}
|
|
}
|
|
]
|
|
},
|
|
pos: Pos {
|
|
index: 7,
|
|
line: 1,
|
|
col: 8
|
|
}
|
|
}))
|
|
},
|
|
pos: Pos {
|
|
index: 0,
|
|
line: 1,
|
|
col: 1
|
|
}
|
|
}]
|
|
);
|
|
assert_matches!(
|
|
&parse("a = 5;")[..],
|
|
[node!(Assign {
|
|
subject,
|
|
value
|
|
})] if matches!(subject.kind, Id(id) if id == hash("a")) && matches!(value.kind, Int(5))
|
|
);
|
|
assert_eq!(
|
|
parse("let a = 5;")[..],
|
|
vec![Node {
|
|
kind: Let {
|
|
subject: B(Node {
|
|
kind: Param {
|
|
subject: B(Node {
|
|
kind: Id(hash("a")),
|
|
pos: Pos {
|
|
index: 4,
|
|
line: 1,
|
|
col: 5
|
|
}
|
|
}),
|
|
typ: None
|
|
},
|
|
pos: Pos {
|
|
index: 4,
|
|
line: 1,
|
|
col: 5
|
|
}
|
|
}),
|
|
value: B(Node {
|
|
kind: Int(5),
|
|
pos: Pos {
|
|
index: 8,
|
|
line: 1,
|
|
col: 9
|
|
}
|
|
})
|
|
},
|
|
pos: Pos {
|
|
index: 0,
|
|
line: 1,
|
|
col: 1
|
|
}
|
|
}]
|
|
);
|
|
assert_matches!(
|
|
&parse("fn test() -> i32 {}")[..],
|
|
[node!(Fn {
|
|
subject,
|
|
params,
|
|
return_typ,
|
|
body
|
|
})] if subject.kind == Id(hash("test")) && *params == vec![] && return_typ.kind == Id(hash("i32")) && body.kind == Block(vec![])
|
|
);
|
|
assert_matches!(
|
|
&parse("if 0 {}")[..],
|
|
[node!(If {
|
|
cond,
|
|
truthy,
|
|
falsy: None
|
|
})] if matches!(cond.kind, Int(0)) && truthy.kind == Block(vec![])
|
|
);
|
|
assert_matches!(
|
|
&parse("if 0 {} else {}")[..],
|
|
[node!(If {
|
|
cond,
|
|
truthy,
|
|
falsy: Some(falsy),
|
|
})] if matches!(cond.kind, Int(0)) && truthy.kind == Block(vec![]) && falsy.kind == Block(vec![])
|
|
);
|
|
assert_matches!(
|
|
&parse("loop {}")[..],
|
|
[node!(Loop {
|
|
body,
|
|
})] if body.kind == Block(vec![])
|
|
);
|
|
}
|