fixup parsing

This commit is contained in:
SimonFJ20 2024-09-01 02:09:24 +02:00
parent 5d563527fd
commit 21857307d7
8 changed files with 422 additions and 237 deletions

View File

@ -1,11 +1,12 @@
fn b() {
a();
a()
}
fn a() {
let abc = 123;
abc
}

101
src/ast.rs Normal file
View File

@ -0,0 +1,101 @@
use crate::pos::Pos;
#[derive(Clone, PartialEq, Debug)]
pub struct Stmt {
pub kind: StmtKind,
pub pos: Pos,
}
impl Stmt {
pub fn new(kind: StmtKind, pos: Pos) -> Self {
Self { kind, pos }
}
}
#[derive(Clone, PartialEq, Debug)]
pub enum StmtKind {
Error,
Let {
subject: Box<Param>,
value: Box<Expr>,
},
Fn {
subject: Box<Expr>,
params: Vec<Param>,
return_typ: Option<Box<Typ>>,
body: Box<Expr>,
},
Return {
expr: Option<Box<Expr>>,
},
Break {
expr: Option<Box<Expr>>,
},
Assign {
subject: Box<Expr>,
value: Box<Expr>,
},
Expr(Box<Expr>),
}
#[derive(Clone, PartialEq, Debug)]
pub struct Expr {
pub kind: ExprKind,
pub pos: Pos,
}
impl Expr {
pub fn new(kind: ExprKind, pos: Pos) -> Self {
Self { kind, pos }
}
}
#[derive(Clone, PartialEq, Debug)]
pub enum ExprKind {
Error,
Ident(u64),
Int(i64),
Str(String),
Group(Box<Expr>),
Block {
stmts: Vec<Stmt>,
expr: Option<Box<Expr>>,
},
Call {
subject: Box<Expr>,
args: Vec<Expr>,
},
If {
cond: Box<Expr>,
truthy: Box<Expr>,
falsy: Option<Box<Expr>>,
},
Loop {
body: Box<Expr>,
},
}
#[derive(Clone, PartialEq, Debug)]
pub struct Typ {
pub kind: TypKind,
pub pos: Pos,
}
impl Typ {
pub fn new(kind: TypKind, pos: Pos) -> Self {
Self { kind, pos }
}
}
#[derive(Clone, PartialEq, Debug)]
pub enum TypKind {
Error,
Ident(u64),
}
#[derive(Clone, PartialEq, Debug)]
pub struct Param {
pub subject: Box<Expr>,
pub typ: Option<Box<Typ>>,
pub pos: Pos,
}

View File

@ -1,7 +1,6 @@
use crate::{
pos::{Error, ErrorAcc, Pos},
token::{Token, TokenKind, TokenValue},
util::hash,
};
use std::{collections::HashMap, rc::Rc, str::Chars, sync::Mutex};
@ -72,9 +71,13 @@ impl<'a> Lexer<'a> {
if let Some(kind) = self.keywords.get(&value) {
return self.token(kind.clone(), pos);
}
let id = hash(&value);
let id = self.symbols.len() as u64;
self.symbols.insert(id, value);
break self.token_with_value(TokenKind::Id, TokenValue::Id(id), pos);
break self.token_with_value(
TokenKind::Ident,
TokenValue::Ident(id),
pos,
);
}
}
}
@ -242,7 +245,7 @@ impl<'a> Lexer<'a> {
});
}
fn pos(&self) -> Pos {
pub fn pos(&self) -> Pos {
Pos {
index: self.index,
line: self.line,

View File

@ -1,13 +1,39 @@
#![allow(dead_code)]
use std::{rc::Rc, sync::Mutex};
use parser::Parser;
use pos::{ErrorAcc, Pos};
mod ast;
mod lexer;
mod parsed;
mod parser;
mod pos;
mod sym;
mod token;
mod util;
fn main() {
println!("Hello, world!");
let args = std::env::args().collect::<Vec<_>>();
let filename = args[1].clone();
let text = std::fs::read_to_string(filename).unwrap();
let error_acc = Rc::new(Mutex::new(ErrorAcc::new()));
let mut parser = Parser::new(&text, error_acc.clone());
let ast = parser.parse();
println!("{ast:#?}");
let error_acc = error_acc.lock().unwrap();
if !error_acc.ok() {
println!("Compilation failed");
for error in error_acc.iter() {
match error.pos {
Some(Pos { line, col, .. }) => {
println!("{:?} at {line}:{col}: {}", error.kind, error.msg)
}
None => println!("{:?}: {}", error.kind, error.msg),
}
}
}
}

View File

@ -1,57 +0,0 @@
use crate::pos::Pos;
#[derive(Clone, PartialEq, Debug)]
pub struct Node {
pub kind: NodeKind,
pub pos: Pos,
}
impl Node {
pub fn new(kind: NodeKind, pos: Pos) -> Self {
Self { kind, pos }
}
}
#[derive(Clone, PartialEq, Debug)]
pub enum NodeKind {
Error,
Id(u64),
Int(i64),
Str(String),
Group(Box<Node>),
Block(Vec<Node>),
Call {
subject: Box<Node>,
args: Vec<Node>,
},
If {
cond: Box<Node>,
truthy: Box<Node>,
falsy: Option<Box<Node>>,
},
Loop {
body: Box<Node>,
},
Break,
Assign {
subject: Box<Node>,
value: Box<Node>,
},
Let {
subject: Box<Node>,
value: Box<Node>,
},
Fn {
subject: Box<Node>,
params: Vec<Node>,
return_typ: Box<Node>,
body: Box<Node>,
},
Return {
value: Option<Box<Node>>,
},
Param {
subject: Box<Node>,
typ: Option<Box<Node>>,
},
}

View File

@ -1,10 +1,10 @@
use std::{collections::HashMap, rc::Rc, sync::Mutex};
use crate::{
ast::{Expr, ExprKind as EK, Param, Stmt, StmtKind as SK, Typ, TypKind},
lexer::Lexer,
parsed::{Node, NodeKind},
pos::{Error, ErrorAcc, Pos},
token::{Token, TokenKind, TokenValue},
token::{Token, TokenKind as TK, TokenValue},
};
pub struct Parser<'a> {
@ -24,7 +24,7 @@ impl<'a> Parser<'a> {
}
}
pub fn parse(&mut self) -> Vec<Node> {
pub fn parse(&mut self) -> Vec<Stmt> {
self.parse_file()
}
@ -32,7 +32,7 @@ impl<'a> Parser<'a> {
self.lexer.symbols()
}
fn parse_file(&mut self) -> Vec<Node> {
fn parse_file(&mut self) -> Vec<Stmt> {
let mut stmts = Vec::new();
loop {
match self.current {
@ -42,73 +42,66 @@ impl<'a> Parser<'a> {
}
}
fn parse_stmt(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_stmt(&mut self) -> Stmt {
use TK::*;
let pos = self.pos();
match self.curr_kind() {
Some(TokenKind::LBrace) => self.parse_block(),
Some(TokenKind::If) => self.parse_if(),
Some(TokenKind::Loop) => self.parse_loop(),
Some(TokenKind::Fn) => self.parse_fn(),
_ => {
let stmt = match self.curr_kind() {
Some(TokenKind::Let) => self.parse_let(),
Some(TokenKind::Break) => {
self.step();
self.node(NodeKind::Break, pos.clone())
}
Some(TokenKind::Return) => {
self.step();
let value = match self.curr_kind() {
Some(TokenKind::Semicolon) => None,
_ => Some(Box::new(self.parse_expr())),
Some(Fn) => self.parse_fn(),
Some(kind @ (LBrace | If | Loop)) => Stmt::new(
SK::Expr(Box::new(match kind {
LBrace => self.parse_block(),
If => self.parse_if(),
Loop => self.parse_loop(),
_ => unreachable!(),
})),
pos,
),
Some(kind) => {
let stmt = match kind {
Let => self.parse_let(),
Return => self.parse_return(),
Break => self.parse_break(),
_ => match self.parse_assign() {
ParsedAssign::Assign(stmt) => stmt,
ParsedAssign::Expr(expr, pos) => Stmt::new(SK::Expr(Box::new(expr)), pos),
},
};
self.node(NodeKind::Return { value }, pos.clone())
}
_ => self.parse_assign(),
};
match self.curr_kind() {
Some(TokenKind::Semicolon) => {
self.step();
self.eat_semicolon();
stmt
}
_ => {
self.error("expected ';'", pos.clone());
self.error_node(pos)
}
}
}
_ => unreachable!(),
}
}
fn parse_fn(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_fn(&mut self) -> Stmt {
let pos = self.pos();
self.step();
if !self.curr_is(TokenKind::Id) {
if !self.curr_is(TK::Ident) {
self.error("expected id", pos.clone());
return self.error_node(pos);
return self.error_stmt(pos);
}
let subject = Box::new(self.parse_id());
if !self.curr_is(TokenKind::LParen) {
let subject = Box::new(self.parse_ident_expr());
if !self.curr_is(TK::LParen) {
self.error("expected '('", pos.clone());
return self.error_node(pos);
return self.error_stmt(pos);
}
let params = match self.parse_fn_params() {
Ok(params) => params,
Err(expr) => return expr,
Err(pos) => return self.error_stmt(pos),
};
if !self.curr_is(TokenKind::MinusLt) {
self.error("expected '->'", pos.clone());
return self.error_node(pos);
}
let return_typ = if self.curr_is(TK::MinusLt) {
self.step();
let return_typ = Box::new(self.parse_typ());
if !self.curr_is(TokenKind::LBrace) {
Some(Box::new(self.parse_typ()))
} else {
None
};
if !self.curr_is(TK::LBrace) {
self.error("expected '{'", pos.clone());
return self.error_node(pos);
return self.error_stmt(pos);
}
let body = Box::new(self.parse_block());
self.node(
NodeKind::Fn {
Stmt::new(
SK::Fn {
subject,
params,
return_typ,
@ -118,49 +111,49 @@ impl<'a> Parser<'a> {
)
}
fn parse_fn_params(&mut self) -> Result<Vec<Node>, Node> {
let pos = self.pos().unwrap();
fn parse_fn_params(&mut self) -> Result<Vec<Param>, Pos> {
let pos = self.pos();
self.step();
let mut params = Vec::new();
if !self.curr_is(TokenKind::RParen) {
if !self.curr_is(TokenKind::Id) {
if !self.curr_is(TK::RParen) {
if !self.curr_is(TK::Ident) {
self.error("expected id", pos.clone());
return Err(self.error_node(pos));
return Err(pos);
}
params.push(self.parse_param());
while let Some(TokenKind::Comma) = self.curr_kind() {
while let Some(TK::Comma) = self.curr_kind() {
self.step();
if self.curr_is(TokenKind::RParen) {
if self.curr_is(TK::RParen) {
self.error("expected ')'", pos.clone());
break;
}
params.push(self.parse_param());
}
}
if !self.curr_is(TokenKind::RParen) {
if !self.curr_is(TK::RParen) {
self.error("expected ')'", pos.clone());
return Err(self.error_node(pos));
return Err(pos);
}
self.step();
Ok(params)
}
fn parse_let(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_let(&mut self) -> Stmt {
let pos = self.pos();
self.step();
if !self.curr_is(TokenKind::Id) {
if !self.curr_is(TK::Ident) {
self.error("expected id", pos.clone());
return self.error_node(pos);
return self.error_stmt(pos);
}
let subject = self.parse_param();
if !self.curr_is(TokenKind::Equal) {
if !self.curr_is(TK::Equal) {
self.error("expected '='", pos.clone());
return self.error_node(pos);
return self.error_stmt(pos);
}
self.step();
let value = self.parse_expr();
self.node(
NodeKind::Let {
Stmt::new(
SK::Let {
subject: Box::new(subject),
value: Box::new(value),
},
@ -168,68 +161,64 @@ impl<'a> Parser<'a> {
)
}
fn parse_param(&mut self) -> Node {
let pos = self.pos().unwrap();
let subject = Box::new(self.parse_id());
let typ = if let Some(TokenKind::Colon) = self.curr_kind() {
fn parse_return(&mut self) -> Stmt {
let pos = self.pos();
self.step();
Some(Box::new(self.parse_typ()))
} else {
None
let expr = match self.curr_kind() {
Some(TK::Semicolon) => None,
_ => Some(Box::new(self.parse_expr())),
};
self.node(NodeKind::Param { subject, typ }, pos)
Stmt::new(SK::Return { expr }, pos.clone())
}
fn parse_typ(&mut self) -> Node {
let pos = self.pos().unwrap();
match self.curr_kind() {
Some(TokenKind::Id) => self.parse_id(),
_ => {
self.error("expected type", pos.clone());
fn parse_break(&mut self) -> Stmt {
let pos = self.pos();
self.step();
self.error_node(pos)
}
}
let expr = match self.curr_kind() {
Some(TK::Semicolon) => None,
_ => Some(Box::new(self.parse_expr())),
};
Stmt::new(SK::Break { expr }, pos.clone())
}
fn parse_assign(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_assign(&mut self) -> ParsedAssign {
let pos = self.pos();
let subject = self.parse_expr();
match self.curr_kind() {
Some(TokenKind::Equal) => {
Some(TK::Equal) => {
self.step();
let value = self.parse_expr();
self.node(
NodeKind::Assign {
ParsedAssign::Assign(Stmt::new(
SK::Assign {
subject: Box::new(subject),
value: Box::new(value),
},
pos,
)
))
}
_ => subject,
_ => ParsedAssign::Expr(subject, pos),
}
}
fn parse_expr(&mut self) -> Node {
fn parse_expr(&mut self) -> Expr {
self.parse_call()
}
fn parse_call(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_call(&mut self) -> Expr {
let pos = self.pos();
let mut subject = self.parse_value();
loop {
match self.curr_kind() {
Some(TokenKind::LParen) => {
Some(TK::LParen) => {
self.step();
let mut args = Vec::new();
match self.curr_kind() {
None | Some(TokenKind::RParen) => {}
None | Some(TK::RParen) => {}
Some(_) => {
args.push(self.parse_expr());
while let Some(TokenKind::Comma) = self.curr_kind() {
while let Some(TK::Comma) = self.curr_kind() {
self.step();
if let Some(TokenKind::RParen) = self.curr_kind() {
if let Some(TK::RParen) = self.curr_kind() {
break;
}
args.push(self.parse_expr());
@ -237,15 +226,15 @@ impl<'a> Parser<'a> {
}
}
match self.curr_kind() {
Some(TokenKind::RParen) => {}
Some(TK::RParen) => {}
_ => {
self.error("expected ')'", pos.clone());
return self.error_node(pos);
return self.error_expr(pos);
}
}
self.step();
subject = self.node(
NodeKind::Call {
subject = Expr::new(
EK::Call {
subject: Box::new(subject),
args,
},
@ -257,42 +246,42 @@ impl<'a> Parser<'a> {
}
}
fn parse_value(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_value(&mut self) -> Expr {
let pos = self.pos();
match self.curr_kind() {
Some(TokenKind::Id) => self.parse_id(),
Some(TokenKind::Int) => self.parse_int(),
Some(TokenKind::Str) => self.parse_string(),
Some(TokenKind::LParen) => self.parse_group(),
Some(TokenKind::LBrace) => self.parse_block(),
Some(TokenKind::If) => self.parse_if(),
Some(TokenKind::Loop) => self.parse_loop(),
Some(TK::Ident) => self.parse_ident_expr(),
Some(TK::Int) => self.parse_int(),
Some(TK::Str) => self.parse_string(),
Some(TK::LParen) => self.parse_group(),
Some(TK::LBrace) => self.parse_block(),
Some(TK::If) => self.parse_if(),
Some(TK::Loop) => self.parse_loop(),
_ => {
self.error("expected value", pos.clone());
self.step();
self.error_node(pos)
self.error_expr(pos)
}
}
}
fn parse_id(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_ident_expr(&mut self) -> Expr {
let pos = self.pos();
let Some(Token {
kind: TokenKind::Id,
value: TokenValue::Id(value),
kind: TK::Ident,
value: TokenValue::Ident(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
self.node(NodeKind::Id(value), pos)
Expr::new(EK::Ident(value), pos)
}
fn parse_int(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_int(&mut self) -> Expr {
let pos = self.pos();
let Some(Token {
kind: TokenKind::Int,
kind: TK::Int,
value: TokenValue::Int(value),
..
}) = self.current
@ -300,13 +289,13 @@ impl<'a> Parser<'a> {
unreachable!()
};
self.step();
self.node(NodeKind::Int(value), pos)
Expr::new(EK::Int(value), pos)
}
fn parse_string(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_string(&mut self) -> Expr {
let pos = self.pos();
let Some(Token {
kind: TokenKind::Str,
kind: TK::Str,
value: TokenValue::Str(value),
..
}) = self.current.clone()
@ -314,62 +303,121 @@ impl<'a> Parser<'a> {
unreachable!()
};
self.step();
self.node(NodeKind::Str(value.clone()), pos)
Expr::new(EK::Str(value.clone()), pos)
}
fn parse_group(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_group(&mut self) -> Expr {
let pos = self.pos();
self.step();
let expr = Box::new(self.parse_expr());
if !self.curr_is(TokenKind::RParen) {
if !self.curr_is(TK::RParen) {
self.error("expected ')'", pos.clone());
return self.error_node(pos);
return self.error_expr(pos);
}
self.step();
self.node(NodeKind::Group(expr), pos)
Expr::new(EK::Group(expr), pos)
}
fn parse_block(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_block(&mut self) -> Expr {
use TK::*;
let pos = self.pos();
self.step();
let mut stmts = Vec::new();
loop {
match self.curr_kind() {
None => {
self.error("expected ')'", pos.clone());
break self.error_node(pos);
self.error("expected '}'", pos.clone());
break self.error_expr(pos);
}
Some(TokenKind::RBrace) => {
Some(RBrace) => {
self.step();
break self.node(NodeKind::Block(stmts), pos);
break Expr::new(EK::Block { stmts, expr: None }, pos);
}
_ => stmts.push(self.parse_stmt()),
Some(Fn) => stmts.push(self.parse_fn()),
Some(kind @ (Let | Return | Break)) => {
let stmt = match kind {
Let => self.parse_let(),
Return => self.parse_return(),
Break => self.parse_break(),
_ => unreachable!(),
};
self.eat_semicolon();
stmts.push(stmt);
}
Some(kind @ (LBrace | If | Loop)) => {
let expr = match kind {
LBrace => self.parse_block(),
If => self.parse_if(),
Loop => self.parse_loop(),
_ => unreachable!(),
};
match self.curr_kind() {
Some(TK::Semicolon) => {
self.step();
stmts.push(Stmt::new(SK::Expr(Box::new(expr)), pos.clone()))
}
Some(TK::RBrace) => {
self.step();
break Expr::new(
EK::Block {
stmts,
expr: Some(Box::new(expr)),
},
pos,
);
}
_ => {}
}
}
Some(_) => match self.parse_assign() {
ParsedAssign::Assign(stmt) => {
self.eat_semicolon();
stmts.push(stmt);
}
ParsedAssign::Expr(expr, pos) => match self.curr_kind() {
Some(TK::Semicolon) => {
self.step();
stmts.push(Stmt::new(SK::Expr(Box::new(expr)), pos))
}
Some(TK::RBrace) => {
self.step();
break Expr::new(
EK::Block {
stmts,
expr: Some(Box::new(expr)),
},
pos,
);
}
_ => {}
},
},
}
}
}
fn parse_if(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_if(&mut self) -> Expr {
let pos = self.pos();
self.step();
let cond = Box::new(self.parse_expr());
if !self.curr_is(TokenKind::LBrace) {
if !self.curr_is(TK::LBrace) {
self.error("expected '}'", pos.clone());
return self.error_node(pos);
return self.error_expr(pos);
}
let truthy = Box::new(self.parse_block());
let falsy = match self.curr_kind() {
Some(TokenKind::Else) => {
Some(TK::Else) => {
self.step();
if !self.curr_is(TokenKind::LBrace) {
if !self.curr_is(TK::LBrace) {
self.error("expected '}'", pos.clone());
return self.error_node(pos);
return self.error_expr(pos);
}
Some(Box::new(self.parse_block()))
}
_ => None,
};
self.node(
NodeKind::If {
Expr::new(
EK::If {
cond,
truthy,
falsy,
@ -378,15 +426,65 @@ impl<'a> Parser<'a> {
)
}
fn parse_loop(&mut self) -> Node {
let pos = self.pos().unwrap();
fn parse_loop(&mut self) -> Expr {
let pos = self.pos();
self.step();
if !self.curr_is(TokenKind::LBrace) {
if !self.curr_is(TK::LBrace) {
self.error("expected '}'", pos.clone());
return self.error_node(pos);
return self.error_expr(pos);
}
let body = Box::new(self.parse_block());
self.node(NodeKind::Loop { body }, pos)
Expr::new(EK::Loop { body }, pos)
}
fn parse_typ(&mut self) -> Typ {
let pos = self.pos();
match self.curr_kind() {
Some(TK::Ident) => self.parse_ident_typ(),
_ => {
self.error("expected type", pos.clone());
self.step();
self.error_typ(pos)
}
}
}
fn parse_ident_typ(&mut self) -> Typ {
let pos = self.pos();
let Some(Token {
kind: TK::Ident,
value: TokenValue::Ident(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
Typ::new(TypKind::Ident(value), pos)
}
fn parse_param(&mut self) -> Param {
let pos = self.pos();
let subject = Box::new(self.parse_ident_expr());
let typ = if let Some(TK::Colon) = self.curr_kind() {
self.step();
Some(Box::new(self.parse_typ()))
} else {
None
};
Param { subject, typ, pos }
}
fn eat_semicolon(&mut self) {
let pos = self.pos();
match self.curr_kind() {
Some(TK::Semicolon) => {
self.step();
}
_ => {
self.error("expected ';'", pos.clone());
}
}
}
fn error<S: Into<String>>(&mut self, msg: S, pos: Pos) {
@ -402,26 +500,35 @@ impl<'a> Parser<'a> {
self.current = self.lexer.next();
}
fn node(&self, kind: NodeKind, pos: Pos) -> Node {
Node { kind, pos }
fn error_stmt(&self, pos: Pos) -> Stmt {
Stmt::new(SK::Error, pos)
}
fn error_node(&self, pos: Pos) -> Node {
Node {
kind: NodeKind::Error,
pos,
}
fn error_expr(&self, pos: Pos) -> Expr {
Expr::new(EK::Error, pos)
}
fn pos(&self) -> Option<Pos> {
self.current.as_ref().map(|token| token.pos.clone())
fn error_typ(&self, pos: Pos) -> Typ {
Typ::new(TypKind::Error, pos)
}
fn curr_is(&self, kind: TokenKind) -> bool {
fn pos(&self) -> Pos {
self.current
.as_ref()
.map(|token| token.pos.clone())
.unwrap_or_else(|| self.lexer.pos())
}
fn curr_is(&self, kind: TK) -> bool {
self.curr_kind() == Some(kind)
}
fn curr_kind(&self) -> Option<TokenKind> {
fn curr_kind(&self) -> Option<TK> {
self.current.as_ref().map(|t| t.kind.clone())
}
}
enum ParsedAssign {
Assign(Stmt),
Expr(Expr, Pos),
}

View File

@ -41,4 +41,8 @@ impl ErrorAcc {
pub fn add(&mut self, error: Error) {
self.errors.push(error)
}
pub fn iter(&self) -> impl Iterator<Item = &Error> {
self.errors.iter()
}
}

View File

@ -10,7 +10,7 @@ pub struct Token {
#[derive(Clone, PartialEq, Debug)]
pub enum TokenKind {
Error,
Id,
Ident,
Int,
Str,
If,
@ -34,7 +34,7 @@ pub enum TokenKind {
#[derive(Clone, PartialEq, Debug)]
pub enum TokenValue {
None,
Id(u64),
Ident(u64),
Int(i64),
Str(String),
}