From 0fde8066df58e8136bcc86b88285d89d4993813b Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Sat, 7 Sep 2024 02:32:27 +0200 Subject: [PATCH] =?UTF-8?q?symbol=20resolution=20works=20=F0=9F=8E=89?= =?UTF-8?q?=F0=9F=8E=89=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ast_lower.rs | 358 +++++++++++++++++++++++++++++++++++++++++++++++ src/hir.rs | 164 ++++++++++++++++++++++ src/lexer.rs | 14 +- src/main.rs | 14 +- src/parser.rs | 2 +- src/pos.rs | 6 +- 6 files changed, 549 insertions(+), 9 deletions(-) create mode 100644 src/ast_lower.rs create mode 100644 src/hir.rs diff --git a/src/ast_lower.rs b/src/ast_lower.rs new file mode 100644 index 0000000..57a1d11 --- /dev/null +++ b/src/ast_lower.rs @@ -0,0 +1,358 @@ +use std::{collections::HashMap, rc::Rc, sync::Mutex}; + +use crate::{ + ast::{self}, + hir::{ + Expr, ExprId, ExprKind, OwnerId, Pack, Param, ParamId, Stmt, StmtId, StmtKind, Typ, TypId, + TypKind, + }, + pos::{Error, ErrorAcc, ErrorKind}, +}; + +pub struct AstLower { + error_acc: Rc>, + stmts: Vec, + exprs: Vec, + typs: Vec, + params: Vec, + owners: HashMap, + next_owner_id: u64, + ribs: Vec, + rib: RibId, +} + +impl AstLower { + pub fn new(error_acc: Rc>) -> Self { + let ribs = vec![Rib::new_root()]; + Self { + error_acc, + stmts: Vec::new(), + exprs: Vec::new(), + typs: Vec::new(), + params: Vec::new(), + owners: HashMap::new(), + next_owner_id: 0, + ribs, + rib: RibId(0), + } + } + + pub fn lower_ast(&mut self, ast: &[ast::Stmt]) { + self.lower_stmts(ast); + } + + pub fn pack_it_up(self) -> Pack { + Pack::new(self.stmts, self.exprs, self.typs, self.params, self.owners) + } + + fn lower_stmts(&mut self, stmts: &[ast::Stmt]) { + self.find_fns(stmts); + for stmt in stmts { + self.lower_stmt(stmt); + } + } + + fn find_fns(&mut self, stmts: &[ast::Stmt]) { + for stmt in stmts { + if let ast::StmtKind::Fn { + subject, + params: _, + return_typ: _, + body: _, + } = &stmt.kind + { + let ast::Expr { + kind: ast::ExprKind::Ident(ident_id), + pos: _, + } = subject.as_ref() + else { + unreachable!(); + }; + let owner_id = self.next_owner_id(); + self.ribs[self.rib.0] + .define_item(*ident_id, owner_id) + .unwrap() + } + } + } + + fn lower_stmt(&mut self, stmt: &ast::Stmt) -> StmtId { + let ast::Stmt { kind, pos } = stmt; + match &kind { + ast::StmtKind::Error => self.intern_stmt(Stmt::new(StmtKind::Error, pos.clone())), + ast::StmtKind::Let { subject, value } => { + self.make_rib(); + let owner_id = self.next_owner_id(); + let subject = self.lower_param(subject, owner_id); + let value = self.lower_expr(value); + let stmt_id = + self.intern_stmt(Stmt::new(StmtKind::Let { subject, value }, pos.clone())); + self.owners.insert(owner_id, stmt_id); + stmt_id + } + ast::StmtKind::Fn { + subject, + params, + return_typ, + body, + } => { + let ast::ExprKind::Ident(ident_id) = subject.kind else { + unreachable!(); + }; + let owner_id = self.next_owner_id(); + let params = params + .iter() + .map(|param| self.lower_param(param, owner_id)) + .collect::>(); + let return_typ = return_typ.as_ref().map(|typ| self.lower_typ(typ)); + let body = self.lower_expr(body); + self.intern_stmt(Stmt::new( + StmtKind::Fn { + subject_ident_id: ident_id, + params, + return_typ, + body, + }, + pos.clone(), + )) + } + ast::StmtKind::Return { expr } => { + let expr = expr.as_ref().map(|expr| self.lower_expr(expr)); + self.intern_stmt(Stmt::new(StmtKind::Return { expr }, pos.clone())) + } + ast::StmtKind::Break { expr } => { + let expr = expr.as_ref().map(|expr| self.lower_expr(expr)); + self.intern_stmt(Stmt::new(StmtKind::Return { expr }, pos.clone())) + } + ast::StmtKind::Assign { subject, value } => { + let subject = self.lower_expr(subject); + let value = self.lower_expr(value); + self.intern_stmt(Stmt::new(StmtKind::Assign { subject, value }, pos.clone())) + } + ast::StmtKind::Expr(expr) => { + let expr = self.lower_expr(expr); + self.intern_stmt(Stmt::new(StmtKind::Expr(expr), pos.clone())) + } + } + } + + fn lower_expr(&mut self, expr: &ast::Expr) -> ExprId { + let ast::Expr { kind, pos } = expr; + match kind { + ast::ExprKind::Error => self.intern_expr(Expr::new(ExprKind::Error, pos.clone())), + ast::ExprKind::Ident(ident_id) => { + let Some(owner) = self.rib().ident_owner(&self.ribs, *ident_id) else { + println!("ribs: {:#?}", self.ribs); + self.error_acc.lock().unwrap().add(Error { + kind: ErrorKind::Checker, + pos: Some(pos.clone()), + msg: format!("undefined identifier '{ident_id}'"), + }); + return self.intern_expr(Expr::new(ExprKind::Error, pos.clone())); + }; + self.intern_expr(Expr::new( + ExprKind::Ident { + ident_id: *ident_id, + owner, + }, + pos.clone(), + )) + } + ast::ExprKind::Int(value) => { + self.intern_expr(Expr::new(ExprKind::Int(*value), pos.clone())) + } + ast::ExprKind::Str(value) => { + self.intern_expr(Expr::new(ExprKind::Str(value.clone()), pos.clone())) + } + ast::ExprKind::Group(expr) => self.lower_expr(expr), + ast::ExprKind::Block { stmts, expr } => { + let outer_rib = self.rib; + self.rib = self.make_rib(); + let stmts = stmts + .iter() + .map(|stmt| self.lower_stmt(stmt)) + .collect::>(); + let expr = expr.as_ref().map(|expr| self.lower_expr(expr)); + self.rib = outer_rib; + self.intern_expr(Expr::new(ExprKind::Block { stmts, expr }, pos.clone())) + } + ast::ExprKind::Call { subject, args } => { + let subject = self.lower_expr(subject); + let args = args + .iter() + .map(|arg| self.lower_expr(arg)) + .collect::>(); + self.intern_expr(Expr::new(ExprKind::Call { subject, args }, pos.clone())) + } + ast::ExprKind::If { + cond, + truthy, + falsy, + } => { + let cond = self.lower_expr(cond); + let truthy = self.lower_expr(truthy); + let falsy = falsy.as_ref().map(|expr| self.lower_expr(expr)); + self.intern_expr(Expr::new( + ExprKind::If { + cond, + truthy, + falsy, + }, + pos.clone(), + )) + } + ast::ExprKind::Loop { body } => { + let body = self.lower_expr(body); + self.intern_expr(Expr::new(ExprKind::Loop { body }, pos.clone())) + } + } + } + + fn lower_typ(&mut self, typ: &ast::Typ) -> TypId { + let ast::Typ { kind, pos } = typ; + match kind { + ast::TypKind::Error => self.intern_typ(Typ::new(TypKind::Error, pos.clone())), + ast::TypKind::Ident(ident_id) => { + let Some(owner) = self.rib().ident_owner(&self.ribs, *ident_id) else { + todo!(); + }; + self.intern_typ(Typ::new( + TypKind::Ident { + ident_id: *ident_id, + owner, + }, + pos.clone(), + )) + } + } + } + + fn lower_param(&mut self, param: &ast::Param, owner_id: OwnerId) -> ParamId { + let ast::Param { subject, typ, pos } = param; + let ast::ExprKind::Ident(ident_id) = subject.kind else { + unreachable!(); + }; + let typ = typ.as_ref().map(|typ| self.lower_typ(typ.as_ref())); + let param_id = self.intern_param(Param { + subject_ident_id: ident_id, + typ, + pos: pos.clone(), + }); + self.rib_mut().define_param(ident_id, owner_id, param_id); + param_id + } + + fn intern_stmt(&mut self, stmt: Stmt) -> StmtId { + match self.stmts.iter().position(|v| *v == stmt) { + Some(id) => StmtId(id), + None => { + let id = self.stmts.len(); + self.stmts.push(stmt); + StmtId(id) + } + } + } + + fn intern_expr(&mut self, expr: Expr) -> ExprId { + match self.exprs.iter().position(|v| *v == expr) { + Some(id) => ExprId(id), + None => { + let id = self.exprs.len(); + self.exprs.push(expr); + ExprId(id) + } + } + } + + fn intern_typ(&mut self, typ: Typ) -> TypId { + match self.typs.iter().position(|v| *v == typ) { + Some(id) => TypId(id), + None => { + let id = self.typs.len(); + self.typs.push(typ); + TypId(id) + } + } + } + + fn intern_param(&mut self, param: Param) -> ParamId { + match self.params.iter().position(|v| *v == param) { + Some(id) => ParamId(id), + None => { + let id = self.params.len(); + self.params.push(param); + ParamId(id) + } + } + } + + fn next_owner_id(&mut self) -> OwnerId { + let id = OwnerId(self.next_owner_id); + self.next_owner_id += 1; + id + } + + fn make_rib(&mut self) -> RibId { + let id = RibId(self.ribs.len()); + self.ribs.push(Rib::new(self.rib)); + id + } + + fn rib_mut(&mut self) -> &mut Rib { + &mut self.ribs[self.rib.0] + } + + fn rib(&self) -> &Rib { + &self.ribs[self.rib.0] + } +} + +#[derive(Clone, Copy, Debug)] +struct RibId(usize); + +#[derive(Debug)] +struct Rib { + parent: Option, + defs_owner: HashMap, + defs_params: HashMap>, +} + +impl Rib { + pub fn new(parent: RibId) -> Self { + Self { + parent: Some(parent), + defs_owner: HashMap::new(), + defs_params: HashMap::new(), + } + } + + pub fn new_root() -> Self { + Self { + parent: None, + defs_owner: HashMap::new(), + defs_params: HashMap::new(), + } + } + + pub fn define_item(&mut self, ident_id: u64, owner_id: OwnerId) -> Result<(), ()> { + if self.parent.is_some() && self.defs_owner.contains_key(&ident_id) { + return Err(()); + } + self.defs_owner.insert(ident_id, owner_id); + self.defs_params.insert(ident_id, None); + Ok(()) + } + + pub fn define_param(&mut self, ident_id: u64, owner_id: OwnerId, param_id: ParamId) { + self.defs_owner.insert(ident_id, owner_id); + self.defs_params.insert(ident_id, Some(param_id)); + } + + pub fn ident_owner(&self, ribs: &[Rib], ident_id: u64) -> Option { + if let Some(owner_id) = self.defs_owner.get(&ident_id) { + return Some(*owner_id); + } + let parent_id = self.parent?; + ribs[parent_id.0].ident_owner(ribs, ident_id) + } +} diff --git a/src/hir.rs b/src/hir.rs new file mode 100644 index 0000000..bc4a99b --- /dev/null +++ b/src/hir.rs @@ -0,0 +1,164 @@ +use std::collections::HashMap; + +use crate::pos::Pos; + +#[derive(Clone, PartialEq, Debug)] +pub struct Pack { + stmts: Vec, + exprs: Vec, + typs: Vec, + params: Vec, + owners: HashMap, +} + +impl<'pack> Pack { + pub fn new( + stmts: Vec, + exprs: Vec, + typs: Vec, + params: Vec, + owners: HashMap, + ) -> Self { + Self { + stmts, + exprs, + typs, + params, + owners, + } + } + + pub fn stmt(&'pack self, id: StmtId) -> &'pack Stmt { + &self.stmts[id.0] + } + + pub fn expr(&'pack self, id: ExprId) -> &'pack Expr { + &self.exprs[id.0] + } + + pub fn typ(&'pack self, id: TypId) -> &'pack Typ { + &self.typs[id.0] + } + + pub fn param(&'pack self, id: ParamId) -> &'pack Param { + &self.params[id.0] + } +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct StmtId(pub usize); + +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct ExprId(pub usize); + +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct TypId(pub usize); + +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct ParamId(pub usize); + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct OwnerId(pub u64); + +#[derive(Clone, PartialEq, Debug)] +pub struct Stmt { + pub kind: StmtKind, + pub pos: Pos, +} + +impl Stmt { + pub fn new(kind: StmtKind, pos: Pos) -> Self { + Self { kind, pos } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub enum StmtKind { + Error, + Let { + subject: ParamId, + value: ExprId, + }, + Fn { + subject_ident_id: u64, + params: Vec, + return_typ: Option, + body: ExprId, + }, + Return { + expr: Option, + }, + Break { + expr: Option, + }, + Assign { + subject: ExprId, + value: ExprId, + }, + Expr(ExprId), +} + +#[derive(Clone, PartialEq, Debug)] +pub struct Expr { + pub kind: ExprKind, + pub pos: Pos, +} + +impl Expr { + pub fn new(kind: ExprKind, pos: Pos) -> Self { + Self { kind, pos } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub enum ExprKind { + Error, + Ident { + ident_id: u64, + owner: OwnerId, + }, + Int(i64), + Str(String), + Group(ExprId), + Block { + stmts: Vec, + expr: Option, + }, + Call { + subject: ExprId, + args: Vec, + }, + If { + cond: ExprId, + truthy: ExprId, + falsy: Option, + }, + Loop { + body: ExprId, + }, +} + +#[derive(Clone, PartialEq, Debug)] +pub struct Typ { + pub kind: TypKind, + pub pos: Pos, +} + +impl Typ { + pub fn new(kind: TypKind, pos: Pos) -> Self { + Self { kind, pos } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub enum TypKind { + Error, + Ident { ident_id: u64, owner: OwnerId }, +} + +#[derive(Clone, PartialEq, Debug)] +pub struct Param { + pub subject_ident_id: u64, + pub typ: Option, + pub pos: Pos, +} diff --git a/src/lexer.rs b/src/lexer.rs index b88a17f..460a7f3 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -12,6 +12,7 @@ pub struct Lexer<'a> { line: i64, col: i64, symbols: HashMap, + symbol_ids: HashMap, keywords: HashMap, error_acc: Rc>, } @@ -28,6 +29,7 @@ impl<'a> Lexer<'a> { line: 1, col: 1, symbols: HashMap::new(), + symbol_ids: HashMap::new(), keywords: Self::make_keywords(), error_acc, } @@ -71,8 +73,14 @@ impl<'a> Lexer<'a> { if let Some(kind) = self.keywords.get(&value) { return self.token(kind.clone(), pos); } - let id = self.symbols.len() as u64; - self.symbols.insert(id, value); + let id = if let Some(id) = self.symbol_ids.get(&value) { + *id + } else { + let id = self.symbols.len() as u64; + self.symbols.insert(id, value.clone()); + self.symbol_ids.insert(value, id); + id + }; break self.token_with_value( TokenKind::Ident, TokenValue::Ident(id), @@ -239,7 +247,7 @@ impl<'a> Lexer<'a> { fn error>(&mut self, msg: S, pos: Pos) { let msg = msg.into(); self.error_acc.lock().unwrap().add(Error { - kind: crate::pos::ErrorKind::LexerError, + kind: crate::pos::ErrorKind::Lexer, pos: Some(pos), msg, }); diff --git a/src/main.rs b/src/main.rs index 68e16b8..53bfb2e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,10 +2,13 @@ use std::{rc::Rc, sync::Mutex}; +use ast_lower::AstLower; use parser::Parser; use pos::{ErrorAcc, Pos}; mod ast; +mod ast_lower; +mod hir; mod lexer; mod parser; mod pos; @@ -19,10 +22,17 @@ fn main() { let error_acc = Rc::new(Mutex::new(ErrorAcc::new())); let mut parser = Parser::new(&text, error_acc.clone()); - let ast = parser.parse(); - println!("{ast:#?}"); + let mut ast_lower = AstLower::new(error_acc.clone()); + ast_lower.lower_ast(&ast); + let pack = ast_lower.pack_it_up(); + + println!("text: {text}"); + // println!("ast: {ast:#?}"); + // println!("hir: {pack:#?}"); + let symbols = parser.symbols(); + println!("symbols: {symbols:#?}"); let error_acc = error_acc.lock().unwrap(); if !error_acc.ok() { diff --git a/src/parser.rs b/src/parser.rs index 83b89c3..a5df8f1 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -490,7 +490,7 @@ impl<'a> Parser<'a> { fn error>(&mut self, msg: S, pos: Pos) { let msg = msg.into(); self.error_acc.lock().unwrap().add(Error { - kind: crate::pos::ErrorKind::ParserError, + kind: crate::pos::ErrorKind::Parser, pos: Some(pos), msg, }); diff --git a/src/pos.rs b/src/pos.rs index 446ff39..a9839ab 100644 --- a/src/pos.rs +++ b/src/pos.rs @@ -20,9 +20,9 @@ pub struct Error { #[derive(Clone, Debug)] pub enum ErrorKind { - LexerError, - ParserError, - CheckerError, + Lexer, + Parser, + Checker, } pub struct ErrorAcc {