This commit is contained in:
SimonFJ20 2024-05-29 15:08:07 +02:00
parent 63a0975020
commit 5f41342a16
11 changed files with 1391 additions and 498 deletions

11
example.yapp Normal file
View File

@ -0,0 +1,11 @@
fn b() {
a();
}
fn a() {
}

63
src/checked.rs Normal file
View File

@ -0,0 +1,63 @@
#[derive(Clone, PartialEq, Debug)]
pub struct Node {
pub kind: NodeKind,
pub typ: Type,
}
#[derive(Clone, PartialEq, Debug)]
pub enum NodeKind {
Error,
Id(u64),
Int(i64),
String(String),
Group(Box<Node>),
Block(Vec<Node>),
Call {
subject: Box<Node>,
args: Vec<Node>,
},
If {
cond: Box<Node>,
truthy: Box<Node>,
falsy: Option<Box<Node>>,
},
Loop {
body: Box<Node>,
},
Break,
Assign {
subject: Box<Node>,
value: Box<Node>,
},
Let {
subject: Box<Node>,
value: Box<Node>,
},
Fn {
subject: Box<Node>,
params: Vec<Node>,
return_typ: Box<Node>,
body: Box<Node>,
},
Return {
value: Option<Box<Node>>,
},
Param {
subject: Box<Node>,
typ: Option<Type>,
},
}
#[derive(Clone, PartialEq, Debug)]
pub enum Type {
Error,
Unit,
I32,
U32,
String,
Fn {
id: u64,
params: Vec<Node>,
return_typ: Box<Type>,
},
}

383
src/checker.rs Normal file
View File

@ -0,0 +1,383 @@
#![allow(unused_variables)]
use crate::{
checked::{Node, NodeKind, Type},
hash::hash,
itertewls::Itertewls,
parsed,
sym::Syms,
};
pub struct Checker {}
impl Checker {
pub fn new() -> Self {
Self {}
}
pub fn check(&mut self, ast: &Vec<parsed::Node>) -> Vec<Node> {
let mut syms = Syms::new();
ast.into_iter()
.map(|stmt| self.check_expr(stmt, &mut syms))
.collect()
}
fn fn_scan<'syms>(
&mut self,
ast: &Vec<parsed::Node>,
syms: &mut Syms<'syms>,
) -> Result<(), ()> {
for node in ast {
match node {
parsed::Node::Fn {
subject,
params,
return_typ,
body,
} => {
let params = params.into_iter().map(|param| {
let parsed::Node::Param { subject, typ } = param else { unreachable!() };
let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() };
let typ = self.check_type(typ.as_ref().ok_or(())?);
Ok((*id, self.node(NodeKind::Param { subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)), typ: Some(typ) }, Type::Unit)))
}).collect::<Result<Vec<_>, _>>()?;
if let Some(id) = params.iter().map(|(id, _)| *id).find_first_duplicate() {
self.error("redefinition param");
return Err(());
}
let parsed::Node::Id(id) = subject.as_ref() else { unreachable!() };
if syms.defined_locally(*id) {
self.error("redefinition fn");
return Err(());
}
//syms.define(*id, typ.clone());
}
_ => {}
}
}
Ok(())
}
fn check_expr<'syms>(&mut self, node: &parsed::Node, syms: &mut Syms<'syms>) -> Node {
match node {
parsed::Node::Error => self.node(NodeKind::Error, Type::Unit),
parsed::Node::Id(id) => {
let Some(sym) = syms.get(*id) else {
self.error("undefined >~<");
return self.node(NodeKind::Error, Type::Error);
};
self.node(NodeKind::Id(*id), sym.typ)
}
parsed::Node::Int(value) => self.node(
NodeKind::Int(*value),
if *value as i32 > i32::MAX {
Type::U32
} else {
Type::I32
},
),
parsed::Node::String(value) => self.node(NodeKind::String(value.clone()), Type::String),
parsed::Node::Group(expr) => {
let expr = self.check_expr(expr, syms);
let typ = expr.typ.clone();
self.node(NodeKind::Group(Box::new(expr)), typ)
}
parsed::Node::Block(stmts) => {
let mut child_syms = syms.child();
let stmts = stmts
.into_iter()
.map(|stmt| self.check_expr(stmt, &mut child_syms))
.collect::<Vec<_>>();
let typ = stmts
.last()
.map(|stmt| stmt.typ.clone())
.unwrap_or(Type::Unit);
self.node(NodeKind::Block(stmts), typ)
}
parsed::Node::Call { subject, args } => {
let subject = Box::new(self.check_expr(subject, syms));
let args = args
.into_iter()
.map(|arg| self.check_expr(arg, syms))
.collect::<Vec<_>>();
let typ = 'br: {
match subject.typ.clone() {
Type::Fn {
id: _,
params,
return_typ,
} => {
if args.len() != params.len() {
self.error("too few/many args");
break 'br Type::Error;
}
if args
.iter()
.zip(params)
.map(|(arg, param)| self.compatible(&arg.typ, &param.typ))
.any(|is_compatible| !is_compatible)
{
self.error("incorrect args");
break 'br Type::Error;
}
*return_typ
}
_ => {
self.error("not a function");
Type::Error
}
}
};
self.node(NodeKind::Call { subject, args }, typ)
}
parsed::Node::If {
cond,
truthy,
falsy,
} => {
let cond = Box::new(self.check_expr(cond, syms));
let truthy = Box::new(self.check_expr(truthy, syms));
let falsy = falsy
.as_ref()
.map(|block| Box::new(self.check_expr(block, syms)));
let typ = 'br: {
match falsy.as_ref().map(|block| block.typ.clone()) {
Some(falsy_typ) => {
if !self.compatible(&truthy.typ, &falsy_typ) {
self.error("incompatible types #2");
break 'br Type::Error;
}
falsy_typ
}
None => Type::Unit,
}
};
self.node(
NodeKind::If {
cond,
truthy,
falsy,
},
typ,
)
}
parsed::Node::Loop { body } => {
let body = Box::new(self.check_expr(body, &mut syms.child()));
let typ = body.typ.clone();
self.node(NodeKind::Loop { body }, typ)
}
parsed::Node::Break => self.node(NodeKind::Break, Type::Unit),
parsed::Node::Assign { subject, value } => {
let subject = Box::new(self.check_expr(subject, syms));
let value = Box::new(self.check_expr(value, syms));
let typ = if !self.compatible(&subject.typ, &value.typ) {
self.error("incompatible types #3");
Type::Error
} else {
subject.typ.clone()
};
self.node(NodeKind::Assign { subject, value }, typ)
}
parsed::Node::Let { subject, value } => {
let (subject, subject_typ) = match subject.as_ref() {
parsed::Node::Param { subject, typ } => {
(subject, typ.as_ref().map(|typ| self.check_type(typ)))
}
_ => unreachable!(),
};
let value = Box::new(self.check_expr(value, syms));
let typ = value.typ.clone();
if subject_typ
.as_ref()
.map(|subject_typ| !self.compatible(subject_typ, &typ))
.unwrap_or(false)
{
self.error("incompatible types #1");
return self.node(NodeKind::Error, Type::Error);
}
let subject = match subject.as_ref() {
parsed::Node::Id(id) => {
if syms.defined_locally(*id) {
self.error("redefinition");
return self.node(NodeKind::Error, Type::Error);
}
syms.define(*id, typ.clone());
Box::new(self.node(
NodeKind::Param {
subject: Box::new(self.node(NodeKind::Id(*id), Type::Unit)),
typ: Some(Type::Unit),
},
Type::Unit,
))
}
_ => unreachable!(),
};
self.node(NodeKind::Let { subject, value }, typ)
}
parsed::Node::Fn {
subject,
params,
return_typ,
body,
} => {
todo!("symbol lookup");
}
parsed::Node::Return { value } => {
let value = value
.as_ref()
.map(|value| Box::new(self.check_expr(value, syms)));
let typ = value
.as_ref()
.map(|value| value.typ.clone())
.unwrap_or(Type::Unit);
self.node(NodeKind::Return { value }, typ)
}
parsed::Node::Param { .. } => unreachable!("handle elsewhere"),
}
}
fn check_type(&self, node: &parsed::Node) -> Type {
match node {
parsed::Node::Error => Type::Error,
parsed::Node::Id(value) => {
if *value == hash("i32") {
Type::I32
} else if *value == hash("u32") {
Type::U32
} else {
todo!("symbol lookup idk")
}
}
_ => unreachable!(),
}
}
fn compatible(&self, typ_a: &Type, typ_b: &Type) -> bool {
typ_a == typ_b
}
fn node(&self, kind: NodeKind, typ: Type) -> Node {
Node { kind, typ }
}
fn error<S: Into<String>>(&mut self, msg: S) {
let msg = msg.into();
println!("checker error: {msg}");
}
}
#[test]
fn test_checker() {
use crate::parser::Parser;
use NodeKind::{Block, Id, Int, Let, Param};
use Type::{Unit, I32};
let check = |text| Checker::new().check(&Parser::new(text).parse());
assert_eq!(
check("let a = 5; a;"),
vec![
Node {
kind: Let {
subject: Box::new(Node {
kind: Param {
subject: Box::new(Node {
kind: Id(hash("a")),
typ: Unit
}),
typ: Some(Unit)
},
typ: Unit
}),
value: Box::new(Node {
kind: Int(5),
typ: I32
})
},
typ: I32
},
Node {
kind: Id(hash("a")),
typ: I32
}
]
);
assert_eq!(
check("let a = 5; a; { a; let b = 5; b; } a; b;"),
vec![
Node {
kind: Let {
subject: Box::new(Node {
kind: Param {
subject: Box::new(Node {
kind: Id(hash("a")),
typ: Unit
}),
typ: Some(Unit)
},
typ: Unit
}),
value: Box::new(Node {
kind: Int(5),
typ: I32
})
},
typ: I32
},
Node {
kind: Id(hash("a")),
typ: I32
},
Node {
kind: Block(vec![
Node {
kind: Id(hash("a")),
typ: I32
},
Node {
kind: Let {
subject: Box::new(Node {
kind: Param {
subject: Box::new(Node {
kind: Id(hash("b")),
typ: Unit
}),
typ: Some(Unit)
},
typ: Unit
}),
value: Box::new(Node {
kind: Int(5),
typ: I32
})
},
typ: I32
},
Node {
kind: Id(hash("b")),
typ: I32
}
]),
typ: I32
},
Node {
kind: Id(hash("a")),
typ: I32
},
Node {
kind: NodeKind::Error,
typ: Type::Error
}
]
);
}

7
src/hash.rs Normal file
View File

@ -0,0 +1,7 @@
use std::hash::{DefaultHasher, Hash, Hasher};
pub fn hash<H: Hash>(value: H) -> u64 {
let mut hasher = DefaultHasher::new();
value.hash(&mut hasher);
hasher.finish()
}

29
src/itertewls.rs Normal file
View File

@ -0,0 +1,29 @@
enum Duplicate<T: std::hash::Hash> {
None(std::collections::HashMap<T, T>),
Found(T),
}
pub trait Itertewls<T>
where
Self: Iterator<Item = T> + Sized,
{
fn find_first_duplicate(self) -> Option<T>;
}
impl<I, Item> Itertewls<Item> for I
where
I: Iterator<Item = Item> + Sized,
Item: std::cmp::PartialEq + Clone,
{
fn find_first_duplicate(mut self) -> Option<Item> {
self.try_fold(Vec::new(), |mut used, item| {
if used.contains(&item) {
Err(item)
} else {
used.push(item);
Ok(used)
}
})
.err()
}
}

279
src/lexer.rs Normal file
View File

@ -0,0 +1,279 @@
use crate::{
hash::hash,
token::{Token, TokenKind, TokenValue},
};
use std::{collections::HashMap, str::Chars};
pub struct Lexer<'a> {
text: &'a str,
chars: Chars<'a>,
current: Option<char>,
index: usize,
line: i32,
col: i32,
symbols: HashMap<u64, String>,
keywords: HashMap<String, TokenKind>,
}
impl<'a> Lexer<'a> {
pub fn new(text: &'a str) -> Self {
let mut chars = text.chars();
let current = chars.next();
Self {
text,
chars,
current,
index: 0,
line: 1,
col: 1,
symbols: HashMap::new(),
keywords: Self::make_keywords(),
}
}
fn make_keywords() -> HashMap<String, TokenKind> {
let mut keywords = HashMap::new();
keywords.insert("if".to_string(), TokenKind::If);
keywords.insert("else".to_string(), TokenKind::Else);
keywords.insert("loop".to_string(), TokenKind::Loop);
keywords.insert("break".to_string(), TokenKind::Break);
keywords.insert("let".to_string(), TokenKind::Let);
keywords.insert("fn".to_string(), TokenKind::Fn);
keywords.insert("return".to_string(), TokenKind::Return);
keywords
}
pub fn symbols(self) -> HashMap<u64, String> {
self.symbols
}
fn next_token(&mut self) -> Option<Token> {
match self.current {
None => None,
Some(' ' | '\t' | '\n') => {
self.step();
self.next_token()
}
Some(ch @ ('a'..='z' | 'A'..='Z' | '_')) => {
let mut value = String::new();
value.push(ch);
self.step();
loop {
match self.current {
Some(ch @ ('a'..='z' | 'A'..='Z' | '0'..='9' | '_')) => {
value.push(ch);
self.step();
}
_ => {
if let Some(kind) = self.keywords.get(&value) {
return self.token(kind.clone());
}
let id = hash(&value);
self.symbols.insert(id, value);
break self.token_with_value(TokenKind::Id, TokenValue::Id(id));
}
}
}
}
Some(ch @ ('1'..='9')) => {
let mut value = String::new();
value.push(ch);
self.step();
loop {
match self.current {
Some(ch @ ('0'..='9' | '_')) => {
value.push(ch);
self.step();
}
_ => {
let value = value.replace('_', "").parse::<i64>().unwrap();
break self.token_with_value(TokenKind::Int, TokenValue::Int(value));
}
}
}
}
Some('"') => {
self.step();
let mut value = String::new();
loop {
match self.current {
Some('\\') => {
self.step();
match self.current {
Some('n') => value.push('\n'),
Some('r') => value.push('\r'),
Some('t') => value.push('\t'),
Some('0') => value.push('\0'),
Some(ch) => value.push(ch),
None => {
self.error("malformed string");
break self.token(TokenKind::Error);
}
}
}
Some('"') => {
self.step();
break self
.token_with_value(TokenKind::String, TokenValue::String(value));
}
Some(ch) => {
value.push(ch);
}
_ => {
self.error("malformed string");
break self.token(TokenKind::Error);
}
}
self.step()
}
}
Some('/') => {
self.step();
match self.current {
Some('/') => {
self.step();
loop {
match self.current {
None | Some('\n') => break self.next_token(),
_ => {
self.step();
}
}
}
}
Some('*') => {
self.step();
let mut lch = self.current;
loop {
match (lch, self.current) {
(Some('*'), Some('/')) => break self.next_token(),
(_, Some(ch)) => {
lch = Some(ch);
self.step();
}
(_, None) => {
self.error("malformed /**/ comment");
break self.token(TokenKind::Error);
}
}
}
}
_ => todo!(),
}
}
Some('-') => {
self.step();
match self.current {
Some('>') => {
self.step();
self.token(TokenKind::MinusLt)
}
_ => todo!(),
}
}
Some(ch @ ('0' | '(' | ')' | '{' | '}' | ':' | ',' | ';' | '=')) => {
self.step();
match ch {
'0' => self.token_with_value(TokenKind::Int, TokenValue::Int(0)),
'(' => self.token(TokenKind::LParen),
')' => self.token(TokenKind::RParen),
'{' => self.token(TokenKind::LBrace),
'}' => self.token(TokenKind::RBrace),
':' => self.token(TokenKind::Colon),
',' => self.token(TokenKind::Comma),
';' => self.token(TokenKind::Semicolon),
'=' => self.token(TokenKind::Equal),
_ => unreachable!(),
}
}
Some(ch) => {
self.error(format!("unknown char '{ch}'"));
self.token(TokenKind::Error)
}
}
}
fn step(&mut self) {
match self.current {
Some('\n') => {
self.line += 1;
self.col = 1;
}
Some(_) => {
self.col += 1;
}
_ => {}
}
self.current = self.chars.next();
if self.current.is_some() {
self.index += 1;
}
}
fn token(&self, kind: TokenKind) -> Option<Token> {
Some(Token {
kind,
value: TokenValue::None,
index: self.index,
line: self.line,
col: self.col,
})
}
fn token_with_value(&self, kind: TokenKind, value: TokenValue) -> Option<Token> {
Some(Token {
kind,
value,
index: self.index,
line: self.line,
col: self.col,
})
}
fn error<S: Into<String>>(&mut self, msg: S) {
let msg = msg.into();
println!("lexer error: {msg}, line {}", self.line)
}
fn done(&self) -> bool {
self.current.is_none()
}
}
impl Iterator for Lexer<'_> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
#[test]
fn test_lexer() {
use TokenKind as TK;
use TokenValue as TV;
let lex = |text| {
Lexer::new(text)
.map(|token| (token.kind, token.value))
.collect::<Vec<_>>()
};
assert_eq!(lex("abc"), vec![(TK::Id, TV::Id(hash("abc")))]);
assert_eq!(lex("123"), vec![(TK::Int, TV::Int(123))]);
assert_eq!(lex("\"\""), vec![(TK::String, TV::String("".to_string()))]);
assert_eq!(
lex("\"hello\""),
vec![(TK::String, TV::String("hello".to_string()))]
);
assert_eq!(
lex("\"new\\nline\""),
vec![(TK::String, TV::String("new\nline".to_string()))]
);
assert_eq!(
lex("\"backslash\\\\\""),
vec![(TK::String, TV::String("backslash\\".to_string()))]
);
assert_eq!(lex("->"), vec![(TK::MinusLt, TV::None)]);
assert_eq!(lex("let"), vec![(TK::Let, TV::None)]);
}

View File

@ -1,503 +1,14 @@
#![allow(dead_code)] #![allow(dead_code)]
use std::{collections::HashMap, str::Chars}; mod checked;
mod checker;
#[derive(Clone, PartialEq, Debug)] mod hash;
enum Expr { mod itertewls;
Error, mod lexer;
Id(u64), mod parsed;
Int(i64), mod parser;
String(String), mod sym;
Block(Vec<Expr>), mod token;
Call {
subject: Box<Expr>,
args: Vec<Expr>,
},
If {
cond: Box<Expr>,
truthy: Box<Expr>,
falsy: Option<Box<Expr>>,
},
Loop {
body: Box<Expr>,
},
Break,
Assign {
subject: Box<Expr>,
value: Box<Expr>,
},
Let {
subject: Box<Expr>,
value: Box<Expr>,
},
Fn {
subject: Box<Expr>,
value: Box<Expr>,
},
Return {
value: Option<Box<Expr>>,
},
}
struct Parser<'a> {
lexer: Lexer<'a>,
current: Option<Token>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
let mut lexer = Lexer::new(text);
let current = lexer.next();
Self { lexer, current }
}
pub fn parse(&mut self) -> Vec<Expr> {
self.parse_file()
}
fn parse_file(&mut self) -> Vec<Expr> {
let mut stmts = Vec::new();
loop {
match self.current {
Some(_) => stmts.push(self.parse_stmt()),
None => break stmts,
}
}
}
fn parse_stmt(&mut self) -> Expr {
match self.curr_kind() {
Some(TokenKind::If) => todo!(),
Some(TokenKind::Loop) => todo!(),
Some(TokenKind::Fn) => todo!(),
_ => {
let stmt = match self.curr_kind() {
Some(TokenKind::Let) => todo!(),
Some(TokenKind::Break) => {
self.step();
Expr::Break
}
Some(TokenKind::Return) => {
self.step();
let value = match self.curr_kind() {
Some(TokenKind::Semicolon) => None,
_ => Some(Box::new(self.parse_expr())),
};
Expr::Return { value }
}
_ => self.parse_assign(),
};
match self.curr_kind() {
Some(TokenKind::Semicolon) => {
self.step();
stmt
}
_ => {
self.error("expected ';'");
Expr::Error
}
}
}
}
}
fn parse_assign(&mut self) -> Expr {
let subject = self.parse_expr();
match self.curr_kind() {
Some(TokenKind::Equal) => {
self.step();
let value = self.parse_expr();
Expr::Assign {
subject: Box::new(subject),
value: Box::new(value),
}
}
_ => subject,
}
}
fn parse_expr(&mut self) -> Expr {
self.parse_call()
}
fn parse_call(&mut self) -> Expr {
let mut subject = self.parse_value();
loop {
match self.curr_kind() {
Some(TokenKind::LParen) => {
self.step();
let mut args = Vec::new();
match self.curr_kind() {
None | Some(TokenKind::LParen) => {}
Some(_) => {
args.push(self.parse_expr());
while let Some(TokenKind::Comma) = self.curr_kind() {
self.step();
if let Some(TokenKind::RParen) = self.curr_kind() {
break;
}
args.push(self.parse_expr());
}
}
}
match self.curr_kind() {
Some(TokenKind::RParen) => {}
_ => {
self.error("expected ')'");
return Expr::Error;
}
}
self.step();
subject = Expr::Call {
subject: Box::new(subject),
args,
};
}
_ => break subject,
}
}
}
fn parse_value(&mut self) -> Expr {
match self.curr_kind() {
Some(TokenKind::Id) => {
let Some(Token {
value: TokenValue::Id(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
Expr::Id(value)
}
Some(TokenKind::Int) => {
let Some(Token {
value: TokenValue::Int(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
Expr::Int(value)
}
Some(TokenKind::String) => {
self.step();
let Some(Token {
value: TokenValue::String(value),
..
}) = self.current.clone()
else {
unreachable!()
};
Expr::String(value.clone())
}
_ => {
self.step();
self.error("expected value");
Expr::Error
}
}
}
fn error<S: Into<String>>(&mut self, msg: S) {
let msg = msg.into();
println!(
"parser error: {msg}, line {}",
self.current
.as_ref()
.map(|t| t.line.to_string())
.unwrap_or_else(|| "-".to_string())
)
}
fn step(&mut self) {
self.current = self.lexer.next();
}
fn curr_kind(&self) -> Option<TokenKind> {
self.current.as_ref().map(|t| t.kind.clone())
}
}
#[test]
fn test_parser() {
use Expr::*;
assert_eq!(Parser::new("abc;").parse(), vec![Id(0)]);
assert_eq!(Parser::new("123;").parse(), vec![Int(123)]);
assert_eq!(Parser::new("0;").parse(), vec![Int(0)]);
assert_eq!(Parser::new("0;abc;").parse(), vec![Int(0), Id(0)]);
assert_eq!(
Parser::new("add(mul(12, 34), 56);").parse(),
vec![Call {
subject: Box::new(Id(0)),
args: vec![
Call {
subject: Box::new(Id(1)),
args: vec![Int(12), Int(34)]
},
Int(56)
]
}]
);
assert_eq!(Parser::new("break;").parse(), vec![Break]);
assert_eq!(Parser::new("return;").parse(), vec![Return { value: None }]);
assert_eq!(
Parser::new("return add(1, 2);").parse(),
vec![Return {
value: Some(Box::new(Call {
subject: Box::new(Id(0)),
args: vec![Int(1), Int(2)]
}))
}]
);
}
#[derive(Clone, PartialEq, Debug)]
struct Token {
kind: TokenKind,
value: TokenValue,
index: usize,
line: i32,
col: i32,
}
#[derive(Clone, PartialEq, Debug)]
enum TokenKind {
Error,
Id,
Int,
String,
If,
Loop,
Break,
Let,
Fn,
Return,
LParen,
RParen,
LBrace,
RBrace,
Comma,
Semicolon,
Equal,
}
#[derive(Clone, PartialEq, Debug)]
enum TokenValue {
None,
Id(u64),
Int(i64),
String(String),
}
struct Lexer<'a> {
text: &'a str,
chars: Chars<'a>,
current: Option<char>,
index: usize,
line: i32,
col: i32,
symbol_counter: u64,
symbols: HashMap<String, u64>,
keywords: HashMap<String, TokenKind>,
}
impl<'a> Lexer<'a> {
pub fn new(text: &'a str) -> Self {
let mut chars = text.chars();
let current = chars.next();
Self {
text,
chars,
current,
index: 0,
line: 1,
col: 1,
symbol_counter: 0,
symbols: HashMap::new(),
keywords: Self::make_keywords(),
}
}
fn make_keywords() -> HashMap<String, TokenKind> {
let mut keywords = HashMap::new();
keywords.insert("if".to_string(), TokenKind::If);
keywords.insert("loop".to_string(), TokenKind::Loop);
keywords.insert("break".to_string(), TokenKind::Break);
keywords.insert("let".to_string(), TokenKind::Let);
keywords.insert("fn".to_string(), TokenKind::Fn);
keywords.insert("return".to_string(), TokenKind::Return);
keywords
}
fn next_token(&mut self) -> Option<Token> {
match self.current {
None => None,
Some(' ' | '\t' | '\n') => {
self.step();
self.next_token()
}
Some(ch @ ('a'..='z' | 'A'..='Z' | '_')) => {
let mut value = String::new();
value.push(ch);
self.step();
loop {
match self.current {
Some(ch @ ('a'..='z' | 'A'..='Z' | '0'..='9' | '_')) => {
value.push(ch);
self.step();
}
_ => {
if let Some(kind) = self.keywords.get(&value) {
return self.token(kind.clone());
}
let id = self.symbol_counter;
self.symbol_counter += 1;
self.symbols.insert(value, id);
break self.token_with_value(TokenKind::Id, TokenValue::Id(id));
}
}
}
}
Some(ch @ ('1'..='9')) => {
let mut value = String::new();
value.push(ch);
self.step();
loop {
match self.current {
Some(ch @ ('0'..='9' | '_')) => {
value.push(ch);
self.step();
}
_ => {
let value = value.replace('_', "").parse::<i64>().unwrap();
break self.token_with_value(TokenKind::Int, TokenValue::Int(value));
}
}
}
}
Some('/') => {
self.step();
match self.current {
Some('/') => {
self.step();
loop {
match self.current {
None | Some('\n') => break self.next_token(),
_ => {
self.step();
}
}
}
}
Some('*') => {
self.step();
let mut lch = self.current;
loop {
match (lch, self.current) {
(Some('*'), Some('/')) => break self.next_token(),
(_, Some(ch)) => {
lch = Some(ch);
self.step();
}
(_, None) => {
self.error("malformed /**/ comment");
break self.token(TokenKind::Error);
}
}
}
}
_ => todo!(),
}
}
Some(ch @ ('0' | '(' | ')' | '{' | '}' | ',' | ';' | '=')) => {
self.step();
match ch {
'0' => self.token_with_value(TokenKind::Int, TokenValue::Int(0)),
'(' => self.token(TokenKind::LParen),
')' => self.token(TokenKind::RParen),
'{' => self.token(TokenKind::LBrace),
'}' => self.token(TokenKind::RBrace),
',' => self.token(TokenKind::Comma),
';' => self.token(TokenKind::Semicolon),
'=' => self.token(TokenKind::Equal),
_ => unreachable!(),
}
}
_ => todo!(),
}
}
fn step(&mut self) {
match self.current {
Some('\n') => {
self.line += 1;
self.col = 1;
}
Some(_) => {
self.col += 1;
}
_ => {}
}
self.current = self.chars.next();
if self.current.is_some() {
self.index += 1;
}
}
fn token(&self, kind: TokenKind) -> Option<Token> {
Some(Token {
kind,
value: TokenValue::None,
index: self.index,
line: self.line,
col: self.col,
})
}
fn token_with_value(&self, kind: TokenKind, value: TokenValue) -> Option<Token> {
Some(Token {
kind,
value,
index: self.index,
line: self.line,
col: self.col,
})
}
fn error<S: Into<String>>(&mut self, msg: S) {
let msg = msg.into();
println!("lexer error: {msg}, line {}", self.line)
}
fn done(&self) -> bool {
self.current.is_none()
}
}
impl Iterator for Lexer<'_> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
#[test]
fn test_lexer() {
assert_eq!(
Lexer::new("123").collect::<Vec<_>>(),
vec![Token {
kind: TokenKind::Int,
value: TokenValue::Int(123),
index: 2,
line: 1,
col: 4
}]
);
}
fn main() { fn main() {
println!("Hello, world!"); println!("Hello, world!");

43
src/parsed.rs Normal file
View File

@ -0,0 +1,43 @@
#[derive(Clone, PartialEq, Debug)]
pub enum Node {
Error,
Id(u64),
Int(i64),
String(String),
Group(Box<Node>),
Block(Vec<Node>),
Call {
subject: Box<Node>,
args: Vec<Node>,
},
If {
cond: Box<Node>,
truthy: Box<Node>,
falsy: Option<Box<Node>>,
},
Loop {
body: Box<Node>,
},
Break,
Assign {
subject: Box<Node>,
value: Box<Node>,
},
Let {
subject: Box<Node>,
value: Box<Node>,
},
Fn {
subject: Box<Node>,
params: Vec<Node>,
return_typ: Box<Node>,
body: Box<Node>,
},
Return {
value: Option<Box<Node>>,
},
Param {
subject: Box<Node>,
typ: Option<Box<Node>>,
},
}

478
src/parser.rs Normal file
View File

@ -0,0 +1,478 @@
use std::collections::HashMap;
use crate::{
lexer::Lexer,
parsed::Node,
token::{Token, TokenKind, TokenValue},
};
pub struct Parser<'a> {
lexer: Lexer<'a>,
current: Option<Token>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
let mut lexer = Lexer::new(text);
let current = lexer.next();
Self { lexer, current }
}
pub fn parse(&mut self) -> Vec<Node> {
self.parse_file()
}
pub fn symbols(self) -> HashMap<u64, String> {
self.lexer.symbols()
}
fn parse_file(&mut self) -> Vec<Node> {
let mut stmts = Vec::new();
loop {
match self.current {
Some(_) => stmts.push(self.parse_stmt()),
None => break stmts,
}
}
}
fn parse_stmt(&mut self) -> Node {
match self.curr_kind() {
Some(TokenKind::LBrace) => self.parse_block(),
Some(TokenKind::If) => self.parse_if(),
Some(TokenKind::Loop) => self.parse_loop(),
Some(TokenKind::Fn) => self.parse_fn(),
_ => {
let stmt = match self.curr_kind() {
Some(TokenKind::Let) => self.parse_let(),
Some(TokenKind::Break) => {
self.step();
Node::Break
}
Some(TokenKind::Return) => {
self.step();
let value = match self.curr_kind() {
Some(TokenKind::Semicolon) => None,
_ => Some(Box::new(self.parse_expr())),
};
Node::Return { value }
}
_ => self.parse_assign(),
};
match self.curr_kind() {
Some(TokenKind::Semicolon) => {
self.step();
stmt
}
_ => {
self.error("expected ';'");
Node::Error
}
}
}
}
}
fn parse_fn(&mut self) -> Node {
self.step();
if !self.curr_is(TokenKind::Id) {
self.error("expected id");
return Node::Error;
}
let subject = Box::new(self.parse_id());
if !self.curr_is(TokenKind::LParen) {
self.error("expected '('");
return Node::Error;
}
let params = match self.parse_fn_params() {
Ok(params) => params,
Err(expr) => return expr,
};
if !self.curr_is(TokenKind::MinusLt) {
self.error("expected '->'");
return Node::Error;
}
self.step();
let return_typ = Box::new(self.parse_typ());
if !self.curr_is(TokenKind::LBrace) {
self.error("expected '{'");
return Node::Error;
}
let body = Box::new(self.parse_block());
Node::Fn {
subject,
params,
return_typ,
body,
}
}
fn parse_fn_params(&mut self) -> Result<Vec<Node>, Node> {
self.step();
let mut params = Vec::new();
if !self.curr_is(TokenKind::RParen) {
if !self.curr_is(TokenKind::RParen) {
self.error("expected ')'");
return Err(Node::Error);
}
if !self.curr_is(TokenKind::Id) {
self.error("expected id");
return Err(Node::Error);
}
params.push(self.parse_param());
while let Some(TokenKind::Comma) = self.curr_kind() {
self.step();
if self.curr_is(TokenKind::RParen) {
self.error("expected ')'");
break;
}
params.push(self.parse_param());
}
}
if !self.curr_is(TokenKind::RParen) {
self.error("expected ')'");
return Err(Node::Error);
}
self.step();
Ok(params)
}
fn parse_let(&mut self) -> Node {
self.step();
if !self.curr_is(TokenKind::Id) {
self.error("expected id");
return Node::Error;
}
let subject = self.parse_param();
if !self.curr_is(TokenKind::Equal) {
self.error("expected '='");
return Node::Error;
}
self.step();
let value = self.parse_expr();
Node::Let {
subject: Box::new(subject),
value: Box::new(value),
}
}
fn parse_param(&mut self) -> Node {
let subject = Box::new(self.parse_id());
let typ = if let Some(TokenKind::Comma) = self.curr_kind() {
self.step();
Some(Box::new(self.parse_typ()))
} else {
None
};
Node::Param { subject, typ }
}
fn parse_typ(&mut self) -> Node {
match self.curr_kind() {
Some(TokenKind::Id) => self.parse_id(),
_ => {
self.error("expected type");
self.step();
Node::Error
}
}
}
fn parse_assign(&mut self) -> Node {
let subject = self.parse_expr();
match self.curr_kind() {
Some(TokenKind::Equal) => {
self.step();
let value = self.parse_expr();
Node::Assign {
subject: Box::new(subject),
value: Box::new(value),
}
}
_ => subject,
}
}
fn parse_expr(&mut self) -> Node {
self.parse_call()
}
fn parse_call(&mut self) -> Node {
let mut subject = self.parse_value();
loop {
match self.curr_kind() {
Some(TokenKind::LParen) => {
self.step();
let mut args = Vec::new();
match self.curr_kind() {
None | Some(TokenKind::LParen) => {}
Some(_) => {
args.push(self.parse_expr());
while let Some(TokenKind::Comma) = self.curr_kind() {
self.step();
if let Some(TokenKind::RParen) = self.curr_kind() {
break;
}
args.push(self.parse_expr());
}
}
}
match self.curr_kind() {
Some(TokenKind::RParen) => {}
_ => {
self.error("expected ')'");
return Node::Error;
}
}
self.step();
subject = Node::Call {
subject: Box::new(subject),
args,
};
}
_ => break subject,
}
}
}
fn parse_value(&mut self) -> Node {
match self.curr_kind() {
Some(TokenKind::Id) => self.parse_id(),
Some(TokenKind::Int) => self.parse_int(),
Some(TokenKind::String) => self.parse_string(),
Some(TokenKind::LParen) => self.parse_group(),
Some(TokenKind::LBrace) => self.parse_block(),
Some(TokenKind::If) => self.parse_if(),
Some(TokenKind::Loop) => self.parse_loop(),
_ => {
self.error("expected value");
self.step();
Node::Error
}
}
}
fn parse_id(&mut self) -> Node {
let Some(Token {
kind: TokenKind::Id,
value: TokenValue::Id(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
Node::Id(value)
}
fn parse_int(&mut self) -> Node {
let Some(Token {
kind: TokenKind::Int,
value: TokenValue::Int(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
Node::Int(value)
}
fn parse_string(&mut self) -> Node {
let Some(Token {
kind: TokenKind::String,
value: TokenValue::String(value),
..
}) = self.current.clone()
else {
unreachable!()
};
self.step();
Node::String(value.clone())
}
fn parse_group(&mut self) -> Node {
self.step();
let expr = Box::new(self.parse_expr());
if !self.curr_is(TokenKind::RParen) {
self.error("expected ')'");
return Node::Error;
}
self.step();
Node::Group(expr)
}
fn parse_block(&mut self) -> Node {
self.step();
let mut stmts = Vec::new();
loop {
match self.curr_kind() {
Some(TokenKind::RBrace) => {
self.step();
break Node::Block(stmts);
}
_ => stmts.push(self.parse_stmt()),
}
}
}
fn parse_if(&mut self) -> Node {
self.step();
let cond = Box::new(self.parse_expr());
if !self.curr_is(TokenKind::LBrace) {
self.error("expected '}'");
return Node::Error;
}
let truthy = Box::new(self.parse_block());
let falsy = match self.curr_kind() {
Some(TokenKind::Else) => {
self.step();
if !self.curr_is(TokenKind::LBrace) {
self.error("expected '}'");
return Node::Error;
}
Some(Box::new(self.parse_block()))
}
_ => None,
};
Node::If {
cond,
truthy,
falsy,
}
}
fn parse_loop(&mut self) -> Node {
self.step();
if !self.curr_is(TokenKind::LBrace) {
self.error("expected '}'");
return Node::Error;
}
let body = Box::new(self.parse_block());
Node::Loop { body }
}
fn error<S: Into<String>>(&mut self, msg: S) {
let msg = msg.into();
println!(
"parser error: {msg}, line {}",
self.current
.as_ref()
.map(|t| t.line.to_string())
.unwrap_or_else(|| "-".to_string())
)
}
fn step(&mut self) {
self.current = self.lexer.next();
}
fn curr_is(&self, kind: TokenKind) -> bool {
self.curr_kind() == Some(kind)
}
fn curr_kind(&self) -> Option<TokenKind> {
self.current.as_ref().map(|t| t.kind.clone())
}
}
#[test]
fn test_parser() {
use crate::hash::hash;
use Node::*;
let parse = |text| Parser::new(text).parse();
#[allow(non_snake_case)]
fn B<T>(v: T) -> Box<T> {
Box::new(v)
}
assert_eq!(Parser::new("abc;").parse(), vec![Id(hash("abc"))]);
assert_eq!(Parser::new("123;").parse(), vec![Int(123)]);
assert_eq!(
Parser::new("\"hello\";").parse(),
vec![String("hello".to_string())]
);
assert_eq!(Parser::new("0;").parse(), vec![Int(0)]);
assert_eq!(Parser::new("0;abc;").parse(), vec![Int(0), Id(hash("abc"))]);
assert_eq!(
parse("add(mul(12, 34), 56);"),
vec![Call {
subject: B(Id(hash("add"))),
args: vec![
Call {
subject: B(Id(hash("mul"))),
args: vec![Int(12), Int(34)]
},
Int(56)
]
}]
);
assert_eq!(
parse("a = 123;"),
vec![Assign {
subject: B(Id(hash("a"))),
value: B(Int(123))
}]
);
assert_eq!(parse("break;"), vec![Break]);
assert_eq!(parse("return;"), vec![Return { value: None }]);
assert_eq!(
parse("return add(1, 2);"),
vec![Return {
value: Some(B(Call {
subject: B(Id(hash("add"))),
args: vec![Int(1), Int(2)]
}))
}]
);
assert_eq!(
parse("a = 5;"),
vec![Assign {
subject: B(Id(hash("a"))),
value: B(Int(5))
}]
);
assert_eq!(
parse("let a = 5;"),
vec![Let {
subject: B(Param {
subject: B(Id(hash("a"))),
typ: None
}),
value: B(Int(5))
}]
);
assert_eq!(
parse("fn test() -> i32 {}"),
vec![Fn {
subject: B(Id(hash("test"))),
params: vec![],
return_typ: B(Id(hash("i32"))),
body: B(Block(vec![]))
}]
);
assert_eq!(
parse("if 0 {}"),
vec![If {
cond: B(Int(0)),
truthy: B(Block(vec![])),
falsy: None
}]
);
assert_eq!(
parse("if 0 {} else {}"),
vec![If {
cond: B(Int(0)),
truthy: B(Block(vec![])),
falsy: Some(B(Block(vec![]))),
}]
);
assert_eq!(
parse("loop {}"),
vec![Loop {
body: B(Block(vec![])),
}]
);
}

49
src/sym.rs Normal file
View File

@ -0,0 +1,49 @@
#![allow(unused_variables)]
use std::collections::HashMap;
use crate::checked::Type;
#[derive(Clone, PartialEq, Debug)]
pub struct Sym {
pub id: u64,
pub typ: Type,
}
pub struct Syms<'syms> {
parent: Option<&'syms Syms<'syms>>,
map: HashMap<u64, Sym>,
}
impl<'syms> Syms<'syms> {
pub fn new() -> Self {
Self {
parent: None,
map: HashMap::new(),
}
}
pub fn child(&'syms self) -> Self {
Self {
parent: Some(self),
map: HashMap::new(),
}
}
pub fn get(&self, id: u64) -> Option<Sym> {
if let Some(sym) = self.map.get(&id) {
return Some(sym.clone());
}
if let Some(parent) = self.parent {
return parent.get(id);
}
None
}
pub fn defined_locally(&self, id: u64) -> bool {
self.map.contains_key(&id)
}
pub fn define(&mut self, id: u64, typ: Type) {
self.map.insert(id, Sym { id, typ });
}
}

40
src/token.rs Normal file
View File

@ -0,0 +1,40 @@
#[derive(Clone, PartialEq, Debug)]
pub struct Token {
pub kind: TokenKind,
pub value: TokenValue,
pub index: usize,
pub line: i32,
pub col: i32,
}
#[derive(Clone, PartialEq, Debug)]
pub enum TokenKind {
Error,
Id,
Int,
String,
If,
Else,
Loop,
Break,
Let,
Fn,
Return,
LParen,
RParen,
LBrace,
RBrace,
Colon,
Comma,
Semicolon,
Equal,
MinusLt,
}
#[derive(Clone, PartialEq, Debug)]
pub enum TokenValue {
None,
Id(u64),
Int(i64),
String(String),
}