This commit is contained in:
SimonFJ20 2024-05-28 04:22:09 +02:00
commit 63a0975020
4 changed files with 520 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

7
Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "yapping"
version = "0.1.0"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "yapping"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

504
src/main.rs Normal file
View File

@ -0,0 +1,504 @@
#![allow(dead_code)]
use std::{collections::HashMap, str::Chars};
#[derive(Clone, PartialEq, Debug)]
enum Expr {
Error,
Id(u64),
Int(i64),
String(String),
Block(Vec<Expr>),
Call {
subject: Box<Expr>,
args: Vec<Expr>,
},
If {
cond: Box<Expr>,
truthy: Box<Expr>,
falsy: Option<Box<Expr>>,
},
Loop {
body: Box<Expr>,
},
Break,
Assign {
subject: Box<Expr>,
value: Box<Expr>,
},
Let {
subject: Box<Expr>,
value: Box<Expr>,
},
Fn {
subject: Box<Expr>,
value: Box<Expr>,
},
Return {
value: Option<Box<Expr>>,
},
}
struct Parser<'a> {
lexer: Lexer<'a>,
current: Option<Token>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
let mut lexer = Lexer::new(text);
let current = lexer.next();
Self { lexer, current }
}
pub fn parse(&mut self) -> Vec<Expr> {
self.parse_file()
}
fn parse_file(&mut self) -> Vec<Expr> {
let mut stmts = Vec::new();
loop {
match self.current {
Some(_) => stmts.push(self.parse_stmt()),
None => break stmts,
}
}
}
fn parse_stmt(&mut self) -> Expr {
match self.curr_kind() {
Some(TokenKind::If) => todo!(),
Some(TokenKind::Loop) => todo!(),
Some(TokenKind::Fn) => todo!(),
_ => {
let stmt = match self.curr_kind() {
Some(TokenKind::Let) => todo!(),
Some(TokenKind::Break) => {
self.step();
Expr::Break
}
Some(TokenKind::Return) => {
self.step();
let value = match self.curr_kind() {
Some(TokenKind::Semicolon) => None,
_ => Some(Box::new(self.parse_expr())),
};
Expr::Return { value }
}
_ => self.parse_assign(),
};
match self.curr_kind() {
Some(TokenKind::Semicolon) => {
self.step();
stmt
}
_ => {
self.error("expected ';'");
Expr::Error
}
}
}
}
}
fn parse_assign(&mut self) -> Expr {
let subject = self.parse_expr();
match self.curr_kind() {
Some(TokenKind::Equal) => {
self.step();
let value = self.parse_expr();
Expr::Assign {
subject: Box::new(subject),
value: Box::new(value),
}
}
_ => subject,
}
}
fn parse_expr(&mut self) -> Expr {
self.parse_call()
}
fn parse_call(&mut self) -> Expr {
let mut subject = self.parse_value();
loop {
match self.curr_kind() {
Some(TokenKind::LParen) => {
self.step();
let mut args = Vec::new();
match self.curr_kind() {
None | Some(TokenKind::LParen) => {}
Some(_) => {
args.push(self.parse_expr());
while let Some(TokenKind::Comma) = self.curr_kind() {
self.step();
if let Some(TokenKind::RParen) = self.curr_kind() {
break;
}
args.push(self.parse_expr());
}
}
}
match self.curr_kind() {
Some(TokenKind::RParen) => {}
_ => {
self.error("expected ')'");
return Expr::Error;
}
}
self.step();
subject = Expr::Call {
subject: Box::new(subject),
args,
};
}
_ => break subject,
}
}
}
fn parse_value(&mut self) -> Expr {
match self.curr_kind() {
Some(TokenKind::Id) => {
let Some(Token {
value: TokenValue::Id(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
Expr::Id(value)
}
Some(TokenKind::Int) => {
let Some(Token {
value: TokenValue::Int(value),
..
}) = self.current
else {
unreachable!()
};
self.step();
Expr::Int(value)
}
Some(TokenKind::String) => {
self.step();
let Some(Token {
value: TokenValue::String(value),
..
}) = self.current.clone()
else {
unreachable!()
};
Expr::String(value.clone())
}
_ => {
self.step();
self.error("expected value");
Expr::Error
}
}
}
fn error<S: Into<String>>(&mut self, msg: S) {
let msg = msg.into();
println!(
"parser error: {msg}, line {}",
self.current
.as_ref()
.map(|t| t.line.to_string())
.unwrap_or_else(|| "-".to_string())
)
}
fn step(&mut self) {
self.current = self.lexer.next();
}
fn curr_kind(&self) -> Option<TokenKind> {
self.current.as_ref().map(|t| t.kind.clone())
}
}
#[test]
fn test_parser() {
use Expr::*;
assert_eq!(Parser::new("abc;").parse(), vec![Id(0)]);
assert_eq!(Parser::new("123;").parse(), vec![Int(123)]);
assert_eq!(Parser::new("0;").parse(), vec![Int(0)]);
assert_eq!(Parser::new("0;abc;").parse(), vec![Int(0), Id(0)]);
assert_eq!(
Parser::new("add(mul(12, 34), 56);").parse(),
vec![Call {
subject: Box::new(Id(0)),
args: vec![
Call {
subject: Box::new(Id(1)),
args: vec![Int(12), Int(34)]
},
Int(56)
]
}]
);
assert_eq!(Parser::new("break;").parse(), vec![Break]);
assert_eq!(Parser::new("return;").parse(), vec![Return { value: None }]);
assert_eq!(
Parser::new("return add(1, 2);").parse(),
vec![Return {
value: Some(Box::new(Call {
subject: Box::new(Id(0)),
args: vec![Int(1), Int(2)]
}))
}]
);
}
#[derive(Clone, PartialEq, Debug)]
struct Token {
kind: TokenKind,
value: TokenValue,
index: usize,
line: i32,
col: i32,
}
#[derive(Clone, PartialEq, Debug)]
enum TokenKind {
Error,
Id,
Int,
String,
If,
Loop,
Break,
Let,
Fn,
Return,
LParen,
RParen,
LBrace,
RBrace,
Comma,
Semicolon,
Equal,
}
#[derive(Clone, PartialEq, Debug)]
enum TokenValue {
None,
Id(u64),
Int(i64),
String(String),
}
struct Lexer<'a> {
text: &'a str,
chars: Chars<'a>,
current: Option<char>,
index: usize,
line: i32,
col: i32,
symbol_counter: u64,
symbols: HashMap<String, u64>,
keywords: HashMap<String, TokenKind>,
}
impl<'a> Lexer<'a> {
pub fn new(text: &'a str) -> Self {
let mut chars = text.chars();
let current = chars.next();
Self {
text,
chars,
current,
index: 0,
line: 1,
col: 1,
symbol_counter: 0,
symbols: HashMap::new(),
keywords: Self::make_keywords(),
}
}
fn make_keywords() -> HashMap<String, TokenKind> {
let mut keywords = HashMap::new();
keywords.insert("if".to_string(), TokenKind::If);
keywords.insert("loop".to_string(), TokenKind::Loop);
keywords.insert("break".to_string(), TokenKind::Break);
keywords.insert("let".to_string(), TokenKind::Let);
keywords.insert("fn".to_string(), TokenKind::Fn);
keywords.insert("return".to_string(), TokenKind::Return);
keywords
}
fn next_token(&mut self) -> Option<Token> {
match self.current {
None => None,
Some(' ' | '\t' | '\n') => {
self.step();
self.next_token()
}
Some(ch @ ('a'..='z' | 'A'..='Z' | '_')) => {
let mut value = String::new();
value.push(ch);
self.step();
loop {
match self.current {
Some(ch @ ('a'..='z' | 'A'..='Z' | '0'..='9' | '_')) => {
value.push(ch);
self.step();
}
_ => {
if let Some(kind) = self.keywords.get(&value) {
return self.token(kind.clone());
}
let id = self.symbol_counter;
self.symbol_counter += 1;
self.symbols.insert(value, id);
break self.token_with_value(TokenKind::Id, TokenValue::Id(id));
}
}
}
}
Some(ch @ ('1'..='9')) => {
let mut value = String::new();
value.push(ch);
self.step();
loop {
match self.current {
Some(ch @ ('0'..='9' | '_')) => {
value.push(ch);
self.step();
}
_ => {
let value = value.replace('_', "").parse::<i64>().unwrap();
break self.token_with_value(TokenKind::Int, TokenValue::Int(value));
}
}
}
}
Some('/') => {
self.step();
match self.current {
Some('/') => {
self.step();
loop {
match self.current {
None | Some('\n') => break self.next_token(),
_ => {
self.step();
}
}
}
}
Some('*') => {
self.step();
let mut lch = self.current;
loop {
match (lch, self.current) {
(Some('*'), Some('/')) => break self.next_token(),
(_, Some(ch)) => {
lch = Some(ch);
self.step();
}
(_, None) => {
self.error("malformed /**/ comment");
break self.token(TokenKind::Error);
}
}
}
}
_ => todo!(),
}
}
Some(ch @ ('0' | '(' | ')' | '{' | '}' | ',' | ';' | '=')) => {
self.step();
match ch {
'0' => self.token_with_value(TokenKind::Int, TokenValue::Int(0)),
'(' => self.token(TokenKind::LParen),
')' => self.token(TokenKind::RParen),
'{' => self.token(TokenKind::LBrace),
'}' => self.token(TokenKind::RBrace),
',' => self.token(TokenKind::Comma),
';' => self.token(TokenKind::Semicolon),
'=' => self.token(TokenKind::Equal),
_ => unreachable!(),
}
}
_ => todo!(),
}
}
fn step(&mut self) {
match self.current {
Some('\n') => {
self.line += 1;
self.col = 1;
}
Some(_) => {
self.col += 1;
}
_ => {}
}
self.current = self.chars.next();
if self.current.is_some() {
self.index += 1;
}
}
fn token(&self, kind: TokenKind) -> Option<Token> {
Some(Token {
kind,
value: TokenValue::None,
index: self.index,
line: self.line,
col: self.col,
})
}
fn token_with_value(&self, kind: TokenKind, value: TokenValue) -> Option<Token> {
Some(Token {
kind,
value,
index: self.index,
line: self.line,
col: self.col,
})
}
fn error<S: Into<String>>(&mut self, msg: S) {
let msg = msg.into();
println!("lexer error: {msg}, line {}", self.line)
}
fn done(&self) -> bool {
self.current.is_none()
}
}
impl Iterator for Lexer<'_> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
#[test]
fn test_lexer() {
assert_eq!(
Lexer::new("123").collect::<Vec<_>>(),
vec![Token {
kind: TokenKind::Int,
value: TokenValue::Int(123),
index: 2,
line: 1,
col: 4
}]
);
}
fn main() {
println!("Hello, world!");
}