commit 63a09750203f986e3eb0feb93674ce929186ff63 Author: SimonFJ20 Date: Tue May 28 04:22:09 2024 +0200 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5e9bb7a --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "yapping" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d5eb827 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "yapping" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..eeaf413 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,504 @@ +#![allow(dead_code)] + +use std::{collections::HashMap, str::Chars}; + +#[derive(Clone, PartialEq, Debug)] +enum Expr { + Error, + Id(u64), + Int(i64), + String(String), + Block(Vec), + Call { + subject: Box, + args: Vec, + }, + If { + cond: Box, + truthy: Box, + falsy: Option>, + }, + Loop { + body: Box, + }, + Break, + Assign { + subject: Box, + value: Box, + }, + Let { + subject: Box, + value: Box, + }, + Fn { + subject: Box, + value: Box, + }, + Return { + value: Option>, + }, +} + +struct Parser<'a> { + lexer: Lexer<'a>, + current: Option, +} + +impl<'a> Parser<'a> { + pub fn new(text: &'a str) -> Self { + let mut lexer = Lexer::new(text); + let current = lexer.next(); + Self { lexer, current } + } + + pub fn parse(&mut self) -> Vec { + self.parse_file() + } + + fn parse_file(&mut self) -> Vec { + let mut stmts = Vec::new(); + loop { + match self.current { + Some(_) => stmts.push(self.parse_stmt()), + None => break stmts, + } + } + } + + fn parse_stmt(&mut self) -> Expr { + match self.curr_kind() { + Some(TokenKind::If) => todo!(), + Some(TokenKind::Loop) => todo!(), + Some(TokenKind::Fn) => todo!(), + _ => { + let stmt = match self.curr_kind() { + Some(TokenKind::Let) => todo!(), + Some(TokenKind::Break) => { + self.step(); + Expr::Break + } + Some(TokenKind::Return) => { + self.step(); + let value = match self.curr_kind() { + Some(TokenKind::Semicolon) => None, + _ => Some(Box::new(self.parse_expr())), + }; + Expr::Return { value } + } + _ => self.parse_assign(), + }; + match self.curr_kind() { + Some(TokenKind::Semicolon) => { + self.step(); + stmt + } + _ => { + self.error("expected ';'"); + Expr::Error + } + } + } + } + } + + fn parse_assign(&mut self) -> Expr { + let subject = self.parse_expr(); + match self.curr_kind() { + Some(TokenKind::Equal) => { + self.step(); + let value = self.parse_expr(); + Expr::Assign { + subject: Box::new(subject), + value: Box::new(value), + } + } + _ => subject, + } + } + + fn parse_expr(&mut self) -> Expr { + self.parse_call() + } + + fn parse_call(&mut self) -> Expr { + let mut subject = self.parse_value(); + loop { + match self.curr_kind() { + Some(TokenKind::LParen) => { + self.step(); + let mut args = Vec::new(); + match self.curr_kind() { + None | Some(TokenKind::LParen) => {} + Some(_) => { + args.push(self.parse_expr()); + while let Some(TokenKind::Comma) = self.curr_kind() { + self.step(); + if let Some(TokenKind::RParen) = self.curr_kind() { + break; + } + args.push(self.parse_expr()); + } + } + } + match self.curr_kind() { + Some(TokenKind::RParen) => {} + _ => { + self.error("expected ')'"); + return Expr::Error; + } + } + self.step(); + subject = Expr::Call { + subject: Box::new(subject), + args, + }; + } + _ => break subject, + } + } + } + + fn parse_value(&mut self) -> Expr { + match self.curr_kind() { + Some(TokenKind::Id) => { + let Some(Token { + value: TokenValue::Id(value), + .. + }) = self.current + else { + unreachable!() + }; + self.step(); + Expr::Id(value) + } + Some(TokenKind::Int) => { + let Some(Token { + value: TokenValue::Int(value), + .. + }) = self.current + else { + unreachable!() + }; + self.step(); + Expr::Int(value) + } + Some(TokenKind::String) => { + self.step(); + let Some(Token { + value: TokenValue::String(value), + .. + }) = self.current.clone() + else { + unreachable!() + }; + Expr::String(value.clone()) + } + _ => { + self.step(); + self.error("expected value"); + Expr::Error + } + } + } + + fn error>(&mut self, msg: S) { + let msg = msg.into(); + println!( + "parser error: {msg}, line {}", + self.current + .as_ref() + .map(|t| t.line.to_string()) + .unwrap_or_else(|| "-".to_string()) + ) + } + + fn step(&mut self) { + self.current = self.lexer.next(); + } + + fn curr_kind(&self) -> Option { + self.current.as_ref().map(|t| t.kind.clone()) + } +} + +#[test] +fn test_parser() { + use Expr::*; + assert_eq!(Parser::new("abc;").parse(), vec![Id(0)]); + assert_eq!(Parser::new("123;").parse(), vec![Int(123)]); + assert_eq!(Parser::new("0;").parse(), vec![Int(0)]); + assert_eq!(Parser::new("0;abc;").parse(), vec![Int(0), Id(0)]); + assert_eq!( + Parser::new("add(mul(12, 34), 56);").parse(), + vec![Call { + subject: Box::new(Id(0)), + args: vec![ + Call { + subject: Box::new(Id(1)), + args: vec![Int(12), Int(34)] + }, + Int(56) + ] + }] + ); + assert_eq!(Parser::new("break;").parse(), vec![Break]); + assert_eq!(Parser::new("return;").parse(), vec![Return { value: None }]); + assert_eq!( + Parser::new("return add(1, 2);").parse(), + vec![Return { + value: Some(Box::new(Call { + subject: Box::new(Id(0)), + args: vec![Int(1), Int(2)] + })) + }] + ); +} + +#[derive(Clone, PartialEq, Debug)] +struct Token { + kind: TokenKind, + value: TokenValue, + index: usize, + line: i32, + col: i32, +} + +#[derive(Clone, PartialEq, Debug)] +enum TokenKind { + Error, + Id, + Int, + String, + If, + Loop, + Break, + Let, + Fn, + Return, + LParen, + RParen, + LBrace, + RBrace, + Comma, + Semicolon, + Equal, +} + +#[derive(Clone, PartialEq, Debug)] +enum TokenValue { + None, + Id(u64), + Int(i64), + String(String), +} + +struct Lexer<'a> { + text: &'a str, + chars: Chars<'a>, + current: Option, + index: usize, + line: i32, + col: i32, + symbol_counter: u64, + symbols: HashMap, + keywords: HashMap, +} + +impl<'a> Lexer<'a> { + pub fn new(text: &'a str) -> Self { + let mut chars = text.chars(); + let current = chars.next(); + Self { + text, + chars, + current, + index: 0, + line: 1, + col: 1, + symbol_counter: 0, + symbols: HashMap::new(), + keywords: Self::make_keywords(), + } + } + + fn make_keywords() -> HashMap { + let mut keywords = HashMap::new(); + keywords.insert("if".to_string(), TokenKind::If); + keywords.insert("loop".to_string(), TokenKind::Loop); + keywords.insert("break".to_string(), TokenKind::Break); + keywords.insert("let".to_string(), TokenKind::Let); + keywords.insert("fn".to_string(), TokenKind::Fn); + keywords.insert("return".to_string(), TokenKind::Return); + keywords + } + + fn next_token(&mut self) -> Option { + match self.current { + None => None, + Some(' ' | '\t' | '\n') => { + self.step(); + self.next_token() + } + Some(ch @ ('a'..='z' | 'A'..='Z' | '_')) => { + let mut value = String::new(); + value.push(ch); + self.step(); + loop { + match self.current { + Some(ch @ ('a'..='z' | 'A'..='Z' | '0'..='9' | '_')) => { + value.push(ch); + self.step(); + } + _ => { + if let Some(kind) = self.keywords.get(&value) { + return self.token(kind.clone()); + } + let id = self.symbol_counter; + self.symbol_counter += 1; + self.symbols.insert(value, id); + break self.token_with_value(TokenKind::Id, TokenValue::Id(id)); + } + } + } + } + Some(ch @ ('1'..='9')) => { + let mut value = String::new(); + value.push(ch); + self.step(); + loop { + match self.current { + Some(ch @ ('0'..='9' | '_')) => { + value.push(ch); + self.step(); + } + _ => { + let value = value.replace('_', "").parse::().unwrap(); + break self.token_with_value(TokenKind::Int, TokenValue::Int(value)); + } + } + } + } + Some('/') => { + self.step(); + match self.current { + Some('/') => { + self.step(); + loop { + match self.current { + None | Some('\n') => break self.next_token(), + _ => { + self.step(); + } + } + } + } + Some('*') => { + self.step(); + let mut lch = self.current; + loop { + match (lch, self.current) { + (Some('*'), Some('/')) => break self.next_token(), + (_, Some(ch)) => { + lch = Some(ch); + self.step(); + } + (_, None) => { + self.error("malformed /**/ comment"); + break self.token(TokenKind::Error); + } + } + } + } + _ => todo!(), + } + } + Some(ch @ ('0' | '(' | ')' | '{' | '}' | ',' | ';' | '=')) => { + self.step(); + match ch { + '0' => self.token_with_value(TokenKind::Int, TokenValue::Int(0)), + '(' => self.token(TokenKind::LParen), + ')' => self.token(TokenKind::RParen), + '{' => self.token(TokenKind::LBrace), + '}' => self.token(TokenKind::RBrace), + ',' => self.token(TokenKind::Comma), + ';' => self.token(TokenKind::Semicolon), + '=' => self.token(TokenKind::Equal), + _ => unreachable!(), + } + } + _ => todo!(), + } + } + + fn step(&mut self) { + match self.current { + Some('\n') => { + self.line += 1; + self.col = 1; + } + Some(_) => { + self.col += 1; + } + _ => {} + } + self.current = self.chars.next(); + if self.current.is_some() { + self.index += 1; + } + } + + fn token(&self, kind: TokenKind) -> Option { + Some(Token { + kind, + value: TokenValue::None, + index: self.index, + line: self.line, + col: self.col, + }) + } + + fn token_with_value(&self, kind: TokenKind, value: TokenValue) -> Option { + Some(Token { + kind, + value, + index: self.index, + line: self.line, + col: self.col, + }) + } + + fn error>(&mut self, msg: S) { + let msg = msg.into(); + println!("lexer error: {msg}, line {}", self.line) + } + + fn done(&self) -> bool { + self.current.is_none() + } +} + +impl Iterator for Lexer<'_> { + type Item = Token; + + fn next(&mut self) -> Option { + self.next_token() + } +} + +#[test] +fn test_lexer() { + assert_eq!( + Lexer::new("123").collect::>(), + vec![Token { + kind: TokenKind::Int, + value: TokenValue::Int(123), + index: 2, + line: 1, + col: 4 + }] + ); +} + +fn main() { + println!("Hello, world!"); +}