From b8960f4548600f1cc96ed9129a6e9b39c7a41a5d Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Thu, 6 Apr 2023 04:17:57 +0200 Subject: [PATCH] init --- .gitignore | 4 + lexer.py | 278 +++++++++++++++++++++++++++++++++++++++++ main.py | 14 +++ parsed.py | 240 ++++++++++++++++++++++++++++++++++++ parser.py | 348 ++++++++++++++++++++++++++++++++++++++++++++++++++++ position.py | 28 +++++ tokens.py | 87 +++++++++++++ 7 files changed, 999 insertions(+) create mode 100644 .gitignore create mode 100644 lexer.py create mode 100644 main.py create mode 100644 parsed.py create mode 100644 parser.py create mode 100644 position.py create mode 100644 tokens.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fbc3dfd --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ + +__pycache__ +.mypy_cache + diff --git a/lexer.py b/lexer.py new file mode 100644 index 0000000..3747b37 --- /dev/null +++ b/lexer.py @@ -0,0 +1,278 @@ +from tokens import TokenIterator, Token, TokenType +from typing import Optional +from position import Position, Span + + +class Lexer(TokenIterator): + def __init__(self, text: str) -> None: + self.text = text + self.index = 0 + self.line = 1 + self.col = 1 + + def next(self) -> Token: + if self.done(): + return self.token(TokenType.Eof, self.pos(), self.pos()) + elif self.current() in " \t\r\n": + self.step() + while not self.done() and self.current() in " \t\r\n": + self.step() + return self.next() + elif self.current() == "(": + return self.single(TokenType.LParen) + elif self.current() == ")": + return self.single(TokenType.RParen) + elif self.current() == "{": + return self.single(TokenType.LBrace) + elif self.current() == "}": + return self.single(TokenType.RBrace) + elif self.current() == "[": + return self.single(TokenType.LBracket) + elif self.current() == "]": + return self.single(TokenType.RBracket) + elif self.current() == ".": + start = self.pos() + self.step() + if self.current_is("."): + end = self.pos() + self.step() + if self.current_is("."): + end = self.pos() + self.step() + return self.token(TokenType.DotDotDot, start, end) + elif self.current_is("="): + end = self.pos() + self.step() + return self.token(TokenType.DotDotEqual, start, end) + else: + return self.token(TokenType.DotDot, start, end) + else: + return self.token(TokenType.Dot, start, start) + elif self.current() == ",": + return self.single(TokenType.Comma) + elif self.current() == ":": + start = self.pos() + self.step() + if self.current_is(":"): + end = self.pos() + self.step() + if self.current_is("<"): + end = self.pos() + self.step() + return self.token(TokenType.ColonColonLT, start, end) + else: + return self.token(TokenType.ColonColon, start, end) + else: + return self.token(TokenType.Comma, start, start) + elif self.current() == ";": + return self.single(TokenType.Semicolon) + elif self.current() == "&": + return self.single(TokenType.Ampersand) + elif self.current() == "+": + return self.single_or_double(TokenType.Plus, "=", TokenType.PlusEqual) + elif self.current() == "-": + start = self.pos() + self.step() + if self.current_is("="): + end = self.pos() + self.step() + return self.token(TokenType.MinusEqual, start, end) + elif self.current_is(">"): + end = self.pos() + self.step() + return self.token(TokenType.MinusLT, start, end) + else: + return self.token(TokenType.Minus, start, start) + elif self.current() == "*": + return self.single_or_double(TokenType.Asterisk, "=", TokenType.AsteriskEqual) + elif self.current() == "/": + start = self.pos() + self.step() + if self.current_is("="): + end = self.pos() + self.step() + return self.token(TokenType.AsteriskEqual, start, end) + elif self.current_is("/"): + self.step() + while not self.done() and self.current() != "\n": + self.step() + return self.next() + elif self.current_is("*"): + end = self.pos() + self.step() + depth = 1 + last_char: Optional[str] = None + while not self.done(): + if last_char == "/" and self.current() == "*": + depth += 1 + elif last_char == "*" and self.current() == "/": + depth -= 1 + if depth == 0: + self.step() + break + last_char = self.current() + end = self.pos() + self.step() + if depth != 0: + return self.token(TokenType.MalformedComment, start, end) + return self.next() + else: + return self.token(TokenType.Slash, start, start) + elif self.current() == "%": + return self.single_or_double(TokenType.Percent, "=", TokenType.PercentEqual) + elif self.current() == "=": + start = self.pos() + self.step() + if self.current_is("="): + end = self.pos() + self.step() + return self.token(TokenType.EqualEqual, start, end) + elif self.current_is(">"): + end = self.pos() + self.step() + return self.token(TokenType.EqualLT, start, end) + else: + return self.token(TokenType.Equal, start, start) + elif self.current() == "!": + return self.single_or_double(TokenType.Exclamation, "=", TokenType.ExclamationEqual) + elif self.current() == "<": + return self.single_or_double(TokenType.LT, "=", TokenType.LTEqual) + elif self.current() == ">": + return self.single_or_double(TokenType.GT, "=", TokenType.GTEqual) + elif self.current() == "'": + start = self.pos() + self.step() + end = self.pos() + first = self.current() + self.step() + if not self.done() and first == "\\": + end = self.pos() + self.step() + if not self.current_is("'"): + return self.token(TokenType.MalformedChar, start, end) + else: + end = self.pos() + self.step() + return self.token(TokenType.Char, start, end) + elif self.current() == "\"": + start = self.pos() + end = self.pos() + self.step() + while not self.done() and self.current() != "\"": + end = self.pos() + first = self.current() + self.step() + if not self.done() and first == "\\": + end = self.pos() + self.step() + if not self.current_is("\""): + return self.token(TokenType.MalformedString, start, end) + else: + end = self.pos() + self.step() + return self.token(TokenType.String, start, end) + elif self.current() == "0": + return self.single(TokenType.Int) + elif self.current() in "123456789": + start = self.pos() + end = self.pos() + self.step() + while not self.done() and self.current() in "1234567890": + end = self.pos() + self.step() + return self.token(TokenType.Int, start, end) + elif self.current() in "abcdefghijklmnopqrstuvwxyz_": + start = self.pos() + end = self.pos() + self.step() + while (not self.done() + and self.current().lower() + in "1234567890abcdefghijklmnopqrstuvwxyz_"): + end = self.pos() + self.step() + value = self.text[start.index:self.index] + if value == "_": + return self.token(TokenType.Underscore, start, end) + elif value == "false": + return self.token(TokenType.KwFalse, start, end) + elif value == "true": + return self.token(TokenType.KwTrue, start, end) + elif value == "not": + return self.token(TokenType.KwNot, start, end) + elif value == "in": + return self.token(TokenType.KwIn, start, end) + elif value == "and": + return self.token(TokenType.KwAnd, start, end) + elif value == "or": + return self.token(TokenType.KwOr, start, end) + elif value == "xor": + return self.token(TokenType.KwXor, start, end) + elif value == "let": + return self.token(TokenType.KwLet, start, end) + elif value == "mut": + return self.token(TokenType.KwMut, start, end) + elif value == "if": + return self.token(TokenType.KwIf, start, end) + elif value == "else": + return self.token(TokenType.KwElse, start, end) + elif value == "while": + return self.token(TokenType.KwWhile, start, end) + elif value == "for": + return self.token(TokenType.KwFor, start, end) + elif value == "loop": + return self.token(TokenType.KwLoop, start, end) + elif value == "break": + return self.token(TokenType.KwBreak, start, end) + elif value == "continue": + return self.token(TokenType.KwContinue, start, end) + elif value == "fn": + return self.token(TokenType.KwFn, start, end) + elif value == "return": + return self.token(TokenType.KwReturn, start, end) + elif value == "match": + return self.token(TokenType.KwMatch, start, end) + else: + return self.token(TokenType.Id, start, end) + else: + start = self.pos() + self.step() + return self.token(TokenType.InvalidChar, start, start) + + def single(self, token_type: TokenType) -> Token: + start = self.pos() + self.step() + return self.token(token_type, start, start) + + def single_or_double(self, type1: TokenType, char2: str, type2: TokenType) -> Token: + start = self.pos() + self.step() + if not self.done() and self.current() == char2: + end = self.pos() + self.step() + return self.token(type2, start, end) + else: + return self.token(type1, start, start) + + def token(self, token_type: TokenType, start: Position, end: Position) -> Token: + return Token(token_type, start.index, self.index - start.index, Span(start, end)) + + def pos(self) -> Position: + return Position(self.index, self.line, self.col) + + def step(self) -> None: + self.index += 1 + if not self.done(): + if self.current() == "\n": + self.line += 1 + self.col = 1 + else: + self.col += 1 + + def current_is(self, value: str) -> bool: + return not self.done() and self.current() == value + + def done(self) -> bool: + return self.index >= len(self.text) + + def current(self) -> str: + return self.text[self.index] diff --git a/main.py b/main.py new file mode 100644 index 0000000..cc1aa60 --- /dev/null +++ b/main.py @@ -0,0 +1,14 @@ +from lexer import Lexer +from parser import Parser + + +def main() -> None: + text = "\"\\\"hello\\\\\"" + lexer = Lexer(text) + parser = Parser(text, lexer) + parsed = parser.parse_expr() + print(parsed) + + +if __name__ == "__main__": + main() diff --git a/parsed.py b/parsed.py new file mode 100644 index 0000000..25e60cb --- /dev/null +++ b/parsed.py @@ -0,0 +1,240 @@ +from enum import Enum, auto +from position import Node +from typing import Optional, List + +class Pattern: + def __str__(self) -> str: + raise NotImplementedError() + +class PatternError(Pattern): + def __init__(self, message: str) -> None: + super().__init__() + self.message = message + +class Expr: + def __str__(self) -> str: + raise NotImplementedError() + + +class ExprError(Expr): + def __init__(self, message: str) -> None: + super().__init__() + self.message = message + + def __str__(self) -> str: + return f"ErrorExpr({self.message})" + +class Id(Expr): + def __init__(self, value: str) -> None: + super().__init__() + self.value = value + + def __str__(self) -> str: + return f"Id({self.value})" + +class Int(Expr): + def __init__(self, value: int) -> None: + super().__init__() + self.value = value + + def __str__(self) -> str: + return f"Int({self.value})" + + +class Char(Expr): + def __init__(self, value: str) -> None: + super().__init__() + self.value = value + + def __str__(self) -> str: + return f"Char('{self.value}')" + + +class String(Expr): + def __init__(self, value: str) -> None: + super().__init__() + self.value = value + + def __str__(self) -> str: + return f"String(\"{self.value}\")" + + +class Bool(Expr): + def __init__(self, value: bool) -> None: + super().__init__() + self.value = value + + def __str__(self) -> str: + value = "true" if self.value else "false" + return f"Bool({value})" + + +class Unit(Expr): + def __init__(self) -> None: + super().__init__() + + def __str__(self) -> str: + return "Unit" + + +class Block(Expr): + def __init__(self, statements: List[Node[Expr]], value: Optional[Node[Expr]]) -> None: + super().__init__() + self.statements = statements + self.value = value + + def __str__(self) -> str: + statements = ", ".join(node.__str__() for node in self.statements) + return f"Block {{ statements: [{statements}], value: {self.value} }}" + + +class If(Expr): + def __init__(self, condition: Node[Expr], truthy: Node[Expr], falsy: Optional[Node[Expr]]) -> None: + super().__init__() + self.condition = condition + self.truthy = truthy + self.falsy = falsy + + def __str__(self) -> str: + return f"If {{ condition: {self.condition}, truthy: {self.truthy}, falsy: {self.falsy} }}" + +class MatchArm: + def __init__(self, pattern: Node[Pattern], expr: Node[Expr]) -> None: + self.pattern = pattern + self.expr = expr + +class Match(Expr): + def __init__(self, value: Node[Expr], arms: List[Node[MatchArm]]) -> None: + super().__init__() + self.value = value + self.arms = arms + +class Loop(Expr): + def __init__(self, body: Node[Expr]) -> None: + super().__init__() + self.body = body + + def __str__(self) -> str: + return f"Loop {{ body: {self.body} }}" + +class While(Expr): + def __init__(self, condition: Node[Expr], body: Node[Expr]) -> None: + super().__init__() + self.condition = condition + self.body = body + + def __str__(self) -> str: + return f"While {{ condition: {self.condition}, body: {self.body} }}" + +class For(Expr): + def __init__(self, subject: Node[Pattern], value: Node[Expr], body: Node[Expr]) -> None: + super().__init__() + self.subject = subject + self.value = value + self.body = body + + def __str__(self) -> str: + return f"For {{ subject: {self.subject}, value: {self.value}, body: {self.body} }}" + +class StructMember(Expr): + def __init__(self, subject: Node[Expr], member_id: str) -> None: + super().__init__() + self.subject = subject + self.member_id = member_id + + def __str__(self) -> str: + return f"StructMember {{ subject: {self.subject}, member_id: {self.member_id} }}" + +class TupleMember(Expr): + def __init__(self, subject: Node[Expr], member_index: int) -> None: + super().__init__() + self.subject = subject + self.member_index = member_index + + def __str__(self) -> str: + return f"StructMember {{ subject: {self.subject}, member_index: {self.member_index} }}" + +class Index(Expr): + def __init__(self, subject: Node[Expr], value: Node[Expr]) -> None: + super().__init__() + self.subject = subject + self.value = value + + def __str__(self) -> str: + return f"Index {{ subject: {self.subject}, value: {self.value} }}" + +class Call(Expr): + def __init__(self, subject: Node[Expr], arguments: List[Node[Expr]]) -> None: + super().__init__() + self.subject = subject + self.arguments = arguments + + def __str__(self) -> str: + arguments = ", ".join(node.__str__() for node in self.arguments) + return f"Index {{ subject: {self.subject}, arguments: {arguments} }}" + +class UnaryType(Enum): + Not = auto() + Negate = auto() + Reference = auto() + ReferenceMut = auto() + Dereference = auto() + + +class Unary(Expr): + def __init__(self, unary_type: UnaryType, subject: Node[Expr]) -> None: + super().__init__() + self.unary_type = unary_type + self.subject = subject + + def __str__(self) -> str: + return f"Unary {{ unary_type: {self.unary_type}, subject: {self.subject} }}" + + +class BinaryType(Enum): + And = auto() + Or = auto() + Add = auto() + Subtract = auto() + Multiply = auto() + Divide = auto() + Modulo = auto() + Exponent = auto() + Equal = auto() + Inequal = auto() + LT = auto() + GT = auto() + LTEqual = auto() + GTEqual = auto() + In = auto() + + +class Binary(Expr): + def __init__(self, binary_type: BinaryType, left: Node[Expr], right: Node[Expr]) -> None: + super().__init__() + self.binary_type = binary_type + self.left = left + self.right = right + + def __str__(self) -> str: + return f"Binary {{ binary_type: {self.binary_type}, left: {self.left}, right: {self.right} }}" + + +class AssignType(Enum): + Assign = auto() + Add = auto() + Subtract = auto() + Multiply = auto() + Divide = auto() + Modulo = auto() + + +class Assign(Expr): + def __init__(self, assign_type: AssignType, subject: Node[Expr], value: Node[Expr]) -> None: + super().__init__() + self.assign_type = assign_type + self.subject = subject + self.value = value + + def __str__(self) -> str: + return f"Assign {{ assign_type: {self.assign_type}, subject: {self.subject}, value: {self.value} }}" diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..b0e7975 --- /dev/null +++ b/parser.py @@ -0,0 +1,348 @@ +from tokens import Token, TokenType, TokenIterator +from position import Span, Node +from parsed import Assign, AssignType, Binary, BinaryType, Block, Call, MatchArm, PatternError, Expr, For, Id, If, Index, Int, Char, Loop, Pattern, String, ExprError, StructMember, TupleMember, Unary, UnaryType, While +from typing import List, Optional + + +class Parser: + def __init__(self, text: str, tokens: TokenIterator) -> None: + self.text = text + self.tokens = tokens + self.current_token = tokens.next() + + def parse(self) -> List[Node[Expr]]: + statements: List[Node[Expr]] = [] + while not self.done(): + statements.append(self.parse_statement()) + return statements + + def parse_statement(self) -> Node[Expr]: + if self.current_is(TokenType.KwIf): + return self.parse_if() + else: + return self.parse_assign() + + def parse_assign(self) -> Node[Expr]: + subject = self.parse_expr() + if self.current_is(TokenType.Equal): + self.step() + value = self.parse_expr() + return Node(Assign(AssignType.Assign, subject, value), subject.span.to(value.span)) + else: + return subject + + def parse_expr(self) -> Node[Expr]: + return self.parse_or() + + def parse_or(self) -> Node[Expr]: + left = self.parse_and() + while self.current_is(TokenType.KwOr): + self.step() + right = self.parse_and() + left = Node(Binary(BinaryType.Or, left, right), left.span.to(right.span)) + return left + + + def parse_and(self) -> Node[Expr]: + left = self.parse_equal() + while self.current_is(TokenType.KwOr): + self.step() + right = self.parse_equal() + left = Node(Binary(BinaryType.And, left, right), left.span.to(right.span)) + return left + + def parse_equal(self) -> Node[Expr]: + left = self.parse_compare() + while not self.done(): + if self.current_is(TokenType.EqualEqual): + self.step() + right = self.parse_compare() + left = Node(Binary(BinaryType.Equal, left, right), left.span.to(right.span)) + elif self.current_is(TokenType.ExclamationEqual): + self.step() + right = self.parse_compare() + left = Node(Binary(BinaryType.Inequal, left, right), left.span.to(right.span)) + else: + break + return left + + def parse_compare(self) -> Node[Expr]: + left = self.parse_add_subtract() + while not self.done(): + if self.current_is(TokenType.LT): + self.step() + right = self.parse_add_subtract() + left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span)) + elif self.current_is(TokenType.GT): + self.step() + right = self.parse_add_subtract() + left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span)) + elif self.current_is(TokenType.LTEqual): + self.step() + right = self.parse_add_subtract() + left = Node(Binary(BinaryType.LTEqual, left, right), left.span.to(right.span)) + elif self.current_is(TokenType.GTEqual): + self.step() + right = self.parse_add_subtract() + left = Node(Binary(BinaryType.GTEqual, left, right), left.span.to(right.span)) + else: + break + return left + + def parse_add_subtract(self) -> Node[Expr]: + left = self.parse_multiply_divide_modulo() + while not self.done(): + if self.current_is(TokenType.Plus): + self.step() + right = self.parse_multiply_divide_modulo() + left = Node(Binary(BinaryType.Add, left, right), left.span.to(right.span)) + elif self.current_is(TokenType.Minus): + self.step() + right = self.parse_multiply_divide_modulo() + left = Node(Binary(BinaryType.Subtract, left, right), left.span.to(right.span)) + else: + break + return left + + def parse_multiply_divide_modulo(self) -> Node[Expr]: + left = self.parse_negate() + while not self.done(): + if self.current_is(TokenType.Asterisk): + self.step() + right = self.parse_negate() + left = Node(Binary(BinaryType.Multiply, left, right), left.span.to(right.span)) + elif self.current_is(TokenType.Slash): + self.step() + right = self.parse_negate() + left = Node(Binary(BinaryType.Divide, left, right), left.span.to(right.span)) + elif self.current_is(TokenType.Percent): + self.step() + right = self.parse_negate() + left = Node(Binary(BinaryType.Modulo, left, right), left.span.to(right.span)) + else: + break + return left + + def parse_negate(self) -> Node[Expr]: + if self.current_is(TokenType.Minus): + token_span = self.current().span + self.step() + subject = self.parse_exponent() + return Node(Unary(UnaryType.Negate, subject), token_span.to(subject.span)) + else: + return self.parse_exponent() + + def parse_exponent(self) -> Node[Expr]: + left = self.parse_unary() + if self.current_is(TokenType.AsteriskAsterisk): + self.step() + right = self.parse_exponent() + return Node(Binary(BinaryType.Exponent, left, right), left.span.to(right.span)) + else: + return left + + def parse_unary(self) -> Node[Expr]: + if self.current_is(TokenType.KwNot): + token_span = self.current().span + self.step() + subject = self.parse_unary() + return Node(Unary(UnaryType.Not, subject), token_span.to(subject.span)) + elif self.current_is(TokenType.Asterisk): + token_span = self.current().span + self.step() + subject = self.parse_unary() + return Node(Unary(UnaryType.Dereference, subject), token_span.to(subject.span)) + elif self.current_is(TokenType.Ampersand): + token_span = self.current().span + self.step() + if self.current_is(TokenType.KwMut): + self.step() + subject = self.parse_unary() + return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span)) + else: + subject = self.parse_unary() + return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span)) + else: + return self.parse_member_index_call() + + def parse_member_index_call(self) -> Node[Expr]: + subject: Node[Expr] = self.parse_operand() + while not self.done(): + if self.current_is(TokenType.Dot): + self.step() + if self.current_is(TokenType.Id): + id_token = self.current() + self.step() + text = id_token.text_slice(self.text) + subject = Node(StructMember(subject, text), subject.span.to(id_token.span)) + elif self.current_is(TokenType.Int): + int_token = self.current() + self.step() + value = int(int_token.text_slice(self.text)) + subject = Node(TupleMember(subject, value), subject.span.to(int_token.span)) + else: + return Node(ExprError("expected Int or Id"), subject.span) + elif self.current_is(TokenType.LBracket): + self.step() + value = self.parse_expr() + if not self.current_is(TokenType.RBracket): + return Node(ExprError("expected ']'"), subject.span.to(value.span)) + rbracket_token_span = self.current().span + self.step() + subject = Node(Index(subject, value), subject.span.to(rbracket_token_span)) + elif self.current_is(TokenType.LParen): + self.step() + arguments: List[Node[Expr]] = [] + if not self.done() and self.current() != TokenType.RParen: + arguments.append(self.parse_expr()) + while not self.done() and self.current() == TokenType.Comma: + self.step() + if self.done() or self.current() == TokenType.RParen: + break + arguments.append(self.parse_expr()) + if not self.current_is(TokenType.RParen): + if len(arguments) > 0: + end = arguments[-1].span + else: + end = subject.span + return Node(ExprError("expected ')'"), subject.span.to(end)) + end = self.current().span + self.step() + subject = Node(Call(subject, arguments), subject.span.to(end)) + else: + break + return subject + + def parse_operand(self) -> Node[Expr]: + if self.current_is(TokenType.Id): + token = self.current() + value = token.text_slice(self.text) + self.step() + return Node(Id(value), token.span) + elif self.current_is(TokenType.Int): + token = self.current() + value = int(token.text_slice(self.text)) + self.step() + return Node(Int(value), token.span) + elif self.current_is(TokenType.Char): + token = self.current() + value = token.text_slice(self.text) + self.step() + return Node(Char(value), token.span) + elif self.current_is(TokenType.String): + token = self.current() + value = token.text_slice(self.text) + self.step() + return Node(String(value), token.span) + elif self.current_is(TokenType.LBrace): + return self.parse_block() + elif self.current_is(TokenType.KwIf): + return self.parse_if() + elif self.current_is(TokenType.KwLoop): + return self.parse_loop() + elif self.current_is(TokenType.KwWhile): + return self.parse_while() + elif self.current_is(TokenType.KwFor): + return self.parse_for() + else: + token = self.current() + self.step() + return Node(ExprError("expected value"), token.span) + + def parse_block(self) -> Node[Expr]: + begin = self.current().span + self.step() + statements: List[Node[Expr]] = [] + value: Optional[Node[Expr]] = None + while not self.done() and self.current().token_type != TokenType.RBrace: + if value is not None: + statements.append(value) + value = self.parse_statement() + if not self.current_is(TokenType.RBrace): + if value is not None: + end = value.span + else: + end = begin + return Node(ExprError("expected '}'"), begin.to(end)) + end = self.current().span + self.step() + return Node(Block(statements, value), begin.to(end)) + + def parse_if(self) -> Node[Expr]: + begin = self.current().span + self.step() + condition = self.parse_expr() + if not self.current_is(TokenType.LBrace): + return Node(ExprError("expected '{'"), begin.to(condition.span)) + truthy = self.parse_block() + if self.current_is(TokenType.KwElse): + else_token_span = self.current().span + self.step() + if not self.current_is(TokenType.LBrace): + return Node(ExprError("expected '{'"), begin.to(else_token_span)) + falsy = self.parse_block() + return Node(If(condition, truthy, falsy), begin.to(falsy.span)) + else: + return Node(If(condition, truthy, None), begin.to(truthy.span)) + + def parse_match(self) -> Node[Expr]: + begin = self.current().span + self.step() + if not self.current_is(TokenType.LBrace): + return Node(ExprError("expected '{'"), begin) + self.step() + arms: List[Node[MatchArm]] = [] + if not self.done() and self.current() != TokenType.RBrace: + + def parse_match_arm(self) -> Node[MatchArm]: + pass + + def parse_match_arm_statement(self) -> Node[Expr]: + pass + + def parse_loop(self) -> Node[Expr]: + begin = self.current().span + self.step() + if not self.current_is(TokenType.LBrace): + return Node(ExprError("expected '{'"), begin) + body = self.parse_block() + return Node(Loop(body), begin.to(body.span)) + + def parse_while(self) -> Node[Expr]: + begin = self.current().span + self.step() + condition = self.parse_expr() + if not self.current_is(TokenType.LBrace): + return Node(ExprError("expected '{'"), begin.to(condition.span)) + self.step() + body = self.parse_block() + return Node(While(condition, body), begin.to(body.span)) + + def parse_for(self) -> Node[Expr]: + begin = self.current().span + self.step() + subject = self.parse_pattern() + if not self.current_is(TokenType.KwIn): + return Node(ExprError("expected 'in'"), begin.to(subject.span)) + self.step() + value = self.parse_expr() + if not self.current_is(TokenType.LBrace): + return Node(ExprError("expected '{'"), begin.to(value.span)) + self.step() + body = self.parse_block() + return Node(For(subject, value, body), begin.to(body.span)) + + def parse_pattern(self) -> Node[Pattern]: + return Node(PatternError("not implemented"), self.current().span) + + def step(self) -> None: + self.current_token = self.tokens.next() + + def current_is(self, token_type: TokenType) -> bool: + return not self.done() and self.current().token_type == token_type + + def done(self) -> bool: + return self.current_token.token_type == TokenType.Eof + + def current(self) -> Token: + return self.current_token diff --git a/position.py b/position.py new file mode 100644 index 0000000..7a526e3 --- /dev/null +++ b/position.py @@ -0,0 +1,28 @@ +from __future__ import annotations +from typing import NamedTuple, TypeVar, Generic + + +class Position(NamedTuple): + index: int + line: int + col: int + +class Span(NamedTuple): + begin: Position + end: Position + + def to(self, end: Span) -> Span: + return Span(self.begin, end.end) + + +T = TypeVar("T") + + +class Node(Generic[T]): + def __init__(self, value: T, span: Span) -> None: + super().__init__() + self.value = value + self.span = span + + def __str__(self) -> str: + return self.value.__str__() diff --git a/tokens.py b/tokens.py new file mode 100644 index 0000000..7ab560f --- /dev/null +++ b/tokens.py @@ -0,0 +1,87 @@ +from enum import Enum, auto +from typing import NamedTuple +from position import Span + + +class TokenType(Enum): + Eof = auto() + InvalidChar = auto() + MalformedComment = auto() + MalformedChar = auto() + MalformedString = auto() + Id = auto() + Int = auto() + Char = auto() + String = auto() + LParen = auto() + RParen = auto() + LBrace = auto() + RBrace = auto() + LBracket = auto() + RBracket = auto() + Underscore = auto() + Dot = auto() + DotDot = auto() + DotDotDot = auto() + DotDotEqual = auto() + Comma = auto() + Colon = auto() + ColonColon = auto() + ColonColonLT = auto() + Semicolon = auto() + Ampersand = auto() + Plus = auto() + PlusEqual = auto() + Minus = auto() + MinusEqual = auto() + MinusLT = auto() + Asterisk = auto() + AsteriskEqual = auto() + AsteriskAsterisk = auto() + Slash = auto() + SlashEqual = auto() + Percent = auto() + PercentEqual = auto() + Equal = auto() + EqualEqual = auto() + EqualLT = auto() + Exclamation = auto() + ExclamationEqual = auto() + LT = auto() + LTEqual = auto() + GT = auto() + GTEqual = auto() + KwFalse = auto() + KwTrue = auto() + KwNot = auto() + KwIn = auto() + KwAnd = auto() + KwOr = auto() + KwXor = auto() + KwLet = auto() + KwMut = auto() + KwIf = auto() + KwMatch = auto() + KwElse = auto() + KwLoop = auto() + KwWhile = auto() + KwFor = auto() + KwBreak = auto() + KwContinue = auto() + KwFn = auto() + KwReturn = auto() + + +class Token(NamedTuple): + token_type: TokenType + index: int + length: int + span: Span + + def text_slice(self, text: str) -> str: + return text[self.index:self.index + self.length] + + +class TokenIterator: + def next(self) -> Token: + raise NotImplementedError()