from tokens import Token, TokenType, TokenIterator from position import Span, Node from parsed import Assign, AssignType, Binary, BinaryType, Block, Call, MatchArm, PatternError, Expr, For, Id, If, Index, Int, Char, Loop, Pattern, String, ExprError, StructMember, TupleMember, Unary, UnaryType, While from typing import List, Optional class Parser: def __init__(self, text: str, tokens: TokenIterator) -> None: self.text = text self.tokens = tokens self.current_token = tokens.next() def parse(self) -> List[Node[Expr]]: statements: List[Node[Expr]] = [] while not self.done(): statements.append(self.parse_statement()) return statements def parse_statement(self) -> Node[Expr]: if self.current_is(TokenType.KwIf): return self.parse_if() else: return self.parse_assign() def parse_assign(self) -> Node[Expr]: subject = self.parse_expr() if self.current_is(TokenType.Equal): self.step() value = self.parse_expr() return Node(Assign(AssignType.Assign, subject, value), subject.span.to(value.span)) else: return subject def parse_expr(self) -> Node[Expr]: return self.parse_or() def parse_or(self) -> Node[Expr]: left = self.parse_and() while self.current_is(TokenType.KwOr): self.step() right = self.parse_and() left = Node(Binary(BinaryType.Or, left, right), left.span.to(right.span)) return left def parse_and(self) -> Node[Expr]: left = self.parse_equal() while self.current_is(TokenType.KwOr): self.step() right = self.parse_equal() left = Node(Binary(BinaryType.And, left, right), left.span.to(right.span)) return left def parse_equal(self) -> Node[Expr]: left = self.parse_compare() while not self.done(): if self.current_is(TokenType.EqualEqual): self.step() right = self.parse_compare() left = Node(Binary(BinaryType.Equal, left, right), left.span.to(right.span)) elif self.current_is(TokenType.ExclamationEqual): self.step() right = self.parse_compare() left = Node(Binary(BinaryType.Inequal, left, right), left.span.to(right.span)) else: break return left def parse_compare(self) -> Node[Expr]: left = self.parse_add_subtract() while not self.done(): if self.current_is(TokenType.LT): self.step() right = self.parse_add_subtract() left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span)) elif self.current_is(TokenType.GT): self.step() right = self.parse_add_subtract() left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span)) elif self.current_is(TokenType.LTEqual): self.step() right = self.parse_add_subtract() left = Node(Binary(BinaryType.LTEqual, left, right), left.span.to(right.span)) elif self.current_is(TokenType.GTEqual): self.step() right = self.parse_add_subtract() left = Node(Binary(BinaryType.GTEqual, left, right), left.span.to(right.span)) else: break return left def parse_add_subtract(self) -> Node[Expr]: left = self.parse_multiply_divide_modulo() while not self.done(): if self.current_is(TokenType.Plus): self.step() right = self.parse_multiply_divide_modulo() left = Node(Binary(BinaryType.Add, left, right), left.span.to(right.span)) elif self.current_is(TokenType.Minus): self.step() right = self.parse_multiply_divide_modulo() left = Node(Binary(BinaryType.Subtract, left, right), left.span.to(right.span)) else: break return left def parse_multiply_divide_modulo(self) -> Node[Expr]: left = self.parse_negate() while not self.done(): if self.current_is(TokenType.Asterisk): self.step() right = self.parse_negate() left = Node(Binary(BinaryType.Multiply, left, right), left.span.to(right.span)) elif self.current_is(TokenType.Slash): self.step() right = self.parse_negate() left = Node(Binary(BinaryType.Divide, left, right), left.span.to(right.span)) elif self.current_is(TokenType.Percent): self.step() right = self.parse_negate() left = Node(Binary(BinaryType.Modulo, left, right), left.span.to(right.span)) else: break return left def parse_negate(self) -> Node[Expr]: if self.current_is(TokenType.Minus): token_span = self.current().span self.step() subject = self.parse_exponent() return Node(Unary(UnaryType.Negate, subject), token_span.to(subject.span)) else: return self.parse_exponent() def parse_exponent(self) -> Node[Expr]: left = self.parse_unary() if self.current_is(TokenType.AsteriskAsterisk): self.step() right = self.parse_exponent() return Node(Binary(BinaryType.Exponent, left, right), left.span.to(right.span)) else: return left def parse_unary(self) -> Node[Expr]: if self.current_is(TokenType.KwNot): token_span = self.current().span self.step() subject = self.parse_unary() return Node(Unary(UnaryType.Not, subject), token_span.to(subject.span)) elif self.current_is(TokenType.Asterisk): token_span = self.current().span self.step() subject = self.parse_unary() return Node(Unary(UnaryType.Dereference, subject), token_span.to(subject.span)) elif self.current_is(TokenType.Ampersand): token_span = self.current().span self.step() if self.current_is(TokenType.KwMut): self.step() subject = self.parse_unary() return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span)) else: subject = self.parse_unary() return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span)) else: return self.parse_member_index_call() def parse_member_index_call(self) -> Node[Expr]: subject: Node[Expr] = self.parse_operand() while not self.done(): if self.current_is(TokenType.Dot): self.step() if self.current_is(TokenType.Id): id_token = self.current() self.step() text = id_token.text_slice(self.text) subject = Node(StructMember(subject, text), subject.span.to(id_token.span)) elif self.current_is(TokenType.Int): int_token = self.current() self.step() value = int(int_token.text_slice(self.text)) subject = Node(TupleMember(subject, value), subject.span.to(int_token.span)) else: return Node(ExprError("expected Int or Id"), subject.span) elif self.current_is(TokenType.LBracket): self.step() value = self.parse_expr() if not self.current_is(TokenType.RBracket): return Node(ExprError("expected ']'"), subject.span.to(value.span)) rbracket_token_span = self.current().span self.step() subject = Node(Index(subject, value), subject.span.to(rbracket_token_span)) elif self.current_is(TokenType.LParen): self.step() arguments: List[Node[Expr]] = [] if not self.done() and self.current() != TokenType.RParen: arguments.append(self.parse_expr()) while not self.done() and self.current() == TokenType.Comma: self.step() if self.done() or self.current() == TokenType.RParen: break arguments.append(self.parse_expr()) if not self.current_is(TokenType.RParen): if len(arguments) > 0: end = arguments[-1].span else: end = subject.span return Node(ExprError("expected ')'"), subject.span.to(end)) end = self.current().span self.step() subject = Node(Call(subject, arguments), subject.span.to(end)) else: break return subject def parse_operand(self) -> Node[Expr]: if self.current_is(TokenType.Id): token = self.current() value = token.text_slice(self.text) self.step() return Node(Id(value), token.span) elif self.current_is(TokenType.Int): token = self.current() value = int(token.text_slice(self.text)) self.step() return Node(Int(value), token.span) elif self.current_is(TokenType.Char): token = self.current() value = token.text_slice(self.text) self.step() return Node(Char(value), token.span) elif self.current_is(TokenType.String): token = self.current() value = token.text_slice(self.text) self.step() return Node(String(value), token.span) elif self.current_is(TokenType.LBrace): return self.parse_block() elif self.current_is(TokenType.KwIf): return self.parse_if() elif self.current_is(TokenType.KwLoop): return self.parse_loop() elif self.current_is(TokenType.KwWhile): return self.parse_while() elif self.current_is(TokenType.KwFor): return self.parse_for() else: token = self.current() self.step() return Node(ExprError("expected value"), token.span) def parse_block(self) -> Node[Expr]: begin = self.current().span self.step() statements: List[Node[Expr]] = [] value: Optional[Node[Expr]] = None while not self.done() and self.current().token_type != TokenType.RBrace: if value is not None: statements.append(value) value = self.parse_statement() if not self.current_is(TokenType.RBrace): if value is not None: end = value.span else: end = begin return Node(ExprError("expected '}'"), begin.to(end)) end = self.current().span self.step() return Node(Block(statements, value), begin.to(end)) def parse_if(self) -> Node[Expr]: begin = self.current().span self.step() condition = self.parse_expr() if not self.current_is(TokenType.LBrace): return Node(ExprError("expected '{'"), begin.to(condition.span)) truthy = self.parse_block() if self.current_is(TokenType.KwElse): else_token_span = self.current().span self.step() if not self.current_is(TokenType.LBrace): return Node(ExprError("expected '{'"), begin.to(else_token_span)) falsy = self.parse_block() return Node(If(condition, truthy, falsy), begin.to(falsy.span)) else: return Node(If(condition, truthy, None), begin.to(truthy.span)) def parse_match(self) -> Node[Expr]: begin = self.current().span self.step() if not self.current_is(TokenType.LBrace): return Node(ExprError("expected '{'"), begin) self.step() arms: List[Node[MatchArm]] = [] if not self.done() and self.current() != TokenType.RBrace: def parse_match_arm(self) -> Node[MatchArm]: pass def parse_match_arm_statement(self) -> Node[Expr]: pass def parse_loop(self) -> Node[Expr]: begin = self.current().span self.step() if not self.current_is(TokenType.LBrace): return Node(ExprError("expected '{'"), begin) body = self.parse_block() return Node(Loop(body), begin.to(body.span)) def parse_while(self) -> Node[Expr]: begin = self.current().span self.step() condition = self.parse_expr() if not self.current_is(TokenType.LBrace): return Node(ExprError("expected '{'"), begin.to(condition.span)) self.step() body = self.parse_block() return Node(While(condition, body), begin.to(body.span)) def parse_for(self) -> Node[Expr]: begin = self.current().span self.step() subject = self.parse_pattern() if not self.current_is(TokenType.KwIn): return Node(ExprError("expected 'in'"), begin.to(subject.span)) self.step() value = self.parse_expr() if not self.current_is(TokenType.LBrace): return Node(ExprError("expected '{'"), begin.to(value.span)) self.step() body = self.parse_block() return Node(For(subject, value, body), begin.to(body.span)) def parse_pattern(self) -> Node[Pattern]: return Node(PatternError("not implemented"), self.current().span) def step(self) -> None: self.current_token = self.tokens.next() def current_is(self, token_type: TokenType) -> bool: return not self.done() and self.current().token_type == token_type def done(self) -> bool: return self.current_token.token_type == TokenType.Eof def current(self) -> Token: return self.current_token