parrot/parser.py

349 lines
14 KiB
Python
Raw Normal View History

2023-04-06 03:17:57 +01:00
from tokens import Token, TokenType, TokenIterator
from position import Span, Node
from parsed import Assign, AssignType, Binary, BinaryType, Block, Call, MatchArm, PatternError, Expr, For, Id, If, Index, Int, Char, Loop, Pattern, String, ExprError, StructMember, TupleMember, Unary, UnaryType, While
from typing import List, Optional
class Parser:
def __init__(self, text: str, tokens: TokenIterator) -> None:
self.text = text
self.tokens = tokens
self.current_token = tokens.next()
def parse(self) -> List[Node[Expr]]:
statements: List[Node[Expr]] = []
while not self.done():
statements.append(self.parse_statement())
return statements
def parse_statement(self) -> Node[Expr]:
if self.current_is(TokenType.KwIf):
return self.parse_if()
else:
return self.parse_assign()
def parse_assign(self) -> Node[Expr]:
subject = self.parse_expr()
if self.current_is(TokenType.Equal):
self.step()
value = self.parse_expr()
return Node(Assign(AssignType.Assign, subject, value), subject.span.to(value.span))
else:
return subject
def parse_expr(self) -> Node[Expr]:
return self.parse_or()
def parse_or(self) -> Node[Expr]:
left = self.parse_and()
while self.current_is(TokenType.KwOr):
self.step()
right = self.parse_and()
left = Node(Binary(BinaryType.Or, left, right), left.span.to(right.span))
return left
def parse_and(self) -> Node[Expr]:
left = self.parse_equal()
while self.current_is(TokenType.KwOr):
self.step()
right = self.parse_equal()
left = Node(Binary(BinaryType.And, left, right), left.span.to(right.span))
return left
def parse_equal(self) -> Node[Expr]:
left = self.parse_compare()
while not self.done():
if self.current_is(TokenType.EqualEqual):
self.step()
right = self.parse_compare()
left = Node(Binary(BinaryType.Equal, left, right), left.span.to(right.span))
elif self.current_is(TokenType.ExclamationEqual):
self.step()
right = self.parse_compare()
left = Node(Binary(BinaryType.Inequal, left, right), left.span.to(right.span))
else:
break
return left
def parse_compare(self) -> Node[Expr]:
left = self.parse_add_subtract()
while not self.done():
if self.current_is(TokenType.LT):
self.step()
right = self.parse_add_subtract()
left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span))
elif self.current_is(TokenType.GT):
self.step()
right = self.parse_add_subtract()
left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span))
elif self.current_is(TokenType.LTEqual):
self.step()
right = self.parse_add_subtract()
left = Node(Binary(BinaryType.LTEqual, left, right), left.span.to(right.span))
elif self.current_is(TokenType.GTEqual):
self.step()
right = self.parse_add_subtract()
left = Node(Binary(BinaryType.GTEqual, left, right), left.span.to(right.span))
else:
break
return left
def parse_add_subtract(self) -> Node[Expr]:
left = self.parse_multiply_divide_modulo()
while not self.done():
if self.current_is(TokenType.Plus):
self.step()
right = self.parse_multiply_divide_modulo()
left = Node(Binary(BinaryType.Add, left, right), left.span.to(right.span))
elif self.current_is(TokenType.Minus):
self.step()
right = self.parse_multiply_divide_modulo()
left = Node(Binary(BinaryType.Subtract, left, right), left.span.to(right.span))
else:
break
return left
def parse_multiply_divide_modulo(self) -> Node[Expr]:
left = self.parse_negate()
while not self.done():
if self.current_is(TokenType.Asterisk):
self.step()
right = self.parse_negate()
left = Node(Binary(BinaryType.Multiply, left, right), left.span.to(right.span))
elif self.current_is(TokenType.Slash):
self.step()
right = self.parse_negate()
left = Node(Binary(BinaryType.Divide, left, right), left.span.to(right.span))
elif self.current_is(TokenType.Percent):
self.step()
right = self.parse_negate()
left = Node(Binary(BinaryType.Modulo, left, right), left.span.to(right.span))
else:
break
return left
def parse_negate(self) -> Node[Expr]:
if self.current_is(TokenType.Minus):
token_span = self.current().span
self.step()
subject = self.parse_exponent()
return Node(Unary(UnaryType.Negate, subject), token_span.to(subject.span))
else:
return self.parse_exponent()
def parse_exponent(self) -> Node[Expr]:
left = self.parse_unary()
if self.current_is(TokenType.AsteriskAsterisk):
self.step()
right = self.parse_exponent()
return Node(Binary(BinaryType.Exponent, left, right), left.span.to(right.span))
else:
return left
def parse_unary(self) -> Node[Expr]:
if self.current_is(TokenType.KwNot):
token_span = self.current().span
self.step()
subject = self.parse_unary()
return Node(Unary(UnaryType.Not, subject), token_span.to(subject.span))
elif self.current_is(TokenType.Asterisk):
token_span = self.current().span
self.step()
subject = self.parse_unary()
return Node(Unary(UnaryType.Dereference, subject), token_span.to(subject.span))
elif self.current_is(TokenType.Ampersand):
token_span = self.current().span
self.step()
if self.current_is(TokenType.KwMut):
self.step()
subject = self.parse_unary()
return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span))
else:
subject = self.parse_unary()
return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span))
else:
return self.parse_member_index_call()
def parse_member_index_call(self) -> Node[Expr]:
subject: Node[Expr] = self.parse_operand()
while not self.done():
if self.current_is(TokenType.Dot):
self.step()
if self.current_is(TokenType.Id):
id_token = self.current()
self.step()
text = id_token.text_slice(self.text)
subject = Node(StructMember(subject, text), subject.span.to(id_token.span))
elif self.current_is(TokenType.Int):
int_token = self.current()
self.step()
value = int(int_token.text_slice(self.text))
subject = Node(TupleMember(subject, value), subject.span.to(int_token.span))
else:
return Node(ExprError("expected Int or Id"), subject.span)
elif self.current_is(TokenType.LBracket):
self.step()
value = self.parse_expr()
if not self.current_is(TokenType.RBracket):
return Node(ExprError("expected ']'"), subject.span.to(value.span))
rbracket_token_span = self.current().span
self.step()
subject = Node(Index(subject, value), subject.span.to(rbracket_token_span))
elif self.current_is(TokenType.LParen):
self.step()
arguments: List[Node[Expr]] = []
if not self.done() and self.current() != TokenType.RParen:
arguments.append(self.parse_expr())
while not self.done() and self.current() == TokenType.Comma:
self.step()
if self.done() or self.current() == TokenType.RParen:
break
arguments.append(self.parse_expr())
if not self.current_is(TokenType.RParen):
if len(arguments) > 0:
end = arguments[-1].span
else:
end = subject.span
return Node(ExprError("expected ')'"), subject.span.to(end))
end = self.current().span
self.step()
subject = Node(Call(subject, arguments), subject.span.to(end))
else:
break
return subject
def parse_operand(self) -> Node[Expr]:
if self.current_is(TokenType.Id):
token = self.current()
value = token.text_slice(self.text)
self.step()
return Node(Id(value), token.span)
elif self.current_is(TokenType.Int):
token = self.current()
value = int(token.text_slice(self.text))
self.step()
return Node(Int(value), token.span)
elif self.current_is(TokenType.Char):
token = self.current()
value = token.text_slice(self.text)
self.step()
return Node(Char(value), token.span)
elif self.current_is(TokenType.String):
token = self.current()
value = token.text_slice(self.text)
self.step()
return Node(String(value), token.span)
elif self.current_is(TokenType.LBrace):
return self.parse_block()
elif self.current_is(TokenType.KwIf):
return self.parse_if()
elif self.current_is(TokenType.KwLoop):
return self.parse_loop()
elif self.current_is(TokenType.KwWhile):
return self.parse_while()
elif self.current_is(TokenType.KwFor):
return self.parse_for()
else:
token = self.current()
self.step()
return Node(ExprError("expected value"), token.span)
def parse_block(self) -> Node[Expr]:
begin = self.current().span
self.step()
statements: List[Node[Expr]] = []
value: Optional[Node[Expr]] = None
while not self.done() and self.current().token_type != TokenType.RBrace:
if value is not None:
statements.append(value)
value = self.parse_statement()
if not self.current_is(TokenType.RBrace):
if value is not None:
end = value.span
else:
end = begin
return Node(ExprError("expected '}'"), begin.to(end))
end = self.current().span
self.step()
return Node(Block(statements, value), begin.to(end))
def parse_if(self) -> Node[Expr]:
begin = self.current().span
self.step()
condition = self.parse_expr()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin.to(condition.span))
truthy = self.parse_block()
if self.current_is(TokenType.KwElse):
else_token_span = self.current().span
self.step()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin.to(else_token_span))
falsy = self.parse_block()
return Node(If(condition, truthy, falsy), begin.to(falsy.span))
else:
return Node(If(condition, truthy, None), begin.to(truthy.span))
def parse_match(self) -> Node[Expr]:
begin = self.current().span
self.step()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin)
self.step()
arms: List[Node[MatchArm]] = []
if not self.done() and self.current() != TokenType.RBrace:
def parse_match_arm(self) -> Node[MatchArm]:
pass
def parse_match_arm_statement(self) -> Node[Expr]:
pass
def parse_loop(self) -> Node[Expr]:
begin = self.current().span
self.step()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin)
body = self.parse_block()
return Node(Loop(body), begin.to(body.span))
def parse_while(self) -> Node[Expr]:
begin = self.current().span
self.step()
condition = self.parse_expr()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin.to(condition.span))
self.step()
body = self.parse_block()
return Node(While(condition, body), begin.to(body.span))
def parse_for(self) -> Node[Expr]:
begin = self.current().span
self.step()
subject = self.parse_pattern()
if not self.current_is(TokenType.KwIn):
return Node(ExprError("expected 'in'"), begin.to(subject.span))
self.step()
value = self.parse_expr()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin.to(value.span))
self.step()
body = self.parse_block()
return Node(For(subject, value, body), begin.to(body.span))
def parse_pattern(self) -> Node[Pattern]:
return Node(PatternError("not implemented"), self.current().span)
def step(self) -> None:
self.current_token = self.tokens.next()
def current_is(self, token_type: TokenType) -> bool:
return not self.done() and self.current().token_type == token_type
def done(self) -> bool:
return self.current_token.token_type == TokenType.Eof
def current(self) -> Token:
return self.current_token