This commit is contained in:
SimonFJ20 2023-04-06 04:17:57 +02:00
commit b8960f4548
7 changed files with 999 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
__pycache__
.mypy_cache

278
lexer.py Normal file
View File

@ -0,0 +1,278 @@
from tokens import TokenIterator, Token, TokenType
from typing import Optional
from position import Position, Span
class Lexer(TokenIterator):
def __init__(self, text: str) -> None:
self.text = text
self.index = 0
self.line = 1
self.col = 1
def next(self) -> Token:
if self.done():
return self.token(TokenType.Eof, self.pos(), self.pos())
elif self.current() in " \t\r\n":
self.step()
while not self.done() and self.current() in " \t\r\n":
self.step()
return self.next()
elif self.current() == "(":
return self.single(TokenType.LParen)
elif self.current() == ")":
return self.single(TokenType.RParen)
elif self.current() == "{":
return self.single(TokenType.LBrace)
elif self.current() == "}":
return self.single(TokenType.RBrace)
elif self.current() == "[":
return self.single(TokenType.LBracket)
elif self.current() == "]":
return self.single(TokenType.RBracket)
elif self.current() == ".":
start = self.pos()
self.step()
if self.current_is("."):
end = self.pos()
self.step()
if self.current_is("."):
end = self.pos()
self.step()
return self.token(TokenType.DotDotDot, start, end)
elif self.current_is("="):
end = self.pos()
self.step()
return self.token(TokenType.DotDotEqual, start, end)
else:
return self.token(TokenType.DotDot, start, end)
else:
return self.token(TokenType.Dot, start, start)
elif self.current() == ",":
return self.single(TokenType.Comma)
elif self.current() == ":":
start = self.pos()
self.step()
if self.current_is(":"):
end = self.pos()
self.step()
if self.current_is("<"):
end = self.pos()
self.step()
return self.token(TokenType.ColonColonLT, start, end)
else:
return self.token(TokenType.ColonColon, start, end)
else:
return self.token(TokenType.Comma, start, start)
elif self.current() == ";":
return self.single(TokenType.Semicolon)
elif self.current() == "&":
return self.single(TokenType.Ampersand)
elif self.current() == "+":
return self.single_or_double(TokenType.Plus, "=", TokenType.PlusEqual)
elif self.current() == "-":
start = self.pos()
self.step()
if self.current_is("="):
end = self.pos()
self.step()
return self.token(TokenType.MinusEqual, start, end)
elif self.current_is(">"):
end = self.pos()
self.step()
return self.token(TokenType.MinusLT, start, end)
else:
return self.token(TokenType.Minus, start, start)
elif self.current() == "*":
return self.single_or_double(TokenType.Asterisk, "=", TokenType.AsteriskEqual)
elif self.current() == "/":
start = self.pos()
self.step()
if self.current_is("="):
end = self.pos()
self.step()
return self.token(TokenType.AsteriskEqual, start, end)
elif self.current_is("/"):
self.step()
while not self.done() and self.current() != "\n":
self.step()
return self.next()
elif self.current_is("*"):
end = self.pos()
self.step()
depth = 1
last_char: Optional[str] = None
while not self.done():
if last_char == "/" and self.current() == "*":
depth += 1
elif last_char == "*" and self.current() == "/":
depth -= 1
if depth == 0:
self.step()
break
last_char = self.current()
end = self.pos()
self.step()
if depth != 0:
return self.token(TokenType.MalformedComment, start, end)
return self.next()
else:
return self.token(TokenType.Slash, start, start)
elif self.current() == "%":
return self.single_or_double(TokenType.Percent, "=", TokenType.PercentEqual)
elif self.current() == "=":
start = self.pos()
self.step()
if self.current_is("="):
end = self.pos()
self.step()
return self.token(TokenType.EqualEqual, start, end)
elif self.current_is(">"):
end = self.pos()
self.step()
return self.token(TokenType.EqualLT, start, end)
else:
return self.token(TokenType.Equal, start, start)
elif self.current() == "!":
return self.single_or_double(TokenType.Exclamation, "=", TokenType.ExclamationEqual)
elif self.current() == "<":
return self.single_or_double(TokenType.LT, "=", TokenType.LTEqual)
elif self.current() == ">":
return self.single_or_double(TokenType.GT, "=", TokenType.GTEqual)
elif self.current() == "'":
start = self.pos()
self.step()
end = self.pos()
first = self.current()
self.step()
if not self.done() and first == "\\":
end = self.pos()
self.step()
if not self.current_is("'"):
return self.token(TokenType.MalformedChar, start, end)
else:
end = self.pos()
self.step()
return self.token(TokenType.Char, start, end)
elif self.current() == "\"":
start = self.pos()
end = self.pos()
self.step()
while not self.done() and self.current() != "\"":
end = self.pos()
first = self.current()
self.step()
if not self.done() and first == "\\":
end = self.pos()
self.step()
if not self.current_is("\""):
return self.token(TokenType.MalformedString, start, end)
else:
end = self.pos()
self.step()
return self.token(TokenType.String, start, end)
elif self.current() == "0":
return self.single(TokenType.Int)
elif self.current() in "123456789":
start = self.pos()
end = self.pos()
self.step()
while not self.done() and self.current() in "1234567890":
end = self.pos()
self.step()
return self.token(TokenType.Int, start, end)
elif self.current() in "abcdefghijklmnopqrstuvwxyz_":
start = self.pos()
end = self.pos()
self.step()
while (not self.done()
and self.current().lower()
in "1234567890abcdefghijklmnopqrstuvwxyz_"):
end = self.pos()
self.step()
value = self.text[start.index:self.index]
if value == "_":
return self.token(TokenType.Underscore, start, end)
elif value == "false":
return self.token(TokenType.KwFalse, start, end)
elif value == "true":
return self.token(TokenType.KwTrue, start, end)
elif value == "not":
return self.token(TokenType.KwNot, start, end)
elif value == "in":
return self.token(TokenType.KwIn, start, end)
elif value == "and":
return self.token(TokenType.KwAnd, start, end)
elif value == "or":
return self.token(TokenType.KwOr, start, end)
elif value == "xor":
return self.token(TokenType.KwXor, start, end)
elif value == "let":
return self.token(TokenType.KwLet, start, end)
elif value == "mut":
return self.token(TokenType.KwMut, start, end)
elif value == "if":
return self.token(TokenType.KwIf, start, end)
elif value == "else":
return self.token(TokenType.KwElse, start, end)
elif value == "while":
return self.token(TokenType.KwWhile, start, end)
elif value == "for":
return self.token(TokenType.KwFor, start, end)
elif value == "loop":
return self.token(TokenType.KwLoop, start, end)
elif value == "break":
return self.token(TokenType.KwBreak, start, end)
elif value == "continue":
return self.token(TokenType.KwContinue, start, end)
elif value == "fn":
return self.token(TokenType.KwFn, start, end)
elif value == "return":
return self.token(TokenType.KwReturn, start, end)
elif value == "match":
return self.token(TokenType.KwMatch, start, end)
else:
return self.token(TokenType.Id, start, end)
else:
start = self.pos()
self.step()
return self.token(TokenType.InvalidChar, start, start)
def single(self, token_type: TokenType) -> Token:
start = self.pos()
self.step()
return self.token(token_type, start, start)
def single_or_double(self, type1: TokenType, char2: str, type2: TokenType) -> Token:
start = self.pos()
self.step()
if not self.done() and self.current() == char2:
end = self.pos()
self.step()
return self.token(type2, start, end)
else:
return self.token(type1, start, start)
def token(self, token_type: TokenType, start: Position, end: Position) -> Token:
return Token(token_type, start.index, self.index - start.index, Span(start, end))
def pos(self) -> Position:
return Position(self.index, self.line, self.col)
def step(self) -> None:
self.index += 1
if not self.done():
if self.current() == "\n":
self.line += 1
self.col = 1
else:
self.col += 1
def current_is(self, value: str) -> bool:
return not self.done() and self.current() == value
def done(self) -> bool:
return self.index >= len(self.text)
def current(self) -> str:
return self.text[self.index]

14
main.py Normal file
View File

@ -0,0 +1,14 @@
from lexer import Lexer
from parser import Parser
def main() -> None:
text = "\"\\\"hello\\\\\""
lexer = Lexer(text)
parser = Parser(text, lexer)
parsed = parser.parse_expr()
print(parsed)
if __name__ == "__main__":
main()

240
parsed.py Normal file
View File

@ -0,0 +1,240 @@
from enum import Enum, auto
from position import Node
from typing import Optional, List
class Pattern:
def __str__(self) -> str:
raise NotImplementedError()
class PatternError(Pattern):
def __init__(self, message: str) -> None:
super().__init__()
self.message = message
class Expr:
def __str__(self) -> str:
raise NotImplementedError()
class ExprError(Expr):
def __init__(self, message: str) -> None:
super().__init__()
self.message = message
def __str__(self) -> str:
return f"ErrorExpr({self.message})"
class Id(Expr):
def __init__(self, value: str) -> None:
super().__init__()
self.value = value
def __str__(self) -> str:
return f"Id({self.value})"
class Int(Expr):
def __init__(self, value: int) -> None:
super().__init__()
self.value = value
def __str__(self) -> str:
return f"Int({self.value})"
class Char(Expr):
def __init__(self, value: str) -> None:
super().__init__()
self.value = value
def __str__(self) -> str:
return f"Char('{self.value}')"
class String(Expr):
def __init__(self, value: str) -> None:
super().__init__()
self.value = value
def __str__(self) -> str:
return f"String(\"{self.value}\")"
class Bool(Expr):
def __init__(self, value: bool) -> None:
super().__init__()
self.value = value
def __str__(self) -> str:
value = "true" if self.value else "false"
return f"Bool({value})"
class Unit(Expr):
def __init__(self) -> None:
super().__init__()
def __str__(self) -> str:
return "Unit"
class Block(Expr):
def __init__(self, statements: List[Node[Expr]], value: Optional[Node[Expr]]) -> None:
super().__init__()
self.statements = statements
self.value = value
def __str__(self) -> str:
statements = ", ".join(node.__str__() for node in self.statements)
return f"Block {{ statements: [{statements}], value: {self.value} }}"
class If(Expr):
def __init__(self, condition: Node[Expr], truthy: Node[Expr], falsy: Optional[Node[Expr]]) -> None:
super().__init__()
self.condition = condition
self.truthy = truthy
self.falsy = falsy
def __str__(self) -> str:
return f"If {{ condition: {self.condition}, truthy: {self.truthy}, falsy: {self.falsy} }}"
class MatchArm:
def __init__(self, pattern: Node[Pattern], expr: Node[Expr]) -> None:
self.pattern = pattern
self.expr = expr
class Match(Expr):
def __init__(self, value: Node[Expr], arms: List[Node[MatchArm]]) -> None:
super().__init__()
self.value = value
self.arms = arms
class Loop(Expr):
def __init__(self, body: Node[Expr]) -> None:
super().__init__()
self.body = body
def __str__(self) -> str:
return f"Loop {{ body: {self.body} }}"
class While(Expr):
def __init__(self, condition: Node[Expr], body: Node[Expr]) -> None:
super().__init__()
self.condition = condition
self.body = body
def __str__(self) -> str:
return f"While {{ condition: {self.condition}, body: {self.body} }}"
class For(Expr):
def __init__(self, subject: Node[Pattern], value: Node[Expr], body: Node[Expr]) -> None:
super().__init__()
self.subject = subject
self.value = value
self.body = body
def __str__(self) -> str:
return f"For {{ subject: {self.subject}, value: {self.value}, body: {self.body} }}"
class StructMember(Expr):
def __init__(self, subject: Node[Expr], member_id: str) -> None:
super().__init__()
self.subject = subject
self.member_id = member_id
def __str__(self) -> str:
return f"StructMember {{ subject: {self.subject}, member_id: {self.member_id} }}"
class TupleMember(Expr):
def __init__(self, subject: Node[Expr], member_index: int) -> None:
super().__init__()
self.subject = subject
self.member_index = member_index
def __str__(self) -> str:
return f"StructMember {{ subject: {self.subject}, member_index: {self.member_index} }}"
class Index(Expr):
def __init__(self, subject: Node[Expr], value: Node[Expr]) -> None:
super().__init__()
self.subject = subject
self.value = value
def __str__(self) -> str:
return f"Index {{ subject: {self.subject}, value: {self.value} }}"
class Call(Expr):
def __init__(self, subject: Node[Expr], arguments: List[Node[Expr]]) -> None:
super().__init__()
self.subject = subject
self.arguments = arguments
def __str__(self) -> str:
arguments = ", ".join(node.__str__() for node in self.arguments)
return f"Index {{ subject: {self.subject}, arguments: {arguments} }}"
class UnaryType(Enum):
Not = auto()
Negate = auto()
Reference = auto()
ReferenceMut = auto()
Dereference = auto()
class Unary(Expr):
def __init__(self, unary_type: UnaryType, subject: Node[Expr]) -> None:
super().__init__()
self.unary_type = unary_type
self.subject = subject
def __str__(self) -> str:
return f"Unary {{ unary_type: {self.unary_type}, subject: {self.subject} }}"
class BinaryType(Enum):
And = auto()
Or = auto()
Add = auto()
Subtract = auto()
Multiply = auto()
Divide = auto()
Modulo = auto()
Exponent = auto()
Equal = auto()
Inequal = auto()
LT = auto()
GT = auto()
LTEqual = auto()
GTEqual = auto()
In = auto()
class Binary(Expr):
def __init__(self, binary_type: BinaryType, left: Node[Expr], right: Node[Expr]) -> None:
super().__init__()
self.binary_type = binary_type
self.left = left
self.right = right
def __str__(self) -> str:
return f"Binary {{ binary_type: {self.binary_type}, left: {self.left}, right: {self.right} }}"
class AssignType(Enum):
Assign = auto()
Add = auto()
Subtract = auto()
Multiply = auto()
Divide = auto()
Modulo = auto()
class Assign(Expr):
def __init__(self, assign_type: AssignType, subject: Node[Expr], value: Node[Expr]) -> None:
super().__init__()
self.assign_type = assign_type
self.subject = subject
self.value = value
def __str__(self) -> str:
return f"Assign {{ assign_type: {self.assign_type}, subject: {self.subject}, value: {self.value} }}"

348
parser.py Normal file
View File

@ -0,0 +1,348 @@
from tokens import Token, TokenType, TokenIterator
from position import Span, Node
from parsed import Assign, AssignType, Binary, BinaryType, Block, Call, MatchArm, PatternError, Expr, For, Id, If, Index, Int, Char, Loop, Pattern, String, ExprError, StructMember, TupleMember, Unary, UnaryType, While
from typing import List, Optional
class Parser:
def __init__(self, text: str, tokens: TokenIterator) -> None:
self.text = text
self.tokens = tokens
self.current_token = tokens.next()
def parse(self) -> List[Node[Expr]]:
statements: List[Node[Expr]] = []
while not self.done():
statements.append(self.parse_statement())
return statements
def parse_statement(self) -> Node[Expr]:
if self.current_is(TokenType.KwIf):
return self.parse_if()
else:
return self.parse_assign()
def parse_assign(self) -> Node[Expr]:
subject = self.parse_expr()
if self.current_is(TokenType.Equal):
self.step()
value = self.parse_expr()
return Node(Assign(AssignType.Assign, subject, value), subject.span.to(value.span))
else:
return subject
def parse_expr(self) -> Node[Expr]:
return self.parse_or()
def parse_or(self) -> Node[Expr]:
left = self.parse_and()
while self.current_is(TokenType.KwOr):
self.step()
right = self.parse_and()
left = Node(Binary(BinaryType.Or, left, right), left.span.to(right.span))
return left
def parse_and(self) -> Node[Expr]:
left = self.parse_equal()
while self.current_is(TokenType.KwOr):
self.step()
right = self.parse_equal()
left = Node(Binary(BinaryType.And, left, right), left.span.to(right.span))
return left
def parse_equal(self) -> Node[Expr]:
left = self.parse_compare()
while not self.done():
if self.current_is(TokenType.EqualEqual):
self.step()
right = self.parse_compare()
left = Node(Binary(BinaryType.Equal, left, right), left.span.to(right.span))
elif self.current_is(TokenType.ExclamationEqual):
self.step()
right = self.parse_compare()
left = Node(Binary(BinaryType.Inequal, left, right), left.span.to(right.span))
else:
break
return left
def parse_compare(self) -> Node[Expr]:
left = self.parse_add_subtract()
while not self.done():
if self.current_is(TokenType.LT):
self.step()
right = self.parse_add_subtract()
left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span))
elif self.current_is(TokenType.GT):
self.step()
right = self.parse_add_subtract()
left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span))
elif self.current_is(TokenType.LTEqual):
self.step()
right = self.parse_add_subtract()
left = Node(Binary(BinaryType.LTEqual, left, right), left.span.to(right.span))
elif self.current_is(TokenType.GTEqual):
self.step()
right = self.parse_add_subtract()
left = Node(Binary(BinaryType.GTEqual, left, right), left.span.to(right.span))
else:
break
return left
def parse_add_subtract(self) -> Node[Expr]:
left = self.parse_multiply_divide_modulo()
while not self.done():
if self.current_is(TokenType.Plus):
self.step()
right = self.parse_multiply_divide_modulo()
left = Node(Binary(BinaryType.Add, left, right), left.span.to(right.span))
elif self.current_is(TokenType.Minus):
self.step()
right = self.parse_multiply_divide_modulo()
left = Node(Binary(BinaryType.Subtract, left, right), left.span.to(right.span))
else:
break
return left
def parse_multiply_divide_modulo(self) -> Node[Expr]:
left = self.parse_negate()
while not self.done():
if self.current_is(TokenType.Asterisk):
self.step()
right = self.parse_negate()
left = Node(Binary(BinaryType.Multiply, left, right), left.span.to(right.span))
elif self.current_is(TokenType.Slash):
self.step()
right = self.parse_negate()
left = Node(Binary(BinaryType.Divide, left, right), left.span.to(right.span))
elif self.current_is(TokenType.Percent):
self.step()
right = self.parse_negate()
left = Node(Binary(BinaryType.Modulo, left, right), left.span.to(right.span))
else:
break
return left
def parse_negate(self) -> Node[Expr]:
if self.current_is(TokenType.Minus):
token_span = self.current().span
self.step()
subject = self.parse_exponent()
return Node(Unary(UnaryType.Negate, subject), token_span.to(subject.span))
else:
return self.parse_exponent()
def parse_exponent(self) -> Node[Expr]:
left = self.parse_unary()
if self.current_is(TokenType.AsteriskAsterisk):
self.step()
right = self.parse_exponent()
return Node(Binary(BinaryType.Exponent, left, right), left.span.to(right.span))
else:
return left
def parse_unary(self) -> Node[Expr]:
if self.current_is(TokenType.KwNot):
token_span = self.current().span
self.step()
subject = self.parse_unary()
return Node(Unary(UnaryType.Not, subject), token_span.to(subject.span))
elif self.current_is(TokenType.Asterisk):
token_span = self.current().span
self.step()
subject = self.parse_unary()
return Node(Unary(UnaryType.Dereference, subject), token_span.to(subject.span))
elif self.current_is(TokenType.Ampersand):
token_span = self.current().span
self.step()
if self.current_is(TokenType.KwMut):
self.step()
subject = self.parse_unary()
return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span))
else:
subject = self.parse_unary()
return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span))
else:
return self.parse_member_index_call()
def parse_member_index_call(self) -> Node[Expr]:
subject: Node[Expr] = self.parse_operand()
while not self.done():
if self.current_is(TokenType.Dot):
self.step()
if self.current_is(TokenType.Id):
id_token = self.current()
self.step()
text = id_token.text_slice(self.text)
subject = Node(StructMember(subject, text), subject.span.to(id_token.span))
elif self.current_is(TokenType.Int):
int_token = self.current()
self.step()
value = int(int_token.text_slice(self.text))
subject = Node(TupleMember(subject, value), subject.span.to(int_token.span))
else:
return Node(ExprError("expected Int or Id"), subject.span)
elif self.current_is(TokenType.LBracket):
self.step()
value = self.parse_expr()
if not self.current_is(TokenType.RBracket):
return Node(ExprError("expected ']'"), subject.span.to(value.span))
rbracket_token_span = self.current().span
self.step()
subject = Node(Index(subject, value), subject.span.to(rbracket_token_span))
elif self.current_is(TokenType.LParen):
self.step()
arguments: List[Node[Expr]] = []
if not self.done() and self.current() != TokenType.RParen:
arguments.append(self.parse_expr())
while not self.done() and self.current() == TokenType.Comma:
self.step()
if self.done() or self.current() == TokenType.RParen:
break
arguments.append(self.parse_expr())
if not self.current_is(TokenType.RParen):
if len(arguments) > 0:
end = arguments[-1].span
else:
end = subject.span
return Node(ExprError("expected ')'"), subject.span.to(end))
end = self.current().span
self.step()
subject = Node(Call(subject, arguments), subject.span.to(end))
else:
break
return subject
def parse_operand(self) -> Node[Expr]:
if self.current_is(TokenType.Id):
token = self.current()
value = token.text_slice(self.text)
self.step()
return Node(Id(value), token.span)
elif self.current_is(TokenType.Int):
token = self.current()
value = int(token.text_slice(self.text))
self.step()
return Node(Int(value), token.span)
elif self.current_is(TokenType.Char):
token = self.current()
value = token.text_slice(self.text)
self.step()
return Node(Char(value), token.span)
elif self.current_is(TokenType.String):
token = self.current()
value = token.text_slice(self.text)
self.step()
return Node(String(value), token.span)
elif self.current_is(TokenType.LBrace):
return self.parse_block()
elif self.current_is(TokenType.KwIf):
return self.parse_if()
elif self.current_is(TokenType.KwLoop):
return self.parse_loop()
elif self.current_is(TokenType.KwWhile):
return self.parse_while()
elif self.current_is(TokenType.KwFor):
return self.parse_for()
else:
token = self.current()
self.step()
return Node(ExprError("expected value"), token.span)
def parse_block(self) -> Node[Expr]:
begin = self.current().span
self.step()
statements: List[Node[Expr]] = []
value: Optional[Node[Expr]] = None
while not self.done() and self.current().token_type != TokenType.RBrace:
if value is not None:
statements.append(value)
value = self.parse_statement()
if not self.current_is(TokenType.RBrace):
if value is not None:
end = value.span
else:
end = begin
return Node(ExprError("expected '}'"), begin.to(end))
end = self.current().span
self.step()
return Node(Block(statements, value), begin.to(end))
def parse_if(self) -> Node[Expr]:
begin = self.current().span
self.step()
condition = self.parse_expr()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin.to(condition.span))
truthy = self.parse_block()
if self.current_is(TokenType.KwElse):
else_token_span = self.current().span
self.step()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin.to(else_token_span))
falsy = self.parse_block()
return Node(If(condition, truthy, falsy), begin.to(falsy.span))
else:
return Node(If(condition, truthy, None), begin.to(truthy.span))
def parse_match(self) -> Node[Expr]:
begin = self.current().span
self.step()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin)
self.step()
arms: List[Node[MatchArm]] = []
if not self.done() and self.current() != TokenType.RBrace:
def parse_match_arm(self) -> Node[MatchArm]:
pass
def parse_match_arm_statement(self) -> Node[Expr]:
pass
def parse_loop(self) -> Node[Expr]:
begin = self.current().span
self.step()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin)
body = self.parse_block()
return Node(Loop(body), begin.to(body.span))
def parse_while(self) -> Node[Expr]:
begin = self.current().span
self.step()
condition = self.parse_expr()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin.to(condition.span))
self.step()
body = self.parse_block()
return Node(While(condition, body), begin.to(body.span))
def parse_for(self) -> Node[Expr]:
begin = self.current().span
self.step()
subject = self.parse_pattern()
if not self.current_is(TokenType.KwIn):
return Node(ExprError("expected 'in'"), begin.to(subject.span))
self.step()
value = self.parse_expr()
if not self.current_is(TokenType.LBrace):
return Node(ExprError("expected '{'"), begin.to(value.span))
self.step()
body = self.parse_block()
return Node(For(subject, value, body), begin.to(body.span))
def parse_pattern(self) -> Node[Pattern]:
return Node(PatternError("not implemented"), self.current().span)
def step(self) -> None:
self.current_token = self.tokens.next()
def current_is(self, token_type: TokenType) -> bool:
return not self.done() and self.current().token_type == token_type
def done(self) -> bool:
return self.current_token.token_type == TokenType.Eof
def current(self) -> Token:
return self.current_token

28
position.py Normal file
View File

@ -0,0 +1,28 @@
from __future__ import annotations
from typing import NamedTuple, TypeVar, Generic
class Position(NamedTuple):
index: int
line: int
col: int
class Span(NamedTuple):
begin: Position
end: Position
def to(self, end: Span) -> Span:
return Span(self.begin, end.end)
T = TypeVar("T")
class Node(Generic[T]):
def __init__(self, value: T, span: Span) -> None:
super().__init__()
self.value = value
self.span = span
def __str__(self) -> str:
return self.value.__str__()

87
tokens.py Normal file
View File

@ -0,0 +1,87 @@
from enum import Enum, auto
from typing import NamedTuple
from position import Span
class TokenType(Enum):
Eof = auto()
InvalidChar = auto()
MalformedComment = auto()
MalformedChar = auto()
MalformedString = auto()
Id = auto()
Int = auto()
Char = auto()
String = auto()
LParen = auto()
RParen = auto()
LBrace = auto()
RBrace = auto()
LBracket = auto()
RBracket = auto()
Underscore = auto()
Dot = auto()
DotDot = auto()
DotDotDot = auto()
DotDotEqual = auto()
Comma = auto()
Colon = auto()
ColonColon = auto()
ColonColonLT = auto()
Semicolon = auto()
Ampersand = auto()
Plus = auto()
PlusEqual = auto()
Minus = auto()
MinusEqual = auto()
MinusLT = auto()
Asterisk = auto()
AsteriskEqual = auto()
AsteriskAsterisk = auto()
Slash = auto()
SlashEqual = auto()
Percent = auto()
PercentEqual = auto()
Equal = auto()
EqualEqual = auto()
EqualLT = auto()
Exclamation = auto()
ExclamationEqual = auto()
LT = auto()
LTEqual = auto()
GT = auto()
GTEqual = auto()
KwFalse = auto()
KwTrue = auto()
KwNot = auto()
KwIn = auto()
KwAnd = auto()
KwOr = auto()
KwXor = auto()
KwLet = auto()
KwMut = auto()
KwIf = auto()
KwMatch = auto()
KwElse = auto()
KwLoop = auto()
KwWhile = auto()
KwFor = auto()
KwBreak = auto()
KwContinue = auto()
KwFn = auto()
KwReturn = auto()
class Token(NamedTuple):
token_type: TokenType
index: int
length: int
span: Span
def text_slice(self, text: str) -> str:
return text[self.index:self.index + self.length]
class TokenIterator:
def next(self) -> Token:
raise NotImplementedError()