init
This commit is contained in:
commit
b8960f4548
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
__pycache__
|
||||
.mypy_cache
|
||||
|
278
lexer.py
Normal file
278
lexer.py
Normal file
@ -0,0 +1,278 @@
|
||||
from tokens import TokenIterator, Token, TokenType
|
||||
from typing import Optional
|
||||
from position import Position, Span
|
||||
|
||||
|
||||
class Lexer(TokenIterator):
|
||||
def __init__(self, text: str) -> None:
|
||||
self.text = text
|
||||
self.index = 0
|
||||
self.line = 1
|
||||
self.col = 1
|
||||
|
||||
def next(self) -> Token:
|
||||
if self.done():
|
||||
return self.token(TokenType.Eof, self.pos(), self.pos())
|
||||
elif self.current() in " \t\r\n":
|
||||
self.step()
|
||||
while not self.done() and self.current() in " \t\r\n":
|
||||
self.step()
|
||||
return self.next()
|
||||
elif self.current() == "(":
|
||||
return self.single(TokenType.LParen)
|
||||
elif self.current() == ")":
|
||||
return self.single(TokenType.RParen)
|
||||
elif self.current() == "{":
|
||||
return self.single(TokenType.LBrace)
|
||||
elif self.current() == "}":
|
||||
return self.single(TokenType.RBrace)
|
||||
elif self.current() == "[":
|
||||
return self.single(TokenType.LBracket)
|
||||
elif self.current() == "]":
|
||||
return self.single(TokenType.RBracket)
|
||||
elif self.current() == ".":
|
||||
start = self.pos()
|
||||
self.step()
|
||||
if self.current_is("."):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
if self.current_is("."):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.DotDotDot, start, end)
|
||||
elif self.current_is("="):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.DotDotEqual, start, end)
|
||||
else:
|
||||
return self.token(TokenType.DotDot, start, end)
|
||||
else:
|
||||
return self.token(TokenType.Dot, start, start)
|
||||
elif self.current() == ",":
|
||||
return self.single(TokenType.Comma)
|
||||
elif self.current() == ":":
|
||||
start = self.pos()
|
||||
self.step()
|
||||
if self.current_is(":"):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
if self.current_is("<"):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.ColonColonLT, start, end)
|
||||
else:
|
||||
return self.token(TokenType.ColonColon, start, end)
|
||||
else:
|
||||
return self.token(TokenType.Comma, start, start)
|
||||
elif self.current() == ";":
|
||||
return self.single(TokenType.Semicolon)
|
||||
elif self.current() == "&":
|
||||
return self.single(TokenType.Ampersand)
|
||||
elif self.current() == "+":
|
||||
return self.single_or_double(TokenType.Plus, "=", TokenType.PlusEqual)
|
||||
elif self.current() == "-":
|
||||
start = self.pos()
|
||||
self.step()
|
||||
if self.current_is("="):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.MinusEqual, start, end)
|
||||
elif self.current_is(">"):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.MinusLT, start, end)
|
||||
else:
|
||||
return self.token(TokenType.Minus, start, start)
|
||||
elif self.current() == "*":
|
||||
return self.single_or_double(TokenType.Asterisk, "=", TokenType.AsteriskEqual)
|
||||
elif self.current() == "/":
|
||||
start = self.pos()
|
||||
self.step()
|
||||
if self.current_is("="):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.AsteriskEqual, start, end)
|
||||
elif self.current_is("/"):
|
||||
self.step()
|
||||
while not self.done() and self.current() != "\n":
|
||||
self.step()
|
||||
return self.next()
|
||||
elif self.current_is("*"):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
depth = 1
|
||||
last_char: Optional[str] = None
|
||||
while not self.done():
|
||||
if last_char == "/" and self.current() == "*":
|
||||
depth += 1
|
||||
elif last_char == "*" and self.current() == "/":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
self.step()
|
||||
break
|
||||
last_char = self.current()
|
||||
end = self.pos()
|
||||
self.step()
|
||||
if depth != 0:
|
||||
return self.token(TokenType.MalformedComment, start, end)
|
||||
return self.next()
|
||||
else:
|
||||
return self.token(TokenType.Slash, start, start)
|
||||
elif self.current() == "%":
|
||||
return self.single_or_double(TokenType.Percent, "=", TokenType.PercentEqual)
|
||||
elif self.current() == "=":
|
||||
start = self.pos()
|
||||
self.step()
|
||||
if self.current_is("="):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.EqualEqual, start, end)
|
||||
elif self.current_is(">"):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.EqualLT, start, end)
|
||||
else:
|
||||
return self.token(TokenType.Equal, start, start)
|
||||
elif self.current() == "!":
|
||||
return self.single_or_double(TokenType.Exclamation, "=", TokenType.ExclamationEqual)
|
||||
elif self.current() == "<":
|
||||
return self.single_or_double(TokenType.LT, "=", TokenType.LTEqual)
|
||||
elif self.current() == ">":
|
||||
return self.single_or_double(TokenType.GT, "=", TokenType.GTEqual)
|
||||
elif self.current() == "'":
|
||||
start = self.pos()
|
||||
self.step()
|
||||
end = self.pos()
|
||||
first = self.current()
|
||||
self.step()
|
||||
if not self.done() and first == "\\":
|
||||
end = self.pos()
|
||||
self.step()
|
||||
if not self.current_is("'"):
|
||||
return self.token(TokenType.MalformedChar, start, end)
|
||||
else:
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.Char, start, end)
|
||||
elif self.current() == "\"":
|
||||
start = self.pos()
|
||||
end = self.pos()
|
||||
self.step()
|
||||
while not self.done() and self.current() != "\"":
|
||||
end = self.pos()
|
||||
first = self.current()
|
||||
self.step()
|
||||
if not self.done() and first == "\\":
|
||||
end = self.pos()
|
||||
self.step()
|
||||
if not self.current_is("\""):
|
||||
return self.token(TokenType.MalformedString, start, end)
|
||||
else:
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.String, start, end)
|
||||
elif self.current() == "0":
|
||||
return self.single(TokenType.Int)
|
||||
elif self.current() in "123456789":
|
||||
start = self.pos()
|
||||
end = self.pos()
|
||||
self.step()
|
||||
while not self.done() and self.current() in "1234567890":
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.Int, start, end)
|
||||
elif self.current() in "abcdefghijklmnopqrstuvwxyz_":
|
||||
start = self.pos()
|
||||
end = self.pos()
|
||||
self.step()
|
||||
while (not self.done()
|
||||
and self.current().lower()
|
||||
in "1234567890abcdefghijklmnopqrstuvwxyz_"):
|
||||
end = self.pos()
|
||||
self.step()
|
||||
value = self.text[start.index:self.index]
|
||||
if value == "_":
|
||||
return self.token(TokenType.Underscore, start, end)
|
||||
elif value == "false":
|
||||
return self.token(TokenType.KwFalse, start, end)
|
||||
elif value == "true":
|
||||
return self.token(TokenType.KwTrue, start, end)
|
||||
elif value == "not":
|
||||
return self.token(TokenType.KwNot, start, end)
|
||||
elif value == "in":
|
||||
return self.token(TokenType.KwIn, start, end)
|
||||
elif value == "and":
|
||||
return self.token(TokenType.KwAnd, start, end)
|
||||
elif value == "or":
|
||||
return self.token(TokenType.KwOr, start, end)
|
||||
elif value == "xor":
|
||||
return self.token(TokenType.KwXor, start, end)
|
||||
elif value == "let":
|
||||
return self.token(TokenType.KwLet, start, end)
|
||||
elif value == "mut":
|
||||
return self.token(TokenType.KwMut, start, end)
|
||||
elif value == "if":
|
||||
return self.token(TokenType.KwIf, start, end)
|
||||
elif value == "else":
|
||||
return self.token(TokenType.KwElse, start, end)
|
||||
elif value == "while":
|
||||
return self.token(TokenType.KwWhile, start, end)
|
||||
elif value == "for":
|
||||
return self.token(TokenType.KwFor, start, end)
|
||||
elif value == "loop":
|
||||
return self.token(TokenType.KwLoop, start, end)
|
||||
elif value == "break":
|
||||
return self.token(TokenType.KwBreak, start, end)
|
||||
elif value == "continue":
|
||||
return self.token(TokenType.KwContinue, start, end)
|
||||
elif value == "fn":
|
||||
return self.token(TokenType.KwFn, start, end)
|
||||
elif value == "return":
|
||||
return self.token(TokenType.KwReturn, start, end)
|
||||
elif value == "match":
|
||||
return self.token(TokenType.KwMatch, start, end)
|
||||
else:
|
||||
return self.token(TokenType.Id, start, end)
|
||||
else:
|
||||
start = self.pos()
|
||||
self.step()
|
||||
return self.token(TokenType.InvalidChar, start, start)
|
||||
|
||||
def single(self, token_type: TokenType) -> Token:
|
||||
start = self.pos()
|
||||
self.step()
|
||||
return self.token(token_type, start, start)
|
||||
|
||||
def single_or_double(self, type1: TokenType, char2: str, type2: TokenType) -> Token:
|
||||
start = self.pos()
|
||||
self.step()
|
||||
if not self.done() and self.current() == char2:
|
||||
end = self.pos()
|
||||
self.step()
|
||||
return self.token(type2, start, end)
|
||||
else:
|
||||
return self.token(type1, start, start)
|
||||
|
||||
def token(self, token_type: TokenType, start: Position, end: Position) -> Token:
|
||||
return Token(token_type, start.index, self.index - start.index, Span(start, end))
|
||||
|
||||
def pos(self) -> Position:
|
||||
return Position(self.index, self.line, self.col)
|
||||
|
||||
def step(self) -> None:
|
||||
self.index += 1
|
||||
if not self.done():
|
||||
if self.current() == "\n":
|
||||
self.line += 1
|
||||
self.col = 1
|
||||
else:
|
||||
self.col += 1
|
||||
|
||||
def current_is(self, value: str) -> bool:
|
||||
return not self.done() and self.current() == value
|
||||
|
||||
def done(self) -> bool:
|
||||
return self.index >= len(self.text)
|
||||
|
||||
def current(self) -> str:
|
||||
return self.text[self.index]
|
14
main.py
Normal file
14
main.py
Normal file
@ -0,0 +1,14 @@
|
||||
from lexer import Lexer
|
||||
from parser import Parser
|
||||
|
||||
|
||||
def main() -> None:
|
||||
text = "\"\\\"hello\\\\\""
|
||||
lexer = Lexer(text)
|
||||
parser = Parser(text, lexer)
|
||||
parsed = parser.parse_expr()
|
||||
print(parsed)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
240
parsed.py
Normal file
240
parsed.py
Normal file
@ -0,0 +1,240 @@
|
||||
from enum import Enum, auto
|
||||
from position import Node
|
||||
from typing import Optional, List
|
||||
|
||||
class Pattern:
|
||||
def __str__(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
class PatternError(Pattern):
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__()
|
||||
self.message = message
|
||||
|
||||
class Expr:
|
||||
def __str__(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class ExprError(Expr):
|
||||
def __init__(self, message: str) -> None:
|
||||
super().__init__()
|
||||
self.message = message
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"ErrorExpr({self.message})"
|
||||
|
||||
class Id(Expr):
|
||||
def __init__(self, value: str) -> None:
|
||||
super().__init__()
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Id({self.value})"
|
||||
|
||||
class Int(Expr):
|
||||
def __init__(self, value: int) -> None:
|
||||
super().__init__()
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Int({self.value})"
|
||||
|
||||
|
||||
class Char(Expr):
|
||||
def __init__(self, value: str) -> None:
|
||||
super().__init__()
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Char('{self.value}')"
|
||||
|
||||
|
||||
class String(Expr):
|
||||
def __init__(self, value: str) -> None:
|
||||
super().__init__()
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"String(\"{self.value}\")"
|
||||
|
||||
|
||||
class Bool(Expr):
|
||||
def __init__(self, value: bool) -> None:
|
||||
super().__init__()
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
value = "true" if self.value else "false"
|
||||
return f"Bool({value})"
|
||||
|
||||
|
||||
class Unit(Expr):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def __str__(self) -> str:
|
||||
return "Unit"
|
||||
|
||||
|
||||
class Block(Expr):
|
||||
def __init__(self, statements: List[Node[Expr]], value: Optional[Node[Expr]]) -> None:
|
||||
super().__init__()
|
||||
self.statements = statements
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
statements = ", ".join(node.__str__() for node in self.statements)
|
||||
return f"Block {{ statements: [{statements}], value: {self.value} }}"
|
||||
|
||||
|
||||
class If(Expr):
|
||||
def __init__(self, condition: Node[Expr], truthy: Node[Expr], falsy: Optional[Node[Expr]]) -> None:
|
||||
super().__init__()
|
||||
self.condition = condition
|
||||
self.truthy = truthy
|
||||
self.falsy = falsy
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"If {{ condition: {self.condition}, truthy: {self.truthy}, falsy: {self.falsy} }}"
|
||||
|
||||
class MatchArm:
|
||||
def __init__(self, pattern: Node[Pattern], expr: Node[Expr]) -> None:
|
||||
self.pattern = pattern
|
||||
self.expr = expr
|
||||
|
||||
class Match(Expr):
|
||||
def __init__(self, value: Node[Expr], arms: List[Node[MatchArm]]) -> None:
|
||||
super().__init__()
|
||||
self.value = value
|
||||
self.arms = arms
|
||||
|
||||
class Loop(Expr):
|
||||
def __init__(self, body: Node[Expr]) -> None:
|
||||
super().__init__()
|
||||
self.body = body
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Loop {{ body: {self.body} }}"
|
||||
|
||||
class While(Expr):
|
||||
def __init__(self, condition: Node[Expr], body: Node[Expr]) -> None:
|
||||
super().__init__()
|
||||
self.condition = condition
|
||||
self.body = body
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"While {{ condition: {self.condition}, body: {self.body} }}"
|
||||
|
||||
class For(Expr):
|
||||
def __init__(self, subject: Node[Pattern], value: Node[Expr], body: Node[Expr]) -> None:
|
||||
super().__init__()
|
||||
self.subject = subject
|
||||
self.value = value
|
||||
self.body = body
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"For {{ subject: {self.subject}, value: {self.value}, body: {self.body} }}"
|
||||
|
||||
class StructMember(Expr):
|
||||
def __init__(self, subject: Node[Expr], member_id: str) -> None:
|
||||
super().__init__()
|
||||
self.subject = subject
|
||||
self.member_id = member_id
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"StructMember {{ subject: {self.subject}, member_id: {self.member_id} }}"
|
||||
|
||||
class TupleMember(Expr):
|
||||
def __init__(self, subject: Node[Expr], member_index: int) -> None:
|
||||
super().__init__()
|
||||
self.subject = subject
|
||||
self.member_index = member_index
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"StructMember {{ subject: {self.subject}, member_index: {self.member_index} }}"
|
||||
|
||||
class Index(Expr):
|
||||
def __init__(self, subject: Node[Expr], value: Node[Expr]) -> None:
|
||||
super().__init__()
|
||||
self.subject = subject
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Index {{ subject: {self.subject}, value: {self.value} }}"
|
||||
|
||||
class Call(Expr):
|
||||
def __init__(self, subject: Node[Expr], arguments: List[Node[Expr]]) -> None:
|
||||
super().__init__()
|
||||
self.subject = subject
|
||||
self.arguments = arguments
|
||||
|
||||
def __str__(self) -> str:
|
||||
arguments = ", ".join(node.__str__() for node in self.arguments)
|
||||
return f"Index {{ subject: {self.subject}, arguments: {arguments} }}"
|
||||
|
||||
class UnaryType(Enum):
|
||||
Not = auto()
|
||||
Negate = auto()
|
||||
Reference = auto()
|
||||
ReferenceMut = auto()
|
||||
Dereference = auto()
|
||||
|
||||
|
||||
class Unary(Expr):
|
||||
def __init__(self, unary_type: UnaryType, subject: Node[Expr]) -> None:
|
||||
super().__init__()
|
||||
self.unary_type = unary_type
|
||||
self.subject = subject
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Unary {{ unary_type: {self.unary_type}, subject: {self.subject} }}"
|
||||
|
||||
|
||||
class BinaryType(Enum):
|
||||
And = auto()
|
||||
Or = auto()
|
||||
Add = auto()
|
||||
Subtract = auto()
|
||||
Multiply = auto()
|
||||
Divide = auto()
|
||||
Modulo = auto()
|
||||
Exponent = auto()
|
||||
Equal = auto()
|
||||
Inequal = auto()
|
||||
LT = auto()
|
||||
GT = auto()
|
||||
LTEqual = auto()
|
||||
GTEqual = auto()
|
||||
In = auto()
|
||||
|
||||
|
||||
class Binary(Expr):
|
||||
def __init__(self, binary_type: BinaryType, left: Node[Expr], right: Node[Expr]) -> None:
|
||||
super().__init__()
|
||||
self.binary_type = binary_type
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Binary {{ binary_type: {self.binary_type}, left: {self.left}, right: {self.right} }}"
|
||||
|
||||
|
||||
class AssignType(Enum):
|
||||
Assign = auto()
|
||||
Add = auto()
|
||||
Subtract = auto()
|
||||
Multiply = auto()
|
||||
Divide = auto()
|
||||
Modulo = auto()
|
||||
|
||||
|
||||
class Assign(Expr):
|
||||
def __init__(self, assign_type: AssignType, subject: Node[Expr], value: Node[Expr]) -> None:
|
||||
super().__init__()
|
||||
self.assign_type = assign_type
|
||||
self.subject = subject
|
||||
self.value = value
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Assign {{ assign_type: {self.assign_type}, subject: {self.subject}, value: {self.value} }}"
|
348
parser.py
Normal file
348
parser.py
Normal file
@ -0,0 +1,348 @@
|
||||
from tokens import Token, TokenType, TokenIterator
|
||||
from position import Span, Node
|
||||
from parsed import Assign, AssignType, Binary, BinaryType, Block, Call, MatchArm, PatternError, Expr, For, Id, If, Index, Int, Char, Loop, Pattern, String, ExprError, StructMember, TupleMember, Unary, UnaryType, While
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, text: str, tokens: TokenIterator) -> None:
|
||||
self.text = text
|
||||
self.tokens = tokens
|
||||
self.current_token = tokens.next()
|
||||
|
||||
def parse(self) -> List[Node[Expr]]:
|
||||
statements: List[Node[Expr]] = []
|
||||
while not self.done():
|
||||
statements.append(self.parse_statement())
|
||||
return statements
|
||||
|
||||
def parse_statement(self) -> Node[Expr]:
|
||||
if self.current_is(TokenType.KwIf):
|
||||
return self.parse_if()
|
||||
else:
|
||||
return self.parse_assign()
|
||||
|
||||
def parse_assign(self) -> Node[Expr]:
|
||||
subject = self.parse_expr()
|
||||
if self.current_is(TokenType.Equal):
|
||||
self.step()
|
||||
value = self.parse_expr()
|
||||
return Node(Assign(AssignType.Assign, subject, value), subject.span.to(value.span))
|
||||
else:
|
||||
return subject
|
||||
|
||||
def parse_expr(self) -> Node[Expr]:
|
||||
return self.parse_or()
|
||||
|
||||
def parse_or(self) -> Node[Expr]:
|
||||
left = self.parse_and()
|
||||
while self.current_is(TokenType.KwOr):
|
||||
self.step()
|
||||
right = self.parse_and()
|
||||
left = Node(Binary(BinaryType.Or, left, right), left.span.to(right.span))
|
||||
return left
|
||||
|
||||
|
||||
def parse_and(self) -> Node[Expr]:
|
||||
left = self.parse_equal()
|
||||
while self.current_is(TokenType.KwOr):
|
||||
self.step()
|
||||
right = self.parse_equal()
|
||||
left = Node(Binary(BinaryType.And, left, right), left.span.to(right.span))
|
||||
return left
|
||||
|
||||
def parse_equal(self) -> Node[Expr]:
|
||||
left = self.parse_compare()
|
||||
while not self.done():
|
||||
if self.current_is(TokenType.EqualEqual):
|
||||
self.step()
|
||||
right = self.parse_compare()
|
||||
left = Node(Binary(BinaryType.Equal, left, right), left.span.to(right.span))
|
||||
elif self.current_is(TokenType.ExclamationEqual):
|
||||
self.step()
|
||||
right = self.parse_compare()
|
||||
left = Node(Binary(BinaryType.Inequal, left, right), left.span.to(right.span))
|
||||
else:
|
||||
break
|
||||
return left
|
||||
|
||||
def parse_compare(self) -> Node[Expr]:
|
||||
left = self.parse_add_subtract()
|
||||
while not self.done():
|
||||
if self.current_is(TokenType.LT):
|
||||
self.step()
|
||||
right = self.parse_add_subtract()
|
||||
left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span))
|
||||
elif self.current_is(TokenType.GT):
|
||||
self.step()
|
||||
right = self.parse_add_subtract()
|
||||
left = Node(Binary(BinaryType.LT, left, right), left.span.to(right.span))
|
||||
elif self.current_is(TokenType.LTEqual):
|
||||
self.step()
|
||||
right = self.parse_add_subtract()
|
||||
left = Node(Binary(BinaryType.LTEqual, left, right), left.span.to(right.span))
|
||||
elif self.current_is(TokenType.GTEqual):
|
||||
self.step()
|
||||
right = self.parse_add_subtract()
|
||||
left = Node(Binary(BinaryType.GTEqual, left, right), left.span.to(right.span))
|
||||
else:
|
||||
break
|
||||
return left
|
||||
|
||||
def parse_add_subtract(self) -> Node[Expr]:
|
||||
left = self.parse_multiply_divide_modulo()
|
||||
while not self.done():
|
||||
if self.current_is(TokenType.Plus):
|
||||
self.step()
|
||||
right = self.parse_multiply_divide_modulo()
|
||||
left = Node(Binary(BinaryType.Add, left, right), left.span.to(right.span))
|
||||
elif self.current_is(TokenType.Minus):
|
||||
self.step()
|
||||
right = self.parse_multiply_divide_modulo()
|
||||
left = Node(Binary(BinaryType.Subtract, left, right), left.span.to(right.span))
|
||||
else:
|
||||
break
|
||||
return left
|
||||
|
||||
def parse_multiply_divide_modulo(self) -> Node[Expr]:
|
||||
left = self.parse_negate()
|
||||
while not self.done():
|
||||
if self.current_is(TokenType.Asterisk):
|
||||
self.step()
|
||||
right = self.parse_negate()
|
||||
left = Node(Binary(BinaryType.Multiply, left, right), left.span.to(right.span))
|
||||
elif self.current_is(TokenType.Slash):
|
||||
self.step()
|
||||
right = self.parse_negate()
|
||||
left = Node(Binary(BinaryType.Divide, left, right), left.span.to(right.span))
|
||||
elif self.current_is(TokenType.Percent):
|
||||
self.step()
|
||||
right = self.parse_negate()
|
||||
left = Node(Binary(BinaryType.Modulo, left, right), left.span.to(right.span))
|
||||
else:
|
||||
break
|
||||
return left
|
||||
|
||||
def parse_negate(self) -> Node[Expr]:
|
||||
if self.current_is(TokenType.Minus):
|
||||
token_span = self.current().span
|
||||
self.step()
|
||||
subject = self.parse_exponent()
|
||||
return Node(Unary(UnaryType.Negate, subject), token_span.to(subject.span))
|
||||
else:
|
||||
return self.parse_exponent()
|
||||
|
||||
def parse_exponent(self) -> Node[Expr]:
|
||||
left = self.parse_unary()
|
||||
if self.current_is(TokenType.AsteriskAsterisk):
|
||||
self.step()
|
||||
right = self.parse_exponent()
|
||||
return Node(Binary(BinaryType.Exponent, left, right), left.span.to(right.span))
|
||||
else:
|
||||
return left
|
||||
|
||||
def parse_unary(self) -> Node[Expr]:
|
||||
if self.current_is(TokenType.KwNot):
|
||||
token_span = self.current().span
|
||||
self.step()
|
||||
subject = self.parse_unary()
|
||||
return Node(Unary(UnaryType.Not, subject), token_span.to(subject.span))
|
||||
elif self.current_is(TokenType.Asterisk):
|
||||
token_span = self.current().span
|
||||
self.step()
|
||||
subject = self.parse_unary()
|
||||
return Node(Unary(UnaryType.Dereference, subject), token_span.to(subject.span))
|
||||
elif self.current_is(TokenType.Ampersand):
|
||||
token_span = self.current().span
|
||||
self.step()
|
||||
if self.current_is(TokenType.KwMut):
|
||||
self.step()
|
||||
subject = self.parse_unary()
|
||||
return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span))
|
||||
else:
|
||||
subject = self.parse_unary()
|
||||
return Node(Unary(UnaryType.ReferenceMut, subject), token_span.to(subject.span))
|
||||
else:
|
||||
return self.parse_member_index_call()
|
||||
|
||||
def parse_member_index_call(self) -> Node[Expr]:
|
||||
subject: Node[Expr] = self.parse_operand()
|
||||
while not self.done():
|
||||
if self.current_is(TokenType.Dot):
|
||||
self.step()
|
||||
if self.current_is(TokenType.Id):
|
||||
id_token = self.current()
|
||||
self.step()
|
||||
text = id_token.text_slice(self.text)
|
||||
subject = Node(StructMember(subject, text), subject.span.to(id_token.span))
|
||||
elif self.current_is(TokenType.Int):
|
||||
int_token = self.current()
|
||||
self.step()
|
||||
value = int(int_token.text_slice(self.text))
|
||||
subject = Node(TupleMember(subject, value), subject.span.to(int_token.span))
|
||||
else:
|
||||
return Node(ExprError("expected Int or Id"), subject.span)
|
||||
elif self.current_is(TokenType.LBracket):
|
||||
self.step()
|
||||
value = self.parse_expr()
|
||||
if not self.current_is(TokenType.RBracket):
|
||||
return Node(ExprError("expected ']'"), subject.span.to(value.span))
|
||||
rbracket_token_span = self.current().span
|
||||
self.step()
|
||||
subject = Node(Index(subject, value), subject.span.to(rbracket_token_span))
|
||||
elif self.current_is(TokenType.LParen):
|
||||
self.step()
|
||||
arguments: List[Node[Expr]] = []
|
||||
if not self.done() and self.current() != TokenType.RParen:
|
||||
arguments.append(self.parse_expr())
|
||||
while not self.done() and self.current() == TokenType.Comma:
|
||||
self.step()
|
||||
if self.done() or self.current() == TokenType.RParen:
|
||||
break
|
||||
arguments.append(self.parse_expr())
|
||||
if not self.current_is(TokenType.RParen):
|
||||
if len(arguments) > 0:
|
||||
end = arguments[-1].span
|
||||
else:
|
||||
end = subject.span
|
||||
return Node(ExprError("expected ')'"), subject.span.to(end))
|
||||
end = self.current().span
|
||||
self.step()
|
||||
subject = Node(Call(subject, arguments), subject.span.to(end))
|
||||
else:
|
||||
break
|
||||
return subject
|
||||
|
||||
def parse_operand(self) -> Node[Expr]:
|
||||
if self.current_is(TokenType.Id):
|
||||
token = self.current()
|
||||
value = token.text_slice(self.text)
|
||||
self.step()
|
||||
return Node(Id(value), token.span)
|
||||
elif self.current_is(TokenType.Int):
|
||||
token = self.current()
|
||||
value = int(token.text_slice(self.text))
|
||||
self.step()
|
||||
return Node(Int(value), token.span)
|
||||
elif self.current_is(TokenType.Char):
|
||||
token = self.current()
|
||||
value = token.text_slice(self.text)
|
||||
self.step()
|
||||
return Node(Char(value), token.span)
|
||||
elif self.current_is(TokenType.String):
|
||||
token = self.current()
|
||||
value = token.text_slice(self.text)
|
||||
self.step()
|
||||
return Node(String(value), token.span)
|
||||
elif self.current_is(TokenType.LBrace):
|
||||
return self.parse_block()
|
||||
elif self.current_is(TokenType.KwIf):
|
||||
return self.parse_if()
|
||||
elif self.current_is(TokenType.KwLoop):
|
||||
return self.parse_loop()
|
||||
elif self.current_is(TokenType.KwWhile):
|
||||
return self.parse_while()
|
||||
elif self.current_is(TokenType.KwFor):
|
||||
return self.parse_for()
|
||||
else:
|
||||
token = self.current()
|
||||
self.step()
|
||||
return Node(ExprError("expected value"), token.span)
|
||||
|
||||
def parse_block(self) -> Node[Expr]:
|
||||
begin = self.current().span
|
||||
self.step()
|
||||
statements: List[Node[Expr]] = []
|
||||
value: Optional[Node[Expr]] = None
|
||||
while not self.done() and self.current().token_type != TokenType.RBrace:
|
||||
if value is not None:
|
||||
statements.append(value)
|
||||
value = self.parse_statement()
|
||||
if not self.current_is(TokenType.RBrace):
|
||||
if value is not None:
|
||||
end = value.span
|
||||
else:
|
||||
end = begin
|
||||
return Node(ExprError("expected '}'"), begin.to(end))
|
||||
end = self.current().span
|
||||
self.step()
|
||||
return Node(Block(statements, value), begin.to(end))
|
||||
|
||||
def parse_if(self) -> Node[Expr]:
|
||||
begin = self.current().span
|
||||
self.step()
|
||||
condition = self.parse_expr()
|
||||
if not self.current_is(TokenType.LBrace):
|
||||
return Node(ExprError("expected '{'"), begin.to(condition.span))
|
||||
truthy = self.parse_block()
|
||||
if self.current_is(TokenType.KwElse):
|
||||
else_token_span = self.current().span
|
||||
self.step()
|
||||
if not self.current_is(TokenType.LBrace):
|
||||
return Node(ExprError("expected '{'"), begin.to(else_token_span))
|
||||
falsy = self.parse_block()
|
||||
return Node(If(condition, truthy, falsy), begin.to(falsy.span))
|
||||
else:
|
||||
return Node(If(condition, truthy, None), begin.to(truthy.span))
|
||||
|
||||
def parse_match(self) -> Node[Expr]:
|
||||
begin = self.current().span
|
||||
self.step()
|
||||
if not self.current_is(TokenType.LBrace):
|
||||
return Node(ExprError("expected '{'"), begin)
|
||||
self.step()
|
||||
arms: List[Node[MatchArm]] = []
|
||||
if not self.done() and self.current() != TokenType.RBrace:
|
||||
|
||||
def parse_match_arm(self) -> Node[MatchArm]:
|
||||
pass
|
||||
|
||||
def parse_match_arm_statement(self) -> Node[Expr]:
|
||||
pass
|
||||
|
||||
def parse_loop(self) -> Node[Expr]:
|
||||
begin = self.current().span
|
||||
self.step()
|
||||
if not self.current_is(TokenType.LBrace):
|
||||
return Node(ExprError("expected '{'"), begin)
|
||||
body = self.parse_block()
|
||||
return Node(Loop(body), begin.to(body.span))
|
||||
|
||||
def parse_while(self) -> Node[Expr]:
|
||||
begin = self.current().span
|
||||
self.step()
|
||||
condition = self.parse_expr()
|
||||
if not self.current_is(TokenType.LBrace):
|
||||
return Node(ExprError("expected '{'"), begin.to(condition.span))
|
||||
self.step()
|
||||
body = self.parse_block()
|
||||
return Node(While(condition, body), begin.to(body.span))
|
||||
|
||||
def parse_for(self) -> Node[Expr]:
|
||||
begin = self.current().span
|
||||
self.step()
|
||||
subject = self.parse_pattern()
|
||||
if not self.current_is(TokenType.KwIn):
|
||||
return Node(ExprError("expected 'in'"), begin.to(subject.span))
|
||||
self.step()
|
||||
value = self.parse_expr()
|
||||
if not self.current_is(TokenType.LBrace):
|
||||
return Node(ExprError("expected '{'"), begin.to(value.span))
|
||||
self.step()
|
||||
body = self.parse_block()
|
||||
return Node(For(subject, value, body), begin.to(body.span))
|
||||
|
||||
def parse_pattern(self) -> Node[Pattern]:
|
||||
return Node(PatternError("not implemented"), self.current().span)
|
||||
|
||||
def step(self) -> None:
|
||||
self.current_token = self.tokens.next()
|
||||
|
||||
def current_is(self, token_type: TokenType) -> bool:
|
||||
return not self.done() and self.current().token_type == token_type
|
||||
|
||||
def done(self) -> bool:
|
||||
return self.current_token.token_type == TokenType.Eof
|
||||
|
||||
def current(self) -> Token:
|
||||
return self.current_token
|
28
position.py
Normal file
28
position.py
Normal file
@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
from typing import NamedTuple, TypeVar, Generic
|
||||
|
||||
|
||||
class Position(NamedTuple):
|
||||
index: int
|
||||
line: int
|
||||
col: int
|
||||
|
||||
class Span(NamedTuple):
|
||||
begin: Position
|
||||
end: Position
|
||||
|
||||
def to(self, end: Span) -> Span:
|
||||
return Span(self.begin, end.end)
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class Node(Generic[T]):
|
||||
def __init__(self, value: T, span: Span) -> None:
|
||||
super().__init__()
|
||||
self.value = value
|
||||
self.span = span
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.value.__str__()
|
87
tokens.py
Normal file
87
tokens.py
Normal file
@ -0,0 +1,87 @@
|
||||
from enum import Enum, auto
|
||||
from typing import NamedTuple
|
||||
from position import Span
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
Eof = auto()
|
||||
InvalidChar = auto()
|
||||
MalformedComment = auto()
|
||||
MalformedChar = auto()
|
||||
MalformedString = auto()
|
||||
Id = auto()
|
||||
Int = auto()
|
||||
Char = auto()
|
||||
String = auto()
|
||||
LParen = auto()
|
||||
RParen = auto()
|
||||
LBrace = auto()
|
||||
RBrace = auto()
|
||||
LBracket = auto()
|
||||
RBracket = auto()
|
||||
Underscore = auto()
|
||||
Dot = auto()
|
||||
DotDot = auto()
|
||||
DotDotDot = auto()
|
||||
DotDotEqual = auto()
|
||||
Comma = auto()
|
||||
Colon = auto()
|
||||
ColonColon = auto()
|
||||
ColonColonLT = auto()
|
||||
Semicolon = auto()
|
||||
Ampersand = auto()
|
||||
Plus = auto()
|
||||
PlusEqual = auto()
|
||||
Minus = auto()
|
||||
MinusEqual = auto()
|
||||
MinusLT = auto()
|
||||
Asterisk = auto()
|
||||
AsteriskEqual = auto()
|
||||
AsteriskAsterisk = auto()
|
||||
Slash = auto()
|
||||
SlashEqual = auto()
|
||||
Percent = auto()
|
||||
PercentEqual = auto()
|
||||
Equal = auto()
|
||||
EqualEqual = auto()
|
||||
EqualLT = auto()
|
||||
Exclamation = auto()
|
||||
ExclamationEqual = auto()
|
||||
LT = auto()
|
||||
LTEqual = auto()
|
||||
GT = auto()
|
||||
GTEqual = auto()
|
||||
KwFalse = auto()
|
||||
KwTrue = auto()
|
||||
KwNot = auto()
|
||||
KwIn = auto()
|
||||
KwAnd = auto()
|
||||
KwOr = auto()
|
||||
KwXor = auto()
|
||||
KwLet = auto()
|
||||
KwMut = auto()
|
||||
KwIf = auto()
|
||||
KwMatch = auto()
|
||||
KwElse = auto()
|
||||
KwLoop = auto()
|
||||
KwWhile = auto()
|
||||
KwFor = auto()
|
||||
KwBreak = auto()
|
||||
KwContinue = auto()
|
||||
KwFn = auto()
|
||||
KwReturn = auto()
|
||||
|
||||
|
||||
class Token(NamedTuple):
|
||||
token_type: TokenType
|
||||
index: int
|
||||
length: int
|
||||
span: Span
|
||||
|
||||
def text_slice(self, text: str) -> str:
|
||||
return text[self.index:self.index + self.length]
|
||||
|
||||
|
||||
class TokenIterator:
|
||||
def next(self) -> Token:
|
||||
raise NotImplementedError()
|
Loading…
Reference in New Issue
Block a user