chapter 3 done

This commit is contained in:
SimonFJ20 2024-09-18 03:38:06 +02:00
parent fb831e277c
commit 56a4af28f5

View File

@ -17,6 +17,7 @@ This is how I would define an AST data type.
type Stmt = { type Stmt = {
kind: StmtKind, kind: StmtKind,
pos: Pos, pos: Pos,
id: number,
}; };
type StmtKind = type StmtKind =
@ -29,6 +30,7 @@ type StmtKind =
type Expr = { type Expr = {
kind: ExprKind, kind: ExprKind,
pos: Pos, pos: Pos,
id: number,
}; };
type ExprKind = type ExprKind =
@ -43,7 +45,11 @@ Both `Stmt` (statement) and `Expr` (expression) are polymorphic types, meaning a
For both `Stmt` and `Expr` there's an error-kind. This makes the parser simpler, as we won't need to manage parsing failures differently than successful parslings. For both `Stmt` and `Expr` there's an error-kind. This makes the parser simpler, as we won't need to manage parsing failures differently than successful parslings.
## 3.2 Consumer of lexer Both AST node types contain an `id` field. This field will be a unique value for each instance of a node.
## 3.2 The parser class
### 3.2.1 Consumer of lexer
To start, we'll implement a `Parser` class, which for now is simply a consumer of a token iterater, meaning the lexer. In simple terms, whereas the lexer is a transformation from text to tokens, the parser is a transformation from token to an AST, except that the parser is not an iterator. To start, we'll implement a `Parser` class, which for now is simply a consumer of a token iterater, meaning the lexer. In simple terms, whereas the lexer is a transformation from text to tokens, the parser is a transformation from token to an AST, except that the parser is not an iterator.
@ -105,7 +111,9 @@ class Parser {
When testing, we first check that we have not reach the end. Either we have to do that here, or the caller will have to write something like `!this.done() && this.test(...)`, and it's easy to do it here. When testing, we first check that we have not reach the end. Either we have to do that here, or the caller will have to write something like `!this.done() && this.test(...)`, and it's easy to do it here.
We'll also want a method for reporting errors. ### 3.2.2 Reporting errors
We'll want a method for reporting errors.
```ts ```ts
class Parser { class Parser {
@ -117,6 +125,30 @@ class Parser {
} }
``` ```
### 3.2.3 Constructing AST nodes
We also want methods for constructing statements and expressions with auto incrementing ids.
```ts
class Parser {
// ...
private nextNodeId = 0;
// ...
private stmt(kind: StmtKind, pos: Pos): Stmt {
const id = this.nextNodeId;
this.nextNodeId += 1;
return { kind, pos, id };
}
private expr(kind: ExprKind, pos: Pos): Expr {
const id = this.nextNodeId;
this.nextNodeId += 1;
return { kind, pos, id };
}
// ...
}
```
## 3.3 Operands ## 3.3 Operands
Operands are the individual parts of an operation. For example, in the math expression `a + b`, (would be `+ a b` in the input language), `a` and `b` are the *operands*, while `+` is the *operator*. In the expression `a + b * c`, the operands are `a`, `b` and `c`. But in the expression `a * (b + c)`, the operands of the multiply operation are `a` and `(b + c)`. `(b + c)` is an operands, because it is enclosed on both sides. This is how we'll define operands. Operands are the individual parts of an operation. For example, in the math expression `a + b`, (would be `+ a b` in the input language), `a` and `b` are the *operands*, while `+` is the *operator*. In the expression `a + b * c`, the operands are `a`, `b` and `c`. But in the expression `a * (b + c)`, the operands of the multiply operation are `a` and `(b + c)`. `(b + c)` is an operands, because it is enclosed on both sides. This is how we'll define operands.
@ -131,7 +163,7 @@ class Parser {
// ... // ...
this.report("expected expr", pos); this.report("expected expr", pos);
this.step(); this.step();
return { kind: { type: "error" }, pos }; return this.expr({ type: "error" }, pos);
} }
// ... // ...
} }
@ -159,17 +191,17 @@ class Parser {
if (this.test("ident")) { if (this.test("ident")) {
const value = this.current().identValue; const value = this.current().identValue;
this.step(); this.step();
return { kind: { type: "ident", value }, pos }; return this.expr({ type: "ident", value }, pos);
} }
if (this.test("int")) { if (this.test("int")) {
const value = this.current().intValue; const value = this.current().intValue;
this.step(); this.step();
return { kind: { type: "int", value }, pos }; return this.expr({ type: "int", value }, pos);
} }
if (this.test("string")) { if (this.test("string")) {
const value = this.current().stringValue; const value = this.current().stringValue;
this.step(); this.step();
return { kind: { type: "string", value }, pos }; return this.expr({ type: "string", value }, pos);
} }
// ... // ...
} }
@ -201,10 +233,10 @@ class Parser {
const expr = this.parseExpr(); const expr = this.parseExpr();
if (!this.test(")")) { if (!this.test(")")) {
this.report("expected ')'"); this.report("expected ')'");
return { kind: { type: "error" }, pos }; return this.expr({ type: "error" }, pos);
} }
this.step(); this.step();
return { kind: { type: "group", expr }, pos }; return this.expr({ type: "group", expr }, pos);
} }
// ... // ...
} }
@ -293,11 +325,11 @@ class Parser {
this.step(); this.step();
if (!this.test("ident")) { if (!this.test("ident")) {
this.report("expected ident"); this.report("expected ident");
return { kind: { type: "error" }, pos }; return this.expr({ type: "error" }, pos);
} }
const value = this.current().identValue; const value = this.current().identValue;
this.step(); this.step();
subject = { kind: { type: "field", subject, value }, pos }; subject = this.expr({ type: "field", subject, value }, pos);
continue; continue;
} }
// ... // ...
@ -335,10 +367,10 @@ class Parser {
const value = this.parseExpr(); const value = this.parseExpr();
if (!this.test("]") { if (!this.test("]") {
this.report("expected ']'"); this.report("expected ']'");
return { kind: { type: "error" }, pos }; return this.expr({ type: "error" }, pos);
} }
this.step(); this.step();
subject = { kind: { type: "index", subject, value }, pos }; subject = this.expr({ type: "index", subject, value }, pos);
continue; continue;
} }
// ... // ...
@ -385,10 +417,10 @@ class Parser {
const value = this.parseExpr(); const value = this.parseExpr();
if (!this.test(")") { if (!this.test(")") {
this.report("expected ')'"); this.report("expected ')'");
return { kind: { type: "error" }, pos }; return this.expr({ type: "error" }, pos);
} }
this.step(); this.step();
subject = { kind: { type: "call", subject, args }, pos }; subject = this.expr({ type: "call", subject, args }, pos);
continue; continue;
} }
// ... // ...
@ -457,7 +489,7 @@ class Parser {
if (this.test("not")) { if (this.test("not")) {
this.step(); this.step();
const subject = this.parsePrefix(); const subject = this.parsePrefix();
return { kind: { type: "unary", unaryType: "not", subject }, pos }; return this.expr({ type: "unary", unaryType: "not", subject }, pos);
} }
// ... // ...
} }
@ -478,7 +510,7 @@ class Parser {
this.step(); this.step();
const left = this.parsePrefix(); const left = this.parsePrefix();
const right = this.parsePrefix(); const right = this.parsePrefix();
return { kind: { type: "binary", binaryType: "+", left, right }, pos }; return this.expr({ type: "binary", binaryType: "+", left, right }, pos);
} }
// ... // ...
} }
@ -525,19 +557,19 @@ class Parser {
const cond = this.parseExpr(); const cond = this.parseExpr();
if (!this.test("{")) { if (!this.test("{")) {
this.report("expected block"); this.report("expected block");
return { kind: { type: "error" }, pos }; return this.expr({ type: "error" }, pos);
} }
const truthy = this.parseBlock(); const truthy = this.parseBlock();
if (!this.test("else")) { if (!this.test("else")) {
return { kind: { type: "if", cond, truthy }, pos }; return this.expr({ type: "if", cond, truthy }, pos);
} }
this.step(); this.step();
if (!this.test("{")) { if (!this.test("{")) {
this.report("expected block"); this.report("expected block");
return { kind: { type: "error" }, pos }; return this.expr({ type: "error" }, pos);
} }
const falsy = this.parseBlock(); const falsy = this.parseBlock();
return { kind: { type: "if", cond, truthy, falsy }, pos }; return this.expr({ type: "if", cond, truthy, falsy }, pos);
} }
// ... // ...
} }
@ -567,10 +599,10 @@ class Parser {
this.step(); this.step();
if (!this.test("{")) { if (!this.test("{")) {
this.report("expected '}'"); this.report("expected '}'");
return { kind: { type: "error" }, pos }; return this.expr({ type: "error" }, pos);
} }
const body = this.parseExpr(); const body = this.parseExpr();
return { kind: { type: "loop", body }, pos }; return this.expr({ type: "loop", body }, pos);
} }
// ... // ...
} }
@ -607,10 +639,10 @@ class Parser {
const pos = this.pos(); const pos = this.pos();
this.step(); this.step();
if (!this.test(";")) { if (!this.test(";")) {
return { kind: { type: "break" }, pos }; return this.stmt({ type: "break" }, pos);
} }
const expr = this.parseExpr(); const expr = this.parseExpr();
return { kind: { type: "break", expr }, pos }; return this.stmt({ type: "break", expr }, pos);
} }
// ... // ...
} }
@ -637,10 +669,10 @@ class Parser {
const pos = this.pos(); const pos = this.pos();
this.step(); this.step();
if (!this.test(";")) { if (!this.test(";")) {
return { kind: { type: "return" }, pos }; return this.stmt({ type: "return" }, pos);
} }
const expr = this.parseExpr(); const expr = this.parseExpr();
return { kind: { type: "return", expr }, pos }; return this.stmt({ type: "return", expr }, pos);
} }
// ... // ...
} }
@ -677,23 +709,23 @@ class Parser {
this.step(); this.step();
if (!this.test("ident")) { if (!this.test("ident")) {
this.report("expected ident"); this.report("expected ident");
return { kind: { type: "error" }, pos }; return this.stmt({ type: "error" }, pos);
} }
const ident = this.current().identValue; const ident = this.current().identValue;
this.step(); this.step();
if (!this.test("(")) { if (!this.test("(")) {
this.report("expected '('"); this.report("expected '('");
return { kind: { type: "error" }, pos }; return this.stmt({ type: "error" }, pos);
} }
const params = this.parseFnParams(); const params = this.parseFnParams();
if (!params.ok) if (!params.ok)
return { kind: { type: "error" }, pos }; return this.stmt({ type: "error" }, pos);
if (!this.test("{")) { if (!this.test("{")) {
this.report("expected block"); this.report("expected block");
return { kind: { type: "error" }, pos }; return this.stmt({ type: "error" }, pos);
} }
const body = this.parseBlock(); const body = this.parseBlock();
return { kind: { type: "fn", ident, params: params.value, body }, pos }; return this.stmt({ type: "fn", ident, params: params.value, body }, pos);
} }
// ... // ...
} }
@ -780,15 +812,15 @@ class Parser {
this.step(); this.step();
const paramResult = this.parseParam(); const paramResult = this.parseParam();
if (!paramResult.ok) if (!paramResult.ok)
return { kind: { type: "error" }, pos }; return this.stmt({ type: "error" }, pos);
const param = paramResult.value; const param = paramResult.value;
if (!this.test("=")) { if (!this.test("=")) {
this.report("expected '='"); this.report("expected '='");
return { kind: { type: "error" }, pos }; return this.stmt({ type: "error" }, pos);
} }
this.step(); this.step();
const value = this.parseExpr(); const value = this.parseExpr();
return { kind: { type: "let", param, value }, pos }; return this.stmt({ type: "let", param, value }, pos);
} }
// ... // ...
} }
@ -818,11 +850,11 @@ class Parser {
const pos = this.pos(); const pos = this.pos();
const subject = this.parseExpr(); const subject = this.parseExpr();
if (!this.test("=")) { if (!this.test("=")) {
return { kind: { type: "expr", expr: subject }, pos }; return this.stmt({ type: "expr", expr: subject }, pos);
} }
this.step(); this.step();
const value = this.parseExpr(); const value = this.parseExpr();
return { kind: { type: "assign", subject, value }, pos }; return this.stmt({ type: "assign", subject, value }, pos);
} }
// ... // ...
} }
@ -860,11 +892,274 @@ class Parser {
const pos = this.pos(); const pos = this.pos();
this.step(); this.step();
let stmts: Stmt[] = []; let stmts: Stmt[] = [];
while (!this.test("}")) { while (!this.done()) {
// TODO // ...
} }
this.report("expected '}'");
return this.expr({ type: "error" }, pos);
} }
// ... // ...
} }
``` ```
We step over the `{` and begin looping. We expect to return inside the loop, so we report an error, if the loop runs through.
```ts
class Parser {
// ...
public parseBlock(): Expr {
// ...
while (!this.done()) {
if (this.test("}")) {
return this.expr({ type: "block", stmts }, pos);
// ...
}
}
// ...
}
// ...
}
```
If we reach a `}`, return a block with the statements.
```ts
class Parser {
// ...
public parseBlock(): Expr {
// ...
while (!this.done()) {
if (this.test("}")) {
// ...
} else if (this.test("fn")) {
stmts.push(this.parseFn());
// ...
}
}
// ...
}
// ...
}
```
If we reach a `fn`-token, we parse a fn statement and continue parsing statements.
```ts
class Parser {
// ...
public parseBlock(): Expr {
// ...
while (!this.done()) {
if (this.test("}")) {
// ...
} else if (this.test("let") || this.test("return") || this.test("break")) {
stmts.push(this.parseSingleLineBlockStmt());
this.eatSemicolon();
// ...
}
}
// ...
}
// ...
private parseSingleLineBlockStmt(): Stmt {
if (this.test("let"))
return this.parseLet();
if (this.test("return"))
return this.parseReturn();
if (this.test("break"))
return this.parseBreak();
this.report("expected stmt");
return this.stmt({ type: "error" }, pos);
}
// ...
private eatSemicolon() {
if (!this.test(";")) {
this.report("expected ';'");
return;
}
this.step();
}
// ...
}
```
If we reach a token designating the start of a single line statement, such as `let` in a let statement, `return`, `break`, parse a single line block statement, then check for a `;`-token. Then continue parsing statements.
```ts
class Parser {
// ...
public parseBlock(): Expr {
// ...
while (!this.done()) {
if (this.test("}")) {
// ...
} else if (this.test("{") || this.test("if") || this.test("loop")) {
let expr = this.parseMultiLineBlockExpr();
if (this.test("}")) {
this.step();
return this.expr({ type: "block", stmts, expr }, pos);
}
stmts.push(this.stmt({ type: "expr", expr }, expr.pos));
// ...
}
}
// ...
}
// ...
private parseMultiLineBlockExpr(): Expr {
if (this.test("{"))
return this.parseBlock();
if (this.test("if"))
return this.parseIf();
if (this.test("loop"))
return this.parseLoop();
this.report("expected expr");
return this.expr({ type: "error" }, pos);
}
// ...
}
```
If we reach a token designating an expression or statement ending with a `}`, such as `if`, `loop` and `{` as in a block expression, parse a multi line expression. If we've hit the end of the block, then return a block expression with the parsed multi line expression as the resuling value. Otherwise, push the expression as an expression statement.
```ts
class Parser {
// ...
public parseBlock(): Expr {
// ...
while (!this.done()) {
if (this.test("}")) {
// ...
} else {
const expr = this.parseExpr();
if (this.test("=")) {
this.step();
const value = this.parseExpr();
this.eatSemicolon();
stmts.push(this.stmt({ type: "assign", subject: expr, value }, pos));
} else if (this.test(";")) {
stmts.push(this.stmt({ type: "expr", expr }, expr.pos));
} else if (this.test("}")) {
return this.expr({ type: "block", stmts, expr }, pos);
} else {
this.report("expected ';' or '}'");
return this.expr({ type: "error" }, pos);
}
}
}
// ...
}
// ...
}
```
If we don't recognize the token we've reached, we assume it's an expression. If we reach `=` after parsing the initial expression, we try to parse an assignment statement. Since we cannot use the `.parseAssign()` method here, we do the same here as in that method, then check that we hit a `;`, and then push the assignment statement instead of returning. If instead we hit a `;`, we push the expression as an expression statements. Else, if we hit a `}`, we've reached the end of the block, and we return a block expression with the parsed expression as the resulting value. Otherwise, we report an error.
## 3.14 Statements
Lastly, we'll define a method `.parseStmts()` for parsing top level statements.
```ts
class Parser {
// ...
public parseStmts(): Stmt[] {
let stmts: Stmt[] = [];
while (!this.done()) {
// ...
}
return stmts;
}
// ...
}
```
We want to parse every statement in the file, so we loop until we've reach the end.
```ts
class Parser {
// ...
public parseStmts(): Stmt[] {
let stmts: Stmt[] = [];
while (!this.done()) {
if (this.test("fn")) {
stmts.push(this.parseFn());
// ...
}
}
return stmts;
}
// ...
}
```
We first test, if we've reached a multi line statement ending in a `}`, such as a fn statement.
```ts
class Parser {
// ...
public parseStmts(): Stmt[] {
let stmts: Stmt[] = [];
while (!this.done()) {
if (this.test("fn")) {
// ...
} else if (this.test("let") || this.test("return") || this.test("break")) {
stmts.push(this.parseSingleLineBlockStmt());
this.eatSemicolon();
// ...
}
}
return stmts;
}
// ...
}
```
Then we test, if we've reached a single line statement, meaning it should end with a `;`, ishc as let, return and break.
```ts
class Parser {
// ...
public parseStmts(): Stmt[] {
let stmts: Stmt[] = [];
while (!this.done()) {
if (this.test("fn")) {
// ...
} else if (this.test("{") || this.test("if") || this.test("loop")) {
let expr = this.parseMultiLineBlockExpr();
stmts.push(this.stmt({ type: "expr", expr }, expr.pos));
// ...
}
}
return stmts;
}
// ...
}
```
Then we test, if we've reached a multi line expression ending in `}`, such as if, loop and a block expression.
```ts
class Parser {
// ...
public parseStmts(): Stmt[] {
let stmts: Stmt[] = [];
while (!this.done()) {
if (this.test("fn")) {
// ...
} else {
stmts.push(this.parseAssign());
}
}
return stmts;
}
// ...
}
```
If none of the above, we parse an assignment statement, which will parse an assignment statement or an expression statement.
## 3.15 Exercises
1. Implement the binary operators: `-`, `*`, `/`, `!=`, `<`, `>`, `<=`, `>=`, `or` and `and`.
2. \*\* Implement infix notation, eg. `a + b` compared to `+ a b`.