From 71d45535231c54d7db25cbf41fe26dda2e90f913 Mon Sep 17 00:00:00 2001 From: Simon From Jakobsen Date: Wed, 11 Sep 2024 14:11:43 +0000 Subject: [PATCH] chapter 3, operands, postfix expressions --- compiler/chapter_2.md | 2 +- compiler/chapter_3.md | 276 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 264 insertions(+), 14 deletions(-) diff --git a/compiler/chapter_2.md b/compiler/chapter_2.md index 3b0a74d..85b4d3e 100644 --- a/compiler/chapter_2.md +++ b/compiler/chapter_2.md @@ -417,7 +417,7 @@ while (token !== null) { ## 2.10 Exercises -1. Implement the operators: `-`, `*`, `/`, `(`, `)`, `[`, `]`, `!=`, `<`, `>`, `<=` and `>=`. +1. Implement the operators: `-`, `*`, `/`, `(`, `)`, `.`, `,`, `,`, `[`, `]`, `!=`, `<`, `>`, `<=` and `>=`. 2. Implement the keywords: `true`, `false`, `null`, `or`, `and`, `not`, `loop`, `break`, `let`, `fn` and `return`. 3. \* Implement single line comments using `//` and multiline comments using `\*` and `*\` (\*\* extra points if multiline comments can be nested, eg. `/* ... /* ... */ ... */`). 4. \* Reimplement integers such that integers are either `0` or start with `[1-9]`. diff --git a/compiler/chapter_3.md b/compiler/chapter_3.md index cf78579..da4bd3b 100644 --- a/compiler/chapter_3.md +++ b/compiler/chapter_3.md @@ -1,11 +1,11 @@ -# Parser +# 3 Parser In this chaper I'll show how I would make a parser. A parser, in addition to our lexer, transforms the input program as text, meaning an unstructured sequence of characters, into a structered representation. Structured meaning the representation tells us about the different constructs such as if statements and expressions. -## Abstract Syntax Tree AST +## 3.1 Abstract Syntax Tree AST The result of parsing is a tree structure representing the input program. @@ -43,7 +43,7 @@ Both `Stmt` (statement) and `Expr` (expression) are polymorphic types, meaning a For both `Stmt` and `Expr` there's an error-kind. This makes the parser simpler, as we won't need to manage parsing failures differently than successful parslings. -## Consumer of lexer +## 3.2 Consumer of lexer To start, we'll implement a `Parser` class, which for now is simply a consumer of a token iterater, meaning the lexer. In simple terms, whereas the lexer is a transformation from text to tokens, the parser is a transformation from token to an AST, except that the parser is not an iterator. @@ -110,14 +110,14 @@ We'll also want a method for reporting errors. ```ts class Parser { // ... - private report(pos: Pos, msg: string) { + private report(msg: string, pos = this.pos()) { console.log(`Parser: ${msg} at ${pos.line}:${pos.col}`); } // ... } ``` -## Operands +## 3.3 Operands Operands are the individual parts of an operation. For example, in the math expression `a + b`, (would be `+ a b` in the input language), `a` and `b` are the *operands*, while `+` is the *operator*. In the expression `a + b * c`, the operands are `a`, `b` and `c`. But in the expression `a * (b + c)`, the operands of the multiply operation are `a` and `(b + c)`. `(b + c)` is an operands, because it is enclosed on both sides. This is how we'll define operands. @@ -128,12 +128,8 @@ class Parser { // ... public parseOperand(): Expr { const pos = this.pos(); - if (this.test("int")) { - const value = this.current().intValue; - this.step(); - return { kind: { type: "int", value }, pos }; - } - this.report(pos "expected expr"); + // ... + this.report("expected expr", pos); this.step(); return { kind: { type: "error" }, pos }; } @@ -141,14 +137,16 @@ class Parser { } ``` -### Integer +### 3.3.1 Identifiers and literals -Parsing an integer is a 1:1 translation between the integer token and an integer expression. +Identifiers and literals (integers, strings) are single token constructs, meaning the parsing consists of translating a token into an ast-node with the value. ```ts type ExprKind = // ... + | { type: "ident", value: string } | { type: "int", value: number } + | { type: "string", value: string } // ... ; ``` @@ -158,14 +156,266 @@ class Parser { // ... public parseOperand(): Expr { // ... + if (this.test("ident")) { + const value = this.current().identValue; + this.step(); + return { kind: { type: "ident", value }, pos }; + } if (this.test("int")) { const value = this.current().intValue; this.step(); return { kind: { type: "int", value }, pos }; } + if (this.test("string")) { + const value = this.current().stringValue; + this.step(); + return { kind: { type: "string", value }, pos }; + } // ... } // ... } ``` +### 3.3.2 Group expressions + +A group expression is an expression enclosed in parenthesis, eg `(1 + 2)`. Because the expression is enclosed, meaning starts with a `(`-token and ends with a `)`-token, we will treat is like an operand. + +```ts +type ExprKind = + // ... + | { type: "group", expr: Expr } + // ... + ; +``` + +If we find a `(`-token in `.parseOperand()`, we know that we should parse a group expression. We do this by ignoring the `(`-token, parsing an expression using `.parseExpr()` and checking that we find a `)`-token afterwards. + +```ts +class Parser { + // ... + public parseOperand(): Expr { + // ... + if (this.test("(")) { + this.step(); + const expr = this.parseExpr(); + if (!this.test(")")) { + this.report("expected ')'"); + return { kind: { type: "error" }, pos }; + } + this.step(); + return { kind: { type: "group", expr }, pos }; + } + // ... + } + // ... +} +``` + +If we do not find the closing `)`-token, we report an error and return an error expression. + +### 3.3.3 Block, if and loop operands + +We want to be able to use blocks, if and loop constructs as expressions. + +Example: +```rs +let temperature_feeling = if > temperature 20 { "hot" } else { "cold" }; +``` + +Each construct will have their own `.parse...()`-method, so we'll just look for the first `{`-, `if`-, or `loop`-token and call the relevant method. + +```ts +class Parser { + // ... + public parseOperand(): Expr { + // ... + if (this.test("{")) + return this.parseBlock(); + if (this.test("if")) + return this.parseIf(); + if (this.test("loop")) + return this.parseLoop(); + // ... + } + // ... +} +``` + +## 3.4 Postfix operators + +Postfix operations are expressions were the operators come after the subject expression. This includes field expressions (eg. `subject.field`), index expressions (eg. `subject[index]`) and call expressions (eg. `subject(...args)`). + +A notable detail, is that postfix operations are chainable, eg. `subject[index].field` is valid, likewise with `subject.method(arg)` and `matrix[y][x]`. + +We'll make a method `.parsePostfix()` to parse postfix operators. + +```ts +class Parser { + // ... + public parsePostfix(): Expr { + let subject = this.parseOperand(); + while (true) { + const pos = this.pos(); + // ... + break; + } + return subject; + } + // ... +} +``` + +We start by parsing an operand. Then we enter a loop, which runs until we no longer find any relevant operator tokens. When we parse a postfix expression, the `subject` will be replaced with the new parsed expression. + +Notice we don't define `pos` at the start, but after we've parsed the subject. That's because we want `pos` to the reflect the start of the postfix operator, not the start of the subject. + +### 3.4.1 Field expression + +A field expression is for accessing fields on an object, and consists of a `.`-token and an identifier, eg. `.field`. + +```ts +type ExprKind = + // ... + | { type: "field", subject: Expr, value: string } + // ... + ; +``` + +```ts +class Parser { + // ... + public parsePostfix(): Expr { + // ... + while (true) { + // ... + if (this.test(".")) { + this.step(); + if (!this.test("ident")) { + this.report("expected ident"); + return { kind: { type: "error" }, pos }; + } + const value = this.current().identValue; + this.step(); + subject = { kind: { type: "field", subject, value }, pos }; + continue; + } + // ... + } + // ... + } + // ... +} +``` + +If we find a `.`-token, we step over it, and make sure that we've hit an identifier. We save the identifier value and step over the identifier. Then we replace `subject` with a new field expression containing the previous `subject` value. Then we continue to look for the next postfix operator. + +### 3.4.2 Index expression + +An index operation consists of the subject and an index. The index is an expression, and it is contained in `[`- and `]`-tokens, eg. `subject[value]`. + + +```ts +type ExprKind = + // ... + | { type: "index", subject: Expr, value: Expr } + // ... + ; +``` + +```ts +class Parser { + // ... + public parsePostfix(): Expr { + // ... + while (true) { + // ... + if (this.test("[")) { + this.step(); + const value = this.parseExpr(); + if (!this.test("]") { + this.report("expected ']'"); + return { kind: { type: "error" }, pos }; + } + this.step(); + subject = { kind: { type: "index", subject, value }, pos }; + continue; + } + // ... + } + // ... + } + // ... +} +``` + +If we find a `[`-token, we parse the index part exactly the same way, we parse a group expression. + +### 3.4.3 Call expression + +A call expression is like an index expression, except that it uses `(` and `)` instead of `[` and `]` and that there can be 0 or more expressions (arguments or args) inside the `(` and `)`. The arguments are seperated by `,`. + +```ts +type ExprKind = + // ... + | { type: "call", subject: Expr, args: Expr[] } + // ... + ; +``` + +```ts +class Parser { + // ... + public parsePostfix(): Expr { + // ... + while (true) { + // ... + if (this.test("(")) { + this.step(); + let args: Expr[] = []; + if (!this.test(")") { + args.push(this.parseExpr()); + while (this.test(",")) { + this.step(); + if (this.test(")")) + break; + args.push(this.parseExpr()); + } + } + const value = this.parseExpr(); + if (!this.test(")") { + this.report("expected ')'"); + return { kind: { type: "error" }, pos }; + } + this.step(); + subject = { kind: { type: "call", subject, args }, pos }; + continue; + } + // ... + } + // ... + } + // ... +} +``` + +Similarly to index epxressions, if we find a `(`-token, we step over it, parse the arguments, check for a `)` and replace `subject` with a call expression containing the previous `subject`. + +When parsing the arguments, we start by testing if we've reached a `)` to check if there are any arguments. If not, we parse the first argument. + +The consecutive arguments are all preceded by a `,`-token. There we test or `,`, to check if we should keep parsing arguments. + +After checking for a seperating `,`, we check if we've reached a `)` and break if so. This is to allow for trailing comma, eg. +```ts +func( + a, + b, // trailing comma +) +``` + +### 3.5 Prefix expressions + + + + +