nandgame-compiler/lexer.ts

import { Position, StaticTokenType, Token, TokenIter } from "./token.ts";

export class Lexer implements TokenIter {
    private index = 0;
    private line = 1;
    private col = 1;

    public constructor(private text: string) {}

    public next(): Token {
        if (this.done()) {
            return { pos: this.position(), tokenType: "eof" };
        } else if (" \t\r\n".includes(this.current())) {
            this.step();
            while (" \t\r\n".includes(this.current())) this.step();
            return this.next();
        } else if (Lexer.idStartChars.includes(this.current())) {
            const pos = this.position();
            let valueString = this.current();
            this.step();
            while (Lexer.idChars.includes(this.current())) {
                valueString += this.current();
                this.step();
            }
            if (valueString in Lexer.keywordTokenTypes) {
                return { pos, tokenType: Lexer.keywordTokenTypes[valueString] };
            } else {
                return {
                    pos,
                    tokenType: "id",
                    value: valueString,
                };
            }
        } else if (Lexer.intStartChars.includes(this.current())) {
            const pos = this.position();
            let valueString = this.current();
            this.step();
            while (Lexer.intChars.includes(this.current())) {
                valueString += this.current();
                this.step();
            }
            return {
                pos,
                tokenType: "int",
                value: parseInt(valueString),
            };
        } else if (this.current() in Lexer.singleStaticTokenTypes) {
            const pos = this.position();
            const tokenType = Lexer.singleStaticTokenTypes[this.current()];
            this.step();
            return { pos, tokenType };
        } else if (this.currentIs("+")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("+")) {
                this.step();
                return { pos, tokenType: "plusplus" };
            } else if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "plusequal" };
            } else {
                return { pos, tokenType: "plus" };
            }
        } else if (this.currentIs("-")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("-")) {
                this.step();
                return { pos, tokenType: "minusminus" };
            } else if (this.currentIs(">")) {
                this.step();
                return { pos, tokenType: "minusgt" };
            } else if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "minusequal" };
            } else {
                return { pos, tokenType: "minus" };
            }
        } else if (this.currentIs("&")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "ampersandequal" };
            } else {
                return { pos, tokenType: "ampersand" };
            }
        } else if (this.currentIs("|")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "pipeequal" };
            } else {
                return { pos, tokenType: "pipe" };
            }
        } else if (this.currentIs("^")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "hatequal" };
            } else {
                return { pos, tokenType: "hat" };
            }
        } else if (this.currentIs("=")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "equalequal" };
            } else {
                return { pos, tokenType: "equal" };
            }
        } else if (this.currentIs("!")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "exclamationequal" };
            } else {
                return { pos, tokenType: "exclamation" };
            }
        } else if (this.currentIs("<")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "ltequal" };
            } else {
                return { pos, tokenType: "lt" };
            }
        } else if (this.currentIs(">")) {
            const pos = this.position();
            this.step();
            if (this.currentIs("=")) {
                this.step();
                return { pos, tokenType: "gtequal" };
            } else {
                return { pos, tokenType: "gt" };
            }
        } else {
            const pos = this.position();
            this.step();
            return { pos, tokenType: "invalid" };
        }
    }

    private static intStartChars = "123456789" as const;
    private static intChars = "1234567890" as const;
    private static idStartChars =
        "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" as const;
    private static idChars = Lexer.idStartChars + Lexer.intChars;

    private static singleStaticTokenTypes: { [key: string]: StaticTokenType } =
        {
            "(": "lparen",
            ")": "rparen",
            "{": "lbrace",
            "}": "rbrace",
            "[": "lbracket",
            "]": "rbracket",
            ".": "dot",
            ",": "comma",
            ":": "colon",
            ";": "semicolon",
            "~": "tilde",
            "*": "asterisk",
        } as const;

    private static keywordTokenTypes: { [key: string]: StaticTokenType } = {
        "let": "let",
        "mut": "mut",
        "not": "not",
        "and": "and",
        "or": "or",
        "fn": "fn",
        "return": "return",
        "if": "fn",
        "else": "else",
        "loop": "loop",
        "while": "while",
        "break": "break",
        "continue": "continue",
    } as const;

    private step() {
        this.index++;
        if (!this.done()) {
            if (this.current() == "\n") {
                this.line++;
                this.col = 1;
            } else {
                this.col++;
            }
        }
    }

    private position(): Position {
        return {
            index: this.index,
            line: this.line,
            col: this.col,
        };
    }

    private currentIs(char: string): boolean {
        return !this.done() && this.current() == char;
    }

    private current(): string {
        return this.text[this.index];
    }

    private done(): boolean {
        return this.index >= this.text.length;
    }
}