nandgame-compiler/lexer.ts

219 lines
7.0 KiB
TypeScript
Raw Permalink Normal View History

2023-04-27 01:32:06 +01:00
import { Position, StaticTokenType, Token, TokenIter } from "./token.ts";
export class Lexer implements TokenIter {
private index = 0;
private line = 1;
private col = 1;
public constructor(private text: string) {}
public next(): Token {
if (this.done()) {
return { pos: this.position(), tokenType: "eof" };
} else if (" \t\r\n".includes(this.current())) {
this.step();
while (" \t\r\n".includes(this.current())) this.step();
return this.next();
} else if (Lexer.idStartChars.includes(this.current())) {
const pos = this.position();
let valueString = this.current();
this.step();
while (Lexer.idChars.includes(this.current())) {
valueString += this.current();
this.step();
}
if (valueString in Lexer.keywordTokenTypes) {
return { pos, tokenType: Lexer.keywordTokenTypes[valueString] };
} else {
return {
pos,
tokenType: "id",
value: valueString,
};
}
} else if (Lexer.intStartChars.includes(this.current())) {
const pos = this.position();
let valueString = this.current();
this.step();
while (Lexer.intChars.includes(this.current())) {
valueString += this.current();
this.step();
}
return {
pos,
tokenType: "int",
value: parseInt(valueString),
};
} else if (this.current() in Lexer.singleStaticTokenTypes) {
const pos = this.position();
const tokenType = Lexer.singleStaticTokenTypes[this.current()];
this.step();
return { pos, tokenType };
} else if (this.currentIs("+")) {
const pos = this.position();
this.step();
if (this.currentIs("+")) {
this.step();
return { pos, tokenType: "plusplus" };
} else if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "plusequal" };
} else {
return { pos, tokenType: "plus" };
}
} else if (this.currentIs("-")) {
const pos = this.position();
this.step();
if (this.currentIs("-")) {
this.step();
return { pos, tokenType: "minusminus" };
} else if (this.currentIs(">")) {
this.step();
return { pos, tokenType: "minusgt" };
} else if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "minusequal" };
} else {
return { pos, tokenType: "minus" };
}
} else if (this.currentIs("&")) {
const pos = this.position();
this.step();
if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "ampersandequal" };
} else {
return { pos, tokenType: "ampersand" };
}
} else if (this.currentIs("|")) {
const pos = this.position();
this.step();
if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "pipeequal" };
} else {
return { pos, tokenType: "pipe" };
}
} else if (this.currentIs("^")) {
const pos = this.position();
this.step();
if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "hatequal" };
} else {
return { pos, tokenType: "hat" };
}
} else if (this.currentIs("=")) {
const pos = this.position();
this.step();
if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "equalequal" };
} else {
return { pos, tokenType: "equal" };
}
} else if (this.currentIs("!")) {
const pos = this.position();
this.step();
if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "exclamationequal" };
} else {
return { pos, tokenType: "exclamation" };
}
} else if (this.currentIs("<")) {
const pos = this.position();
this.step();
if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "ltequal" };
} else {
return { pos, tokenType: "lt" };
}
} else if (this.currentIs(">")) {
const pos = this.position();
this.step();
if (this.currentIs("=")) {
this.step();
return { pos, tokenType: "gtequal" };
} else {
return { pos, tokenType: "gt" };
}
} else {
const pos = this.position();
this.step();
return { pos, tokenType: "invalid" };
}
}
private static intStartChars = "123456789" as const;
private static intChars = "1234567890" as const;
private static idStartChars =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" as const;
private static idChars = Lexer.idStartChars + Lexer.intChars;
private static singleStaticTokenTypes: { [key: string]: StaticTokenType } =
{
"(": "lparen",
")": "rparen",
"{": "lbrace",
"}": "rbrace",
"[": "lbracket",
"]": "rbracket",
".": "dot",
",": "comma",
":": "colon",
";": "semicolon",
"~": "tilde",
"*": "asterisk",
} as const;
private static keywordTokenTypes: { [key: string]: StaticTokenType } = {
"let": "let",
"mut": "mut",
"not": "not",
"and": "and",
"or": "or",
"fn": "fn",
"return": "return",
"if": "fn",
"else": "else",
"loop": "loop",
"while": "while",
"break": "break",
"continue": "continue",
} as const;
private step() {
this.index++;
if (!this.done()) {
if (this.current() == "\n") {
this.line++;
this.col = 1;
} else {
this.col++;
}
}
}
private position(): Position {
return {
index: this.index,
line: this.line,
col: this.col,
};
}
private currentIs(char: string): boolean {
return !this.done() && this.current() == char;
}
private current(): string {
return this.text[this.index];
}
private done(): boolean {
return this.index >= this.text.length;
}
}