219 lines
7.0 KiB
TypeScript
219 lines
7.0 KiB
TypeScript
import { Position, StaticTokenType, Token, TokenIter } from "./token.ts";
|
|
|
|
export class Lexer implements TokenIter {
|
|
private index = 0;
|
|
private line = 1;
|
|
private col = 1;
|
|
|
|
public constructor(private text: string) {}
|
|
|
|
public next(): Token {
|
|
if (this.done()) {
|
|
return { pos: this.position(), tokenType: "eof" };
|
|
} else if (" \t\r\n".includes(this.current())) {
|
|
this.step();
|
|
while (" \t\r\n".includes(this.current())) this.step();
|
|
return this.next();
|
|
} else if (Lexer.idStartChars.includes(this.current())) {
|
|
const pos = this.position();
|
|
let valueString = this.current();
|
|
this.step();
|
|
while (Lexer.idChars.includes(this.current())) {
|
|
valueString += this.current();
|
|
this.step();
|
|
}
|
|
if (valueString in Lexer.keywordTokenTypes) {
|
|
return { pos, tokenType: Lexer.keywordTokenTypes[valueString] };
|
|
} else {
|
|
return {
|
|
pos,
|
|
tokenType: "id",
|
|
value: valueString,
|
|
};
|
|
}
|
|
} else if (Lexer.intStartChars.includes(this.current())) {
|
|
const pos = this.position();
|
|
let valueString = this.current();
|
|
this.step();
|
|
while (Lexer.intChars.includes(this.current())) {
|
|
valueString += this.current();
|
|
this.step();
|
|
}
|
|
return {
|
|
pos,
|
|
tokenType: "int",
|
|
value: parseInt(valueString),
|
|
};
|
|
} else if (this.current() in Lexer.singleStaticTokenTypes) {
|
|
const pos = this.position();
|
|
const tokenType = Lexer.singleStaticTokenTypes[this.current()];
|
|
this.step();
|
|
return { pos, tokenType };
|
|
} else if (this.currentIs("+")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("+")) {
|
|
this.step();
|
|
return { pos, tokenType: "plusplus" };
|
|
} else if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "plusequal" };
|
|
} else {
|
|
return { pos, tokenType: "plus" };
|
|
}
|
|
} else if (this.currentIs("-")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("-")) {
|
|
this.step();
|
|
return { pos, tokenType: "minusminus" };
|
|
} else if (this.currentIs(">")) {
|
|
this.step();
|
|
return { pos, tokenType: "minusgt" };
|
|
} else if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "minusequal" };
|
|
} else {
|
|
return { pos, tokenType: "minus" };
|
|
}
|
|
} else if (this.currentIs("&")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "ampersandequal" };
|
|
} else {
|
|
return { pos, tokenType: "ampersand" };
|
|
}
|
|
} else if (this.currentIs("|")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "pipeequal" };
|
|
} else {
|
|
return { pos, tokenType: "pipe" };
|
|
}
|
|
} else if (this.currentIs("^")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "hatequal" };
|
|
} else {
|
|
return { pos, tokenType: "hat" };
|
|
}
|
|
} else if (this.currentIs("=")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "equalequal" };
|
|
} else {
|
|
return { pos, tokenType: "equal" };
|
|
}
|
|
} else if (this.currentIs("!")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "exclamationequal" };
|
|
} else {
|
|
return { pos, tokenType: "exclamation" };
|
|
}
|
|
} else if (this.currentIs("<")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "ltequal" };
|
|
} else {
|
|
return { pos, tokenType: "lt" };
|
|
}
|
|
} else if (this.currentIs(">")) {
|
|
const pos = this.position();
|
|
this.step();
|
|
if (this.currentIs("=")) {
|
|
this.step();
|
|
return { pos, tokenType: "gtequal" };
|
|
} else {
|
|
return { pos, tokenType: "gt" };
|
|
}
|
|
} else {
|
|
const pos = this.position();
|
|
this.step();
|
|
return { pos, tokenType: "invalid" };
|
|
}
|
|
}
|
|
|
|
private static intStartChars = "123456789" as const;
|
|
private static intChars = "1234567890" as const;
|
|
private static idStartChars =
|
|
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" as const;
|
|
private static idChars = Lexer.idStartChars + Lexer.intChars;
|
|
|
|
private static singleStaticTokenTypes: { [key: string]: StaticTokenType } =
|
|
{
|
|
"(": "lparen",
|
|
")": "rparen",
|
|
"{": "lbrace",
|
|
"}": "rbrace",
|
|
"[": "lbracket",
|
|
"]": "rbracket",
|
|
".": "dot",
|
|
",": "comma",
|
|
":": "colon",
|
|
";": "semicolon",
|
|
"~": "tilde",
|
|
"*": "asterisk",
|
|
} as const;
|
|
|
|
private static keywordTokenTypes: { [key: string]: StaticTokenType } = {
|
|
"let": "let",
|
|
"mut": "mut",
|
|
"not": "not",
|
|
"and": "and",
|
|
"or": "or",
|
|
"fn": "fn",
|
|
"return": "return",
|
|
"if": "fn",
|
|
"else": "else",
|
|
"loop": "loop",
|
|
"while": "while",
|
|
"break": "break",
|
|
"continue": "continue",
|
|
} as const;
|
|
|
|
private step() {
|
|
this.index++;
|
|
if (!this.done()) {
|
|
if (this.current() == "\n") {
|
|
this.line++;
|
|
this.col = 1;
|
|
} else {
|
|
this.col++;
|
|
}
|
|
}
|
|
}
|
|
|
|
private position(): Position {
|
|
return {
|
|
index: this.index,
|
|
line: this.line,
|
|
col: this.col,
|
|
};
|
|
}
|
|
|
|
private currentIs(char: string): boolean {
|
|
return !this.done() && this.current() == char;
|
|
}
|
|
|
|
private current(): string {
|
|
return this.text[this.index];
|
|
}
|
|
|
|
private done(): boolean {
|
|
return this.index >= this.text.length;
|
|
}
|
|
}
|