slige/compiler/Lexer.ts

230 lines
6.8 KiB
TypeScript
Raw Normal View History

2024-11-01 11:21:40 +00:00
import { Pos, Token } from "./Token.ts";
2024-11-04 13:54:55 +00:00
export class Lexer {
2024-11-01 11:21:40 +00:00
private index = 0;
private line = 1;
private col = 1;
public constructor (private text: string) {}
2024-11-04 13:54:55 +00:00
public next(): Token | null {
if (this.done())
return null;
const pos = this.pos();
2024-11-27 14:10:48 +00:00
if (this.test(/[ \t\n\r]/)) {
while (!this.done() && this.test(/[ \t\n\r]/))
2024-11-04 13:54:55 +00:00
this.step();
return this.next();
}
2024-11-15 14:20:49 +00:00
2024-11-04 13:54:55 +00:00
if (this.test(/[a-zA-Z_]/)) {
let value = "";
while (!this.done() && this.test(/[a-zA-Z0-9_]/)) {
value += this.current();
this.step();
}
switch (value) {
2024-11-20 14:41:20 +00:00
case "break":
return this.token("break", pos);
case "return":
return this.token("return", pos);
case "let":
return this.token("let", pos);
case "fn":
return this.token("fn", pos);
case "loop":
return this.token("loop", pos);
2024-11-04 13:54:55 +00:00
case "if":
return this.token("if", pos);
case "else":
return this.token("else", pos);
default:
return { ...this.token("ident", pos), identValue: value };
}
}
2024-11-15 14:20:49 +00:00
if (this.test(/[1-9]/)) {
2024-11-04 13:54:55 +00:00
let textValue = "";
while (!this.done() && this.test(/[0-9]/)) {
textValue += this.current();
this.step();
}
return { ...this.token("int", pos), intValue: parseInt(textValue) };
}
2024-11-22 13:06:28 +00:00
if (this.test("0")) {
this.step()
if (!this.done() && this.test(/[0-9]/)) {
console.error(
`Lexer: invalid number`
+ ` at ${pos.line}:${pos.col}`,
);
return this.token("error", pos);
}
return { ...this.token("int", pos), intValue: 0};
}
2024-11-04 13:54:55 +00:00
if (this.test("\"")) {
this.step();
let value = "";
while (!this.done() && !this.test("\"")) {
if (this.test("\\")) {
this.step();
if (this.done())
break;
value += {
"n": "\n",
"t": "\t",
"0": "\0",
}[this.current()] ?? this.current();
} else {
value += this.current();
}
this.step();
}
if (this.done() || !this.test("\"")) {
console.error(
`Lexer: unclosed/malformed string`
+ ` at ${pos.line}:${pos.col}`,
);
return this.token("error", pos);
}
this.step();
return { ...this.token("string", pos), stringValue: value };
}
2024-11-15 14:20:49 +00:00
if (this.test(/[\+\{\};=\-\*\(\)\.,:;\[\]><!0]/)) {
2024-11-04 13:54:55 +00:00
const first = this.current();
this.step();
if (first === "=" && !this.done() && this.test("=")) {
this.step();
return this.token("==", pos);
}
2024-11-15 14:20:49 +00:00
if (first === "<" && !this.done() && this.test("=")) {
this.step();
return this.token("<=", pos);
}
if (first === ">" && !this.done() && this.test("=")) {
this.step();
return this.token(">=", pos);
}
if (first === "-" && !this.done()) {
if (this.test(">")) {
this.step();
return this.token("->", pos);
}
if (this.test("=")) {
this.step()
return this.token("-=", pos);
}
}
if (first === "!" && !this.done() && this.test("=")) {
this.step();
return this.token("!=", pos);
}
if (first === "+" && !this.done() && this.test("=")) {
this.step();
return this.token("+=", pos);
}
2024-11-04 13:54:55 +00:00
return this.token(first, pos);
}
2024-11-15 14:20:49 +00:00
if (this.test("/")) {
this.step()
if (this.test("/")) {
while (!this.done() && !this.test("\n"))
this.step();
2024-11-18 09:21:30 +00:00
return this.next()
2024-11-15 14:20:49 +00:00
}
return this.token("/", pos)
}
if (this.test("false")) {
this.step();
return this.token("false", pos);
}
if (this.test("true")) {
this.step();
return this.token("true", pos);
}
if (this.test("null")) {
this.step();
return this.token("null", pos);
}
if (this.test("or")) {
this.step();
return this.token("or", pos);
}
if (this.test("and")) {
this.step();
return this.token("and", pos);
}
if (this.test("not")) {
this.step();
return this.token("not", pos);
}
if (this.test("loop")) {
this.step();
return this.token("loop", pos);
}
if (this.test("break")) {
this.step();
return this.token("break", pos);
}
if (this.test("let")) {
this.step();
return this.token("let", pos);
}
if (this.test("fn")) {
this.step();
return this.token("fn", pos);
}
if (this.test("return")) {
this.step();
return this.token("return", pos);
}
2024-11-04 13:54:55 +00:00
console.error(`Lexer: illegal character '${this.current()}' at ${pos.line}:${pos.col}`);
this.step();
return this.next();
}
2024-11-01 11:21:40 +00:00
private done(): boolean { return this.index >= this.text.length; }
2024-11-15 14:20:49 +00:00
2024-11-01 11:21:40 +00:00
private current(): string { return this.text[this.index]; }
2024-11-15 14:20:49 +00:00
public currentPos(): Pos { return this.pos(); }
2024-11-01 11:21:40 +00:00
private step() {
if (this.done())
return;
if (this.current() === "\n") {
this.line += 1;
this.col = 1;
} else {
this.col += 1;
}
this.index += 1;
}
private pos(): Pos {
return {
index: this.index,
line: this.line,
col: this.col
}
}
private token(type: string, pos: Pos): Token {
const length = this.index - pos.index;
return { type, pos, length };
}
2024-11-04 13:54:55 +00:00
private test(pattern: RegExp | string): boolean {
if (typeof pattern === "string")
return this.current() === pattern;
else
return pattern.test(this.current());
}
2024-11-01 11:21:40 +00:00
}