slige/compiler/lexer.ts

244 lines
7.1 KiB
TypeScript
Raw Normal View History

2024-12-11 02:11:00 +00:00
import { Reporter } from "./info.ts";
2024-12-10 20:42:15 +00:00
import { Pos, Token } from "./token.ts";
2024-11-01 11:21:40 +00:00
2024-11-04 13:54:55 +00:00
export class Lexer {
2024-11-01 11:21:40 +00:00
private index = 0;
private line = 1;
private col = 1;
2024-12-11 02:11:00 +00:00
public constructor(private text: string, private reporter: Reporter) {}
2024-11-01 11:21:40 +00:00
2024-11-04 13:54:55 +00:00
public next(): Token | null {
2024-12-06 13:17:52 +00:00
if (this.done()) {
2024-11-04 13:54:55 +00:00
return null;
2024-12-06 13:17:52 +00:00
}
2024-11-04 13:54:55 +00:00
const pos = this.pos();
2024-11-27 14:10:48 +00:00
if (this.test(/[ \t\n\r]/)) {
2024-12-06 13:17:52 +00:00
while (!this.done() && this.test(/[ \t\n\r]/)) {
2024-11-04 13:54:55 +00:00
this.step();
2024-12-06 13:17:52 +00:00
}
2024-11-04 13:54:55 +00:00
return this.next();
}
2024-11-15 14:20:49 +00:00
2024-11-04 13:54:55 +00:00
if (this.test(/[a-zA-Z_]/)) {
let value = "";
while (!this.done() && this.test(/[a-zA-Z0-9_]/)) {
value += this.current();
this.step();
}
2024-12-12 12:06:04 +00:00
const keywords = [
2024-12-29 04:39:22 +00:00
"false",
"true",
"null",
"int",
"bool",
"string",
2024-12-12 12:06:04 +00:00
"break",
"return",
"let",
"fn",
"loop",
"if",
"else",
"struct",
"or",
"and",
"not",
"while",
"for",
"in",
2024-12-29 04:39:22 +00:00
"mod",
2024-12-30 20:12:28 +00:00
"pub",
2024-12-12 12:06:04 +00:00
];
if (keywords.includes(value)) {
return this.token(value, pos);
} else {
return { ...this.token("ident", pos), identValue: value };
2024-11-04 13:54:55 +00:00
}
}
2024-11-15 14:20:49 +00:00
if (this.test(/[1-9]/)) {
2024-11-04 13:54:55 +00:00
let textValue = "";
while (!this.done() && this.test(/[0-9]/)) {
textValue += this.current();
this.step();
}
return { ...this.token("int", pos), intValue: parseInt(textValue) };
}
2024-11-22 13:06:28 +00:00
if (this.test("0")) {
2024-12-06 13:17:52 +00:00
this.step();
2024-11-22 13:06:28 +00:00
if (!this.done() && this.test(/[0-9]/)) {
2024-12-11 02:11:00 +00:00
this.report("invalid number", pos);
2024-11-22 13:06:28 +00:00
return this.token("error", pos);
}
2024-12-06 13:17:52 +00:00
return { ...this.token("int", pos), intValue: 0 };
2024-11-22 13:06:28 +00:00
}
2024-12-22 01:30:23 +00:00
if (this.test("'")) {
this.step();
let value: string;
if (this.test("\\")) {
this.step();
if (this.done()) {
this.report("malformed character literal", pos);
return this.token("error", pos);
}
value = {
n: "\n",
t: "\t",
"0": "\0",
}[this.current()] ?? this.current();
} else {
value = this.current();
}
this.step();
if (this.done() || !this.test("'") || value.length === 0) {
this.report("malformed character literal", pos);
return this.token("error", pos);
}
this.step();
return { ...this.token("int", pos), intValue: value.charCodeAt(0) };
}
2024-12-06 13:17:52 +00:00
if (this.test('"')) {
2024-11-04 13:54:55 +00:00
this.step();
let value = "";
2024-12-06 13:17:52 +00:00
while (!this.done() && !this.test('"')) {
2024-11-04 13:54:55 +00:00
if (this.test("\\")) {
this.step();
2024-12-06 13:17:52 +00:00
if (this.done()) {
2024-11-04 13:54:55 +00:00
break;
2024-12-06 13:17:52 +00:00
}
2024-12-13 09:14:36 +00:00
value += {
n: "\n",
t: "\t",
"0": "\0",
}[this.current()] ?? this.current();
2024-11-04 13:54:55 +00:00
} else {
value += this.current();
}
this.step();
}
2024-12-06 13:17:52 +00:00
if (this.done() || !this.test('"')) {
2024-12-11 02:11:00 +00:00
this.report("unclosed/malformed string", pos);
2024-11-04 13:54:55 +00:00
return this.token("error", pos);
}
this.step();
return { ...this.token("string", pos), stringValue: value };
}
2024-12-11 11:36:19 +00:00
if (this.test(/[\+\{\};=\-\*\(\)\.,:;\[\]><!0#]/)) {
2024-11-04 13:54:55 +00:00
const first = this.current();
this.step();
if (first === "=" && !this.done() && this.test("=")) {
this.step();
return this.token("==", pos);
}
2024-11-15 14:20:49 +00:00
if (first === "<" && !this.done() && this.test("=")) {
this.step();
return this.token("<=", pos);
}
if (first === ">" && !this.done() && this.test("=")) {
this.step();
return this.token(">=", pos);
}
if (first === "-" && !this.done()) {
if (this.test(">")) {
this.step();
return this.token("->", pos);
}
if (this.test("=")) {
2024-12-06 13:17:52 +00:00
this.step();
2024-11-15 14:20:49 +00:00
return this.token("-=", pos);
}
}
if (first === "!" && !this.done() && this.test("=")) {
this.step();
return this.token("!=", pos);
}
if (first === "+" && !this.done() && this.test("=")) {
this.step();
return this.token("+=", pos);
}
2024-12-13 09:14:36 +00:00
if (first === ":") {
if (!this.done() && this.test(":")) {
this.step();
if (!this.done() && this.test("<")) {
this.step();
return this.token("::<", pos);
}
return this.token("::", pos);
}
}
2024-11-04 13:54:55 +00:00
return this.token(first, pos);
}
2024-11-15 14:20:49 +00:00
if (this.test("/")) {
2024-12-06 13:17:52 +00:00
this.step();
2024-11-15 14:20:49 +00:00
if (this.test("/")) {
2024-12-06 13:17:52 +00:00
while (!this.done() && !this.test("\n")) {
2024-11-15 14:20:49 +00:00
this.step();
2024-12-06 13:17:52 +00:00
}
return this.next();
2024-11-15 14:20:49 +00:00
}
2024-12-06 13:17:52 +00:00
return this.token("/", pos);
2024-11-15 14:20:49 +00:00
}
2024-12-12 12:06:04 +00:00
2024-12-11 02:11:00 +00:00
this.report(`illegal character '${this.current()}'`, pos);
2024-11-04 13:54:55 +00:00
this.step();
2024-12-06 13:17:52 +00:00
return this.next();
}
private done(): boolean {
return this.index >= this.text.length;
2024-11-04 13:54:55 +00:00
}
2024-11-01 11:21:40 +00:00
2024-12-06 13:17:52 +00:00
private current(): string {
return this.text[this.index];
}
public currentPos(): Pos {
return this.pos();
}
2024-11-15 14:20:49 +00:00
2024-11-01 11:21:40 +00:00
private step() {
2024-12-06 13:17:52 +00:00
if (this.done()) {
2024-11-01 11:21:40 +00:00
return;
2024-12-06 13:17:52 +00:00
}
2024-11-01 11:21:40 +00:00
if (this.current() === "\n") {
this.line += 1;
this.col = 1;
} else {
this.col += 1;
}
this.index += 1;
}
private pos(): Pos {
return {
index: this.index,
line: this.line,
2024-12-06 13:17:52 +00:00
col: this.col,
};
2024-11-01 11:21:40 +00:00
}
private token(type: string, pos: Pos): Token {
const length = this.index - pos.index;
return { type, pos, length };
}
2024-11-04 13:54:55 +00:00
private test(pattern: RegExp | string): boolean {
2024-12-06 13:17:52 +00:00
if (typeof pattern === "string") {
2024-11-04 13:54:55 +00:00
return this.current() === pattern;
2024-12-06 13:17:52 +00:00
} else {
2024-11-04 13:54:55 +00:00
return pattern.test(this.current());
2024-12-06 13:17:52 +00:00
}
2024-11-04 13:54:55 +00:00
}
2024-12-11 02:11:00 +00:00
private report(msg: string, pos: Pos) {
this.reporter.reportError({
msg,
pos,
reporter: "Lexer",
});
}
2024-11-01 11:21:40 +00:00
}