mirror of
https://git.sfja.dk/Mikkel/slige.git
synced 2025-01-18 10:36:31 +00:00
247 lines
7.1 KiB
TypeScript
247 lines
7.1 KiB
TypeScript
import { Reporter } from "./info.ts";
|
|
import { Pos, Token } from "./token.ts";
|
|
|
|
export class Lexer {
|
|
private index = 0;
|
|
private line = 1;
|
|
private col = 1;
|
|
|
|
public constructor(private text: string, private reporter: Reporter) {}
|
|
|
|
public next(): Token | null {
|
|
if (this.done()) {
|
|
return null;
|
|
}
|
|
const pos = this.pos();
|
|
if (this.test(/[ \t\n\r]/)) {
|
|
while (!this.done() && this.test(/[ \t\n\r]/)) {
|
|
this.step();
|
|
}
|
|
return this.next();
|
|
}
|
|
|
|
if (this.test(/[a-zA-Z_]/)) {
|
|
let value = "";
|
|
while (!this.done() && this.test(/[a-zA-Z0-9_]/)) {
|
|
value += this.current();
|
|
this.step();
|
|
}
|
|
const keywords = [
|
|
"false",
|
|
"true",
|
|
"null",
|
|
"int",
|
|
"bool",
|
|
"string",
|
|
"break",
|
|
"return",
|
|
"let",
|
|
"mut",
|
|
"fn",
|
|
"loop",
|
|
"if",
|
|
"else",
|
|
"struct",
|
|
"or",
|
|
"and",
|
|
"not",
|
|
"while",
|
|
"for",
|
|
"in",
|
|
"mod",
|
|
"pub",
|
|
"use",
|
|
"type_alias",
|
|
];
|
|
if (keywords.includes(value)) {
|
|
return this.token(value, pos);
|
|
} else {
|
|
return { ...this.token("ident", pos), identValue: value };
|
|
}
|
|
}
|
|
if (this.test(/[1-9]/)) {
|
|
let textValue = "";
|
|
while (!this.done() && this.test(/[0-9]/)) {
|
|
textValue += this.current();
|
|
this.step();
|
|
}
|
|
return { ...this.token("int", pos), intValue: parseInt(textValue) };
|
|
}
|
|
|
|
if (this.test("0")) {
|
|
this.step();
|
|
if (!this.done() && this.test(/[0-9]/)) {
|
|
this.report("invalid number", pos);
|
|
return this.token("error", pos);
|
|
}
|
|
return { ...this.token("int", pos), intValue: 0 };
|
|
}
|
|
|
|
if (this.test("'")) {
|
|
this.step();
|
|
let value: string;
|
|
if (this.test("\\")) {
|
|
this.step();
|
|
if (this.done()) {
|
|
this.report("malformed character literal", pos);
|
|
return this.token("error", pos);
|
|
}
|
|
value = {
|
|
n: "\n",
|
|
t: "\t",
|
|
"0": "\0",
|
|
}[this.current()] ?? this.current();
|
|
} else {
|
|
value = this.current();
|
|
}
|
|
this.step();
|
|
if (this.done() || !this.test("'") || value.length === 0) {
|
|
this.report("malformed character literal", pos);
|
|
return this.token("error", pos);
|
|
}
|
|
this.step();
|
|
return { ...this.token("int", pos), intValue: value.charCodeAt(0) };
|
|
}
|
|
|
|
if (this.test('"')) {
|
|
this.step();
|
|
let value = "";
|
|
while (!this.done() && !this.test('"')) {
|
|
if (this.test("\\")) {
|
|
this.step();
|
|
if (this.done()) {
|
|
break;
|
|
}
|
|
value += {
|
|
n: "\n",
|
|
t: "\t",
|
|
"0": "\0",
|
|
}[this.current()] ?? this.current();
|
|
} else {
|
|
value += this.current();
|
|
}
|
|
this.step();
|
|
}
|
|
if (this.done() || !this.test('"')) {
|
|
this.report("unclosed/malformed string", pos);
|
|
return this.token("error", pos);
|
|
}
|
|
this.step();
|
|
return { ...this.token("string", pos), stringValue: value };
|
|
}
|
|
if (this.test(/[\+\{\};=\-\*\(\)\.,:;\[\]><!0#&]/)) {
|
|
const first = this.current();
|
|
this.step();
|
|
if (first === "=" && !this.done() && this.test("=")) {
|
|
this.step();
|
|
return this.token("==", pos);
|
|
}
|
|
if (first === "<" && !this.done() && this.test("=")) {
|
|
this.step();
|
|
return this.token("<=", pos);
|
|
}
|
|
if (first === ">" && !this.done() && this.test("=")) {
|
|
this.step();
|
|
return this.token(">=", pos);
|
|
}
|
|
if (first === "-" && !this.done()) {
|
|
if (this.test(">")) {
|
|
this.step();
|
|
return this.token("->", pos);
|
|
}
|
|
if (this.test("=")) {
|
|
this.step();
|
|
return this.token("-=", pos);
|
|
}
|
|
}
|
|
if (first === "!" && !this.done() && this.test("=")) {
|
|
this.step();
|
|
return this.token("!=", pos);
|
|
}
|
|
if (first === "+" && !this.done() && this.test("=")) {
|
|
this.step();
|
|
return this.token("+=", pos);
|
|
}
|
|
if (first === ":") {
|
|
if (!this.done() && this.test(":")) {
|
|
this.step();
|
|
if (!this.done() && this.test("<")) {
|
|
this.step();
|
|
return this.token("::<", pos);
|
|
}
|
|
return this.token("::", pos);
|
|
}
|
|
}
|
|
return this.token(first, pos);
|
|
}
|
|
if (this.test("/")) {
|
|
this.step();
|
|
if (this.test("/")) {
|
|
while (!this.done() && !this.test("\n")) {
|
|
this.step();
|
|
}
|
|
return this.next();
|
|
}
|
|
return this.token("/", pos);
|
|
}
|
|
|
|
this.report(`illegal character '${this.current()}'`, pos);
|
|
this.step();
|
|
return this.next();
|
|
}
|
|
|
|
private done(): boolean {
|
|
return this.index >= this.text.length;
|
|
}
|
|
|
|
private current(): string {
|
|
return this.text[this.index];
|
|
}
|
|
|
|
public currentPos(): Pos {
|
|
return this.pos();
|
|
}
|
|
|
|
private step() {
|
|
if (this.done()) {
|
|
return;
|
|
}
|
|
if (this.current() === "\n") {
|
|
this.line += 1;
|
|
this.col = 1;
|
|
} else {
|
|
this.col += 1;
|
|
}
|
|
this.index += 1;
|
|
}
|
|
|
|
private pos(): Pos {
|
|
return {
|
|
index: this.index,
|
|
line: this.line,
|
|
col: this.col,
|
|
};
|
|
}
|
|
|
|
private token(type: string, pos: Pos): Token {
|
|
const length = this.index - pos.index;
|
|
return { type, pos, length };
|
|
}
|
|
|
|
private test(pattern: RegExp | string): boolean {
|
|
if (typeof pattern === "string") {
|
|
return this.current() === pattern;
|
|
} else {
|
|
return pattern.test(this.current());
|
|
}
|
|
}
|
|
|
|
private report(msg: string, pos: Pos) {
|
|
this.reporter.reportError({
|
|
msg,
|
|
pos,
|
|
reporter: "Lexer",
|
|
});
|
|
}
|
|
}
|