From ca043f7b576e0f54474dc0a2480c900a5e44e623 Mon Sep 17 00:00:00 2001
From: SimonFJ20 <simonfromjakobsen@gmail.com>
Date: Wed, 12 Apr 2023 01:59:42 +0200
Subject: [PATCH] add utils and scirpt

---
 .gitignore                |   4 +-
 Makefile                  |  35 ++++++++
 compile_flags.txt         |   7 ++
 include/scirpt/lexer.h    |  12 +++
 include/scirpt/parser.h   |   4 +
 include/scirpt/position.h |  11 +++
 include/scirpt/token.h    |  46 ++++++++++
 include/utils/stringmap.h |  48 +++++++++++
 main.c                    |   1 -
 parser.c                  |   1 +
 scirpt/lexer.c            | 171 ++++++++++++++++++++++++++++++++++++++
 scirpt/lexer.h            |  31 +++++++
 scirpt/main.c             |  20 +++++
 scirpt/parser.c           |   0
 scirpt/parser.h           |   6 ++
 utils/stringmap.c         | 118 ++++++++++++++++++++++++++
 utils/stringmap.h         |  21 +++++
 17 files changed, 534 insertions(+), 2 deletions(-)
 create mode 100644 Makefile
 create mode 100644 compile_flags.txt
 create mode 100644 include/scirpt/lexer.h
 create mode 100644 include/scirpt/parser.h
 create mode 100644 include/scirpt/position.h
 create mode 100644 include/scirpt/token.h
 create mode 100644 include/utils/stringmap.h
 delete mode 100644 main.c
 create mode 100644 parser.c
 create mode 100644 scirpt/lexer.c
 create mode 100644 scirpt/lexer.h
 create mode 100644 scirpt/main.c
 create mode 100644 scirpt/parser.c
 create mode 100644 scirpt/parser.h
 create mode 100644 utils/stringmap.c
 create mode 100644 utils/stringmap.h

diff --git a/.gitignore b/.gitignore
index 3e0f233..08e593b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
-a.out
+
+build/
+bin/
 
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8befa33
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,35 @@
+
+CC = gcc
+
+CFLAGS = \
+	-std=c17 \
+	-Wall \
+	-Wextra \
+	-Wpedantic \
+	-Wconversion \
+	-Iinclude
+
+all: compile_flags.txt dirs scirpt
+
+UTILS_SRC = stringmap.c
+UTILS_OBJ = $(patsubst %.c, build/utils/%.o, $(UTILS_SRC))
+
+SCIRPT_SRC = main.c lexer.c parser.c
+SCIRPT_OBJ = $(patsubst %.c, build/scirpt/%.o, $(SCIRPT_SRC))
+
+scirpt: $(SCIRPT_OBJ) $(UTILS_OBJ)
+	$(CC) -o bin/$@ $(CFLAGS) $^ -lm
+
+build/%.o: %.c $(shell find -name *.h)
+	mkdir $(@D) -p
+	$(CC) -c -o $@ $(CFLAGS) $<
+
+dirs:
+	mkdir -p bin
+
+compile_flags.txt:
+	echo -xc $(CFLAGS) | sed 's/\s\+/\n/g' > compile_flags.txt
+
+clean:
+	rm -rf build/ bin/
+
diff --git a/compile_flags.txt b/compile_flags.txt
new file mode 100644
index 0000000..3c63893
--- /dev/null
+++ b/compile_flags.txt
@@ -0,0 +1,7 @@
+-xc
+-std=c17
+-Wall
+-Wextra
+-Wpedantic
+-Wconversion
+-Iinclude
diff --git a/include/scirpt/lexer.h b/include/scirpt/lexer.h
new file mode 100644
index 0000000..a2d3e1e
--- /dev/null
+++ b/include/scirpt/lexer.h
@@ -0,0 +1,12 @@
+#ifndef SCIRPT_LEXER_H
+#define SCIRPT_LEXER_H
+
+#include "scirpt/token.h"
+
+typedef struct ScirptLexer ScirptLexer;
+
+ScirptLexer* scirpt_lexer_new(const char* text, size_t text_length);
+void scirpt_lexer_delete(ScirptLexer* lexer);
+ScirptToken scirpt_lexer_next(ScirptLexer* lexer);
+
+#endif
diff --git a/include/scirpt/parser.h b/include/scirpt/parser.h
new file mode 100644
index 0000000..788cbd8
--- /dev/null
+++ b/include/scirpt/parser.h
@@ -0,0 +1,4 @@
+#ifndef SCIRPT_PARSER_H
+#define SCIRPT_PARSER_H
+
+#endif
diff --git a/include/scirpt/position.h b/include/scirpt/position.h
new file mode 100644
index 0000000..4312790
--- /dev/null
+++ b/include/scirpt/position.h
@@ -0,0 +1,11 @@
+#ifndef SCIRPT_POSITIONS_H
+#define SCIRPT_POSITIONS_H
+
+#include <stddef.h>
+
+typedef struct ScirptPosition {
+	size_t index;
+	int line, col;
+} ScirptPosition;
+
+#endif
diff --git a/include/scirpt/token.h b/include/scirpt/token.h
new file mode 100644
index 0000000..b4556d7
--- /dev/null
+++ b/include/scirpt/token.h
@@ -0,0 +1,46 @@
+#ifndef SCIRPT_TOKENS_H
+#define SCIRPT_TOKENS_H
+
+#include "scirpt/position.h"
+#include <stddef.h>
+
+typedef enum {
+	ScirptTokenTypeEof,
+	ScirptTokenTypeInvalidChar,
+	ScirptTokenTypeId,
+	ScirptTokenTypeInt,
+	ScirptTokenTypeString,
+	ScirptTokenTypeLParen,
+	ScirptTokenTypeRParen,
+	ScirptTokenTypeLBrace,
+	ScirptTokenTypeRBrace,
+	ScirptTokenTypeLBracket,
+	ScirptTokenTypeRBracket,
+	ScirptTokenTypeDot,
+	ScirptTokenTypeComma,
+	ScirptTokenTypeColon,
+	ScirptTokenTypeSemicolon,
+	ScirptTokenTypePlus,
+	ScirptTokenTypeMinus,
+	ScirptTokenTypeAsterisk,
+	ScirptTokenTypeNull,
+	ScirptTokenTypeFalse,
+	ScirptTokenTypeTrue,
+	ScirptTokenTypeLet,
+	ScirptTokenTypeIf,
+	ScirptTokenTypeElse,
+	ScirptTokenTypeWhile,
+	ScirptTokenTypeFor,
+	ScirptTokenTypeIn,
+	ScirptTokenTypeBreak,
+	ScirptTokenTypeFn,
+	ScirptTokenTypeReturn,
+} ScirptTokenType;
+
+typedef struct {
+	ScirptTokenType type;
+	ScirptPosition pos;
+	size_t length;
+} ScirptToken;
+
+#endif
diff --git a/include/utils/stringmap.h b/include/utils/stringmap.h
new file mode 100644
index 0000000..bac36ff
--- /dev/null
+++ b/include/utils/stringmap.h
@@ -0,0 +1,48 @@
+#ifndef UTILS_STRINGMAP_H
+#define UTILS_STRINGMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+// https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2
+// https://stackoverflow.com/questions/1322510/given-an-integer-how-do-i-find-the-next-largest-power-of-two-using-bit-twiddlin/1322548#1322548
+static inline uint64_t utils_nearest_bigger_power_of_2_u64(uint64_t value)
+{
+	value--;
+	value |= value >> 1;
+	value |= value >> 2;
+	value |= value >> 4;
+	value |= value >> 8;
+	value |= value >> 16;
+	value |= value >> 32;
+	value++;
+	return value;
+}
+
+// https://stackoverflow.com/questions/7666509/hash-function-for-string
+// http://www.cse.yorku.ca/~oz/hash.html
+static inline size_t string_hash_djb2(const unsigned char* value, size_t length)
+{
+	size_t hash = 5381;
+	for (size_t i = 0; i < length && value[i] != '\0'; ++i)
+		hash = ((hash << 5) + hash) + value[i];
+	return hash;
+}
+
+typedef struct StringMap StringMap;
+
+StringMap* stringmap_new(void);
+void stringmap_delete(StringMap* map);
+size_t* stringmap_get(const StringMap* map, const char* key, size_t key_length);
+bool stringmap_has(const StringMap* map, const char* key, size_t key_length);
+void stringmap_set(
+	StringMap* map, const char* key, size_t key_length, size_t value
+);
+void stringmap_reserve(StringMap* map, size_t minimum_size);
+void stringmap_remove(StringMap* map, const char* key, size_t key_length);
+void stringmap_clean(StringMap* map);
+void stringmap_shrink(StringMap* map);
+void stringmap_clean_and_shrink(StringMap* map);
+
+#endif
diff --git a/main.c b/main.c
deleted file mode 100644
index 8416927..0000000
--- a/main.c
+++ /dev/null
@@ -1 +0,0 @@
-int main(void) { *(int volatile*)0 = 0; }
diff --git a/parser.c b/parser.c
new file mode 100644
index 0000000..c1dfa96
--- /dev/null
+++ b/parser.c
@@ -0,0 +1 @@
+#include "parser.h"
diff --git a/scirpt/lexer.c b/scirpt/lexer.c
new file mode 100644
index 0000000..2c47a22
--- /dev/null
+++ b/scirpt/lexer.c
@@ -0,0 +1,171 @@
+#include "lexer.h"
+#include "scirpt/lexer.h"
+#include "scirpt/position.h"
+#include "scirpt/token.h"
+#include "utils/stringmap.h"
+#include <stdlib.h>
+#include <string.h>
+
+static inline void step(ScirptLexer* lexer) { scirpt_lexer_step(lexer); }
+static inline ScirptToken
+token(const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start)
+{
+	return scirpt_lexer_token(lexer, type, start);
+}
+static inline ScirptPosition pos(const ScirptLexer* lexer)
+{
+	return scirpt_lexer_pos(lexer);
+}
+static inline bool current_is(const ScirptLexer* lexer, char value)
+{
+	return scirpt_lexer_current_is(lexer, value);
+}
+static inline bool done(const ScirptLexer* lexer)
+{
+	return scirpt_lexer_done(lexer);
+}
+static inline char current(const ScirptLexer* lexer)
+{
+	return scirpt_lexer_current(lexer);
+}
+
+ScirptLexer* scirpt_lexer_new(const char* text, size_t text_length)
+{
+	ScirptLexer* lexer = malloc(sizeof(ScirptLexer));
+	scirpt_lexer_create(lexer, text, text_length);
+	return lexer;
+}
+
+void scirpt_lexer_delete(ScirptLexer* lexer) { free(lexer); }
+
+static inline void
+add_keyword(StringMap* keywords, const char* key, ScirptTokenType value)
+{
+	stringmap_set(keywords, key, strlen(key), value);
+}
+
+void scirpt_lexer_create(
+	ScirptLexer* lexer, const char* text, size_t text_length
+)
+{
+	StringMap* keywords = stringmap_new();
+	add_keyword(keywords, "null", ScirptTokenTypeNull);
+	add_keyword(keywords, "false", ScirptTokenTypeFalse);
+	add_keyword(keywords, "true", ScirptTokenTypeTrue);
+	add_keyword(keywords, "let", ScirptTokenTypeLet);
+	add_keyword(keywords, "if", ScirptTokenTypeIf);
+	add_keyword(keywords, "else", ScirptTokenTypeElse);
+	add_keyword(keywords, "while", ScirptTokenTypeWhile);
+	add_keyword(keywords, "for", ScirptTokenTypeFor);
+	add_keyword(keywords, "in", ScirptTokenTypeIn);
+	add_keyword(keywords, "break", ScirptTokenTypeBreak);
+	add_keyword(keywords, "fn", ScirptTokenTypeFn);
+	add_keyword(keywords, "return", ScirptTokenTypeReturn);
+	*lexer = (ScirptLexer) {
+		.text = text,
+		.text_length = text_length,
+		.index = 0,
+		.line = 1,
+		.col = 1,
+		.keywords = keywords,
+	};
+}
+
+void scirpt_lexer_destroy(ScirptLexer* lexer)
+{
+	stringmap_delete(lexer->keywords);
+}
+
+static inline bool is_whitespace(char value)
+{
+	return value == ' ' || value == '\t' || value == '\r' || value == '\n';
+}
+
+static inline bool is_id_char_excluding_numbers(char value)
+{
+	return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z')
+		|| value == '_';
+}
+
+static inline bool is_int_char(char value)
+{
+	return value >= '0' && value <= '9';
+}
+
+static inline bool is_id_char(char value)
+{
+	return is_id_char_excluding_numbers(value) || is_int_char(value);
+}
+
+ScirptToken scirpt_lexer_next(ScirptLexer* lexer)
+{
+	if (done(lexer)) {
+		return token(lexer, ScirptTokenTypeEof, pos(lexer));
+	} else if (is_whitespace(current(lexer))) {
+		step(lexer);
+		while (!done(lexer) && is_whitespace(current(lexer)))
+			step(lexer);
+		return scirpt_lexer_next(lexer);
+	} else if (is_id_char_excluding_numbers(current(lexer))) {
+		ScirptPosition start = pos(lexer);
+		step(lexer);
+		while (!done(lexer) && is_id_char(current(lexer)))
+			step(lexer);
+		return token(lexer, ScirptTokenTypeId, start);
+	} else {
+		switch (current(lexer)) {
+			default: {
+				ScirptPosition start = pos(lexer);
+				step(lexer);
+				return token(lexer, ScirptTokenTypeInvalidChar, start);
+			}
+		}
+	}
+}
+
+void scirpt_lexer_step(ScirptLexer* lexer)
+{
+	lexer->index++;
+	if (!done(lexer)) {
+		if (current(lexer) == '\n') {
+			lexer->line++;
+			lexer->col = 1;
+		} else {
+			lexer->col++;
+		}
+	}
+}
+ScirptPosition scirpt_lexer_pos(const ScirptLexer* lexer)
+{
+	return (ScirptPosition) {
+		.index = lexer->index,
+		.line = lexer->line,
+		.col = lexer->col,
+	};
+}
+
+ScirptToken scirpt_lexer_token(
+	const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start
+)
+{
+	return (ScirptToken) {
+		.type = type,
+		.pos = start,
+		.length = lexer->index - start.index,
+	};
+}
+
+bool scirpt_lexer_current_is(const ScirptLexer* lexer, char value)
+{
+	return !done(lexer) && current(lexer) == value;
+}
+
+bool scirpt_lexer_done(const ScirptLexer* lexer)
+{
+	return lexer->index >= lexer->text_length;
+}
+
+char scirpt_lexer_current(const ScirptLexer* lexer)
+{
+	return lexer->text[lexer->index];
+}
diff --git a/scirpt/lexer.h b/scirpt/lexer.h
new file mode 100644
index 0000000..3ebbf01
--- /dev/null
+++ b/scirpt/lexer.h
@@ -0,0 +1,31 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#include "scirpt/lexer.h"
+#include "scirpt/token.h"
+#include "utils/stringmap.h"
+#include <stdbool.h>
+#include <stddef.h>
+
+struct ScirptLexer {
+	const char* text;
+	size_t text_length;
+	size_t index;
+	int line, col;
+	StringMap* keywords;
+};
+
+void scirpt_lexer_create(
+	ScirptLexer* lexer, const char* text, size_t text_length
+);
+void scirpt_lexer_destroy(ScirptLexer* lexer);
+void scirpt_lexer_step(ScirptLexer* lexer);
+ScirptPosition scirpt_lexer_pos(const ScirptLexer* lexer);
+ScirptToken scirpt_lexer_token(
+	const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start
+);
+bool scirpt_lexer_current_is(const ScirptLexer* lexer, char value);
+bool scirpt_lexer_done(const ScirptLexer* lexer);
+char scirpt_lexer_current(const ScirptLexer* lexer);
+
+#endif
diff --git a/scirpt/main.c b/scirpt/main.c
new file mode 100644
index 0000000..1e2af83
--- /dev/null
+++ b/scirpt/main.c
@@ -0,0 +1,20 @@
+#include "scirpt/lexer.h"
+#include "scirpt/token.h"
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+int main(void)
+{
+	printf("hello world\n");
+
+	const char* text = "123 if +";
+
+	ScirptLexer* lexer = scirpt_lexer_new(text, strlen(text));
+	while (true) {
+		ScirptToken token = scirpt_lexer_next(lexer);
+		if (token.type == ScirptTokenTypeEof)
+			break;
+		printf("%d\n", token.type);
+	}
+}
diff --git a/scirpt/parser.c b/scirpt/parser.c
new file mode 100644
index 0000000..e69de29
diff --git a/scirpt/parser.h b/scirpt/parser.h
new file mode 100644
index 0000000..c349953
--- /dev/null
+++ b/scirpt/parser.h
@@ -0,0 +1,6 @@
+#ifndef PARSER_H
+#define PARSER_H
+
+#include "scirpt/parser.h"
+
+#endif
diff --git a/utils/stringmap.c b/utils/stringmap.c
new file mode 100644
index 0000000..ed70837
--- /dev/null
+++ b/utils/stringmap.c
@@ -0,0 +1,118 @@
+#include "stringmap.h"
+#include "utils/stringmap.h"
+#include <stdint.h>
+#include <stdlib.h>
+
+StringMap* stringmap_new(void)
+{
+	StringMap* map = malloc(sizeof(StringMap));
+	stringmap_create(map);
+	return map;
+}
+
+void stringmap_delete(StringMap* map) { free(map); }
+
+size_t* stringmap_get(const StringMap* map, const char* key, size_t key_length)
+{
+	size_t key_hash = string_hash_djb2((const unsigned char*)key, key_length);
+	for (size_t i = 0; i < map->size; ++i)
+		if (map->entries[i].key_hash == key_hash && !map->entries[i].deleted)
+			return &map->entries[i].value;
+	return NULL;
+}
+
+bool stringmap_has(const StringMap* map, const char* key, size_t key_length)
+{
+	size_t key_hash = string_hash_djb2((const unsigned char*)key, key_length);
+	for (size_t i = 0; i < map->size; ++i)
+		if (map->entries[i].key_hash == key_hash && !map->entries[i].deleted)
+			return true;
+	return false;
+}
+
+void stringmap_set(
+	StringMap* map, const char* key, size_t key_length, size_t value
+)
+{
+	size_t key_hash = string_hash_djb2((const unsigned char*)key, key_length);
+	for (size_t i = 0; i < map->size; ++i) {
+		if (map->entries[i].key_hash == key_hash && !map->entries[i].deleted) {
+			map->entries[i].value = value;
+			return;
+		}
+	}
+	if (map->entries == NULL) {
+		map->capacity = 8;
+		map->entries = malloc(sizeof(StringMapEntry) * map->capacity);
+	} else if (map->size == map->capacity) {
+		map->capacity *= 2;
+		map->entries
+			= realloc(map->entries, sizeof(StringMapEntry) * map->capacity);
+	}
+	map->entries[map->size] = (StringMapEntry) {
+		.deleted = false,
+		.key_hash = key_hash,
+		.value = value,
+	};
+	map->size++;
+}
+
+void stringmap_reserve(StringMap* map, size_t minimum_size)
+{
+	if (map->capacity >= minimum_size)
+		return;
+	map->capacity = utils_nearest_bigger_power_of_2_u64(minimum_size);
+	map->entries
+		= realloc(map->entries, sizeof(StringMapEntry) * map->capacity);
+}
+
+void stringmap_remove(StringMap* map, const char* key, size_t key_length)
+{
+	size_t key_hash = string_hash_djb2((const unsigned char*)key, key_length);
+	for (size_t i = 0; i < map->size; ++i) {
+		if (map->entries[i].key_hash == key_hash && !map->entries[i].deleted) {
+			map->entries[i].deleted = true;
+		}
+	}
+}
+
+void stringmap_clean(StringMap* map)
+{
+	size_t shift_amount = 0;
+	for (size_t i = 0; i < map->size; ++i) {
+		map->entries[i - shift_amount] = map->entries[i];
+		if (map->entries[i].deleted)
+			shift_amount++;
+	}
+}
+
+void stringmap_shrink(StringMap* map)
+{
+	size_t new_size = utils_nearest_bigger_power_of_2_u64(map->size);
+	if (new_size >= map->capacity)
+		return;
+	map->capacity = new_size;
+	map->entries
+		= realloc(map->entries, sizeof(StringMapEntry) * map->capacity);
+}
+
+void stringmap_clean_and_shrink(StringMap* map)
+{
+	stringmap_clean(map);
+	stringmap_shrink(map);
+}
+
+void stringmap_create(StringMap* map)
+{
+	*map = (StringMap) {
+		.entries = NULL,
+		.size = 0,
+		.capacity = 0,
+	};
+}
+
+void stringmap_destroy(StringMap* map)
+{
+	if (map->entries)
+		free(map->entries);
+}
diff --git a/utils/stringmap.h b/utils/stringmap.h
new file mode 100644
index 0000000..d9c3c64
--- /dev/null
+++ b/utils/stringmap.h
@@ -0,0 +1,21 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include "utils/stringmap.h"
+#include <stdbool.h>
+#include <stddef.h>
+
+typedef struct StringMapEntry {
+	bool deleted;
+	size_t key_hash, value;
+} StringMapEntry;
+
+struct StringMap {
+	StringMapEntry* entries;
+	size_t size, capacity;
+};
+
+void stringmap_create(StringMap* map);
+void stringmap_destroy(StringMap* map);
+
+#endif