diff --git a/.gitignore b/.gitignore index 3e0f233..08e593b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ -a.out + +build/ +bin/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8befa33 --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ + +CC = gcc + +CFLAGS = \ + -std=c17 \ + -Wall \ + -Wextra \ + -Wpedantic \ + -Wconversion \ + -Iinclude + +all: compile_flags.txt dirs scirpt + +UTILS_SRC = stringmap.c +UTILS_OBJ = $(patsubst %.c, build/utils/%.o, $(UTILS_SRC)) + +SCIRPT_SRC = main.c lexer.c parser.c +SCIRPT_OBJ = $(patsubst %.c, build/scirpt/%.o, $(SCIRPT_SRC)) + +scirpt: $(SCIRPT_OBJ) $(UTILS_OBJ) + $(CC) -o bin/$@ $(CFLAGS) $^ -lm + +build/%.o: %.c $(shell find -name *.h) + mkdir $(@D) -p + $(CC) -c -o $@ $(CFLAGS) $< + +dirs: + mkdir -p bin + +compile_flags.txt: + echo -xc $(CFLAGS) | sed 's/\s\+/\n/g' > compile_flags.txt + +clean: + rm -rf build/ bin/ + diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..3c63893 --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,7 @@ +-xc +-std=c17 +-Wall +-Wextra +-Wpedantic +-Wconversion +-Iinclude diff --git a/include/scirpt/lexer.h b/include/scirpt/lexer.h new file mode 100644 index 0000000..a2d3e1e --- /dev/null +++ b/include/scirpt/lexer.h @@ -0,0 +1,12 @@ +#ifndef SCIRPT_LEXER_H +#define SCIRPT_LEXER_H + +#include "scirpt/token.h" + +typedef struct ScirptLexer ScirptLexer; + +ScirptLexer* scirpt_lexer_new(const char* text, size_t text_length); +void scirpt_lexer_delete(ScirptLexer* lexer); +ScirptToken scirpt_lexer_next(ScirptLexer* lexer); + +#endif diff --git a/include/scirpt/parser.h b/include/scirpt/parser.h new file mode 100644 index 0000000..788cbd8 --- /dev/null +++ b/include/scirpt/parser.h @@ -0,0 +1,4 @@ +#ifndef SCIRPT_PARSER_H +#define SCIRPT_PARSER_H + +#endif diff --git a/include/scirpt/position.h b/include/scirpt/position.h new file mode 100644 index 0000000..4312790 --- /dev/null +++ b/include/scirpt/position.h @@ -0,0 +1,11 @@ +#ifndef SCIRPT_POSITIONS_H +#define SCIRPT_POSITIONS_H + +#include + +typedef struct ScirptPosition { + size_t index; + int line, col; +} ScirptPosition; + +#endif diff --git a/include/scirpt/token.h b/include/scirpt/token.h new file mode 100644 index 0000000..b4556d7 --- /dev/null +++ b/include/scirpt/token.h @@ -0,0 +1,46 @@ +#ifndef SCIRPT_TOKENS_H +#define SCIRPT_TOKENS_H + +#include "scirpt/position.h" +#include + +typedef enum { + ScirptTokenTypeEof, + ScirptTokenTypeInvalidChar, + ScirptTokenTypeId, + ScirptTokenTypeInt, + ScirptTokenTypeString, + ScirptTokenTypeLParen, + ScirptTokenTypeRParen, + ScirptTokenTypeLBrace, + ScirptTokenTypeRBrace, + ScirptTokenTypeLBracket, + ScirptTokenTypeRBracket, + ScirptTokenTypeDot, + ScirptTokenTypeComma, + ScirptTokenTypeColon, + ScirptTokenTypeSemicolon, + ScirptTokenTypePlus, + ScirptTokenTypeMinus, + ScirptTokenTypeAsterisk, + ScirptTokenTypeNull, + ScirptTokenTypeFalse, + ScirptTokenTypeTrue, + ScirptTokenTypeLet, + ScirptTokenTypeIf, + ScirptTokenTypeElse, + ScirptTokenTypeWhile, + ScirptTokenTypeFor, + ScirptTokenTypeIn, + ScirptTokenTypeBreak, + ScirptTokenTypeFn, + ScirptTokenTypeReturn, +} ScirptTokenType; + +typedef struct { + ScirptTokenType type; + ScirptPosition pos; + size_t length; +} ScirptToken; + +#endif diff --git a/include/utils/stringmap.h b/include/utils/stringmap.h new file mode 100644 index 0000000..bac36ff --- /dev/null +++ b/include/utils/stringmap.h @@ -0,0 +1,48 @@ +#ifndef UTILS_STRINGMAP_H +#define UTILS_STRINGMAP_H + +#include +#include +#include + +// https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2 +// https://stackoverflow.com/questions/1322510/given-an-integer-how-do-i-find-the-next-largest-power-of-two-using-bit-twiddlin/1322548#1322548 +static inline uint64_t utils_nearest_bigger_power_of_2_u64(uint64_t value) +{ + value--; + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; + value |= value >> 32; + value++; + return value; +} + +// https://stackoverflow.com/questions/7666509/hash-function-for-string +// http://www.cse.yorku.ca/~oz/hash.html +static inline size_t string_hash_djb2(const unsigned char* value, size_t length) +{ + size_t hash = 5381; + for (size_t i = 0; i < length && value[i] != '\0'; ++i) + hash = ((hash << 5) + hash) + value[i]; + return hash; +} + +typedef struct StringMap StringMap; + +StringMap* stringmap_new(void); +void stringmap_delete(StringMap* map); +size_t* stringmap_get(const StringMap* map, const char* key, size_t key_length); +bool stringmap_has(const StringMap* map, const char* key, size_t key_length); +void stringmap_set( + StringMap* map, const char* key, size_t key_length, size_t value +); +void stringmap_reserve(StringMap* map, size_t minimum_size); +void stringmap_remove(StringMap* map, const char* key, size_t key_length); +void stringmap_clean(StringMap* map); +void stringmap_shrink(StringMap* map); +void stringmap_clean_and_shrink(StringMap* map); + +#endif diff --git a/main.c b/main.c deleted file mode 100644 index 8416927..0000000 --- a/main.c +++ /dev/null @@ -1 +0,0 @@ -int main(void) { *(int volatile*)0 = 0; } diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..c1dfa96 --- /dev/null +++ b/parser.c @@ -0,0 +1 @@ +#include "parser.h" diff --git a/scirpt/lexer.c b/scirpt/lexer.c new file mode 100644 index 0000000..2c47a22 --- /dev/null +++ b/scirpt/lexer.c @@ -0,0 +1,171 @@ +#include "lexer.h" +#include "scirpt/lexer.h" +#include "scirpt/position.h" +#include "scirpt/token.h" +#include "utils/stringmap.h" +#include +#include + +static inline void step(ScirptLexer* lexer) { scirpt_lexer_step(lexer); } +static inline ScirptToken +token(const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start) +{ + return scirpt_lexer_token(lexer, type, start); +} +static inline ScirptPosition pos(const ScirptLexer* lexer) +{ + return scirpt_lexer_pos(lexer); +} +static inline bool current_is(const ScirptLexer* lexer, char value) +{ + return scirpt_lexer_current_is(lexer, value); +} +static inline bool done(const ScirptLexer* lexer) +{ + return scirpt_lexer_done(lexer); +} +static inline char current(const ScirptLexer* lexer) +{ + return scirpt_lexer_current(lexer); +} + +ScirptLexer* scirpt_lexer_new(const char* text, size_t text_length) +{ + ScirptLexer* lexer = malloc(sizeof(ScirptLexer)); + scirpt_lexer_create(lexer, text, text_length); + return lexer; +} + +void scirpt_lexer_delete(ScirptLexer* lexer) { free(lexer); } + +static inline void +add_keyword(StringMap* keywords, const char* key, ScirptTokenType value) +{ + stringmap_set(keywords, key, strlen(key), value); +} + +void scirpt_lexer_create( + ScirptLexer* lexer, const char* text, size_t text_length +) +{ + StringMap* keywords = stringmap_new(); + add_keyword(keywords, "null", ScirptTokenTypeNull); + add_keyword(keywords, "false", ScirptTokenTypeFalse); + add_keyword(keywords, "true", ScirptTokenTypeTrue); + add_keyword(keywords, "let", ScirptTokenTypeLet); + add_keyword(keywords, "if", ScirptTokenTypeIf); + add_keyword(keywords, "else", ScirptTokenTypeElse); + add_keyword(keywords, "while", ScirptTokenTypeWhile); + add_keyword(keywords, "for", ScirptTokenTypeFor); + add_keyword(keywords, "in", ScirptTokenTypeIn); + add_keyword(keywords, "break", ScirptTokenTypeBreak); + add_keyword(keywords, "fn", ScirptTokenTypeFn); + add_keyword(keywords, "return", ScirptTokenTypeReturn); + *lexer = (ScirptLexer) { + .text = text, + .text_length = text_length, + .index = 0, + .line = 1, + .col = 1, + .keywords = keywords, + }; +} + +void scirpt_lexer_destroy(ScirptLexer* lexer) +{ + stringmap_delete(lexer->keywords); +} + +static inline bool is_whitespace(char value) +{ + return value == ' ' || value == '\t' || value == '\r' || value == '\n'; +} + +static inline bool is_id_char_excluding_numbers(char value) +{ + return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') + || value == '_'; +} + +static inline bool is_int_char(char value) +{ + return value >= '0' && value <= '9'; +} + +static inline bool is_id_char(char value) +{ + return is_id_char_excluding_numbers(value) || is_int_char(value); +} + +ScirptToken scirpt_lexer_next(ScirptLexer* lexer) +{ + if (done(lexer)) { + return token(lexer, ScirptTokenTypeEof, pos(lexer)); + } else if (is_whitespace(current(lexer))) { + step(lexer); + while (!done(lexer) && is_whitespace(current(lexer))) + step(lexer); + return scirpt_lexer_next(lexer); + } else if (is_id_char_excluding_numbers(current(lexer))) { + ScirptPosition start = pos(lexer); + step(lexer); + while (!done(lexer) && is_id_char(current(lexer))) + step(lexer); + return token(lexer, ScirptTokenTypeId, start); + } else { + switch (current(lexer)) { + default: { + ScirptPosition start = pos(lexer); + step(lexer); + return token(lexer, ScirptTokenTypeInvalidChar, start); + } + } + } +} + +void scirpt_lexer_step(ScirptLexer* lexer) +{ + lexer->index++; + if (!done(lexer)) { + if (current(lexer) == '\n') { + lexer->line++; + lexer->col = 1; + } else { + lexer->col++; + } + } +} +ScirptPosition scirpt_lexer_pos(const ScirptLexer* lexer) +{ + return (ScirptPosition) { + .index = lexer->index, + .line = lexer->line, + .col = lexer->col, + }; +} + +ScirptToken scirpt_lexer_token( + const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start +) +{ + return (ScirptToken) { + .type = type, + .pos = start, + .length = lexer->index - start.index, + }; +} + +bool scirpt_lexer_current_is(const ScirptLexer* lexer, char value) +{ + return !done(lexer) && current(lexer) == value; +} + +bool scirpt_lexer_done(const ScirptLexer* lexer) +{ + return lexer->index >= lexer->text_length; +} + +char scirpt_lexer_current(const ScirptLexer* lexer) +{ + return lexer->text[lexer->index]; +} diff --git a/scirpt/lexer.h b/scirpt/lexer.h new file mode 100644 index 0000000..3ebbf01 --- /dev/null +++ b/scirpt/lexer.h @@ -0,0 +1,31 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "scirpt/lexer.h" +#include "scirpt/token.h" +#include "utils/stringmap.h" +#include +#include + +struct ScirptLexer { + const char* text; + size_t text_length; + size_t index; + int line, col; + StringMap* keywords; +}; + +void scirpt_lexer_create( + ScirptLexer* lexer, const char* text, size_t text_length +); +void scirpt_lexer_destroy(ScirptLexer* lexer); +void scirpt_lexer_step(ScirptLexer* lexer); +ScirptPosition scirpt_lexer_pos(const ScirptLexer* lexer); +ScirptToken scirpt_lexer_token( + const ScirptLexer* lexer, ScirptTokenType type, ScirptPosition start +); +bool scirpt_lexer_current_is(const ScirptLexer* lexer, char value); +bool scirpt_lexer_done(const ScirptLexer* lexer); +char scirpt_lexer_current(const ScirptLexer* lexer); + +#endif diff --git a/scirpt/main.c b/scirpt/main.c new file mode 100644 index 0000000..1e2af83 --- /dev/null +++ b/scirpt/main.c @@ -0,0 +1,20 @@ +#include "scirpt/lexer.h" +#include "scirpt/token.h" +#include +#include +#include + +int main(void) +{ + printf("hello world\n"); + + const char* text = "123 if +"; + + ScirptLexer* lexer = scirpt_lexer_new(text, strlen(text)); + while (true) { + ScirptToken token = scirpt_lexer_next(lexer); + if (token.type == ScirptTokenTypeEof) + break; + printf("%d\n", token.type); + } +} diff --git a/scirpt/parser.c b/scirpt/parser.c new file mode 100644 index 0000000..e69de29 diff --git a/scirpt/parser.h b/scirpt/parser.h new file mode 100644 index 0000000..c349953 --- /dev/null +++ b/scirpt/parser.h @@ -0,0 +1,6 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "scirpt/parser.h" + +#endif diff --git a/utils/stringmap.c b/utils/stringmap.c new file mode 100644 index 0000000..ed70837 --- /dev/null +++ b/utils/stringmap.c @@ -0,0 +1,118 @@ +#include "stringmap.h" +#include "utils/stringmap.h" +#include +#include + +StringMap* stringmap_new(void) +{ + StringMap* map = malloc(sizeof(StringMap)); + stringmap_create(map); + return map; +} + +void stringmap_delete(StringMap* map) { free(map); } + +size_t* stringmap_get(const StringMap* map, const char* key, size_t key_length) +{ + size_t key_hash = string_hash_djb2((const unsigned char*)key, key_length); + for (size_t i = 0; i < map->size; ++i) + if (map->entries[i].key_hash == key_hash && !map->entries[i].deleted) + return &map->entries[i].value; + return NULL; +} + +bool stringmap_has(const StringMap* map, const char* key, size_t key_length) +{ + size_t key_hash = string_hash_djb2((const unsigned char*)key, key_length); + for (size_t i = 0; i < map->size; ++i) + if (map->entries[i].key_hash == key_hash && !map->entries[i].deleted) + return true; + return false; +} + +void stringmap_set( + StringMap* map, const char* key, size_t key_length, size_t value +) +{ + size_t key_hash = string_hash_djb2((const unsigned char*)key, key_length); + for (size_t i = 0; i < map->size; ++i) { + if (map->entries[i].key_hash == key_hash && !map->entries[i].deleted) { + map->entries[i].value = value; + return; + } + } + if (map->entries == NULL) { + map->capacity = 8; + map->entries = malloc(sizeof(StringMapEntry) * map->capacity); + } else if (map->size == map->capacity) { + map->capacity *= 2; + map->entries + = realloc(map->entries, sizeof(StringMapEntry) * map->capacity); + } + map->entries[map->size] = (StringMapEntry) { + .deleted = false, + .key_hash = key_hash, + .value = value, + }; + map->size++; +} + +void stringmap_reserve(StringMap* map, size_t minimum_size) +{ + if (map->capacity >= minimum_size) + return; + map->capacity = utils_nearest_bigger_power_of_2_u64(minimum_size); + map->entries + = realloc(map->entries, sizeof(StringMapEntry) * map->capacity); +} + +void stringmap_remove(StringMap* map, const char* key, size_t key_length) +{ + size_t key_hash = string_hash_djb2((const unsigned char*)key, key_length); + for (size_t i = 0; i < map->size; ++i) { + if (map->entries[i].key_hash == key_hash && !map->entries[i].deleted) { + map->entries[i].deleted = true; + } + } +} + +void stringmap_clean(StringMap* map) +{ + size_t shift_amount = 0; + for (size_t i = 0; i < map->size; ++i) { + map->entries[i - shift_amount] = map->entries[i]; + if (map->entries[i].deleted) + shift_amount++; + } +} + +void stringmap_shrink(StringMap* map) +{ + size_t new_size = utils_nearest_bigger_power_of_2_u64(map->size); + if (new_size >= map->capacity) + return; + map->capacity = new_size; + map->entries + = realloc(map->entries, sizeof(StringMapEntry) * map->capacity); +} + +void stringmap_clean_and_shrink(StringMap* map) +{ + stringmap_clean(map); + stringmap_shrink(map); +} + +void stringmap_create(StringMap* map) +{ + *map = (StringMap) { + .entries = NULL, + .size = 0, + .capacity = 0, + }; +} + +void stringmap_destroy(StringMap* map) +{ + if (map->entries) + free(map->entries); +} diff --git a/utils/stringmap.h b/utils/stringmap.h new file mode 100644 index 0000000..d9c3c64 --- /dev/null +++ b/utils/stringmap.h @@ -0,0 +1,21 @@ +#ifndef UTILS_H +#define UTILS_H + +#include "utils/stringmap.h" +#include +#include + +typedef struct StringMapEntry { + bool deleted; + size_t key_hash, value; +} StringMapEntry; + +struct StringMap { + StringMapEntry* entries; + size_t size, capacity; +}; + +void stringmap_create(StringMap* map); +void stringmap_destroy(StringMap* map); + +#endif