some parsing

This commit is contained in:
SimonFJ20 2023-04-14 02:17:45 +02:00
parent dd73b32372
commit b478e45198
19 changed files with 391 additions and 69 deletions

View File

@ -11,13 +11,13 @@ CFLAGS = \
all: compile_flags.txt dirs scirpt
UTILS_SRC = stringmap.c
UTILS_OBJ = $(patsubst %.c, build/utils/%.o, $(UTILS_SRC))
COMMON_SRC = string.c stringmap.c
COMMON_OBJ = $(patsubst %.c, build/common/%.o, $(COMMON_SRC))
SCIRPT_SRC = main.c lexer.c ast.c parser.c
SCIRPT_OBJ = $(patsubst %.c, build/scirpt/%.o, $(SCIRPT_SRC))
scirpt: $(SCIRPT_OBJ) $(UTILS_OBJ)
scirpt: $(SCIRPT_OBJ) $(COMMON_OBJ)
$(CC) -o bin/$@ $(CFLAGS) $^ -lm
build/%.o: %.c $(shell find -name *.h)

102
common/string.c Normal file
View File

@ -0,0 +1,102 @@
#include "common/string.h"
#include "common/panic.h"
#include "common/result.h"
#include <stddef.h>
#include <stdlib.h>
HeapString stringbuilder_build(StringBuilder* builder)
{
ASSERT(builder->length != 0, "builder cannot be empty");
if (*stringbuilder_get(builder, builder->length - 1) != '\0')
stringbuilder_append(builder, '\0');
return heapstring_from(builder->data, builder->length);
}
RESULT_CTORS(
HeapString, HeapString, UnescapeStringResult, unescaped_string_result
)
UnescapeStringResult common_unescape_string(StringView source)
{
StringBuilder builder;
stringbuilder_construct(&builder);
size_t i = 0;
while (i < source.length) {
if (source.data[i] == '\\') {
i++;
if (i >= source.length) {
stringbuilder_destroy(&builder);
return unescaped_string_result_ok(
heapstring_from_cstring("malformed escape sequence")
);
}
switch (source.data[i]) {
case '0':
stringbuilder_append(&builder, '\0');
break;
case 't':
stringbuilder_append(&builder, '\t');
break;
case 'r':
stringbuilder_append(&builder, '\r');
break;
case 'n':
stringbuilder_append(&builder, '\n');
break;
default:
stringbuilder_append(&builder, source.data[i]);
break;
}
} else {
stringbuilder_append(&builder, source.data[i]);
}
i++;
}
HeapString string = stringbuilder_build(&builder);
stringbuilder_destroy(&builder);
return unescaped_string_result_ok(string);
}
HeapString common_escape_string(StringView source)
{
StringBuilder builder;
stringbuilder_construct(&builder);
for (size_t i = 0; i < source.length; ++i) {
switch (source.data[i]) {
case '\0':
stringbuilder_append(&builder, '\\');
stringbuilder_append(&builder, '0');
break;
case '\t':
stringbuilder_append(&builder, '\\');
stringbuilder_append(&builder, 't');
break;
case '\r':
stringbuilder_append(&builder, '\\');
stringbuilder_append(&builder, 'r');
break;
case '\n':
stringbuilder_append(&builder, '\\');
stringbuilder_append(&builder, 'n');
break;
case '\\':
stringbuilder_append(&builder, '\\');
stringbuilder_append(&builder, '\\');
break;
case '\"':
stringbuilder_append(&builder, '\\');
stringbuilder_append(&builder, '\"');
break;
case '\'':
stringbuilder_append(&builder, '\\');
stringbuilder_append(&builder, '\'');
break;
default:
stringbuilder_append(&builder, source.data[i]);
break;
}
}
HeapString string = stringbuilder_build(&builder);
stringbuilder_destroy(&builder);
return string;
}

View File

@ -1,6 +1,6 @@
#include "stringmap.h"
#include "utils/math.h"
#include "utils/stringmap.h"
#include "common/math.h"
#include "common/stringmap.h"
#include <stdint.h>
#include <stdlib.h>
@ -16,7 +16,7 @@ void stringmap_delete(StringMap* map) { free(map); }
size_t* stringmap_get(const StringMap* map, const char* key, size_t key_length)
{
size_t key_hash
= utils_string_hash_djb2((const unsigned char*)key, key_length);
= common_string_hash_djb2((const unsigned char*)key, key_length);
for (size_t i = 0; i < map->length; ++i)
if (map->data[i].key_hash == key_hash && !map->data[i].deleted)
return &map->data[i].value;
@ -26,7 +26,7 @@ size_t* stringmap_get(const StringMap* map, const char* key, size_t key_length)
bool stringmap_has(const StringMap* map, const char* key, size_t key_length)
{
size_t key_hash
= utils_string_hash_djb2((const unsigned char*)key, key_length);
= common_string_hash_djb2((const unsigned char*)key, key_length);
for (size_t i = 0; i < map->length; ++i)
if (map->data[i].key_hash == key_hash && !map->data[i].deleted)
return true;
@ -38,7 +38,7 @@ void stringmap_set(
)
{
size_t key_hash
= utils_string_hash_djb2((const unsigned char*)key, key_length);
= common_string_hash_djb2((const unsigned char*)key, key_length);
for (size_t i = 0; i < map->length; ++i) {
if (map->data[i].key_hash == key_hash && !map->data[i].deleted) {
map->data[i].value = value;
@ -64,14 +64,14 @@ void stringmap_reserve(StringMap* map, size_t minimum_size)
{
if (map->capacity >= minimum_size)
return;
map->capacity = utils_nearest_bigger_power_of_2_u64(minimum_size);
map->capacity = common_nearest_bigger_power_of_2_u64(minimum_size);
map->data = realloc(map->data, sizeof(StringMapEntry) * map->capacity);
}
void stringmap_remove(StringMap* map, const char* key, size_t key_length)
{
size_t key_hash
= utils_string_hash_djb2((const unsigned char*)key, key_length);
= common_string_hash_djb2((const unsigned char*)key, key_length);
for (size_t i = 0; i < map->length; ++i) {
if (map->data[i].key_hash == key_hash && !map->data[i].deleted) {
map->data[i].deleted = true;
@ -91,7 +91,7 @@ void stringmap_clean(StringMap* map)
void stringmap_shrink(StringMap* map)
{
size_t new_size = utils_nearest_bigger_power_of_2_u64(map->length);
size_t new_size = common_nearest_bigger_power_of_2_u64(map->length);
if (new_size >= map->capacity)
return;
map->capacity = new_size;

View File

@ -1,7 +1,7 @@
#ifndef UTILS_H
#define UTILS_H
#ifndef STRINGMAP_H
#define STRINGMAP_H
#include "utils/stringmap.h"
#include "common/stringmap.h"
#include <stdbool.h>
#include <stddef.h>

View File

@ -1,39 +1,37 @@
#ifndef UTILS_GENERIC_ARRAY_H
#define UTILS_GENERIC_ARRAY_H
#ifndef COMMON_GENERIC_ARRAY_H
#define COMMON_GENERIC_ARRAY_H
#include <stddef.h>
#include <stdlib.h>
#define GENERIC_ARRAY(Type, struct_name, function_prefix) \
struct struct_name { \
typedef struct { \
Type* data; \
size_t length, capacity; \
}; \
} struct_name; \
\
static inline void function_prefix##_construct(struct struct_name* array) \
static inline void function_prefix##_construct(struct_name* array) \
{ \
*array = (struct struct_name) { \
*array = (struct_name) { \
.data = NULL, \
.length = 0, \
.capacity = 0, \
}; \
} \
\
static inline void function_prefix##_destroy(struct struct_name* array) \
static inline void function_prefix##_destroy(struct_name* array) \
{ \
if (array->data) \
free(array->data); \
} \
\
static inline size_t function_prefix##_length( \
const struct struct_name* array \
) \
static inline size_t function_prefix##_length(const struct_name* array) \
{ \
return array->length; \
} \
\
static inline Type* function_prefix##_get( \
const struct struct_name* array, size_t index \
const struct_name* array, size_t index \
) \
{ \
if (index >= array->length) \
@ -43,7 +41,7 @@
} \
\
static inline void function_prefix##_append( \
struct struct_name* array, Type value \
struct_name* array, Type value \
) \
{ \
if (array->data == NULL) { \

View File

@ -1,11 +1,11 @@
#ifndef UTILS_MATH_H
#define UTILS_MATH_H
#ifndef COMMON_MATH_H
#define COMMON_MATH_H
#include <stdint.h>
// https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2
// https://stackoverflow.com/questions/1322510/given-an-integer-how-do-i-find-the-next-largest-power-of-two-using-bit-twiddlin/1322548#1322548
static inline uint64_t utils_nearest_bigger_power_of_2_u64(uint64_t value)
static inline uint64_t common_nearest_bigger_power_of_2_u64(uint64_t value)
{
value--;
value |= value >> 1;

24
include/common/panic.h Normal file
View File

@ -0,0 +1,24 @@
#ifndef COMMON_PANIC_H
#define COMMON_PANIC_H
#include <stdio.h>
#include <stdlib.h>
#define PANIC(...) \
(fputs("panic: ", stderr), \
fprintf(stderr, __VA_ARGS__), \
fprintf( \
stderr, "\n\tat ./%s:%d in %s()\n", __FILE__, __LINE__, __func__ \
), \
exit(1))
#define ASSERT(condition, ...) \
if (!(condition)) \
(fputs("assertion failed: ", stderr), \
fprintf(stderr, __VA_ARGS__), \
fprintf( \
stderr, "\n\tat ./%s:%d in %s()\n", __FILE__, __LINE__, __func__ \
), \
exit(1))
#endif

36
include/common/result.h Normal file
View File

@ -0,0 +1,36 @@
#ifndef COMMON_RESULT_H
#define COMMON_RESULT_H
#include <stdbool.h>
#define RESULT(Value, Error, struct_name) \
typedef struct { \
bool ok; \
union { \
Value value; \
Error error; \
}; \
} struct_name;
#define RESULT_CTORS(Value, Error, struct_name, function_prefix) \
static inline struct_name function_prefix##_ok(Value value) \
{ \
return (struct_name) { \
.ok = true, \
.value = value, \
}; \
} \
\
static inline struct_name function_prefix##_error(Error error) \
{ \
return (struct_name) { \
.ok = false, \
.error = error, \
}; \
}
#define RESULT_WITH_CTORS(Value, Error, struct_name, function_prefix) \
RESULT(Value, Error, struct_name) \
RESULT_CTORS(Value, Error, struct_name, function_prefix)
#endif

View File

@ -1,6 +1,8 @@
#ifndef UTILS_STRING_H
#define UTILS_STRING_H
#ifndef COMMON_STRING_H
#define COMMON_STRING_H
#include "common/generic_array.h"
#include "common/result.h"
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
@ -25,13 +27,23 @@ static inline HeapString heapstring_from(const char* text, size_t length)
};
return string;
}
static inline HeapString heapstring_from_cstring(const char* text)
{
return heapstring_from(text, strlen(text));
}
static inline void heapstring_destroy(HeapString* string)
{
free(string->data);
if (string->data)
free(string->data);
}
GENERIC_ARRAY(char, StringBuilder, stringbuilder)
HeapString stringbuilder_build(StringBuilder* builder);
RESULT(HeapString, HeapString, UnescapeStringResult)
UnescapeStringResult common_unescape_string(StringView source);
HeapString common_escape_string(StringView source);
#endif

View File

@ -1,12 +1,11 @@
#ifndef UTILS_STRING_ARRAY_H
#define UTILS_STRING_ARRAY_H
#ifndef COMMON_STRING_ARRAY_H
#define COMMON_STRING_ARRAY_H
#include "utils/generic_array.h"
#include "utils/string.h"
#include "common/generic_array.h"
#include "common/string.h"
#include <stdlib.h>
GENERIC_ARRAY(StringView, StringViewArray, stringview_array)
typedef struct StringViewArray StringViewArray;
static inline StringViewArray* stringview_array_new(void)
{
@ -21,7 +20,6 @@ static inline void stringview_array_delete(StringViewArray* array)
}
GENERIC_ARRAY(HeapString, HeapStringArray, heapstring_array)
typedef struct HeapStringArray HeapStringArray;
static inline HeapStringArray* heapstring_array_new(void)
{

View File

@ -1,5 +1,5 @@
#ifndef UTILS_STRINGMAP_H
#define UTILS_STRINGMAP_H
#ifndef COMMON_STRINGMAP_H
#define COMMON_STRINGMAP_H
#include <stdbool.h>
#include <stddef.h>
@ -7,7 +7,7 @@
// https://stackoverflow.com/questions/7666509/hash-function-for-string
// http://www.cse.yorku.ca/~oz/hash.html
static inline size_t
utils_string_hash_djb2(const unsigned char* value, size_t length)
common_string_hash_djb2(const unsigned char* value, size_t length)
{
size_t hash = 5381;
for (size_t i = 0; i < length && value[i] != '\0'; ++i)

View File

@ -1,6 +1,9 @@
#ifndef SCIRPT_AST_H
#define SCIRPT_AST_H
#include "common/generic_array.h"
#include "common/string.h"
#include "scirpt/position.h"
#include <stdbool.h>
#include <stdint.h>
@ -10,18 +13,39 @@ typedef enum {
ScirptAstExprTypeId,
ScirptAstExprTypeInt,
ScirptAstExprTypeString,
ScirptAstExprTypeNull,
ScirptAstExprTypeBool,
ScirptAstExprTypeBlock,
ScirptAstExprTypeIf,
} ScirptAstExprType;
typedef struct ScirptAstExpr ScirptAstExpr;
GENERIC_ARRAY(ScirptAstExpr*, ScirptAstExprArray, scirpt_ast_expr_array)
typedef struct {
ScirptAstExprArray statements;
ScirptAstExpr* value;
} ScirptAstExprBlock;
typedef struct {
ScirptAstExpr* condition;
ScirptAstExpr* truthy;
ScirptAstExpr* falsy;
} ScirptAstExprIf;
struct ScirptAstExpr {
ScirptAstExprType type;
ScirptPosition pos;
union {
char* id_value;
HeapString id_value;
int64_t int_value;
char* string_value;
HeapString string_value;
bool bool_value;
ScirptAstExprBlock block;
ScirptAstExprIf if_expr;
};
} ScirptAstExpr;
};
void scirpt_ast_expr_delete(ScirptAstExpr* expr);

View File

@ -1,10 +1,10 @@
#ifndef SCIRPT_PARSER_H
#define SCIRPT_PARSER_H
#include "common/generic_array.h"
#include "common/string.h"
#include "scirpt/ast.h"
#include "scirpt/lexer.h"
#include "utils/generic_array.h"
#include "utils/string.h"
#include <stdlib.h>
typedef struct {
@ -15,14 +15,13 @@ typedef struct {
GENERIC_ARRAY(
ScirptParserError, ScirptParserErrorArray, scirpt_parser_error_array
)
typedef struct ScirptParserErrorArray ScirptParserErrorArray;
typedef struct ScirptParser ScirptParser;
ScirptParser*
scirpt_parser_new(const char* text, size_t text_length, ScirptLexer* lexer);
void scirpt_parser_delete(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_next_expr(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_next(ScirptParser* parser);
bool scirpt_parser_ok(const ScirptParser* parser);
const ScirptParserErrorArray* scirpt_parser_errors(const ScirptParser* parser);

View File

@ -9,14 +9,25 @@ void scirpt_ast_expr_delete(ScirptAstExpr* expr)
case ScirptAstExprTypeError:
break;
case ScirptAstExprTypeId:
free(expr->id_value);
heapstring_destroy(&expr->id_value);
break;
case ScirptAstExprTypeInt:
break;
case ScirptAstExprTypeString:
free(expr->string_value);
heapstring_destroy(&expr->string_value);
break;
case ScirptAstExprTypeBool:
break;
case ScirptAstExprTypeNull:
break;
case ScirptAstExprTypeBlock:
scirpt_ast_expr_array_destroy(&expr->block.statements);
scirpt_ast_expr_delete(expr->block.value);
break;
case ScirptAstExprTypeIf:
scirpt_ast_expr_delete(expr->if_expr.condition);
scirpt_ast_expr_delete(expr->if_expr.truthy);
scirpt_ast_expr_delete(expr->if_expr.falsy);
break;
}
}

View File

@ -1,8 +1,8 @@
#include "lexer.h"
#include "common/stringmap.h"
#include "scirpt/lexer.h"
#include "scirpt/position.h"
#include "scirpt/token.h"
#include "utils/stringmap.h"
#include <stdlib.h>
#include <string.h>

View File

@ -1,9 +1,9 @@
#ifndef LEXER_H
#define LEXER_H
#include "common/stringmap.h"
#include "scirpt/lexer.h"
#include "scirpt/token.h"
#include "utils/stringmap.h"
#include <stdbool.h>
#include <stddef.h>

View File

@ -29,7 +29,7 @@ int main(void)
ScirptLexer* lexer = scirpt_lexer_new(text, strlen(text));
ScirptParser* parser = scirpt_parser_new(text, strlen(text), lexer);
while (true) {
ScirptAstExpr* expr = scirpt_parser_next_expr(parser);
ScirptAstExpr* expr = scirpt_parser_next(parser);
if (expr->type == ScirptAstExprTypeEof) {
break;
} else if (!scirpt_parser_ok(parser)) {

View File

@ -1,10 +1,10 @@
#include "scirpt/parser.h"
#include "common/string.h"
#include "parser.h"
#include "scirpt/ast.h"
#include "scirpt/lexer.h"
#include "scirpt/position.h"
#include "scirpt/token.h"
#include "utils/string.h"
#include <stdint.h>
#include <stdlib.h>
@ -21,11 +21,17 @@ error(ScirptParser* parser, HeapString message, ScirptPosition pos)
{
scirpt_parser_error(parser, message, pos);
}
static inline ScirptPosition pos(ScirptParser* parser)
static inline ScirptPosition position(ScirptParser* parser)
{
return parser->current.pos;
}
static inline void step(ScirptParser* parser) { scirpt_parser_step(parser); }
static inline ScirptAstExpr*
step_alloc_expr(ScirptParser* parser, ScirptAstExpr data)
{
step(parser);
return alloc_expr(data);
}
static inline bool current_is(const ScirptParser* parser, ScirptTokenType type)
{
return scirpt_parser_current_is(parser, type);
@ -45,14 +51,14 @@ scirpt_parser_new(const char* text, size_t text_length, ScirptLexer* lexer)
void scirpt_parser_delete(ScirptParser* parser) { free(parser); }
ScirptAstExpr* scirpt_parser_next_expr(ScirptParser* parser)
ScirptAstExpr* scirpt_parser_next(ScirptParser* parser)
{
if (done(parser)) {
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeEof,
});
} else {
return scirpt_parser_parse_expr(parser);
return scirpt_parser_parse_statement(parser);
}
}
@ -88,6 +94,11 @@ void scirpt_parser_destroy(ScirptParser* parser)
scirpt_parser_error_array_construct(&parser->errors);
}
ScirptAstExpr* scirpt_parser_parse_statement(ScirptParser* parser)
{
return scirpt_parser_parse_expr(parser);
}
ScirptAstExpr* scirpt_parser_parse_expr(ScirptParser* parser)
{
return scirpt_parser_parse_operand(parser);
@ -95,40 +106,143 @@ ScirptAstExpr* scirpt_parser_parse_expr(ScirptParser* parser)
ScirptAstExpr* scirpt_parser_parse_operand(ScirptParser* parser)
{
ScirptPosition pos = position(parser);
switch (parser->current.type) {
case ScirptTokenTypeInt: {
HeapString value_string = heapstring_from(
&parser->text[pos(parser).index], parser->current.length
case ScirptTokenTypeInt:
return scirpt_parser_parse_int(parser);
case ScirptTokenTypeString:
return scirpt_parser_parse_string(parser);
case ScirptTokenTypeNull:
return step_alloc_expr(
parser,
(ScirptAstExpr) {
.type = ScirptAstExprTypeNull,
.pos = pos,
}
);
int64_t value = atoll(value_string.data);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeInt,
.int_value = value,
});
}
case ScirptTokenTypeFalse:
return step_alloc_expr(
parser,
(ScirptAstExpr) {
.type = ScirptAstExprTypeBool,
.pos = pos,
.bool_value = false,
}
);
case ScirptTokenTypeTrue:
return step_alloc_expr(
parser,
(ScirptAstExpr) {
.type = ScirptAstExprTypeBool,
.pos = pos,
.bool_value = true,
}
);
case ScirptTokenTypeIf:
return scirpt_parser_parse_if(parser);
case ScirptTokenTypeEof: {
error(
parser,
heapstring_from_cstring("expected value, got Eof"),
pos(parser)
position(parser)
);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeError,
.pos = pos,
});
}
default: {
error(
parser, heapstring_from_cstring("expected value"), pos(parser)
parser,
heapstring_from_cstring("expected value"),
position(parser)
);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeError,
.pos = pos,
});
}
}
}
ScirptAstExpr* scirpt_parser_parse_int(ScirptParser* parser)
{
ScirptPosition pos = position(parser);
HeapString value_string = heapstring_from(
&parser->text[position(parser).index], parser->current.length
);
int64_t value = atoll(value_string.data);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeInt,
.pos = pos,
.int_value = value,
});
}
ScirptAstExpr* scirpt_parser_parse_string(ScirptParser* parser)
{
ScirptPosition pos = position(parser);
UnescapeStringResult result = common_unescape_string((StringView
) { .data = &parser->text[position(parser).index],
.length = parser->current.length - 2 });
if (!result.ok) {
error(parser, result.error, position(parser));
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeError,
});
}
HeapString value = result.value;
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeString,
.pos = pos,
.string_value = value,
});
}
ScirptAstExpr* scirpt_parser_parse_if(ScirptParser* parser)
{
ScirptPosition pos = position(parser);
step(parser);
ScirptAstExpr* condition = scirpt_parser_parse_expr(parser);
if (!current_is(parser, ScirptTokenTypeLBrace)) {
error(
parser, heapstring_from_cstring("expected '{'"), position(parser)
);
scirpt_ast_expr_delete(condition);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeError,
});
}
ScirptAstExpr* truthy = scirpt_parser_parse_expr(parser);
ScirptAstExpr* falsy = NULL;
if (current_is(parser, ScirptTokenTypeElse)) {
step(parser);
if (!current_is(parser, ScirptTokenTypeLBrace)) {
error(
parser,
heapstring_from_cstring("expected '{'"),
position(parser)
);
scirpt_ast_expr_delete(condition);
scirpt_ast_expr_delete(truthy);
}
falsy = scirpt_parser_parse_expr(parser);
}
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeIf,
.pos = pos,
.if_expr = (ScirptAstExprIf) {
.condition = condition,
.truthy = truthy,
.falsy = falsy,
},
});
}
void scirpt_parser_error(
ScirptParser* parser, HeapString message, ScirptPosition pos
)

View File

@ -25,8 +25,12 @@ void scirpt_parser_construct(
ScirptLexer* lexer
);
void scirpt_parser_destroy(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_statement(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_expr(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_operand(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_int(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_string(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_if(ScirptParser* parser);
void scirpt_parser_error(
ScirptParser* parser, HeapString message, ScirptPosition pos
);