more parsies

This commit is contained in:
SimonFJ20 2023-04-16 04:45:15 +02:00
parent b478e45198
commit 7fb9cef134
10 changed files with 408 additions and 60 deletions

View File

@ -9,6 +9,9 @@ CFLAGS = \
-Wconversion \
-Iinclude
# CFLAGS += -O3
CFLAGS += -g
all: compile_flags.txt dirs scirpt
COMMON_SRC = string.c stringmap.c

View File

@ -5,3 +5,4 @@
-Wpedantic
-Wconversion
-Iinclude
-g

View File

@ -12,11 +12,19 @@ typedef enum {
ScirptAstExprTypeError,
ScirptAstExprTypeId,
ScirptAstExprTypeInt,
ScirptAstExprTypeFloat,
ScirptAstExprTypeString,
ScirptAstExprTypeNull,
ScirptAstExprTypeBool,
ScirptAstExprTypeBlock,
ScirptAstExprTypeIf,
ScirptAstExprTypeMember,
ScirptAstExprTypeCall,
ScirptAstExprTypeIndex,
ScirptAstExprTypeUnary,
ScirptAstExprTypeBinary,
ScirptAstExprTypeAssign,
ScirptAstExprTypeLet,
} ScirptAstExprType;
typedef struct ScirptAstExpr ScirptAstExpr;
@ -25,7 +33,6 @@ GENERIC_ARRAY(ScirptAstExpr*, ScirptAstExprArray, scirpt_ast_expr_array)
typedef struct {
ScirptAstExprArray statements;
ScirptAstExpr* value;
} ScirptAstExprBlock;
typedef struct {
@ -34,16 +41,88 @@ typedef struct {
ScirptAstExpr* falsy;
} ScirptAstExprIf;
typedef struct {
ScirptAstExpr* subject;
HeapString value;
} ScirptAstExprMember;
typedef struct {
ScirptAstExpr* subject;
ScirptAstExprArray args;
} ScirptAstExprCall;
typedef struct {
ScirptAstExpr* subject;
ScirptAstExpr* value;
} ScirptAstExprIndex;
typedef enum {
ScirptAstExprUnaryTypeNot,
ScirptAstExprUnaryTypeNegate,
} ScirptAstExprUnaryType;
typedef struct {
ScirptAstExprUnaryType type;
ScirptAstExpr* subject;
} ScirptAstExprUnary;
typedef enum {
ScirptAstExprBinaryTypeAdd,
ScirptAstExprBinaryTypeSubtract,
ScirptAstExprBinaryTypeMultiply,
ScirptAstExprBinaryTypeDivide,
ScirptAstExprBinaryTypeModulo,
ScirptAstExprBinaryTypeEqual,
ScirptAstExprBinaryTypeInequal,
ScirptAstExprBinaryTypeLt,
ScirptAstExprBinaryTypeGt,
ScirptAstExprBinaryTypeLtEqual,
ScirptAstExprBinaryTypeGtEqual,
} ScirptAstExprBinaryType;
typedef struct {
ScirptAstExprBinaryType type;
ScirptAstExpr* left;
ScirptAstExpr* right;
} ScirptAstExprBinary;
typedef enum {
ScirptAstExprAssignTypeAssign,
ScirptAstExprAssignTypeAdd,
ScirptAstExprAssignTypeSubtract,
ScirptAstExprAssignTypeMultiply,
ScirptAstExprAssignTypeDivide,
ScirptAstExprAssignTypeModulo,
} ScirptAstExprAssignType;
typedef struct {
ScirptAstExpr* subject;
ScirptAstExpr* value;
} ScirptAstExprAssign;
typedef struct {
HeapString subject;
ScirptAstExpr* value;
} ScirptAstExprLet;
struct ScirptAstExpr {
ScirptAstExprType type;
ScirptPosition pos;
union {
HeapString id_value;
int64_t int_value;
double float_value;
HeapString string_value;
bool bool_value;
ScirptAstExprBlock block;
ScirptAstExprIf if_expr;
ScirptAstExprMember member;
ScirptAstExprCall call;
ScirptAstExprIndex index;
ScirptAstExprUnary unary;
ScirptAstExprBinary binary;
// assign
// let
};
};

View File

@ -11,6 +11,7 @@ typedef enum {
ScirptTokenTypeMalformedString,
ScirptTokenTypeId,
ScirptTokenTypeInt,
ScirptTokenTypeDecimals,
ScirptTokenTypeString,
ScirptTokenTypeLParen,
ScirptTokenTypeRParen,

View File

@ -1,4 +1,5 @@
#include "scirpt/ast.h"
#include <stddef.h>
#include <stdlib.h>
void scirpt_ast_expr_delete(ScirptAstExpr* expr)
@ -13,6 +14,8 @@ void scirpt_ast_expr_delete(ScirptAstExpr* expr)
break;
case ScirptAstExprTypeInt:
break;
case ScirptAstExprTypeFloat:
break;
case ScirptAstExprTypeString:
heapstring_destroy(&expr->string_value);
break;
@ -21,13 +24,37 @@ void scirpt_ast_expr_delete(ScirptAstExpr* expr)
case ScirptAstExprTypeNull:
break;
case ScirptAstExprTypeBlock:
for (size_t i = 0; i < expr->block.statements.length; ++i)
scirpt_ast_expr_delete(expr->block.statements.data[i]);
scirpt_ast_expr_array_destroy(&expr->block.statements);
scirpt_ast_expr_delete(expr->block.value);
break;
case ScirptAstExprTypeIf:
scirpt_ast_expr_delete(expr->if_expr.condition);
scirpt_ast_expr_delete(expr->if_expr.truthy);
scirpt_ast_expr_delete(expr->if_expr.falsy);
if (expr->if_expr.falsy)
scirpt_ast_expr_delete(expr->if_expr.falsy);
break;
case ScirptAstExprTypeMember:
scirpt_ast_expr_delete(expr->member.subject);
heapstring_destroy(&expr->member.value);
break;
case ScirptAstExprTypeCall:
scirpt_ast_expr_delete(expr->call.subject);
for (size_t i = 0; i < expr->call.args.length; ++i)
scirpt_ast_expr_delete(expr->call.args.data[i]);
scirpt_ast_expr_array_destroy(&expr->call.args);
break;
case ScirptAstExprTypeIndex:
scirpt_ast_expr_delete(expr->index.subject);
scirpt_ast_expr_delete(expr->index.value);
break;
case ScirptAstExprTypeUnary:
break;
case ScirptAstExprTypeBinary:
break;
case ScirptAstExprTypeAssign:
break;
case ScirptAstExprTypeLet:
break;
}
}

View File

@ -163,13 +163,13 @@ ScirptToken scirpt_lexer_level_2(ScirptLexer* lexer)
case ']':
return single_token(lexer, TT(RBracket));
case '.':
return single_token(lexer, TT(RBracket));
return scirpt_lexer_dot_token(lexer);
case ',':
return single_token(lexer, TT(RBracket));
return single_token(lexer, TT(Comma));
case ':':
return single_token(lexer, TT(RBracket));
return single_token(lexer, TT(Colon));
case ';':
return single_token(lexer, TT(RBracket));
return single_token(lexer, TT(Semicolon));
case '+':
return single_or_double_token(lexer, TT(Plus), '=', TT(PlusEqual));
case '-':
@ -181,7 +181,7 @@ ScirptToken scirpt_lexer_level_2(ScirptLexer* lexer)
lexer, TT(Asterisk), '=', TT(AsteriskEqual)
);
case '/':
return single_token(lexer, TT(RBracket));
return single_token(lexer, TT(Slash));
case '%':
return single_or_double_token(
lexer, TT(Percent), '=', TT(PercentEqual)
@ -259,17 +259,31 @@ ScirptToken scirpt_lexer_string_token(ScirptLexer* lexer)
return token(lexer, TT(String), start);
}
ScirptToken scirpt_lexer_dot_token(ScirptLexer* lexer)
{
ScirptPosition start = pos(lexer);
step(lexer);
if (current_is(lexer, '.')) {
step(lexer);
while (!done(lexer) && is_int_char(current(lexer)))
step(lexer);
return token(lexer, TT(Decimals), start);
} else {
return token(lexer, TT(Dot), start);
}
}
ScirptToken scirpt_lexer_slash_token(ScirptLexer* lexer)
{
ScirptPosition start = pos(lexer);
step(lexer);
if (current_is(lexer, TT(Slash))) {
if (current_is(lexer, '/')) {
step(lexer);
while (!done(lexer) && current(lexer) != '\n')
step(lexer);
return scirpt_lexer_next(lexer);
} else if (current_is(lexer, TT(Asterisk))) {
} else if (current_is(lexer, '*')) {
step(lexer);
int depth = 0;
char last_char = '\0';
@ -289,7 +303,7 @@ ScirptToken scirpt_lexer_slash_token(ScirptLexer* lexer)
if (depth != 0)
return token(lexer, TT(MalformedComment), start);
return scirpt_lexer_next(lexer);
} else if (current_is(lexer, TT(Equal))) {
} else if (current_is(lexer, '=')) {
step(lexer);
return token(lexer, TT(SlashEqual), start);
} else {

View File

@ -26,6 +26,7 @@ ScirptToken scirpt_lexer_skip_whitespace(ScirptLexer* lexer);
ScirptToken scirpt_lexer_id_token(ScirptLexer* lexer);
ScirptToken scirpt_lexer_int_token(ScirptLexer* lexer);
ScirptToken scirpt_lexer_string_token(ScirptLexer* lexer);
ScirptToken scirpt_lexer_dot_token(ScirptLexer* lexer);
ScirptToken scirpt_lexer_slash_token(ScirptLexer* lexer);
void scirpt_lexer_step(ScirptLexer* lexer);
ScirptPosition scirpt_lexer_pos(const ScirptLexer* lexer);

View File

@ -9,8 +9,8 @@
int main(void)
{
const char* text = "123 if +";
printf("test program = \"\"\"\n%s\n\"\"\"\n", text);
const char* text = "if 123 { \"abc\"; abc()() }";
printf("= test program = \"\"\"\n%s\n\"\"\"\n", text);
{
printf("\n- test lexer\n");

View File

@ -6,9 +6,11 @@
#include "scirpt/position.h"
#include "scirpt/token.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#define TT(type) ScirptTokenType##type
#define AET(type) ScirptAstExprType##type
static inline ScirptAstExpr* alloc_expr(ScirptAstExpr data)
{
@ -21,10 +23,6 @@ error(ScirptParser* parser, HeapString message, ScirptPosition pos)
{
scirpt_parser_error(parser, message, pos);
}
static inline ScirptPosition position(ScirptParser* parser)
{
return parser->current.pos;
}
static inline void step(ScirptParser* parser) { scirpt_parser_step(parser); }
static inline ScirptAstExpr*
step_alloc_expr(ScirptParser* parser, ScirptAstExpr data)
@ -55,7 +53,7 @@ ScirptAstExpr* scirpt_parser_next(ScirptParser* parser)
{
if (done(parser)) {
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeEof,
.type = AET(Eof),
});
} else {
return scirpt_parser_parse_statement(parser);
@ -101,53 +99,198 @@ ScirptAstExpr* scirpt_parser_parse_statement(ScirptParser* parser)
ScirptAstExpr* scirpt_parser_parse_expr(ScirptParser* parser)
{
return scirpt_parser_parse_operand(parser);
return scirpt_parser_parse_member_call_index_expr(parser);
}
ScirptAstExpr* scirpt_parser_parse_member_call_index_expr(ScirptParser* parser)
{
ScirptPosition pos = parser->current.pos;
ScirptAstExpr* subject = scirpt_parser_parse_operand(parser);
while (true) {
if (current_is(parser, TT(Dot))) {
step(parser);
if (!current_is(parser, TT(Id))) {
error(
parser,
heapstring_from_cstring("expected id"),
parser->current.pos
);
scirpt_ast_expr_delete(subject);
return alloc_expr((ScirptAstExpr) {
.type = AET(Error),
.pos = pos,
});
}
HeapString value = heapstring_from(
&parser->text[parser->current.pos.index], parser->current.length
);
subject = alloc_expr((ScirptAstExpr) {
.type = AET(Member),
.pos = pos,
.member = (ScirptAstExprMember) {
.subject = subject,
.value = value,
},
});
} else if (current_is(parser, TT(LParen))) {
step(parser);
ScirptAstExprArray args;
scirpt_ast_expr_array_construct(&args);
if (!done(parser) && parser->current.type != TT(RParen)) {
ScirptAstExpr* value = scirpt_parser_parse_operand(parser);
scirpt_ast_expr_array_append(&args, value);
while (current_is(parser, TT(Comma))) {
step(parser);
if (done(parser) || parser->current.type == TT(RParen))
break;
ScirptAstExpr* value = scirpt_parser_parse_operand(parser);
scirpt_ast_expr_array_append(&args, value);
}
}
if (!current_is(parser, TT(RParen))) {
error(
parser,
heapstring_from_cstring("expected ')'"),
parser->current.pos
);
scirpt_ast_expr_delete(subject);
return alloc_expr((ScirptAstExpr) {
.type = AET(Error),
.pos = pos,
});
}
step(parser);
subject = alloc_expr((ScirptAstExpr) {
.type = AET(Call),
.pos = pos,
.call = (ScirptAstExprCall) {
.subject = subject,
.args = args,
},
});
} else if (current_is(parser, TT(LBracket))) {
step(parser);
ScirptAstExpr* value = scirpt_parser_parse_operand(parser);
if (!current_is(parser, TT(RBracket))) {
error(
parser,
heapstring_from_cstring("expected ']'"),
parser->current.pos
);
scirpt_ast_expr_delete(subject);
return alloc_expr((ScirptAstExpr) {
.type = AET(Error),
.pos = pos,
});
}
step(parser);
subject = alloc_expr((ScirptAstExpr) {
.type = AET(Index),
.pos = pos,
.index = (ScirptAstExprIndex) {
.subject = subject,
.value = value,
},
});
} else {
break;
}
}
return subject;
}
ScirptAstExpr* scirpt_parser_parse_operand(ScirptParser* parser)
{
ScirptPosition pos = position(parser);
ScirptPosition pos = parser->current.pos;
switch (parser->current.type) {
case ScirptTokenTypeInt:
return scirpt_parser_parse_int(parser);
case ScirptTokenTypeString:
case TT(Id): {
HeapString value = heapstring_from(
&parser->text[parser->current.pos.index], parser->current.length
);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = AET(Id),
.pos = pos,
.id_value = value,
});
}
case TT(Int):
return scirpt_parser_parse_int_or_float(parser);
case TT(String):
return scirpt_parser_parse_string(parser);
case ScirptTokenTypeNull:
case TT(Null):
return step_alloc_expr(
parser,
(ScirptAstExpr) {
.type = ScirptAstExprTypeNull,
.type = AET(Null),
.pos = pos,
}
);
case ScirptTokenTypeFalse:
case TT(False):
return step_alloc_expr(
parser,
(ScirptAstExpr) {
.type = ScirptAstExprTypeBool,
.type = AET(Bool),
.pos = pos,
.bool_value = false,
}
);
case ScirptTokenTypeTrue:
case TT(True):
return step_alloc_expr(
parser,
(ScirptAstExpr) {
.type = ScirptAstExprTypeBool,
.type = AET(Bool),
.pos = pos,
.bool_value = true,
}
);
case ScirptTokenTypeIf:
case TT(LBrace):
return scirpt_parser_parse_block(parser);
case TT(If):
return scirpt_parser_parse_if(parser);
case ScirptTokenTypeEof: {
case TT(Eof): {
error(
parser,
heapstring_from_cstring("expected value, got Eof"),
position(parser)
parser->current.pos
);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeError,
.type = AET(Error),
.pos = pos,
});
}
case TT(InvalidChar): {
error(
parser,
heapstring_from_cstring("invalid char"),
parser->current.pos
);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = AET(Error),
.pos = pos,
});
}
case TT(MalformedComment): {
error(
parser,
heapstring_from_cstring("malformed comment"),
parser->current.pos
);
return alloc_expr((ScirptAstExpr) {
.type = AET(Error),
.pos = pos,
});
}
case TT(MalformedString): {
error(
parser,
heapstring_from_cstring("malformed string"),
parser->current.pos
);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = AET(Error),
.pos = pos,
});
}
@ -155,77 +298,154 @@ ScirptAstExpr* scirpt_parser_parse_operand(ScirptParser* parser)
error(
parser,
heapstring_from_cstring("expected value"),
position(parser)
parser->current.pos
);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeError,
.type = AET(Error),
.pos = pos,
});
}
}
}
ScirptAstExpr* scirpt_parser_parse_int(ScirptParser* parser)
ScirptAstExpr* scirpt_parser_parse_int_or_float(ScirptParser* parser)
{
ScirptPosition pos = position(parser);
HeapString value_string = heapstring_from(
&parser->text[position(parser).index], parser->current.length
ScirptPosition pos = parser->current.pos;
HeapString int_string = heapstring_from(
&parser->text[parser->current.pos.index], parser->current.length
);
int64_t value = atoll(value_string.data);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeInt,
.pos = pos,
.int_value = value,
});
if (current_is(parser, TT(Decimals))) {
HeapString float_string = heapstring_from(
&parser->text[parser->current.pos.index], parser->current.length
);
step(parser);
StringBuilder builder;
stringbuilder_construct(&builder);
for (size_t i = 0; i < int_string.length; ++i)
stringbuilder_append(&builder, int_string.data[i]);
for (size_t i = 0; i < float_string.length; ++i)
stringbuilder_append(&builder, float_string.data[i]);
heapstring_destroy(&int_string);
heapstring_destroy(&float_string);
HeapString value_string = stringbuilder_build(&builder);
stringbuilder_destroy(&builder);
double value = atof(value_string.data);
heapstring_destroy(&value_string);
return alloc_expr((ScirptAstExpr) {
.type = AET(Float),
.pos = pos,
.float_value = value,
});
} else {
int64_t value = atoll(int_string.data);
heapstring_destroy(&int_string);
return alloc_expr((ScirptAstExpr) {
.type = AET(Int),
.pos = pos,
.int_value = value,
});
}
}
ScirptAstExpr* scirpt_parser_parse_string(ScirptParser* parser)
{
ScirptPosition pos = position(parser);
ScirptPosition pos = parser->current.pos;
UnescapeStringResult result = common_unescape_string((StringView
) { .data = &parser->text[position(parser).index],
) { .data = &parser->text[parser->current.pos.index],
.length = parser->current.length - 2 });
if (!result.ok) {
error(parser, result.error, position(parser));
error(parser, result.error, parser->current.pos);
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeError,
.type = AET(Error),
});
}
HeapString value = result.value;
step(parser);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeString,
.type = AET(String),
.pos = pos,
.string_value = value,
});
}
static inline bool requires_semicolon(ScirptAstExprType type)
{
switch (type) {
default:
return false;
}
}
ScirptAstExpr* scirpt_parser_parse_block(ScirptParser* parser)
{
ScirptPosition pos = parser->current.pos;
step(parser);
ScirptAstExprArray statements;
scirpt_ast_expr_array_construct(&statements);
while (!done(parser) && parser->current.type != TT(RBrace)) {
ScirptAstExpr* statement = scirpt_parser_parse_statement(parser);
scirpt_ast_expr_array_append(&statements, statement);
if (current_is(parser, TT(Semicolon))) {
step(parser);
while (current_is(parser, TT(Semicolon)))
step(parser);
} else {
if (requires_semicolon(statement->type)) {
error(
parser,
heapstring_from_cstring("';' required"),
parser->current.pos
);
}
break;
}
}
if (!current_is(parser, TT(RBrace))) {
error(
parser, heapstring_from_cstring("expected '}'"), parser->current.pos
);
// TODO clean up statements
return alloc_expr((ScirptAstExpr) {
.type = AET(Error),
.pos = pos,
});
}
step(parser);
return alloc_expr((ScirptAstExpr){
.type = AET(Block),
.pos = pos,
.block = (ScirptAstExprBlock) {
.statements = statements,
},
});
}
ScirptAstExpr* scirpt_parser_parse_if(ScirptParser* parser)
{
ScirptPosition pos = position(parser);
ScirptPosition pos = parser->current.pos;
step(parser);
ScirptAstExpr* condition = scirpt_parser_parse_expr(parser);
if (!current_is(parser, ScirptTokenTypeLBrace)) {
if (!current_is(parser, TT(LBrace))) {
error(
parser, heapstring_from_cstring("expected '{'"), position(parser)
parser, heapstring_from_cstring("expected '{'"), parser->current.pos
);
scirpt_ast_expr_delete(condition);
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeError,
.type = AET(Error),
});
}
ScirptAstExpr* truthy = scirpt_parser_parse_expr(parser);
ScirptAstExpr* falsy = NULL;
if (current_is(parser, ScirptTokenTypeElse)) {
if (current_is(parser, TT(Else))) {
step(parser);
if (!current_is(parser, ScirptTokenTypeLBrace)) {
if (!current_is(parser, TT(LBrace))) {
error(
parser,
heapstring_from_cstring("expected '{'"),
position(parser)
parser->current.pos
);
scirpt_ast_expr_delete(condition);
scirpt_ast_expr_delete(truthy);
@ -233,7 +453,7 @@ ScirptAstExpr* scirpt_parser_parse_if(ScirptParser* parser)
falsy = scirpt_parser_parse_expr(parser);
}
return alloc_expr((ScirptAstExpr) {
.type = ScirptAstExprTypeIf,
.type = AET(If),
.pos = pos,
.if_expr = (ScirptAstExprIf) {
.condition = condition,
@ -270,5 +490,5 @@ bool scirpt_parser_current_is(const ScirptParser* parser, ScirptTokenType type)
bool scirpt_parser_done(const ScirptParser* parser)
{
return parser->current.type == ScirptTokenTypeEof;
return parser->current.type == TT(Eof);
}

View File

@ -27,9 +27,11 @@ void scirpt_parser_construct(
void scirpt_parser_destroy(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_statement(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_expr(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_member_call_index_expr(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_operand(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_int(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_int_or_float(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_string(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_block(ScirptParser* parser);
ScirptAstExpr* scirpt_parser_parse_if(ScirptParser* parser);
void scirpt_parser_error(
ScirptParser* parser, HeapString message, ScirptPosition pos