add parser

This commit is contained in:
SimonFJ20 2023-07-27 04:10:30 +02:00
parent 5ff3ac2c4d
commit 2de85bf57d
7 changed files with 1125 additions and 9 deletions

View File

@ -8,10 +8,12 @@ C_FLAGS = \
-Wextra \
-Wpedantic \
-Wconversion \
-Wno-gnu-case-range
-Wno-gnu-case-range \
-g
LINKER_FLAGS = \
-fsanitize=address,undefined
-fsanitize=address,undefined \
-g
SOURCE_FOLDER = src
BUILD_FOLDER = build

View File

@ -1 +1,532 @@
#include "lexer.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
bool string_slice_equal(StringSlice slice, const char* data)
{
if (strlen(data) != slice.length) {
return false;
}
return strncmp(data, slice.data, slice.length) == 0;
}
void string_construct(String* string)
{
*string = (String) {
.data = NULL,
.length = 0,
.capacity = 0,
};
}
void string_destroy(String* string)
{
if (string->data) {
free(string->data);
}
}
const size_t string_starting_alloc_size = 8;
void string_append_char(String* string, char value)
{
if (string->length + 1 >= string->capacity) {
if (string->capacity == 0) {
string->capacity = string_starting_alloc_size;
} else {
string->capacity *= 2;
}
if (string->data == NULL) {
string->data = malloc(string->capacity * sizeof(char));
ASSERT(string->data);
} else {
char* new_buffer
= realloc(string->data, string->capacity * sizeof(char));
ASSERT(new_buffer);
string->data = new_buffer;
}
}
string->data[string->length] = value;
string->length += 1;
}
void string_from_cstr(String* string, const char* value)
{
string_construct(string);
string_append_cstr(string, value);
}
void string_from_slice(String* string, StringSlice slice)
{
string_construct(string);
for (size_t i = 0; i < slice.length; ++i) {
string_append_char(string, slice.data[i]);
}
}
void file_char_reader_construct(FileCharReader* reader, FILE* file)
{
*reader = (FileCharReader) {
.next = file_char_reader_next,
.value = file_char_reader_value,
.file = file,
.buffer = { 0 },
};
string_construct(&reader->buffer);
}
void string_append_cstr(String* string, const char* value)
{
size_t value_length = strlen(value);
if (string->length + value_length + 1 > string->capacity) {
if (string->capacity == 0) {
string->capacity = string_starting_alloc_size;
} else {
string->capacity *= 2;
}
while (string->length + value_length + 1 > string->capacity) {
string->capacity *= 2;
}
char* new_buffer
= realloc(string->data, string->capacity * sizeof(char));
ASSERT(new_buffer);
string->data = new_buffer;
}
strncpy(&string->data[string->length], value, value_length);
}
void string_append_formatted(String* string, const char* format, ...)
{
va_list varargs;
va_start(varargs, format);
size_t format_length = (size_t)vsnprintf(NULL, 0, format, varargs);
if (string->length + format_length + 1 > string->capacity) {
if (string->capacity == 0) {
string->capacity = string_starting_alloc_size;
} else {
string->capacity *= 2;
}
while (string->length + format_length + 1 > string->capacity) {
string->capacity *= 2;
}
char* new_buffer
= realloc(string->data, string->capacity * sizeof(char));
ASSERT(new_buffer);
string->data = new_buffer;
}
size_t written = (size_t)vsnprintf(
&string->data[string->length],
string->capacity - string->length,
format,
varargs
);
ASSERT(written == format_length);
string->length += written;
va_end(varargs);
}
bool string_equal(const String* string, const char* value)
{
return strncmp(value, string->data, string->length + 1);
}
void file_char_reader_destroy(FileCharReader* reader)
{
string_destroy(&reader->buffer);
}
char file_char_reader_next(FileCharReader* reader)
{
int read_maybe_char = fgetc(reader->file);
if (read_maybe_char == EOF) {
return '\0';
}
char read_char = (char)read_maybe_char;
string_append_char(&reader->buffer, read_char);
return (char)read_char;
}
StringSlice file_char_reader_value(
const FileCharReader* reader, size_t index, size_t length
)
{
ASSERT(index + length <= reader->buffer.length);
return (StringSlice) {
.data = &reader->buffer.data[index],
.length = length,
};
}
void error_construct(Error* error, Pos pos, String message)
{
*error = (Error) {
.pos = pos,
.message = message,
};
}
void error_destroy(Error* error) { string_destroy(&error->message); }
void error_collector_construct(ErrorCollector* collector)
{
const size_t errors_start_capacity = 64;
*collector = (ErrorCollector) {
.errors = malloc(errors_start_capacity * sizeof(Error)),
.errors_length = 0,
.errors_capacity = errors_start_capacity,
};
}
void error_collector_destroy(ErrorCollector* collector)
{
for (size_t i = 0; i < collector->errors_length; ++i) {
error_destroy(&collector->errors[i]);
}
free(collector->errors);
}
void error_collector_add(ErrorCollector* collector, Error error)
{
if (collector->errors_length >= collector->errors_capacity) {
Error* new_buffer = realloc(
collector->errors, collector->errors_capacity * sizeof(Error)
);
ASSERT(new_buffer);
collector->errors = new_buffer;
}
collector->errors[collector->errors_length] = error;
collector->errors_length += 1;
}
const char* token_type_value(TokenType type)
{
switch (type) {
case TokenTypeEof:
return "Eof";
case TokenTypeError:
return "error";
case TokenTypeId:
return "Id";
case TokenTypeInt:
return "Int";
case TokenTypeChar:
return "Char";
case TokenTypeString:
return "String";
case TokenTypeLParen:
return "(";
case TokenTypeRParen:
return ")";
case TokenTypeLBrace:
return "{";
case TokenTypeRBrace:
return "}";
case TokenTypeLBracket:
return "[";
case TokenTypeRBracket:
return "]";
}
}
#define LEXER_ADD_ERROR(LEXER, POS, ...) \
{ \
String error_message; \
string_construct(&error_message); \
string_append_formatted(&error_message, __VA_ARGS__); \
Error error; \
error_construct(&error, (POS), error_message); \
error_collector_add((LEXER)->errors, error); \
}
Lexer lexer_create(CharReader* reader, ErrorCollector* errors)
{
return (Lexer) {
.reader = reader,
.errors = errors,
.current = reader->next(reader),
.pos = { 0, 1, 1 },
};
}
Token lexer_next(Lexer* lexer)
{
Pos pos = lexer->pos;
if (lexer_done(lexer)) {
return lexer_token(lexer, TokenTypeEof, pos);
}
switch (lexer->current) {
case ' ':
case '\t':
case '\r':
case '\n':
return lexer_skip_whitespace(lexer);
case '"':
return lexer_lex_string(lexer);
case '(':
return (
lexer_step(lexer), lexer_token(lexer, TokenTypeLParen, pos)
);
case ')':
return (
lexer_step(lexer), lexer_token(lexer, TokenTypeRParen, pos)
);
case '{':
return (
lexer_step(lexer), lexer_token(lexer, TokenTypeLBrace, pos)
);
case '}':
return (
lexer_step(lexer), lexer_token(lexer, TokenTypeRBrace, pos)
);
case '[':
return (
lexer_step(lexer), lexer_token(lexer, TokenTypeLBracket, pos)
);
case ']':
return (
lexer_step(lexer), lexer_token(lexer, TokenTypeRBracket, pos)
);
case '/':
return lexer_lex_slash(lexer);
case '#':
return lexer_lex_hashtag(lexer);
case '0' ... '9':
return lexer_lex_int(lexer);
case '+':
case '-':
case '*':
case '<':
case '>':
case '=':
case '!':
case 'a' ... 'z':
case 'A' ... 'Z':
return lexer_lex_id(lexer);
default:
LEXER_ADD_ERROR(
lexer, pos, "unsupported character `%c`", lexer->current
);
return (lexer_step(lexer), lexer_token(lexer, TokenTypeError, pos));
}
}
Token lexer_lex_id(Lexer* lexer)
{
Pos pos = lexer->pos;
lexer_step(lexer);
while (true) {
switch (lexer->current) {
case '+':
case '-':
case '*':
case '<':
case '>':
case '=':
case '!':
case '0' ... '9':
case 'a' ... 'z':
case 'A' ... 'Z':
lexer_step(lexer);
break;
default:
goto break_loop;
}
}
break_loop:
return lexer_token(lexer, TokenTypeId, pos);
}
Token lexer_lex_int(Lexer* lexer)
{
Pos pos = lexer->pos;
lexer_step(lexer);
while (true) {
switch (lexer->current) {
case '0' ... '9':
lexer_step(lexer);
break;
default:
goto break_loop;
}
}
break_loop:
return lexer_token(lexer, TokenTypeInt, pos);
}
Token lexer_lex_char(Lexer* lexer)
{
Pos pos = lexer->pos;
lexer_step(lexer);
if (lexer_done(lexer)) {
LEXER_ADD_ERROR(
lexer, pos, "malformed character literal, got unexpected Eof"
);
return lexer_token(lexer, TokenTypeError, pos);
}
if (lexer->current == '\'') {
LEXER_ADD_ERROR(
lexer, pos, "malformed character literal, got unexpected `'`"
);
return lexer_token(lexer, TokenTypeError, pos);
}
if (lexer->current == '\\') {
lexer_step(lexer);
if (lexer_done(lexer)) {
LEXER_ADD_ERROR(
lexer, pos, "malformed character literal, got unexpected Eof"
);
return lexer_token(lexer, TokenTypeError, pos);
}
lexer_step(lexer);
}
if (lexer_done(lexer)) {
LEXER_ADD_ERROR(
lexer, pos, "malformed character literal, expected `'`, got Eof"
);
return lexer_token(lexer, TokenTypeError, pos);
}
if (lexer->current != '\'') {
LEXER_ADD_ERROR(
lexer,
pos,
"malformed character literal, expected `'`, got `%c`",
lexer->current
);
return lexer_token(lexer, TokenTypeError, pos);
}
lexer_step(lexer);
return lexer_token(lexer, TokenTypeChar, pos);
}
Token lexer_lex_string(Lexer* lexer)
{
Pos pos = lexer->pos;
lexer_step(lexer);
while (!lexer_done(lexer)) {
char maybe_escape_char = lexer->current;
lexer_step(lexer);
if (maybe_escape_char == '\\') {
if (lexer_done(lexer)) {
LEXER_ADD_ERROR(
lexer,
pos,
"malformed string literal escape sequence, got unexpected "
"Eof"
);
return lexer_token(lexer, TokenTypeError, pos);
}
lexer_step(lexer);
}
}
if (lexer_done(lexer)) {
LEXER_ADD_ERROR(
lexer, pos, "malformed string literal, expected `\"`, got Eof"
);
return lexer_token(lexer, TokenTypeError, pos);
}
if (lexer->current != '\"') {
LEXER_ADD_ERROR(
lexer,
pos,
"malformed string literal, expected `\"`, got `%c`",
lexer->current
);
return lexer_token(lexer, TokenTypeError, pos);
}
lexer_step(lexer);
return lexer_token(lexer, TokenTypeString, pos);
}
Token lexer_lex_slash(Lexer* lexer)
{
Pos pos = lexer->pos;
lexer_step(lexer);
if (lexer->current == '*') {
lexer_step(lexer);
int depth = 1;
char last = '\0';
while (!lexer_done(lexer) && depth > 0) {
if (last == '*' && lexer->current == '/') {
depth -= 1;
} else if (last == '/' && lexer->current == '*') {
depth += 1;
}
last = lexer->current;
lexer_step(lexer);
}
if (depth > 0) {
String error_message;
string_from_cstr(
&error_message,
"malformed multiline comment, expected `*/` before end"
);
Error error;
error_construct(&error, pos, error_message);
error_collector_add(lexer->errors, error);
return lexer_token(lexer, TokenTypeError, pos);
}
return lexer_next(lexer);
}
if (lexer->current == '/') {
lexer_step(lexer);
while (!lexer_done(lexer) && lexer->current != '\n') {
lexer_step(lexer);
}
return lexer_next(lexer);
}
return lexer_token(lexer, TokenTypeId, pos);
}
Token lexer_lex_hashtag(Lexer* lexer)
{
lexer_step(lexer);
while (!lexer_done(lexer) && lexer->current != '\n') {
lexer_step(lexer);
}
return lexer_next(lexer);
}
Token lexer_skip_whitespace(Lexer* lexer)
{
lexer_step(lexer);
while (true) {
switch (lexer->current) {
case ' ':
case '\t':
case '\r':
case '\n':
lexer_step(lexer);
break;
default:
goto break_loop;
}
}
break_loop:
return lexer_next(lexer);
}
Token lexer_token(const Lexer* lexer, TokenType type, Pos pos)
{
return (Token) { type, pos, .length = lexer->pos.index - pos.index };
}
void lexer_step(Lexer* lexer)
{
if (lexer->current == '\n') {
lexer->pos.line += 1;
lexer->pos.col = 1;
} else if (!lexer_done(lexer)) {
lexer->pos.col += 1;
}
lexer->current = lexer->reader->next(lexer->reader);
}
bool lexer_done(const Lexer* lexer) { return lexer->current == '\0'; }

View File

@ -1,4 +1,144 @@
#ifndef LEXER_H
#define LEXER_H
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#define PANIC(...) \
(fprintf(stderr, "panic: "), \
fprintf(stderr, __VA_ARGS__), \
fprintf(stderr, ", at %s:%d in %s()", __FILE__, __LINE__, __func__), \
exit(1));
#define ASSERT(CONDITION) \
{ \
if (!(CONDITION)) { \
(fprintf(stderr, "assert failed: "), \
fprintf( \
stderr, \
"(%s), at %s:%d in %s()", \
#CONDITION, \
__FILE__, \
__LINE__, \
__func__ \
), \
exit(1)); \
} \
};
typedef struct {
const char* data;
size_t length;
} StringSlice;
bool string_slice_equal(StringSlice slice, const char* data);
typedef struct {
char* data;
size_t length;
size_t capacity;
} String;
void string_construct(String* string);
void string_destroy(String* string);
void string_from_cstr(String* string, const char* value);
void string_from_slice(String* string, StringSlice slice);
void string_append_char(String* string, char value);
void string_append_cstr(String* string, const char* value);
void string_append_formatted(String* string, const char* format, ...);
bool string_equal(const String* string, const char* value);
typedef struct CharReader {
char (*next)(struct CharReader* reader);
StringSlice (*value)(
const struct CharReader* reader, size_t index, size_t length
);
} CharReader;
typedef struct FileCharReader {
char (*next)(struct FileCharReader* reader);
StringSlice (*value)(
const struct FileCharReader* reader, size_t index, size_t length
);
FILE* file;
String buffer;
} FileCharReader;
void file_char_reader_construct(FileCharReader* reader, FILE* file);
void file_char_reader_destroy(FileCharReader* reader);
char file_char_reader_next(FileCharReader* reader);
StringSlice file_char_reader_value(
const FileCharReader* reader, size_t index, size_t length
);
typedef struct {
size_t index;
size_t line;
size_t col;
} Pos;
typedef struct {
Pos pos;
String message;
} Error;
void error_construct(Error* error, Pos pos, String message);
void error_destroy(Error* error);
typedef struct {
Error* errors;
size_t errors_length;
size_t errors_capacity;
} ErrorCollector;
void error_collector_construct(ErrorCollector* collector);
void error_collector_destroy(ErrorCollector* collector);
void error_collector_add(ErrorCollector* collector, Error error);
typedef enum {
TokenTypeEof,
TokenTypeError,
TokenTypeId,
TokenTypeInt,
TokenTypeChar,
TokenTypeString,
TokenTypeLParen,
TokenTypeRParen,
TokenTypeLBrace,
TokenTypeRBrace,
TokenTypeLBracket,
TokenTypeRBracket,
} TokenType;
const char* token_type_value(TokenType type);
typedef struct {
TokenType type;
Pos pos;
size_t length;
} Token;
typedef struct {
CharReader* reader;
ErrorCollector* errors;
char current;
Pos pos;
} Lexer;
Lexer lexer_create(CharReader* reader, ErrorCollector* errors);
Token lexer_next(Lexer* lexer);
Token lexer_lex_id(Lexer* lexer);
Token lexer_lex_int(Lexer* lexer);
Token lexer_lex_char(Lexer* lexer);
Token lexer_lex_string(Lexer* lexer);
Token lexer_lex_slash(Lexer* lexer);
Token lexer_lex_hashtag(Lexer* lexer);
Token lexer_skip_whitespace(Lexer* lexer);
Token lexer_token(const Lexer* lexer, TokenType type, Pos pos);
void lexer_step(Lexer* lexer);
bool lexer_done(const Lexer* lexer);
#endif

View File

@ -1,3 +1,5 @@
#include "lexer.h"
#include "parser.h"
#include <stdio.h>
int main(int argc, const char** argv)
@ -14,11 +16,24 @@ int main(int argc, const char** argv)
return 1;
}
printf("file:\n");
int read_char = fgetc(file);
while (read_char != EOF) {
fputc(read_char, stdout);
read_char = fgetc(file);
}
fputc('\n', stdout);
ErrorCollector errors;
error_collector_construct(&errors);
FileCharReader reader;
file_char_reader_construct(&reader, file);
Lexer lexer = lexer_create((CharReader*)&reader, &errors);
Parser parser = parser_create((CharReader*)&reader, lexer, &errors);
ExprVec ast = parser_parse(&parser);
String ast_string;
string_construct(&ast_string);
expr_vec_stringify(&ast, &ast_string);
printf("ast = %s\n", ast_string.data);
string_destroy(&ast_string);
expr_vec_destroy(&ast);
file_char_reader_destroy(&reader);
error_collector_destroy(&errors);
}

352
src/parser.c Normal file
View File

@ -0,0 +1,352 @@
#include "parser.h"
#include "lexer.h"
#include <stdio.h>
#include <stdlib.h>
void expr_vec_construct(ExprVec* vec)
{
const size_t starting_capacity = 8;
*vec = (ExprVec) {
.exprs = malloc(starting_capacity * sizeof(Expr)),
.length = 0,
.capacity = starting_capacity,
};
ASSERT(vec->exprs);
}
void expr_vec_destroy(ExprVec* vec)
{
for (size_t i = 0; i < vec->length; ++i) {
expr_destroy(&vec->exprs[i]);
}
free(vec->exprs);
}
void expr_vec_push(ExprVec* vec, Expr expr)
{
if (vec->length >= vec->capacity) {
vec->capacity *= 2;
Expr* new_buffer = realloc(vec->exprs, vec->capacity * sizeof(Expr));
ASSERT(new_buffer);
vec->exprs = new_buffer;
}
vec->exprs[vec->length] = expr;
vec->length += 1;
}
void expr_vec_stringify(const ExprVec* vec, String* acc)
{
string_append_cstr(acc, "[");
printf("helo worl\n");
if (vec->length > 0) {
expr_stringify(&vec->exprs[0], acc);
for (size_t i = 1; i < vec->length; ++i) {
string_append_cstr(acc, ", ");
expr_stringify(&vec->exprs[i], acc);
}
}
string_append_cstr(acc, "]");
}
Expr error_expr_construct(Pos pos)
{
return (Expr) { ExprTypeError, pos, { 0 } };
}
Expr id_expr_construct(Pos pos, String value)
{
return (Expr) { ExprTypeId, pos, .id_value = value };
}
Expr int_expr_construct(Pos pos, int64_t value)
{
return (Expr) { ExprTypeInt, pos, .int_value = value };
}
Expr char_expr_construct(Pos pos, char value)
{
return (Expr) { ExprTypeChar, pos, .char_value = value };
}
Expr string_expr_construct(Pos pos, String value)
{
return (Expr) { ExprTypeString, pos, .string_value = value };
}
Expr list_expr_construct(Pos pos, ExprVec exprs)
{
return (Expr) { ExprTypeList, pos, .list = exprs };
}
Expr quote_expr_construct(Pos pos, ExprVec exprs)
{
return (Expr) { ExprTypeQuote, pos, .quote = exprs };
}
void expr_destroy(Expr* expr)
{
switch (expr->type) {
case ExprTypeError:
break;
case ExprTypeId:
string_destroy(&expr->id_value);
break;
case ExprTypeInt:
case ExprTypeChar:
break;
case ExprTypeString:
string_destroy(&expr->string_value);
break;
case ExprTypeList:
expr_vec_destroy(&expr->list);
break;
case ExprTypeQuote:
expr_vec_destroy(&expr->quote);
break;
}
}
void expr_stringify(const Expr* expr, String* acc)
{
switch (expr->type) {
case ExprTypeError:
string_append_formatted(acc, "Error");
break;
case ExprTypeId:
string_append_formatted(acc, "Id(%s)", expr->id_value.data);
break;
case ExprTypeInt:
string_append_formatted(acc, "Int(\'%ld\')", expr->int_value);
break;
case ExprTypeChar:
string_append_formatted(acc, "Char(\'%c\')", expr->char_value);
break;
case ExprTypeString:
string_append_formatted(
acc, "String(\"%s\")", expr->string_value.data
);
break;
case ExprTypeList:
string_append_cstr(acc, "List(");
if (expr->list.length > 0) {
expr_stringify(&expr->list.exprs[0], acc);
for (size_t i = 1; i < expr->list.length; ++i) {
string_append_cstr(acc, ", ");
expr_stringify(&expr->list.exprs[i], acc);
}
}
string_append_cstr(acc, ")");
break;
case ExprTypeQuote:
string_append_cstr(acc, "Quote(");
if (expr->quote.length > 0) {
expr_stringify(&expr->quote.exprs[0], acc);
for (size_t i = 1; i < expr->quote.length; ++i) {
string_append_cstr(acc, ", ");
expr_stringify(&expr->quote.exprs[i], acc);
}
}
string_append_cstr(acc, ")");
break;
}
}
#define PARSER_ADD_ERROR(PARSER, POS, ...) \
{ \
String error_message; \
string_construct(&error_message); \
string_append_formatted(&error_message, __VA_ARGS__); \
Error error; \
error_construct(&error, (POS), error_message); \
error_collector_add((PARSER)->errors, error); \
}
Parser
parser_create(const CharReader* reader, Lexer lexer, ErrorCollector* errors)
{
Token first = lexer_next(&lexer);
return (Parser) {
.reader = reader,
.lexer = lexer,
.errors = errors,
.current = first,
};
}
ExprVec parser_parse(Parser* parser)
{
ExprVec exprs;
expr_vec_construct(&exprs);
while (parser->current.type != TokenTypeEof) {
expr_vec_push(&exprs, parser_parse_expr(parser));
}
return exprs;
}
Expr parser_parse_expr(Parser* parser)
{
Pos pos = parser->current.pos;
switch (parser->current.type) {
case TokenTypeId:
return parser_parse_id(parser);
case TokenTypeInt:
return parser_parse_int(parser);
case TokenTypeChar:
return parser_parse_char(parser);
case TokenTypeString:
return parser_parse_string(parser);
case TokenTypeLParen:
return parser_parse_list(parser);
case TokenTypeLBracket:
return parser_parse_quote(parser);
default:
PARSER_ADD_ERROR(
parser,
pos,
"expected value, got `%s`",
token_type_value(parser->current.type)
);
parser_step(parser);
return error_expr_construct(pos);
}
}
Expr parser_parse_id(Parser* parser)
{
Token token = parser->current;
String value;
string_from_slice(
&value,
parser->reader->value(parser->reader, token.pos.index, token.length)
);
parser_step(parser);
return id_expr_construct(token.pos, value);
}
Expr parser_parse_int(Parser* parser)
{
Token token = parser->current;
String text;
string_from_slice(
&text,
parser->reader->value(parser->reader, token.pos.index, token.length)
);
int64_t value = atol(text.data);
string_destroy(&text);
parser_step(parser);
return int_expr_construct(token.pos, value);
}
Expr parser_parse_char(Parser* parser)
{
Token token = parser->current;
StringSlice text
= parser->reader->value(parser->reader, token.pos.index, token.length);
char value = text.data[1];
if (value == '\\') {
switch (text.data[2]) {
case '0':
value = '\0';
break;
case 't':
value = '\t';
break;
case 'r':
value = '\r';
break;
case 'n':
value = '\n';
break;
default:
value = text.data[2];
break;
}
}
parser_step(parser);
return char_expr_construct(token.pos, value);
}
Expr parser_parse_string(Parser* parser)
{
Token token = parser->current;
StringSlice text
= parser->reader->value(parser->reader, token.pos.index, token.length);
String value;
string_construct(&value);
for (size_t i = 1; i < text.length - 2; ++i) {
if (text.data[i] == '\\') {
i += 1;
switch (text.data[i]) {
case '0':
string_append_char(&value, '\0');
break;
case 't':
string_append_char(&value, '\t');
break;
case 'r':
string_append_char(&value, '\r');
break;
case 'n':
string_append_char(&value, '\n');
break;
default:
string_append_char(&value, text.data[i]);
break;
}
} else {
string_append_char(&value, text.data[i]);
}
}
parser_step(parser);
return string_expr_construct(token.pos, value);
}
Expr parser_parse_list(Parser* parser)
{
Pos pos = parser->current.pos;
parser_step(parser);
ExprVec exprs;
expr_vec_construct(&exprs);
while (parser->current.type != TokenTypeEof
&& parser->current.type != TokenTypeRParen) {
expr_vec_push(&exprs, parser_parse_expr(parser));
}
if (parser->current.type != TokenTypeRParen) {
PARSER_ADD_ERROR(
parser,
pos,
"expected `]`, got `%s`",
token_type_value(parser->current.type)
)
} else {
parser_step(parser);
}
return quote_expr_construct(pos, exprs);
}
Expr parser_parse_quote(Parser* parser)
{
Pos pos = parser->current.pos;
parser_step(parser);
ExprVec exprs;
expr_vec_construct(&exprs);
while (parser->current.type != TokenTypeEof
&& parser->current.type != TokenTypeRBracket) {
expr_vec_push(&exprs, parser_parse_expr(parser));
}
if (parser->current.type != TokenTypeRBracket) {
PARSER_ADD_ERROR(
parser,
pos,
"expected `]`, got `%s`",
token_type_value(parser->current.type)
)
} else {
parser_step(parser);
}
return quote_expr_construct(pos, exprs);
}
void parser_step(Parser* parser)
{
parser->current = lexer_next(&parser->lexer);
}

75
src/parser.h Normal file
View File

@ -0,0 +1,75 @@
#ifndef PARSER_H
#define PARSER_H
#include "lexer.h"
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
typedef enum {
ExprTypeError,
ExprTypeId,
ExprTypeInt,
ExprTypeChar,
ExprTypeString,
ExprTypeList,
ExprTypeQuote,
} ExprType;
typedef struct Expr Expr;
typedef struct {
Expr* exprs;
size_t length;
size_t capacity;
} ExprVec;
void expr_vec_construct(ExprVec* vec);
void expr_vec_destroy(ExprVec* vec);
void expr_vec_push(ExprVec* vec, Expr expr);
void expr_vec_stringify(const ExprVec* vec, String* acc);
struct Expr {
ExprType type;
Pos pos;
union {
String id_value;
int64_t int_value;
char char_value;
String string_value;
ExprVec list;
ExprVec quote;
};
};
Expr error_expr_construct(Pos pos);
Expr id_expr_construct(Pos pos, String value);
Expr int_expr_construct(Pos pos, int64_t value);
Expr char_expr_construct(Pos pos, char value);
Expr string_expr_construct(Pos pos, String value);
Expr list_expr_construct(Pos pos, ExprVec exprs);
Expr quote_expr_construct(Pos pos, ExprVec exprs);
void expr_destroy(Expr* expr);
void expr_stringify(const Expr* expr, String* acc);
typedef struct {
const CharReader* reader;
Lexer lexer;
ErrorCollector* errors;
Token current;
} Parser;
Parser
parser_create(const CharReader* reader, Lexer lexer, ErrorCollector* errors);
ExprVec parser_parse(Parser* parser);
Expr parser_parse_expr(Parser* parser);
Expr parser_parse_id(Parser* parser);
Expr parser_parse_int(Parser* parser);
Expr parser_parse_char(Parser* parser);
Expr parser_parse_string(Parser* parser);
Expr parser_parse_list(Parser* parser);
Expr parser_parse_quote(Parser* parser);
void parser_step(Parser* parser);
#endif

1
test.ol Normal file
View File

@ -0,0 +1 @@
(fn sum (a b) (+ a b))