This commit is contained in:
Simon 2023-02-11 18:59:19 +00:00
commit 85adad9831
12 changed files with 691 additions and 0 deletions

36
' Normal file
View File

@ -0,0 +1,36 @@
#ifndef LEXER_H
#define LEXER_H
#include <stddef.h>
typedef enum {
Id,
Int,
Float,
String,
If,
Else,
While,
Break,
LParen,
RParen,
LBrace,
RBrace,
Plus,
Minus,
} TokenType;
typedef struct {
TokenType type;
size_t index, length;
int line, column;
} Token;
typedef struct Lexer Lexer;
void lexer_create(Lexer* lexer, const char* text, size_t text_length);
#endif

187
:w Normal file
View File

@ -0,0 +1,187 @@
#include "lexer.h"
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
struct Lexer {
const char* text;
size_t index, length;
int line, column;
};
Token lexer_skip_whitespace(Lexer* lexer);
Token lexer_make_int(Lexer* lexer);
Token lexer_make_id(Lexer* lexer);
bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value);
Token lexer_make_static(Lexer* lexer);
Token make_single_char_token(Lexer* lexer, TokenType type);
Token make_slash_token(Lexer* lexer);
Token lexer_make_invalid_char(Lexer* lexer);
Position lexer_position(const Lexer* lexer);
Token lexer_token(const Lexer* lexer, TokenType type, Position begin);
bool lexer_done(const Lexer* lexer);
char lexer_current(const Lexer* lexer);
void lexer_step(Lexer* lexer);
void lexer_create(Lexer* lexer, const char* text, size_t text_length)
{
*lexer = (Lexer) {
.text = text,
.length = text_length,
.line = 1,
.column = 1,
};
}
Token lexer_next(Lexer* lexer)
{
char c = lexer_current(lexer);
if (lexer_done(lexer))
return lexer_token(lexer, TokenTypeEof, lexer_position(lexer));
else if (isspace(c))
return lexer_skip_whitespace(lexer);
else if (isdigit(c))
return lexer_make_int(lexer);
else if (isalpha(c) || c == '_')
return lexer_make_id(lexer);
else
return lexer_make_static(lexer);
}
Token lexer_skip_whitespace(Lexer* lexer)
{
lexer_step(lexer);
while (!lexer_done(lexer) && isspace(lexer_current(lexer)))
lexer_step(lexer);
return lexer_next(lexer);
}
Token lexer_make_int(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
lexer_step(lexer);
return lexer_token(lexer, TokenTypeInt, begin);
}
Token lexer_make_id(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
while (!lexer_done(lexer)
&& (isalpha(lexer_current(lexer)) || isdigit(lexer_current(lexer))
|| lexer_current(lexer) == '_'))
lexer_step(lexer);
if (lexer_span_matches(lexer, begin, "if"))
return lexer_token(lexer, TokenTypeIf, begin);
else if (lexer_span_matches(lexer, begin, "else"))
return lexer_token(lexer, TokenTypeElse, begin);
else if (lexer_span_matches(lexer, begin, "while"))
return lexer_token(lexer, TokenTypeWhile, begin);
else if (lexer_span_matches(lexer, begin, "break"))
return lexer_token(lexer, TokenTypeBreak, begin);
else
return lexer_token(lexer, TokenTypeId, begin);
}
bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value)
{
size_t length = lexer->index - begin.index;
if (length != strlen(value))
return false;
return strncmp(&lexer->text[begin.index], value, length) == 0;
}
Token lexer_make_static(Lexer* lexer)
{
switch (lexer_current(lexer)) {
case '(':
return make_single_char_token(lexer, TokenTypeLParen);
case ')':
return make_single_char_token(lexer, TokenTypeRParen);
case '{':
return make_single_char_token(lexer, TokenTypeLBrace);
case '}':
return make_single_char_token(lexer, TokenTypeRBrace);
case ';':
return make_single_char_token(lexer, TokenTypeSemicolon);
case '+':
return make_single_char_token(lexer, TokenTypePlus);
case '-':
return make_single_char_token(lexer, TokenTypeMinus);
case '*':
return make_single_char_token(lexer, TokenTypeAsterisk);
case '/':
return make_slash_token(lexer);
case '%':
return make_single_char_token(lexer, TokenTypePercent);
default:
return lexer_make_invalid_char(lexer);
}
}
Token make_single_char_token(Lexer* lexer, TokenType type)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
return lexer_token(lexer, type, begin);
}
Token skip_singleline_comment(Lexer* lexer);
Token skip_multiline_comment(Lexer* lexer);
Token make_slash_token(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
switch (lexer_current(lexer)) {
case '/':
return skip_singleline_comment(lexer);
default:
return lexer_token(lexer, TokenTypeSlash, begin);
}
}
Token lexer_make_invalid_char(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
return lexer_token(lexer, TokenTypeInvalidChar, begin);
}
Position lexer_position(const Lexer* lexer)
{
return (Position) {
.index = lexer->index,
.line = lexer->line,
.column = lexer->column,
};
}
Token lexer_token(const Lexer* lexer, TokenType type, Position begin)
{
return (Token) {
.type = type,
.position = begin,
.length = lexer->index - begin.index,
};
}
bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; }
char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; }
void lexer_step(Lexer* lexer)
{
if (lexer_done(lexer))
return;
if (lexer_current(lexer) == '\n') {
lexer->line += 1;
lexer->column = 1;
} else {
lexer->column += 1;
}
lexer->index += 1;
}

9
LICENSE Normal file
View File

@ -0,0 +1,9 @@
MIT License
Copyright (c) <year> <copyright holders>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

19
Makefile Normal file
View File

@ -0,0 +1,19 @@
CFLAGS = -std=c17 -Wall -Wextra -Wpedantic -Wconversion
HEADERS = $(wildcard *.h)
all: compile_flags.txt wacc
wacc: main.o lexer.o
gcc $^ -o $@
%.o: %.c $(HEADERS)
gcc $< -c -o $@ $(CFLAGS)
clean:
rm -rf *.o wacc
compile_flags.txt:
echo -xc $(CFLAGS) | sed 's/\s\+/\n/g' > compile_flags.txt

2
README.md Normal file
View File

@ -0,0 +1,2 @@
# wacc

6
compile_flags.txt Normal file
View File

@ -0,0 +1,6 @@
-xc
-std=c17
-Wall
-Wextra
-Wpedantic
-Wconversion

353
lexer.c Normal file
View File

@ -0,0 +1,353 @@
#include "lexer.h"
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
struct Lexer {
const char* text;
size_t index, length;
int line, column;
};
Token lexer_skip_whitespace(Lexer* lexer);
Token lexer_make_int_or_float(Lexer* lexer);
Token lexer_make_id(Lexer* lexer);
bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value);
Token lexer_make_static_token(Lexer* lexer);
Token lexer_make_int_hex_or_binary(Lexer* lexer);
Token lexer_make_char(Lexer* lexer);
Token lexer_make_string(Lexer* lexer);
void lexer_skip_literal_char(Lexer* lexer);
Token lexer_make_single_char_token(Lexer* lexer, TokenType type);
Token lexer_make_slash_token(Lexer* lexer);
Token lexer_skip_singleline_comment(Lexer* lexer);
Token lexer_make_single_or_double_char_token(
Lexer* lexer, TokenType single_type, char second_char, TokenType double_type);
Token lexer_skip_multiline_comment(Lexer* lexer);
Token lexer_make_invalid_char(Lexer* lexer);
Position lexer_position(const Lexer* lexer);
Token lexer_token(const Lexer* lexer, TokenType type, Position begin);
bool lexer_done(const Lexer* lexer);
char lexer_current(const Lexer* lexer);
void lexer_step(Lexer* lexer);
void lexer_create(Lexer* lexer, const char* text, size_t text_length)
{
*lexer = (Lexer) {
.text = text,
.length = text_length,
.line = 1,
.column = 1,
};
}
Token lexer_next(Lexer* lexer)
{
char c = lexer_current(lexer);
if (lexer_done(lexer))
return lexer_token(lexer, TokenTypeEof, lexer_position(lexer));
else if (isspace(c))
return lexer_skip_whitespace(lexer);
else if (c >= '1' && c <= '9')
return lexer_make_int_or_float(lexer);
else if (isalpha(c) || c == '_')
return lexer_make_id(lexer);
else
return lexer_make_static_token(lexer);
}
Token lexer_skip_whitespace(Lexer* lexer)
{
lexer_step(lexer);
while (!lexer_done(lexer) && isspace(lexer_current(lexer)))
lexer_step(lexer);
return lexer_next(lexer);
}
Token lexer_make_int_or_float(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
lexer_step(lexer);
if (!lexer_done(lexer) && lexer_current(lexer) == '.') {
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
lexer_step(lexer);
return lexer_token(lexer, TokenTypeFloat, begin);
} else {
return lexer_token(lexer, TokenTypeInt, begin);
}
}
Token lexer_make_id(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
while (!lexer_done(lexer)
&& (isalpha(lexer_current(lexer)) || isdigit(lexer_current(lexer))
|| lexer_current(lexer) == '_'))
lexer_step(lexer);
if (lexer_span_matches(lexer, begin, "if"))
return lexer_token(lexer, TokenTypeIf, begin);
else if (lexer_span_matches(lexer, begin, "else"))
return lexer_token(lexer, TokenTypeElse, begin);
else if (lexer_span_matches(lexer, begin, "while"))
return lexer_token(lexer, TokenTypeWhile, begin);
else if (lexer_span_matches(lexer, begin, "break"))
return lexer_token(lexer, TokenTypeBreak, begin);
else
return lexer_token(lexer, TokenTypeId, begin);
}
bool lexer_span_matches(const Lexer* lexer, Position begin, const char* value)
{
size_t length = lexer->index - begin.index;
if (length != strlen(value))
return false;
return strncmp(&lexer->text[begin.index], value, length) == 0;
}
Token lexer_make_static_token(Lexer* lexer)
{
switch (lexer_current(lexer)) {
case '0':
return lexer_make_int_hex_or_binary(lexer);
case '\'':
return lexer_make_char(lexer);
case '"':
return lexer_make_string(lexer);
case '(':
return lexer_make_single_char_token(lexer, TokenTypeLParen);
case ')':
return lexer_make_single_char_token(lexer, TokenTypeRParen);
case '{':
return lexer_make_single_char_token(lexer, TokenTypeLBrace);
case '}':
return lexer_make_single_char_token(lexer, TokenTypeRBrace);
case '[':
return lexer_make_single_char_token(lexer, TokenTypeLBracket);
case ']':
return lexer_make_single_char_token(lexer, TokenTypeRBracket);
case '.':
return lexer_make_single_char_token(lexer, TokenTypeDot);
case ',':
return lexer_make_single_char_token(lexer, TokenTypeComma);
case ':':
return lexer_make_single_char_token(lexer, TokenTypeColon);
case ';':
return lexer_make_single_char_token(lexer, TokenTypeSemicolon);
case '+':
return lexer_make_single_or_double_char_token(
lexer, TokenTypePlus, '=', TokenTypePlusEqual);
case '-':
return lexer_make_single_or_double_char_token(
lexer, TokenTypeMinus, '=', TokenTypeMinusEqual);
case '*':
return lexer_make_single_or_double_char_token(
lexer, TokenTypeAsterisk, '=', TokenTypeAsteriskEqual);
case '/':
return lexer_make_slash_token(lexer);
case '%':
return lexer_make_single_or_double_char_token(
lexer, TokenTypePercent, '=', TokenTypePercentEqual);
case '=':
return lexer_make_single_or_double_char_token(
lexer, TokenTypeEqual, '=', TokenTypeDoubleEqual);
case '!':
return lexer_make_single_or_double_char_token(
lexer, TokenTypeExclamation, '=', TokenTypeExclamationEqual);
case '<':
return lexer_make_single_or_double_char_token(
lexer, TokenTypeLt, '=', TokenTypeLtEqual);
case '>':
return lexer_make_single_or_double_char_token(
lexer, TokenTypeGt, '=', TokenTypeGtEqual);
default:
return lexer_make_invalid_char(lexer);
}
}
Token lexer_make_int_hex_or_binary(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
if (!lexer_done(lexer) && (lexer_current(lexer) == 'x' || lexer_current(lexer) == 'X')) {
while (!lexer_done(lexer)
&& (isdigit(lexer_current(lexer))
|| (lexer_current(lexer) >= 'a' || lexer_current(lexer) <= 'f')
|| (lexer_current(lexer) >= 'A' || lexer_current(lexer) <= 'F')))
lexer_step(lexer);
return lexer_token(lexer, TokenTypeHex, begin);
} else if (!lexer_done(lexer) && (lexer_current(lexer) == 'b' || lexer_current(lexer) == 'B')) {
while (!lexer_done(lexer) && (lexer_current(lexer) == '0' || lexer_current(lexer) == '1'))
lexer_step(lexer);
return lexer_token(lexer, TokenTypeBinary, begin);
} else {
return lexer_token(lexer, TokenTypeInt, begin);
}
}
Token lexer_make_char(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
if (lexer_done(lexer))
return lexer_token(lexer, TokenTypeMalformedChar, begin);
lexer_skip_literal_char(lexer);
if (lexer_done(lexer) && lexer_current(lexer) != '\'')
return lexer_token(lexer, TokenTypeMalformedChar, begin);
lexer_step(lexer);
return lexer_token(lexer, TokenTypeChar, begin);
}
Token lexer_make_string(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
if (lexer_done(lexer))
return lexer_token(lexer, TokenTypeMalformedString, begin);
while (!lexer_done(lexer) && lexer_current(lexer) != '\"')
lexer_skip_literal_char(lexer);
if (lexer_done(lexer) && lexer_current(lexer) != '\"')
return lexer_token(lexer, TokenTypeMalformedString, begin);
lexer_step(lexer);
return lexer_token(lexer, TokenTypeChar, begin);
}
void lexer_skip_literal_char(Lexer* lexer)
{
if (lexer_current(lexer) == '\\') {
lexer_step(lexer);
if (!lexer_done(lexer) && lexer_current(lexer) == '0') {
lexer_step(lexer);
} else if (!lexer_done(lexer) && lexer_current(lexer) >= '1'
&& lexer_current(lexer) <= '9') {
lexer_step(lexer);
while (!lexer_done(lexer) && isdigit(lexer_current(lexer)))
lexer_step(lexer);
} else if (!lexer_done(lexer)
&& (lexer_current(lexer) == 'x' || lexer_current(lexer) == 'X')) {
lexer_step(lexer);
while (!lexer_done(lexer)
&& (isdigit(lexer_current(lexer))
|| (lexer_current(lexer) >= 'a' && lexer_current(lexer) <= 'f')
|| (lexer_current(lexer) >= 'A' && lexer_current(lexer) <= 'F')))
lexer_step(lexer);
} else if (!lexer_done(lexer)) {
lexer_step(lexer);
}
} else {
lexer_step(lexer);
}
}
Token lexer_make_single_char_token(Lexer* lexer, TokenType type)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
return lexer_token(lexer, type, begin);
}
Token lexer_make_single_or_double_char_token(
Lexer* lexer, TokenType single_type, char second_char, TokenType double_type)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
if (!lexer_done(lexer) && lexer_current(lexer) == second_char) {
lexer_step(lexer);
return lexer_token(lexer, single_type, begin);
} else {
return lexer_token(lexer, double_type, begin);
}
}
Token lexer_make_slash_token(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
switch (lexer_current(lexer)) {
case '/':
return lexer_skip_singleline_comment(lexer);
case '*':
return lexer_skip_multiline_comment(lexer);
case '=':
lexer_step(lexer);
return lexer_token(lexer, TokenTypeSlashEqual, begin);
default:
return lexer_token(lexer, TokenTypeSlash, begin);
}
}
Token lexer_skip_singleline_comment(Lexer* lexer)
{
lexer_step(lexer);
while (!lexer_done(lexer) && lexer_current(lexer) != '\n')
lexer_step(lexer);
if (!lexer_done(lexer) && lexer_current(lexer) == '\n')
lexer_step(lexer);
return lexer_next(lexer);
}
Token lexer_skip_multiline_comment(Lexer* lexer)
{
lexer_step(lexer);
int depth = 1;
while (!lexer_done(lexer)) {
if (lexer_current(lexer) == '/') {
lexer_step(lexer);
if (!lexer_done(lexer) && lexer_current(lexer) == '*')
depth += 1;
} else if (lexer_current(lexer) == '*') {
lexer_step(lexer);
if (lexer_done(lexer) && lexer_current(lexer) == '/')
depth -= 1;
}
lexer_step(lexer);
}
return depth != 0
? lexer_token(lexer, TokenTypeMalformedMultilineComment, lexer_position(lexer))
: lexer_next(lexer);
}
Token lexer_make_invalid_char(Lexer* lexer)
{
Position begin = lexer_position(lexer);
lexer_step(lexer);
return lexer_token(lexer, TokenTypeInvalidChar, begin);
}
Position lexer_position(const Lexer* lexer)
{
return (Position) {
.index = lexer->index,
.line = lexer->line,
.column = lexer->column,
};
}
Token lexer_token(const Lexer* lexer, TokenType type, Position begin)
{
return (Token) {
.type = type,
.position = begin,
.length = lexer->index - begin.index,
};
}
bool lexer_done(const Lexer* lexer) { return lexer->index >= lexer->length; }
char lexer_current(const Lexer* lexer) { return lexer->text[lexer->index]; }
void lexer_step(Lexer* lexer)
{
if (lexer_done(lexer))
return;
if (lexer_current(lexer) == '\n') {
lexer->line += 1;
lexer->column = 1;
} else {
lexer->column += 1;
}
lexer->index += 1;
}

76
lexer.h Normal file
View File

@ -0,0 +1,76 @@
#ifndef LEXER_H
#define LEXER_H
#include <stddef.h>
typedef enum {
TokenTypeEof,
TokenTypeInvalidChar,
TokenTypeMalformedMultilineComment,
TokenTypeMalformedChar,
TokenTypeMalformedString,
TokenTypeId,
TokenTypeInt,
TokenTypeHex,
TokenTypeBinary,
TokenTypeFloat,
TokenTypeChar,
TokenTypeString,
TokenTypeIf,
TokenTypeElse,
TokenTypeWhile,
TokenTypeBreak,
TokenTypeLParen,
TokenTypeRParen,
TokenTypeLBrace,
TokenTypeRBrace,
TokenTypeLBracket,
TokenTypeRBracket,
TokenTypeDot,
TokenTypeComma,
TokenTypeColon,
TokenTypeSemicolon,
TokenTypePlusEqual,
TokenTypeMinusEqual,
TokenTypeAsteriskEqual,
TokenTypeSlashEqual,
TokenTypePercentEqual,
TokenTypeDoubleEqual,
TokenTypeExclamationEqual,
TokenTypeLtEqual,
TokenTypeGtEqual,
TokenTypePlus,
TokenTypeMinus,
TokenTypeAsterisk,
TokenTypeSlash,
TokenTypePercent,
TokenTypeEqual,
TokenTypeExclamation,
TokenTypeLt,
TokenTypeGt,
} TokenType;
typedef struct {
size_t index;
int line, column;
} Position;
typedef struct {
TokenType type;
Position position;
size_t length;
} Token;
typedef struct Lexer Lexer;
void lexer_create(Lexer* lexer, const char* text, size_t text_length);
Token lexer_next(Lexer* lexer);
#endif

BIN
lexer.o Normal file

Binary file not shown.

3
main.c Normal file
View File

@ -0,0 +1,3 @@
#include <stdio.h>
int main(void) { printf("hello world\n"); }

BIN
main.o Normal file

Binary file not shown.

BIN
wacc Executable file

Binary file not shown.