Compare commits

...

2 Commits

Author SHA1 Message Date
a307f3578a roll own parser 2024-07-19 01:46:30 +02:00
ff45df6bc1 remove flex/bison 2024-07-19 01:46:22 +02:00
13 changed files with 314 additions and 399 deletions

5
.clangd Normal file
View File

@ -0,0 +1,5 @@
CompileFlags:
Remove:
- '-fmodules-ts'
- '-fmodule-mapper=CMakeFiles/stela.dir/main.cpp.o.modmap'
- '-fdeps-format=p1689r5'

View File

@ -3,27 +3,20 @@ cmake_minimum_required(VERSION 3.29)
project(stela VERSION 1.0.0 LANGUAGES CXX) project(stela VERSION 1.0.0 LANGUAGES CXX)
find_package(BISON REQUIRED) set (CMAKE_CXX_STANDARD 20)
find_package(FLEX REQUIRED)
BISON_TARGET( set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Parser set(CMAKE_BUILD_TYPE Debug)
parser.y
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp if (MSVC)
) add_compile_options(/W4 /WX)
FLEX_TARGET( else()
Lexer add_compile_options(-Wall -Wextra -pedantic -Werror)
lexer.l endif()
${CMAKE_CURRENT_BINARY_DIR}/lexer.cpp
)
add_executable( add_executable(
stela stela
main.cpp main.cpp
interpreter.cpp parser.cpp
command.cpp
${BISON_Parser_OUTPUTS}
${FLEX_Lexer_OUTPUTS}
) )
include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -35,7 +35,7 @@ class DoorControl derives GenericDoorControl {
operation Unlock() { this.locked = false; } operation Unlock() { this.locked = false; }
state_machine { state_machine {
entry { initial {
transition DoorControl; transition DoorControl;
} }
DoorControl { DoorControl {

View File

@ -1,20 +0,0 @@
#include <iostream>
#include <sstream>
#include "command.hpp"
using namespace stela;
std::string Command::to_string() const
{
std::stringstream s;
s << "name = [" << this->m_name << "], ";
s << "arguments = [";
for(int i = 0; i < this->arguments.size(); i++) {
s << this->arguments[i];
if(i < this->arguments.size() - 1) {
s << ", ";
}
}
s << "]";
return s.str();
}

View File

@ -1,34 +0,0 @@
#pragma once
#include <string>
#include <vector>
#include <cstdint>
#include <memory>
namespace stela {
class Command {
public:
Command(std::string name, std::vector<uint64_t> arguments)
: m_name(std::move(name))
, arguments(std::move(arguments))
{}
Command(std::string name)
: m_name(std::move(name))
{}
Command() = default;
std::string to_string() const;
std::string name() const
{
return this->m_name;
}
private:
std::string m_name;
std::vector<uint64_t> arguments;
};
}

View File

@ -1,15 +0,0 @@
#include "interpreter.hpp"
#include "command.hpp"
#include <sstream>
using namespace stela;
std::string Interpreter::to_string() const
{
std::stringstream s;
s << "Interpreter: " << this->commands.size() << " commands received from command line.\n";
for(size_t i = 0; i < this->commands.size(); i++) {
s << " * " << this->commands[i].to_string() << '\n';
}
return s.str();
}

View File

@ -1,71 +0,0 @@
#pragma once
#include <vector>
#include "lexer.hpp"
#include "parser.hpp"
namespace stela {
class Command;
class Interpreter {
public:
Interpreter()
: commands()
, lexer(*this)
, parser(lexer, *this)
, m_location(0)
{}
inline int parse()
{
this->m_location = 0;
return this->parser.parse();
}
inline void clear()
{
this->m_location = 0;
this->commands.clear();
}
std::string to_string() const;
/// Switch scanner input stream. Default is standard input (std::cin).
/// It will also reset AST.
inline void switch_input_stream(std::istream* is)
{
this->lexer.switch_streams(is, nullptr);
this->commands.clear();
}
friend class Parser;
friend class Lexer;
private:
inline void add_command(const Command& command)
{
this->commands.push_back(command);
}
inline void increase_location(unsigned int location)
{
this->m_location += location;
std::cout << "increase_location(): "
<< location << ", total = " << this->m_location << '\n';
}
inline unsigned int location() const
{
return this->m_location;
}
private:
Lexer lexer;
Parser parser;
std::vector<Command> commands;
unsigned int m_location;
};
}

View File

@ -1,29 +0,0 @@
#pragma once
#if !defined(yyFlexLexerOnce)
#undef yyFlexLexer
#define yyFlexLexer stela_FlexLexer
#include <FlexLexer.h>
#endif
#undef YY_DECL
#define YY_DECL stela::Parser::symbol_type stela::Lexer::next_token()
#include "parser.hpp"
namespace stela {
class Interpreter;
class Lexer : public yyFlexLexer {
public:
Lexer(Interpreter& interpreter) : interpreter(interpreter) {}
virtual ~Lexer() = default;
virtual stela::Parser::symbol_type next_token();
private:
Interpreter& interpreter;
};
}

75
lexer.l
View File

@ -1,75 +0,0 @@
%{
#include <iostream>
#include <cstdlib>
#include "lexer.hpp"
#include "interpreter.hpp"
#include "parser.hpp"
#include "location.hh"
// Original yyterminate() macro returns int. Since we're using Bison 3 variants
// as tokens, we must redefine it to change type from `int` to `Parser::semantic_type`
#define yyterminate() stela::Parser::make_END(stela::location());
// This will track current scanner location.
// Action is called when length of the token is known.
#define YY_USER_ACTION this->interpreter.increase_location(yyleng);
// !!!WARNING!!!
// Location API is used, but the location is not initialized, 'cause I'm lazy. When making
// a token with make_{something} method you can pass detailed token location. Current location
// is accessible with m_driver.location() method. All puzzle elements are there - just
// pass location value in every action code block below. I'm going to waste more time writing
// this excuse than putting this boilerplate below...
//
// Location class can be found in location.hh and posistion.hh files. It's just a bit too much
// boilerplate for this small example. Bummer.
%}
%option nodefault noyywrap noyylineno c++ yyclass="Lexer" prefix="stela_"
%%
[a-z]+ {
std::cout << "Scanner: identifier [" << yytext << "]\n";
return stela::Parser::make_STRING(yytext, stela::location());
}
\( {
std::cout << "Scanner: '('\n";
return stela::Parser::make_LEFTPAR(stela::location());
}
\) {
std::cout << "Scanner: ')'\n";
return stela::Parser::make_RIGHTPAR(stela::location());
}
; {
std::cout << "Scanner: ';'\n";
return stela::Parser::make_SEMICOLON(stela::location());
}
, {
std::cout << "Scanner: ','\n";
return stela::Parser::make_COMMA(stela::location());
}
[\n\t ] {
//cout << "Scanner: whitechar (ignored)" << endl;
}
[1-9][0-9]* {
std::cout << "Scanner: decimal number: " << yytext << '\n';
uint64_t number = strtoull(yytext, 0, 10);
return stela::Parser::make_NUMBER(number, stela::location());
}
. {
std::cout << "Scanner: unknown character [" << yytext << "]\n";
}
<<EOF>> { return yyterminate(); }
%%

View File

@ -1,12 +1,3 @@
#include "interpreter.hpp"
#include "lexer.hpp"
#include "parser.hpp"
#include <iostream> #include <iostream>
int main() int main() { }
{
stela::Interpreter interpreter;
int result = interpreter.parse();
std::cout << "Prase completed with result: " << result << '\n';
return result;
}

154
parser.cpp Normal file
View File

@ -0,0 +1,154 @@
#include "parser.hpp"
#include <cstdlib>
using namespace stela;
static inline auto in_range(char ch, char begin, char end) -> bool
{
return ch >= begin && ch <= end;
}
static inline auto whitespace_char(char ch) -> bool
{
return ch == ' ' or ch == '\t' or ch == '\n';
}
static inline auto id_start_char(char ch) -> bool
{
return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z') or ch == '_';
}
static inline auto id_char(char ch) -> bool
{
return in_range(ch, 'a', 'z') or in_range(ch, 'A', 'Z')
or in_range(ch, '0', '1') or ch == '_';
}
auto Lexer::next() -> Token
{
auto pos = this->pos();
if (done()) {
return token(TokenType::Eof, pos);
}
char ch = current();
if (whitespace_char(ch)) {
step();
return next();
}
if (id_start_char(ch)) {
std::string value;
value.push_back(ch);
step();
while (not done() and id_char(current())) {
value.push_back(current());
step();
}
if (this->keyword_map.contains(value)) {
return token(this->keyword_map[value], pos);
}
size_t id = this->symbol_values.size();
this->symbol_values.push_back(value);
return Token { TokenType::Id, pos, id };
}
if (in_range(ch, '1', '9')) {
std::string value;
value.push_back(ch);
step();
while (not done() and in_range(ch, '0', '9')) {
value.push_back(current());
step();
}
int64_t int_value = std::strtoll(value.c_str(), nullptr, 10);
size_t id = this->symbol_values.size();
this->int_values.push_back(int_value);
return Token { TokenType::Id, pos, id };
}
if (ch == '0') {
step();
int64_t int_value = 0;
size_t id = this->symbol_values.size();
this->int_values.push_back(int_value);
return Token { TokenType::Id, pos, id };
}
if (ch == '"') {
// TODO string
}
if (ch == '#') {
while (not done() and current() != '\n') {
step();
}
return next();
}
if (ch == '/') {
step();
if (current() == '/') {
while (not done() and current() != '\n') {
step();
}
return next();
}
return error_token(pos, "'/' not implemented");
}
if (ch == '-') {
step();
if (not done() and current() == '>') {
step();
return token(TokenType::MinusLt, pos);
}
return token(TokenType::Minus, pos);
}
if (ch == ':') {
step();
if (current() == ':') {
step();
return token(TokenType::ColonColon, pos);
}
return token(TokenType::Colon, pos);
}
switch (ch) {
case '(':
return single_token(TokenType::LParen, pos);
case ')':
return single_token(TokenType::RParen, pos);
case '{':
return single_token(TokenType::LBrace, pos);
case '}':
return single_token(TokenType::RBrace, pos);
case '[':
return single_token(TokenType::LBracket, pos);
case ']':
return single_token(TokenType::RBracket, pos);
case '.':
return single_token(TokenType::Dot, pos);
case ',':
return single_token(TokenType::Comma, pos);
case ';':
return single_token(TokenType::Semicolon, pos);
}
step();
return error_token(pos, "unrecognized character");
}
auto Lexer::populate_keyword_map()
{
this->keyword_map["error"] = TokenType::Error;
this->keyword_map["eof"] = TokenType::Eof;
this->keyword_map["if"] = TokenType::If;
this->keyword_map["else"] = TokenType::Else;
this->keyword_map["return"] = TokenType::Return;
this->keyword_map["public"] = TokenType::Public;
this->keyword_map["private"] = TokenType::Private;
this->keyword_map["class"] = TokenType::Class;
this->keyword_map["derivable"] = TokenType::Derivable;
this->keyword_map["derives"] = TokenType::Derives;
this->keyword_map["enumeration"] = TokenType::Enumeration;
this->keyword_map["associate"] = TokenType::Associate;
this->keyword_map["attribute"] = TokenType::Attribute;
this->keyword_map["operation"] = TokenType::Operation;
this->keyword_map["state_machine"] = TokenType::StateMachine;
this->keyword_map["transition"] = TokenType::Transition;
this->keyword_map["initial"] = TokenType::Initial;
this->keyword_map["final"] = TokenType::Final;
this->keyword_map["entry"] = TokenType::Entry;
this->keyword_map["exit"] = TokenType::Exit;
}

142
parser.hpp Normal file
View File

@ -0,0 +1,142 @@
#pragma once
#include <cstdint>
#include <fstream>
#include <string>
#include <unordered_map>
#include <vector>
namespace stela {
struct Pos {
size_t index;
int64_t line;
int64_t col;
};
struct Error {
Pos pos;
std::string message;
};
enum class TokenType {
Error,
Eof,
If,
Else,
Return,
Public,
Private,
Class,
Derivable,
Derives,
Enumeration,
Associate,
Attribute,
Operation,
StateMachine,
Transition,
Initial,
Final,
Entry,
Exit,
LParen,
RParen,
LBrace,
RBrace,
LBracket,
RBracket,
Dot,
Comma,
Semicolon,
Colon,
ColonColon,
Minus,
MinusLt,
Equal,
Id,
Int,
};
struct Token {
TokenType type;
Pos pos;
uint64_t id;
};
class Lexer {
public:
Lexer(std::ifstream& file, std::vector<Error>& errors)
: file(&file)
, current_char(file.get())
, errors(&errors)
{ }
auto next() -> Token;
private:
inline auto step()
{
if (this->current_char == EOF) {
this->eof_reached = true;
return;
}
if (this->current_char == '\n') {
this->line += 1;
this->col = 1;
} else {
this->col += 1;
}
this->current_char = this->file->get();
this->index += 1;
}
inline auto single_token(TokenType type, Pos pos) -> Token
{
step();
return token(type, pos);
}
inline auto error_token(Pos pos, std::string message) const -> Token
{
this->errors->push_back(Error { pos, message });
return token(TokenType::Error, pos);
}
inline auto token(TokenType type, Pos pos) const -> Token
{
return Token { type, pos, 0 };
}
inline auto pos() const -> Pos
{
return Pos {
.index = this->index,
.line = this->line,
.col = this->col,
};
}
inline auto done() const -> bool { return this->eof_reached; }
inline auto current() const -> char
{
return static_cast<char>(this->current_char);
}
auto populate_keyword_map();
bool eof_reached = false;
size_t index = 0;
int64_t line = 1;
int64_t col = 1;
std::ifstream* file;
int current_char;
std::unordered_map<std::string, TokenType> keyword_map {};
std::vector<std::string> symbol_values {};
std::vector<int64_t> int_values {};
std::vector<Error>* errors;
};
}

126
parser.y
View File

@ -1,126 +0,0 @@
%skeleton "lalr1.cc"
%require "3.8.2"
%defines
%define api.parser.class { Parser }
%define api.token.constructor
%define api.value.type variant
%define parse.assert
%define api.namespace { stela }
%code requires {
#include <iostream>
#include <string>
#include <vector>
#include <cstdint>
#include "command.hpp"
namespace stela {
class Lexer;
class Interpreter;
}
}
%code top {
#include <iostream>
#include <cstdint>
#include "lexer.hpp"
#include "parser.hpp"
#include "interpreter.hpp"
#include "location.hh"
static stela::Parser::symbol_type yylex(stela::Lexer& lexer, stela::Interpreter& interpreter)
{
return lexer.next_token();
}
using namespace stela;
}
%lex-param { stela::Lexer& lexer }
%lex-param { stela::Interpreter& interpreter }
%parse-param { stela::Lexer& lexer }
%parse-param { stela::Interpreter& interpreter }
%locations
%define parse.trace
%define parse.error verbose
%define api.token.prefix {TOKEN_}
%token END 0 "end of file"
%token <std::string> STRING "string";
%token <uint64_t> NUMBER "number";
%token LEFTPAR "leftpar";
%token RIGHTPAR "rightpar";
%token SEMICOLON "semicolon";
%token COMMA "comma";
%type <stela::Command> command;
%type <std::vector<uint64_t>> arguments;
%start program
%%
program: {
std::cout << "*** RUN ***\n";
std::cout << "Type function with list of parmeters. Parameter list can be empty\n"
<< "or contain positive integers only. Examples: \n"
<< " * function()\n"
<< " * function(1,2,3)\n"
<< "Terminate listing with ; to see parsed AST\n"
<< "Terminate parser with Ctrl-D\n";
std::cout << '\n' << "prompt> ";
interpreter.clear();
}
| program command {
const Command &cmd = $2;
std::cout << "command parsed, updating AST\n";
interpreter.add_command(cmd);
std::cout << '\n' << "prompt> ";
}
| program SEMICOLON {
std::cout << "*** STOP RUN ***\n";
std::cout << interpreter.to_string() << '\n';
}
;
command : STRING LEFTPAR RIGHTPAR {
std::string &id = $1;
std::cout << "ID: " << id << '\n';
$$ = Command(id);
}
| STRING LEFTPAR arguments RIGHTPAR {
std::string &id = $1;
const std::vector<uint64_t> &args = $3;
std::cout << "function: " << id << ", " << args.size() << '\n';
$$ = Command(id, args);
}
;
arguments : NUMBER {
uint64_t number = $1;
$$ = std::vector<uint64_t>();
$$.push_back(number);
std::cout << "first argument: " << number << '\n';
}
| arguments COMMA NUMBER {
uint64_t number = $3;
std::vector<uint64_t> &args = $1;
args.push_back(number);
$$ = args;
std::cout << "next argument: " << number << ", arg list size = " << args.size() << '\n';
}
;
%%
void stela::Parser::error(const location &loc , const std::string &message) {
// Location should be initialized inside scanner action, but is not in this example.
// Let's grab location directly from driver class.
// std::cout << "Error: " << message << '\n' << "Location: " << loc << '\n';
std::cout << "Error: " << message << '\n' << "Error location: " << interpreter.location() << '\n';
}
// vim: ts=4 sw=4 et