diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..d546928 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,29 @@ + +cmake_minimum_required(VERSION 3.29) + +project(stela VERSION 1.0.0 LANGUAGES CXX) + +find_package(BISON REQUIRED) +find_package(FLEX REQUIRED) +BISON_TARGET( + Parser + parser.y + ${CMAKE_CURRENT_BINARY_DIR}/parser.cpp +) +FLEX_TARGET( + Lexer + lexer.l + ${CMAKE_CURRENT_BINARY_DIR}/lexer.cpp +) + +add_executable( + stela + main.cpp + interpreter.cpp + command.cpp + ${BISON_Parser_OUTPUTS} + ${FLEX_Lexer_OUTPUTS} +) + +include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) + diff --git a/command.cpp b/command.cpp new file mode 100644 index 0000000..2029358 --- /dev/null +++ b/command.cpp @@ -0,0 +1,20 @@ +#include +#include +#include "command.hpp" + +using namespace stela; + +std::string Command::to_string() const +{ + std::stringstream s; + s << "name = [" << this->m_name << "], "; + s << "arguments = ["; + for(int i = 0; i < this->arguments.size(); i++) { + s << this->arguments[i]; + if(i < this->arguments.size() - 1) { + s << ", "; + } + } + s << "]"; + return s.str(); +} diff --git a/command.hpp b/command.hpp new file mode 100644 index 0000000..54d175e --- /dev/null +++ b/command.hpp @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include +#include + +namespace stela { + +class Command { +public: + Command(std::string name, std::vector arguments) + : m_name(std::move(name)) + , arguments(std::move(arguments)) + {} + + Command(std::string name) + : m_name(std::move(name)) + {} + + Command() = default; + + std::string to_string() const; + + std::string name() const + { + return this->m_name; + } +private: + std::string m_name; + std::vector arguments; +}; + +} diff --git a/install_nvim.sh b/install_nvim.sh new file mode 100644 index 0000000..84782a3 --- /dev/null +++ b/install_nvim.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -xe + +mkdir -p ~/.config/nvim/syntax +mkdir -p ~/.config/nvim/ftdetect + +cp stela.vim ~/.config/nvim/syntax/ +echo "au BufRead,BufNewFile *.stela set filetype=stela" > ~/.config/nvim/ftdetect/stela.vim + diff --git a/interpreter.cpp b/interpreter.cpp new file mode 100644 index 0000000..3989c4f --- /dev/null +++ b/interpreter.cpp @@ -0,0 +1,15 @@ +#include "interpreter.hpp" +#include "command.hpp" +#include + +using namespace stela; + +std::string Interpreter::to_string() const +{ + std::stringstream s; + s << "Interpreter: " << this->commands.size() << " commands received from command line.\n"; + for(size_t i = 0; i < this->commands.size(); i++) { + s << " * " << this->commands[i].to_string() << '\n'; + } + return s.str(); +} diff --git a/interpreter.hpp b/interpreter.hpp new file mode 100644 index 0000000..ecc9f70 --- /dev/null +++ b/interpreter.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include + +#include "lexer.hpp" +#include "parser.hpp" + +namespace stela { + +class Command; + +class Interpreter { +public: + Interpreter() + : commands() + , lexer(*this) + , parser(lexer, *this) + , m_location(0) + {} + + inline int parse() + { + this->m_location = 0; + return this->parser.parse(); + } + + inline void clear() + { + this->m_location = 0; + this->commands.clear(); + } + + std::string to_string() const; + + /// Switch scanner input stream. Default is standard input (std::cin). + /// It will also reset AST. + inline void switch_input_stream(std::istream* is) + { + this->lexer.switch_streams(is, nullptr); + this->commands.clear(); + } + + friend class Parser; + friend class Lexer; + +private: + inline void add_command(const Command& command) + { + this->commands.push_back(command); + } + + inline void increase_location(unsigned int location) + { + this->m_location += location; + std::cout << "increase_location(): " + << location << ", total = " << this->m_location << '\n'; + } + + inline unsigned int location() const + { + return this->m_location; + } + +private: + Lexer lexer; + Parser parser; + std::vector commands; + unsigned int m_location; +}; + +} diff --git a/lexer.hpp b/lexer.hpp new file mode 100644 index 0000000..4a673ed --- /dev/null +++ b/lexer.hpp @@ -0,0 +1,29 @@ +#pragma once + +#if !defined(yyFlexLexerOnce) +#undef yyFlexLexer +#define yyFlexLexer stela_FlexLexer +#include +#endif + +#undef YY_DECL +#define YY_DECL stela::Parser::symbol_type stela::Lexer::next_token() + +#include "parser.hpp" + +namespace stela { + +class Interpreter; + +class Lexer : public yyFlexLexer { +public: + Lexer(Interpreter& interpreter) : interpreter(interpreter) {} + virtual ~Lexer() = default; + virtual stela::Parser::symbol_type next_token(); + +private: + Interpreter& interpreter; +}; + +} + diff --git a/lexer.l b/lexer.l new file mode 100644 index 0000000..7458e26 --- /dev/null +++ b/lexer.l @@ -0,0 +1,79 @@ + +%{ + #include + #include + #include "lexer.hpp" + #include "interpreter.hpp" + #include "parser.hpp" + #include "location.hh" + + // Original yyterminate() macro returns int. Since we're using Bison 3 variants + // as tokens, we must redefine it to change type from `int` to `Parser::semantic_type` + #define yyterminate() stela::Parser::make_END(stela::location()); + + // This will track current scanner location. + // Action is called when length of the token is known. + #define YY_USER_ACTION this->interpreter.increase_location(yyleng); + + // !!!WARNING!!! + // Location API is used, but the location is not initialized, 'cause I'm lazy. When making + // a token with make_{something} method you can pass detailed token location. Current location + // is accessible with m_driver.location() method. All puzzle elements are there - just + // pass location value in every action code block below. I'm going to waste more time writing + // this excuse than putting this boilerplate below... + // + // Location class can be found in location.hh and posistion.hh files. It's just a bit too much + // boilerplate for this small example. Bummer. +%} + +%option nodefault +%option noyywrap +%option c++ +%option yyclass="Lexer" +%option prefix="stela_" + +%% + +[a-z]+ { + std::cout << "Scanner: identifier [" << yytext << "]\n"; + return stela::Parser::make_STRING(yytext, stela::location( /* put location data here if you want */ )); +} + +\( { + std::cout << "Scanner: '('\n"; + return stela::Parser::make_LEFTPAR(stela::location()); +} + +\) { + std::cout << "Scanner: ')'\n"; + return stela::Parser::make_RIGHTPAR(stela::location()); +} + +; { + std::cout << "Scanner: ';'\n"; + return stela::Parser::make_SEMICOLON(stela::location()); +} + +, { + std::cout << "Scanner: ','\n"; + return stela::Parser::make_COMMA(stela::location()); +} + +[\n\t ] { + //cout << "Scanner: whitechar (ignored)" << endl; +} + +[1-9][0-9]* { + std::cout << "Scanner: decimal number: " << yytext << '\n'; + uint64_t number = strtoull(yytext, 0, 10); + return stela::Parser::make_NUMBER(number, stela::location()); +} + +. { + std::cout << "Scanner: unknown character [" << yytext << "]\n"; +} + +<> { return yyterminate(); } + + +%% diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..89ce4a9 --- /dev/null +++ b/main.cpp @@ -0,0 +1,13 @@ +#include +#include "lexer.hpp" +#include "parser.hpp" +#include "interpreter.hpp" + +int main() +{ + stela::Interpreter interpreter; + int result = interpreter.parse(); + std::cout << "Prase completed with result: " << result << '\n'; + return result; +} + diff --git a/parser.y b/parser.y new file mode 100644 index 0000000..2be43ef --- /dev/null +++ b/parser.y @@ -0,0 +1,125 @@ +%skeleton "lalr1.cc" +%require "3.8" +%defines +%define api.parser.class { Parser } +%define api.token.constructor +%define api.value.type variant +%define parse.assert +%define api.namespace { stela } + +%code requires { + #include + #include + #include + #include + #include "command.hpp" + + namespace stela { + class Lexer; + class Interpreter; + } +} + +%code top { + #include + #include + #include "lexer.hpp" + #include "parser.hpp" + #include "interpreter.hpp" + #include "location.hh" + + static stela::Parser::symbol_type yylex(stela::Lexer& lexer, stela::Interpreter& interpreter) + { + return lexer.next_token(); + } + + using namespace stela; +} + +%lex-param { stela::Lexer& lexer } +%lex-param { stela::Interpreter& interpreter } +%parse-param { stela::Lexer& lexer } +%parse-param { stela::Interpreter& interpreter } +%locations +%define parse.trace +%define parse.error verbose +%define api.token.prefix {TOKEN_} + +%token END 0 "end of file" +%token STRING "string"; +%token NUMBER "number"; +%token LEFTPAR "leftpar"; +%token RIGHTPAR "rightpar"; +%token SEMICOLON "semicolon"; +%token COMMA "comma"; + +%type command; +%type > arguments; + +%start program + +%% + +program: { + std::cout << "*** RUN ***\n"; + std::cout << "Type function with list of parmeters. Parameter list can be empty\n" + << "or contain positive integers only. Examples: \n" + << " * function()\n" + << " * function(1,2,3)\n" + << "Terminate listing with ; to see parsed AST\n" + << "Terminate parser with Ctrl-D\n"; + + std::cout << '\n' << "prompt> "; + interpreter.clear(); + } + | program command { + const Command &cmd = $2; + std::cout << "command parsed, updating AST\n"; + interpreter.add_command(cmd); + std::cout << '\n' << "prompt> "; + } + | program SEMICOLON { + std::cout << "*** STOP RUN ***\n"; + std::cout << interpreter.to_string() << '\n'; + } + ; + + +command : STRING LEFTPAR RIGHTPAR { + std::string &id = $1; + std::cout << "ID: " << id << '\n'; + $$ = Command(id); + } + | STRING LEFTPAR arguments RIGHTPAR { + std::string &id = $1; + const std::vector &args = $3; + std::cout << "function: " << id << ", " << args.size() << '\n'; + $$ = Command(id, args); + } + ; + +arguments : NUMBER { + uint64_t number = $1; + $$ = std::vector(); + $$.push_back(number); + std::cout << "first argument: " << number << '\n'; + } + | arguments COMMA NUMBER { + uint64_t number = $3; + std::vector &args = $1; + args.push_back(number); + $$ = args; + std::cout << "next argument: " << number << ", arg list size = " << args.size() << '\n'; + } + ; + +%% + +void stela::Parser::error(const location &loc , const std::string &message) { + // Location should be initialized inside scanner action, but is not in this example. + // Let's grab location directly from driver class. + // std::cout << "Error: " << message << '\n' << "Location: " << loc << '\n'; + std::cout << "Error: " << message << '\n' << "Error location: " << interpreter.location() << '\n'; +} + +// vim: ts=4 sw=4 et