diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8eb1803 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*\#* +*~* +build +.cache \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d8f71dc --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +.PHONY: build tests + +build: + meson setup build + meson compile -C build + +tests: build + build/zarn-tests + +install: tests + meson install -C build diff --git a/doc/grammar.bnf b/doc/grammar.bnf new file mode 100644 index 0000000..448143e --- /dev/null +++ b/doc/grammar.bnf @@ -0,0 +1,3 @@ +MODULE ::= EXPR* +EXPR ::= +int diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..3dd5a81 --- /dev/null +++ b/meson.build @@ -0,0 +1,43 @@ +project('zarn', + 'cpp', + version: '0.0.0', + default_options: [ + 'prefix=/usr', + 'warning_level=3', + 'cpp_std=c++17' + ]) + +extra_libdir = get_option('prefix') / get_option('libdir') / 'zarn' + +zarn_lib = shared_library('zarn', + sources: [ + 'src/Module.cpp', + 'src/Loc.cpp', + 'src/Node.cpp', + 'src/Logger.cpp', + 'src/Lexer.cpp', + 'src/Parser.cpp' + ], + install: true) + +zarn_dep = declare_dependency(link_with: zarn_lib) + +executable('zarn', + sources: [ + 'src/main.cpp' + ], + dependencies: [ + zarn_dep + ], + install: true) + +executable('zarn-tests', + sources: [ + 'tests/main.cpp', + 'tests/Lexer.cpp', + 'tests/Parser.cpp', + ], + dependencies: [ + zarn_dep, + dependency('catch2') + ]) diff --git a/src/Lexer.cpp b/src/Lexer.cpp new file mode 100644 index 0000000..51557c1 --- /dev/null +++ b/src/Lexer.cpp @@ -0,0 +1,134 @@ +#include "Lexer.hpp" + +namespace zn +{ + /*explicit*/ Lexer::Lexer(Logger& logger, Loc const& loc) + : m_logger { logger } + , m_loc { loc } + { + m_scanners.push_back(std::bind(&Lexer::scan_int, this)); + } + + /*virtual*/ Lexer::~Lexer() + { + } + + void Lexer::scan(std::string const& source) + { + m_source = source; + m_cursor = 0; + } + + std::shared_ptr Lexer::try_next() + { + std::optional info; + + skip_spaces(); + + while (m_cursor < m_source.size() + && m_source[m_cursor] == ';') + { + while (m_cursor < m_source.size() + && m_source[m_cursor] != '\n') + { + m_cursor++; + } + + skip_spaces(); + } + + for (auto const& scanner: m_scanners) + { + auto myinfo = scanner(); + + if (myinfo && + (info == std::nullopt + || info->cursor < myinfo->cursor)) + { + info = myinfo; + } + } + + if (info == std::nullopt + && m_cursor < m_source.size()) + { + std::string tok; + + while (m_cursor < m_source.size() + && !std::isspace(m_source[m_cursor])) + { + tok += m_source[m_cursor]; + m_cursor++; + } + + m_logger.log(LOG_ERROR, m_loc, + "unexpected token '" + tok + "'"); + } + + if (info) + { + m_cursor = info->cursor; + return std::make_shared(info->type, info->repr, m_loc); + } + + return nullptr; + } + + std::vector> Lexer::all() + { + std::vector> result; + std::shared_ptr n; + + while ( (n = try_next()) ) + { + result.push_back(n); + } + + return result; + } + + void Lexer::skip_spaces() + { + while (m_cursor < m_source.size() + && std::isspace(m_source[m_cursor])) + { + if (m_source[m_cursor] == '\n') + { + m_loc = Loc {m_loc.file_path(), m_loc.line() + 1}; + } + + m_cursor++; + } + } + + std::optional Lexer::scan_int() + { + size_t cursor = m_cursor; + std::string repr; + + if (cursor < m_source.size() + && m_source[cursor] == '-') + { + repr += '-'; + cursor++; + } + + while (cursor < m_source.size() + && std::isdigit(m_source[cursor])) + { + repr += m_source[cursor]; + cursor++; + } + + if (repr.empty() || repr.back() == '-') + { + return std::nullopt; + } + + return ScanInfo { + cursor, + NODE_INT, + repr + }; + } +} diff --git a/src/Lexer.hpp b/src/Lexer.hpp new file mode 100644 index 0000000..5214c4b --- /dev/null +++ b/src/Lexer.hpp @@ -0,0 +1,44 @@ +#ifndef zn_LEXER_HPP +#define zn_LEXER_HPP + +#include "common.hpp" + +#include "Logger.hpp" +#include "Node.hpp" +#include "Loc.hpp" + +namespace zn +{ + ZN_ERROR(lex_error); + + struct ScanInfo { + size_t cursor; + NodeType type; + std::string repr; + }; + + using scanner_t = std::function()>; + + class Lexer + { + public: + explicit Lexer(Logger& logger, Loc const& loc); + virtual ~Lexer(); + + void scan(std::string const& source); + std::shared_ptr try_next(); + std::vector> all(); + + private: + Logger& m_logger; + std::string m_source; + size_t m_cursor; + std::vector m_scanners; + Loc m_loc; + + void skip_spaces(); + std::optional scan_int(); + }; +} + +#endif diff --git a/src/Loc.cpp b/src/Loc.cpp new file mode 100644 index 0000000..f865306 --- /dev/null +++ b/src/Loc.cpp @@ -0,0 +1,14 @@ +#include "Loc.hpp" + +namespace zn +{ + /*explicit*/ Loc::Loc(std::filesystem::path file_path, int line) + : m_file_path { file_path } + , m_line { line } + { + } + + /*virtual*/ Loc::~Loc() + { + } +} diff --git a/src/Loc.hpp b/src/Loc.hpp new file mode 100644 index 0000000..4934800 --- /dev/null +++ b/src/Loc.hpp @@ -0,0 +1,24 @@ +#ifndef zn_LOC_HPP +#define zn_LOC_HPP + +#include "common.hpp" + +namespace zn +{ + class Loc + { + public: + explicit Loc(std::filesystem::path file_path, int line=1); + virtual ~Loc(); + + std::filesystem::path file_path() const { return m_file_path; } + int line() const { return m_line; } + + private: + std::filesystem::path m_file_path; + int m_line; + + }; +} + +#endif diff --git a/src/Logger.cpp b/src/Logger.cpp new file mode 100644 index 0000000..8e931dc --- /dev/null +++ b/src/Logger.cpp @@ -0,0 +1,12 @@ +#include "Logger.hpp" + +namespace zn +{ + /*explicit*/ Logger::Logger() + { + } + + /*virtual*/ Logger::~Logger() + { + } +} diff --git a/src/Logger.hpp b/src/Logger.hpp new file mode 100644 index 0000000..970d483 --- /dev/null +++ b/src/Logger.hpp @@ -0,0 +1,44 @@ +#ifndef zn_LOGGER_HPP +#define zn_LOGGER_HPP + +#include "common.hpp" + +#include "Loc.hpp" + +#define LOG_CATEGORIES(G) \ + G(LOG_ERROR), \ + G(LOG_WARNING) + +namespace zn +{ + ZN_MK_ENUM(LogCat, LOG_CATEGORIES); + + class Logger + { + public: + explicit Logger(); + virtual ~Logger(); + + template + void log(LogCat category, + Loc const& loc, + std::string const& what); + private: + }; + + template + void Logger::log(LogCat category, + Loc const& loc, + std::string const& what) + { + std::stringstream ss; + ss << loc.file_path().string() << ":" << loc.line(); + ss << " " << (LogCatStr[category] + strlen("LOG_")); + ss << " " << what; + + throw T {ss.str()}; + } + +} + +#endif diff --git a/src/Module.cpp b/src/Module.cpp new file mode 100644 index 0000000..cfb80f5 --- /dev/null +++ b/src/Module.cpp @@ -0,0 +1,44 @@ +#include "Module.hpp" +#include "Lexer.hpp" +#include "Parser.hpp" + +namespace zn +{ + /*explicit*/ Module::Module(Logger& logger) + : m_logger { logger } + { + } + + /*virtual*/ Module::~Module() + { + } + + void Module::load_from_file(std::filesystem::path file_path) + { + std::string line; + std::ifstream file { file_path }; + + if (!file) + { + m_logger.log(LOG_ERROR, + Loc {file_path}, + "cannot load module '" + + file_path.string() + "'"); + } + + m_source = ""; + + while (std::getline(file, line)) + { + m_source += line + (file.eof() ? "":"\n"); + } + + Lexer lexer { m_logger, Loc {file_path} }; + lexer.scan(m_source); + + Parser parser { m_logger }; + + auto ast = parser.parse(lexer.all()); + std::cout << ast->string() << std::endl; + } +} diff --git a/src/Module.hpp b/src/Module.hpp new file mode 100644 index 0000000..fbc356a --- /dev/null +++ b/src/Module.hpp @@ -0,0 +1,26 @@ +#ifndef zn_MODULE_HPP +#define zn_MODULE_HPP + +#include "common.hpp" +#include "Logger.hpp" + +namespace zn +{ + ZN_ERROR(module_error); + + class Module + { + public: + explicit Module(Logger& logger); + virtual ~Module(); + + void load_from_file(std::filesystem::path file_path); + + private: + Logger& m_logger; + std::string m_source; + + }; +} + +#endif diff --git a/src/Node.cpp b/src/Node.cpp new file mode 100644 index 0000000..6059442 --- /dev/null +++ b/src/Node.cpp @@ -0,0 +1,56 @@ +#include "Node.hpp" + +namespace zn +{ + /*explicit*/ Node::Node(NodeType type, + std::string const& repr, + Loc const& loc) + : m_type { type } + , m_repr { repr } + , m_loc { loc } + { + } + + /*virtual*/ Node::~Node() + { + } + + void Node::add_child(std::shared_ptr child) + { + assert(child); + m_children.push_back(child); + } + + std::shared_ptr Node::child_at(size_t index) const + { + assert(index < size()); + return m_children.at(index); + } + + std::string Node::string() const + { + std::stringstream ss; + + ss << (NodeTypeStr[m_type] + strlen("NODE_")); + + if (!m_repr.empty()) + { + ss << "[" << m_repr << "]"; + } + + if (size() > 0) + { + ss << "("; + std::string sep; + + for (auto const& child: m_children) + { + ss << sep << child->string(); + sep = ","; + } + ss << ")"; + } + + return ss.str(); + } +} diff --git a/src/Node.hpp b/src/Node.hpp new file mode 100644 index 0000000..8f45838 --- /dev/null +++ b/src/Node.hpp @@ -0,0 +1,41 @@ +#ifndef zn_NODE_HPP +#define zn_NODE_HPP + +#include "common.hpp" +#include "Loc.hpp" + +#define NODE_TYPES(G) \ + G(NODE_MODULE), \ + G(NODE_INT) + +namespace zn +{ + ZN_MK_ENUM(NodeType, NODE_TYPES); + + class Node + { + public: + explicit Node(NodeType type, + std::string const& repr, + Loc const& loc); + virtual ~Node(); + + NodeType type() const { return m_type; } + std::string repr() const { return m_repr; } + Loc loc() const { return m_loc; } + size_t size() const { return m_children.size(); } + + void add_child(std::shared_ptr child); + std::shared_ptr child_at(size_t index) const; + + std::string string() const; + + private: + NodeType m_type; + std::string m_repr; + Loc m_loc; + std::vector> m_children; + }; +} + +#endif diff --git a/src/Parser.cpp b/src/Parser.cpp new file mode 100644 index 0000000..4223373 --- /dev/null +++ b/src/Parser.cpp @@ -0,0 +1,107 @@ +#include "Parser.hpp" +#include "src/Logger.hpp" +#include "src/Node.hpp" + +namespace zn +{ + /*explicit*/ Parser::Parser(Logger& logger) + : m_logger { logger } + { + } + + /*virtual*/ Parser::~Parser() + { + } + + std::shared_ptr + Parser::parse(std::vector> tokens) + { + m_tokens = tokens; + m_cursor = 0; + + return parse_module(); + } + + std::shared_ptr Parser::mk_node(NodeType type) + { + return std::make_shared(type, "", m_tokens[m_cursor]->loc()); + } + + bool Parser::type_is(std::vector types) const + { + if (types.size() + m_cursor > m_tokens.size()) + { + return false; + } + + for (size_t i=0; itype()) + { + return false; + } + } + + return true; + } + + bool Parser::type_is(NodeType type) const + { + return type_is(std::vector{type}); + } + + std::shared_ptr Parser::consume(NodeType type) + { + if (!type_is(type)) + { + std::stringstream ss; + ss << "expected '" + << (NodeTypeStr[type] + strlen("NODE_")) + << "', got '" + << (NodeTypeStr[m_tokens[m_cursor]->type()] + strlen("NODE_")) + << "'"; + + m_logger.log(LOG_ERROR, + m_tokens[m_cursor]->loc(), + ss.str()); + } + + return consume(); + } + + std::shared_ptr Parser::consume() + { + m_cursor++; + return m_tokens[m_cursor - 1]; + } + + std::shared_ptr Parser::parse_module() + { + auto node = mk_node(NODE_MODULE); + + while (m_cursor < m_tokens.size()) + { + node->add_child(parse_expr()); + } + + return node; + } + + std::shared_ptr Parser::parse_expr() + { + if (type_is(NODE_INT)) + { + return consume(); + } + + std::stringstream ss; + ss << "unknown expression '" + << m_tokens[m_cursor]->string() + << "'"; + + m_logger.log(LOG_ERROR, + m_tokens[m_cursor]->loc(), + ss.str()); + abort(); + } +} diff --git a/src/Parser.hpp b/src/Parser.hpp new file mode 100644 index 0000000..bce17ed --- /dev/null +++ b/src/Parser.hpp @@ -0,0 +1,38 @@ +#ifndef zn_PARSER_HPP +#define zn_PARSER_HPP + +#include "common.hpp" +#include "Logger.hpp" +#include "Node.hpp" + +namespace zn +{ + ZN_ERROR(syntax_error); + + class Parser + { + public: + explicit Parser(Logger& logger); + virtual ~Parser(); + + std::shared_ptr + parse(std::vector> tokens); + + private: + Logger& m_logger; + std::vector> m_tokens; + size_t m_cursor; + + std::shared_ptr mk_node(NodeType type); + bool type_is(std::vector types) const; + bool type_is(NodeType type) const; + std::shared_ptr consume(NodeType type); + std::shared_ptr consume(); + + std::shared_ptr parse_module(); + std::shared_ptr parse_expr(); + + }; +} + +#endif diff --git a/src/common.hpp b/src/common.hpp new file mode 100644 index 0000000..664a6cc --- /dev/null +++ b/src/common.hpp @@ -0,0 +1,30 @@ +#ifndef zn_COMMON_HPP +#define zn_COMMON_HPP + +#include + +#define ZN_GEN_ENUM(X) X +#define ZN_GEN_STRING(X) #X + +#define ZN_MK_ENUM(PREFIX, ENUM) \ + enum PREFIX { ENUM(ZN_GEN_ENUM) }; \ + constexpr char const* PREFIX ## Str [] = { ENUM(ZN_GEN_STRING) } + +#define ZN_ERROR(NAME) \ + struct NAME : public std::runtime_error { \ + NAME (std::string const& what) : std::runtime_error(what) {} \ + } + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..a5e70df --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,16 @@ +#include +#include "Module.hpp" + +int main(int argc, char** argv) +{ + + if (argc > 1) + { + zn::Logger logger; + zn::Module mod { logger }; + + mod.load_from_file(argv[1]); + } + + return 0; +} diff --git a/tests/Lexer.cpp b/tests/Lexer.cpp new file mode 100644 index 0000000..0219f8c --- /dev/null +++ b/tests/Lexer.cpp @@ -0,0 +1,38 @@ +#include +#include "../src/Lexer.hpp" + +class LexerTest +{ +public: + explicit LexerTest() {} + virtual ~LexerTest() {} + + void test_next(zn::Lexer& lexer, std::string const& oracle) + { + auto node = lexer.try_next(); + INFO("expected " << oracle << " got nullptr"); + REQUIRE(nullptr != node); + REQUIRE(oracle == node->string()); + } + +protected: + zn::Logger m_logger; + zn::Loc m_loc {"tests/lexer"}; + zn::Lexer m_lexer { m_logger, m_loc }; +}; + +TEST_CASE_METHOD(LexerTest, "Lexer_unknown_text") +{ + m_lexer.scan(" ยงยงยง "); + REQUIRE_THROWS_AS(m_lexer.try_next(), zn::lex_error); +} + +TEST_CASE_METHOD(LexerTest, "Lexer_int") +{ + m_lexer.scan(" 3 -2 167 "); + + test_next(m_lexer, "INT[3]"); + test_next(m_lexer, "INT[-2]"); + test_next(m_lexer, "INT[167]"); + REQUIRE(nullptr == m_lexer.try_next()); +} diff --git a/tests/Parser.cpp b/tests/Parser.cpp new file mode 100644 index 0000000..6024cff --- /dev/null +++ b/tests/Parser.cpp @@ -0,0 +1,31 @@ +#include +#include "../src/Parser.hpp" +#include "../src/Lexer.hpp" + +class ParserTest +{ +public: + explicit ParserTest() {} + virtual ~ParserTest() {} + + void test_parse(std::string const& oracle, + std::string const& source) + { + zn::Logger logger; + zn::Loc loc {"tests/parser"}; + zn::Lexer lexer { logger, loc }; + lexer.scan(source); + std::vector> tokens = lexer.all(); + + zn::Parser parser {logger}; + auto node = parser.parse(tokens); + + REQUIRE(oracle == node->string()); + } +protected: +}; + +TEST_CASE_METHOD(ParserTest, "Parser_int") +{ + test_parse("MODULE(INT[37])", " 37"); +} diff --git a/tests/main.cpp b/tests/main.cpp new file mode 100644 index 0000000..4ed06df --- /dev/null +++ b/tests/main.cpp @@ -0,0 +1,2 @@ +#define CATCH_CONFIG_MAIN +#include