From 85a7af18b9448afa8549b2521a1c1116fe0c35e0 Mon Sep 17 00:00:00 2001 From: bog Date: Wed, 27 Sep 2023 20:18:05 +0200 Subject: [PATCH] ADD: init compiler and rename the project. --- .gitignore | 4 ++ Makefile | 11 ++++ doc/grammar.bnf | 4 ++ lib/Compiler.cpp | 17 +++++ lib/Compiler.hpp | 32 ++++++++++ lib/Lexer.cpp | 157 +++++++++++++++++++++++++++++++++++++++++++++++ lib/Lexer.hpp | 52 ++++++++++++++++ lib/Loc.cpp | 14 +++++ lib/Loc.hpp | 45 ++++++++++++++ lib/Node.cpp | 55 +++++++++++++++++ lib/Node.hpp | 42 +++++++++++++ lib/Parser.cpp | 109 ++++++++++++++++++++++++++++++++ lib/Parser.hpp | 39 ++++++++++++ lib/commons.hpp | 29 +++++++++ meson.build | 47 ++++++++++++++ src/main.cpp | 39 ++++++++++++ tests/Lexer.cpp | 36 +++++++++++ tests/Parser.cpp | 32 ++++++++++ tests/main.cpp | 2 + 19 files changed, 766 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 doc/grammar.bnf create mode 100644 lib/Compiler.cpp create mode 100644 lib/Compiler.hpp create mode 100644 lib/Lexer.cpp create mode 100644 lib/Lexer.hpp create mode 100644 lib/Loc.cpp create mode 100644 lib/Loc.hpp create mode 100644 lib/Node.cpp create mode 100644 lib/Node.hpp create mode 100644 lib/Parser.cpp create mode 100644 lib/Parser.hpp create mode 100644 lib/commons.hpp create mode 100644 meson.build create mode 100644 src/main.cpp create mode 100644 tests/Lexer.cpp create mode 100644 tests/Parser.cpp create mode 100644 tests/main.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..10a7aa1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*~* +*\#* +build +.cache \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..5b94a2b --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +.PHONY: build tests + +build: + meson setup build + meson compile -C build + +test: build + build/wongotest + +install: test + meson install -C build diff --git a/doc/grammar.bnf b/doc/grammar.bnf new file mode 100644 index 0000000..1074c4d --- /dev/null +++ b/doc/grammar.bnf @@ -0,0 +1,4 @@ +PROG ::= INSTR* +INSTR ::= DIR +DIR ::= hash ident EXPR +EXPR ::= ident diff --git a/lib/Compiler.cpp b/lib/Compiler.cpp new file mode 100644 index 0000000..41f7115 --- /dev/null +++ b/lib/Compiler.cpp @@ -0,0 +1,17 @@ +#include "Compiler.hpp" + +namespace wg +{ + /*explicit*/ Compiler::Compiler() + { + } + + /*virtual*/ Compiler::~Compiler() + { + } + + void Compiler::compile(std::shared_ptr node) + { + std::cout << node->string() << std::endl; + } +} diff --git a/lib/Compiler.hpp b/lib/Compiler.hpp new file mode 100644 index 0000000..e026dd4 --- /dev/null +++ b/lib/Compiler.hpp @@ -0,0 +1,32 @@ +#ifndef wg_COMPILER_HPP +#define wg_COMPILER_HPP + +#include +#include +#include + +#include "commons.hpp" +#include "Node.hpp" + +namespace wg +{ + class Compiler + { + public: + explicit Compiler(); + virtual ~Compiler(); + + void compile(std::shared_ptr node); + private: + std::unique_ptr m_context = + std::make_unique(); + + std::unique_ptr> m_builder = + std::make_unique>(*m_context); + + std::unique_ptr m_module = + std::make_unique("my module", *m_context); + }; +} + +#endif diff --git a/lib/Lexer.cpp b/lib/Lexer.cpp new file mode 100644 index 0000000..de8f149 --- /dev/null +++ b/lib/Lexer.cpp @@ -0,0 +1,157 @@ +#include "Lexer.hpp" + +namespace wg +{ + /*explicit*/ Lexer::Lexer() + { + add_text("#", NODE_HASH); + + m_scanners.push_back(std::bind(&Lexer::scan_ident, this)); + } + + /*virtual*/ Lexer::~Lexer() + { + } + + void Lexer::scan(std::string const& source) + { + m_source = source; + m_cursor = 0; + } + + std::shared_ptr Lexer::next() + { + std::optional scan_info; + + skip_spaces(); + + for (auto scanner: m_scanners) + { + auto info = scanner(); + + if (info && (scan_info == std::nullopt + || info->cursor > scan_info->cursor)) + { + scan_info = info; + } + } + + if (scan_info) + { + m_cursor = scan_info->cursor; + + return std::make_shared(scan_info->type, + scan_info->repr, + m_loc); + } + + WG_ASSERT(m_cursor <= m_source.size(), "unexpected token"); + + return nullptr; + } + + std::vector> Lexer::all() + { + std::vector> result; + std::shared_ptr node; + + while ( (node = next()) != nullptr ) + { + result.push_back(node); + } + + return result; + } + + void Lexer::add_text(std::string const& text, + NodeType node, + bool has_value) + { + if (text.size() == 1) + { + m_seps.push_back(text[0]); + } + + m_scanners.push_back(std::bind(&Lexer::scan_text, + this, text, + node, has_value)); + } + + bool Lexer::is_sep(size_t index) const + { + WG_ASSERT(index < m_source.size(), "cannot find separator"); + + if (std::isspace(m_source[index])) + { + return true; + } + + auto itr = std::find(std::begin(m_seps), + std::end(m_seps), + m_source[index]); + + return itr != std::end(m_seps); + } + + void Lexer::skip_spaces() + { + while (m_cursor < m_source.size() + && std::isspace(m_source[m_cursor])) + { + if (m_source[m_cursor] == '\n') + { + m_loc = Loc {m_loc.origin(), m_loc.line() + 1}; + } + + m_cursor++; + } + } + + std::optional Lexer::scan_text(std::string const& text, + NodeType type, + bool has_value) const + { + if (m_cursor + text.size() > m_source.size()) + { + return std::nullopt; + } + + for (size_t i=0; i Lexer::scan_ident() const + { + size_t cursor = m_cursor; + std::string repr; + + while (cursor < m_source.size() + && !is_sep(cursor)) + { + repr += m_source[cursor]; + cursor++; + } + + if (repr.empty() == false) + { + return ScanInfo { + cursor, + NODE_IDENT, + repr + }; + } + + return std::nullopt; + } +} diff --git a/lib/Lexer.hpp b/lib/Lexer.hpp new file mode 100644 index 0000000..187424b --- /dev/null +++ b/lib/Lexer.hpp @@ -0,0 +1,52 @@ +#ifndef wg_LEXER_HPP +#define wg_LEXER_HPP + +#include "commons.hpp" +#include "Node.hpp" +#include "Loc.hpp" + +namespace wg +{ + struct ScanInfo { + size_t cursor; + NodeType type; + std::string repr; + }; + + using scanner_t = std::function()>; + + class Lexer + { + public: + explicit Lexer(); + virtual ~Lexer(); + + void scan(std::string const& source); + std::shared_ptr next(); + std::vector> all(); + + private: + std::string m_source; + size_t m_cursor = 0; + Loc m_loc; + std::vector m_scanners; + std::vector m_seps; + + void add_text(std::string const& text, + NodeType node, + bool has_value=false); + + bool is_sep(size_t index) const; + + void skip_spaces(); + + std::optional scan_text(std::string const& text, + NodeType type, + bool has_value) const; + + std::optional scan_ident() const; + + }; +} + +#endif diff --git a/lib/Loc.cpp b/lib/Loc.cpp new file mode 100644 index 0000000..62f0138 --- /dev/null +++ b/lib/Loc.cpp @@ -0,0 +1,14 @@ +#include "Loc.hpp" + +namespace wg +{ + /*explicit*/ Loc::Loc(std::filesystem::path origin, int line) + : m_origin { origin } + , m_line { line } + { + } + + /*virtual*/ Loc::~Loc() + { + } +} diff --git a/lib/Loc.hpp b/lib/Loc.hpp new file mode 100644 index 0000000..7b4f3b4 --- /dev/null +++ b/lib/Loc.hpp @@ -0,0 +1,45 @@ +#ifndef wg_LOC_HPP +#define wg_LOC_HPP + +#include "commons.hpp" +#include + +namespace wg +{ + class Loc + { + public: + explicit Loc(std::filesystem::path origin = "???", int line = 0); + virtual ~Loc(); + + std::filesystem::path origin() const { return m_origin; } + int line() const { return m_line; } + + template + void error(std::string const& what); + + template + void error(std::stringstream const& what); + + private: + std::filesystem::path m_origin; + int m_line = 0; + }; + + template + void Loc::error(std::string const& what) + { + std::stringstream ss; + ss << m_origin.string() << ": ERROR " << what; + + throw T {ss.str() }; + } + + template + void Loc::error(std::stringstream const& what) + { + error(what.str()); + } +} + +#endif diff --git a/lib/Node.cpp b/lib/Node.cpp new file mode 100644 index 0000000..fb5699e --- /dev/null +++ b/lib/Node.cpp @@ -0,0 +1,55 @@ +#include "Node.hpp" + +namespace wg +{ + /*explicit*/ Node::Node(NodeType type, std::string const& repr, Loc const& loc) + : m_type { type } + , m_repr { repr } + , m_loc { loc } + { + } + + /*virtual*/ Node::~Node() + { + } + + void Node::add_child(std::shared_ptr child) + { + m_children.push_back(child); + } + + std::shared_ptr Node::child(size_t index) const + { + WG_ASSERT(index < size(), "aze"); + return m_children.at(index); + } + + std::string Node::string() const + { + std::stringstream ss; + ss << (NodeTypeStr[m_type] + strlen("NODE_")); + + if (!m_repr.empty()) + { + ss << "[" << m_repr << "]"; + } + + if (size() > 0) + { + ss << "("; + + std::string sep; + + for (auto child: m_children) + { + ss << sep << child->string(); + sep = ","; + } + + ss << ")"; + } + + return ss.str(); + } + +} diff --git a/lib/Node.hpp b/lib/Node.hpp new file mode 100644 index 0000000..ddf49b1 --- /dev/null +++ b/lib/Node.hpp @@ -0,0 +1,42 @@ +#ifndef wg_NODE_HPP +#define wg_NODE_HPP + +#include "commons.hpp" +#include "Loc.hpp" + +#define NODE_TYPES(G) \ + G(NODE_PROG), \ + G(NODE_IDENT), \ + G(NODE_HASH), \ + G(NODE_DIR), + +namespace wg +{ + WG_ENUM(NodeType, NODE_TYPES); + + class Node + { + public: + explicit Node(NodeType type, std::string const& repr, Loc const& loc); + virtual ~Node(); + + NodeType type() const { return m_type; } + std::string repr() const { return m_repr; } + Loc loc() const { return m_loc; } + + size_t size() const { return m_children.size(); } + + void add_child(std::shared_ptr child); + std::shared_ptr child(size_t index) const; + + std::string string() const; + + private: + NodeType m_type; + std::string m_repr; + Loc m_loc; + std::vector> m_children; + }; +} + +#endif diff --git a/lib/Parser.cpp b/lib/Parser.cpp new file mode 100644 index 0000000..27ee134 --- /dev/null +++ b/lib/Parser.cpp @@ -0,0 +1,109 @@ +#include "Parser.hpp" + +namespace wg +{ + /*explicit*/ Parser::Parser() + { + } + + /*virtual*/ Parser::~Parser() + { + } + + std::shared_ptr Parser::parse(std::vector> + const& tokens) + { + m_cursor = 0; + m_tokens = tokens; + + return parse_prog(); + } + + Loc Parser::loc() const + { + return m_tokens[m_cursor]->loc(); + } + + std::shared_ptr Parser::consume(NodeType type) + { + auto current = m_tokens[m_cursor]; + + if (current->type() != type) + { + std::stringstream ss; + ss << "type mismatch, expected '" + << (NodeTypeStr[type] + strlen("NODE_")) + << "', got '" + << (NodeTypeStr[current->type()] + strlen("NODE_")) + << "'"; + + loc().error(ss); + } + else + { + return consume(); + } + + return nullptr; + } + + std::shared_ptr Parser::consume() + { + WG_ASSERT(m_cursor < m_tokens.size(), "cannot consume"); + auto node = m_tokens[m_cursor]; + m_cursor++; + return node; + } + + bool Parser::type_is(NodeType type, int lookahead) + { + if (m_cursor + lookahead >= m_tokens.size()) + { + return false; + } + + return m_tokens[m_cursor + lookahead]->type() == type; + } + + bool Parser::type_isnt(NodeType type, int lookahead) + { + return !type_is(type, lookahead); + } + + std::shared_ptr Parser::make_node(NodeType type) const + { + return std::make_shared(type, "", loc()); + } + + std::shared_ptr Parser::parse_prog() + { + auto node = std::make_shared(NODE_PROG, "", Loc {}); + + + while (m_cursor < m_tokens.size()) + { + node->add_child(parse_instr()); + } + + return node; + } + + std::shared_ptr Parser::parse_instr() + { + return parse_dir(); + } + + std::shared_ptr Parser::parse_dir() + { + auto node = make_node(NODE_DIR); + consume(NODE_HASH); + node->add_child(consume(NODE_IDENT)); + node->add_child(parse_expr()); + return node; + } + + std::shared_ptr Parser::parse_expr() + { + return consume(NODE_IDENT); + } +} diff --git a/lib/Parser.hpp b/lib/Parser.hpp new file mode 100644 index 0000000..75fc375 --- /dev/null +++ b/lib/Parser.hpp @@ -0,0 +1,39 @@ +#ifndef wg_PARSER_HPP +#define wg_PARSER_HPP + +#include "commons.hpp" +#include "Node.hpp" + +namespace wg +{ + WG_ERROR(syntax_error); + + class Parser + { + public: + explicit Parser(); + virtual ~Parser(); + + std::shared_ptr parse(std::vector> + const& tokens); + private: + std::vector> m_tokens; + size_t m_cursor; + + Loc loc() const; + std::shared_ptr consume(NodeType type); + std::shared_ptr consume(); + + bool type_is(NodeType type, int lookahead=0); + bool type_isnt(NodeType type, int lookahead=0); + std::shared_ptr make_node(NodeType type) const; + + std::shared_ptr parse_prog(); + std::shared_ptr parse_instr(); + std::shared_ptr parse_dir(); + std::shared_ptr parse_expr(); + + }; +} + +#endif diff --git a/lib/commons.hpp b/lib/commons.hpp new file mode 100644 index 0000000..0f5764d --- /dev/null +++ b/lib/commons.hpp @@ -0,0 +1,29 @@ +#ifndef wg_COMMONS_HPP +#define wg_COMMONS_HPP + +#define WG_GEN_ENUM(X) X +#define WG_GEN_STRING(X) #X + +#define WG_ENUM(PREFIX, TYPES) \ + enum PREFIX { TYPES(WG_GEN_ENUM) }; \ + constexpr char const* PREFIX ## Str [] = { TYPES(WG_GEN_STRING) } + +#define WG_ERROR(NAME) \ + struct NAME : public std::runtime_error { \ + NAME (std::string const& what) : std::runtime_error { what } {} \ + } + +#define WG_ASSERT(COND, MSG) \ + if ( ! (COND) ) { std::cerr << MSG << std::endl; abort(); } + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..283c87d --- /dev/null +++ b/meson.build @@ -0,0 +1,47 @@ +project('wongola', + 'cpp', + version: '0.0.0', + default_options: [ + 'prefix=/usr', + 'warning_level=3', + 'cpp_std=c++17' + ]) + +wongola_lib = static_library( + 'wongola', + sources: [ + 'lib/Node.cpp', + 'lib/Lexer.cpp', + 'lib/Parser.cpp', + 'lib/Compiler.cpp', + 'lib/Loc.cpp', + ], + dependencies: [ + dependency('LLVM') + ] +) + +wongola_dep = declare_dependency( + link_with: [wongola_lib], + include_directories: ['lib'] +) + +executable('wongoc', + sources: [ + 'src/main.cpp', + ], + dependencies: [ + wongola_dep + ], + install: true) + +executable('wongotest', + sources: [ + 'tests/main.cpp', + 'tests/Lexer.cpp', + 'tests/Parser.cpp', + ], + dependencies: [ + wongola_dep, + dependency('catch2') + ]) diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..2175a6e --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include + +int main(int argc, char** argv) +{ + if (argc < 2) + { + return -1; + } + + std::string source; + + // Get Sources + { + std::ifstream file {argv[1]}; + std::string line; + + while (std::getline(file, line)) + { + source += line + (file.eof() ? "" : "\n"); + } + } + + // Scan Sources + wg::Lexer lexer; + lexer.scan(source); + auto tokens = lexer.all(); + + wg::Parser parser; + auto ast = parser.parse(tokens); + + wg::Compiler compiler; + compiler.compile(ast); + + return 0; +} diff --git a/tests/Lexer.cpp b/tests/Lexer.cpp new file mode 100644 index 0000000..a08b9b1 --- /dev/null +++ b/tests/Lexer.cpp @@ -0,0 +1,36 @@ +#include +#include "../lib/Lexer.hpp" + +class LexerTest +{ +public: + explicit LexerTest() {} + virtual ~LexerTest() {} + + void test_next(wg::Lexer& lexer, std::string const& oracle) + { + auto n = lexer.next(); + REQUIRE(nullptr != n); + + REQUIRE(oracle == n->string()); + } + + void test_end(wg::Lexer& lexer) + { + auto n = lexer.next(); + REQUIRE(nullptr == n); + } + +protected: +}; + +TEST_CASE_METHOD(LexerTest, "Lexer_") +{ + wg::Lexer lex; + lex.scan(" # canard #canard"); + test_next(lex, "HASH"); + test_next(lex, "IDENT[canard]"); + test_next(lex, "HASH"); + test_next(lex, "IDENT[canard]"); + test_end(lex); +} diff --git a/tests/Parser.cpp b/tests/Parser.cpp new file mode 100644 index 0000000..03fbb82 --- /dev/null +++ b/tests/Parser.cpp @@ -0,0 +1,32 @@ +#include +#include "../lib/Lexer.hpp" +#include "../lib/Parser.hpp" + +class ParserTest +{ +public: + explicit ParserTest() {} + virtual ~ParserTest() {} + + void test_parse(std::string const& oracle, + std::string const& source) + { + wg::Lexer lex; + lex.scan(source); + auto tokens = lex.all(); + + wg::Parser parser; + + auto node = parser.parse(tokens); + + REQUIRE(oracle == node->string()); + } + +protected: +}; + +TEST_CASE_METHOD(ParserTest, "Parser_") +{ + test_parse("PROG(DIR(IDENT[hello],IDENT[world]))", + "#hello world"); +} diff --git a/tests/main.cpp b/tests/main.cpp new file mode 100644 index 0000000..4ed06df --- /dev/null +++ b/tests/main.cpp @@ -0,0 +1,2 @@ +#define CATCH_CONFIG_MAIN +#include