ADD: init compiler and rename the project.

main
bog 2023-09-27 20:18:05 +02:00
parent 6d4f0a6211
commit 85a7af18b9
19 changed files with 766 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*~*
*\#*
build
.cache

11
Makefile Normal file
View File

@ -0,0 +1,11 @@
.PHONY: build tests
build:
meson setup build
meson compile -C build
test: build
build/wongotest
install: test
meson install -C build

4
doc/grammar.bnf Normal file
View File

@ -0,0 +1,4 @@
PROG ::= INSTR*
INSTR ::= DIR
DIR ::= hash ident EXPR
EXPR ::= ident

17
lib/Compiler.cpp Normal file
View File

@ -0,0 +1,17 @@
#include "Compiler.hpp"
namespace wg
{
/*explicit*/ Compiler::Compiler()
{
}
/*virtual*/ Compiler::~Compiler()
{
}
void Compiler::compile(std::shared_ptr<Node> node)
{
std::cout << node->string() << std::endl;
}
}

32
lib/Compiler.hpp Normal file
View File

@ -0,0 +1,32 @@
#ifndef wg_COMPILER_HPP
#define wg_COMPILER_HPP
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Module.h>
#include "commons.hpp"
#include "Node.hpp"
namespace wg
{
class Compiler
{
public:
explicit Compiler();
virtual ~Compiler();
void compile(std::shared_ptr<Node> node);
private:
std::unique_ptr<llvm::LLVMContext> m_context =
std::make_unique<llvm::LLVMContext>();
std::unique_ptr<llvm::IRBuilder<>> m_builder =
std::make_unique<llvm::IRBuilder<>>(*m_context);
std::unique_ptr<llvm::Module> m_module =
std::make_unique<llvm::Module>("my module", *m_context);
};
}
#endif

157
lib/Lexer.cpp Normal file
View File

@ -0,0 +1,157 @@
#include "Lexer.hpp"
namespace wg
{
/*explicit*/ Lexer::Lexer()
{
add_text("#", NODE_HASH);
m_scanners.push_back(std::bind(&Lexer::scan_ident, this));
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& source)
{
m_source = source;
m_cursor = 0;
}
std::shared_ptr<Node> Lexer::next()
{
std::optional<ScanInfo> scan_info;
skip_spaces();
for (auto scanner: m_scanners)
{
auto info = scanner();
if (info && (scan_info == std::nullopt
|| info->cursor > scan_info->cursor))
{
scan_info = info;
}
}
if (scan_info)
{
m_cursor = scan_info->cursor;
return std::make_shared<Node>(scan_info->type,
scan_info->repr,
m_loc);
}
WG_ASSERT(m_cursor <= m_source.size(), "unexpected token");
return nullptr;
}
std::vector<std::shared_ptr<Node>> Lexer::all()
{
std::vector<std::shared_ptr<Node>> result;
std::shared_ptr<Node> node;
while ( (node = next()) != nullptr )
{
result.push_back(node);
}
return result;
}
void Lexer::add_text(std::string const& text,
NodeType node,
bool has_value)
{
if (text.size() == 1)
{
m_seps.push_back(text[0]);
}
m_scanners.push_back(std::bind(&Lexer::scan_text,
this, text,
node, has_value));
}
bool Lexer::is_sep(size_t index) const
{
WG_ASSERT(index < m_source.size(), "cannot find separator");
if (std::isspace(m_source[index]))
{
return true;
}
auto itr = std::find(std::begin(m_seps),
std::end(m_seps),
m_source[index]);
return itr != std::end(m_seps);
}
void Lexer::skip_spaces()
{
while (m_cursor < m_source.size()
&& std::isspace(m_source[m_cursor]))
{
if (m_source[m_cursor] == '\n')
{
m_loc = Loc {m_loc.origin(), m_loc.line() + 1};
}
m_cursor++;
}
}
std::optional<ScanInfo> Lexer::scan_text(std::string const& text,
NodeType type,
bool has_value) const
{
if (m_cursor + text.size() > m_source.size())
{
return std::nullopt;
}
for (size_t i=0; i<text.size(); i++)
{
if (m_source[m_cursor + i] != text[i])
{
return std::nullopt;
}
}
return ScanInfo {
m_cursor + text.size(),
type,
has_value ? text : ""
};
}
std::optional<ScanInfo> Lexer::scan_ident() const
{
size_t cursor = m_cursor;
std::string repr;
while (cursor < m_source.size()
&& !is_sep(cursor))
{
repr += m_source[cursor];
cursor++;
}
if (repr.empty() == false)
{
return ScanInfo {
cursor,
NODE_IDENT,
repr
};
}
return std::nullopt;
}
}

52
lib/Lexer.hpp Normal file
View File

@ -0,0 +1,52 @@
#ifndef wg_LEXER_HPP
#define wg_LEXER_HPP
#include "commons.hpp"
#include "Node.hpp"
#include "Loc.hpp"
namespace wg
{
struct ScanInfo {
size_t cursor;
NodeType type;
std::string repr;
};
using scanner_t = std::function<std::optional<ScanInfo>()>;
class Lexer
{
public:
explicit Lexer();
virtual ~Lexer();
void scan(std::string const& source);
std::shared_ptr<Node> next();
std::vector<std::shared_ptr<Node>> all();
private:
std::string m_source;
size_t m_cursor = 0;
Loc m_loc;
std::vector<scanner_t> m_scanners;
std::vector<char> m_seps;
void add_text(std::string const& text,
NodeType node,
bool has_value=false);
bool is_sep(size_t index) const;
void skip_spaces();
std::optional<ScanInfo> scan_text(std::string const& text,
NodeType type,
bool has_value) const;
std::optional<ScanInfo> scan_ident() const;
};
}
#endif

14
lib/Loc.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "Loc.hpp"
namespace wg
{
/*explicit*/ Loc::Loc(std::filesystem::path origin, int line)
: m_origin { origin }
, m_line { line }
{
}
/*virtual*/ Loc::~Loc()
{
}
}

45
lib/Loc.hpp Normal file
View File

@ -0,0 +1,45 @@
#ifndef wg_LOC_HPP
#define wg_LOC_HPP
#include "commons.hpp"
#include <sstream>
namespace wg
{
class Loc
{
public:
explicit Loc(std::filesystem::path origin = "???", int line = 0);
virtual ~Loc();
std::filesystem::path origin() const { return m_origin; }
int line() const { return m_line; }
template <typename T>
void error(std::string const& what);
template <typename T>
void error(std::stringstream const& what);
private:
std::filesystem::path m_origin;
int m_line = 0;
};
template <typename T>
void Loc::error(std::string const& what)
{
std::stringstream ss;
ss << m_origin.string() << ": ERROR " << what;
throw T {ss.str() };
}
template <typename T>
void Loc::error(std::stringstream const& what)
{
error<T>(what.str());
}
}
#endif

55
lib/Node.cpp Normal file
View File

@ -0,0 +1,55 @@
#include "Node.hpp"
namespace wg
{
/*explicit*/ Node::Node(NodeType type, std::string const& repr, Loc const& loc)
: m_type { type }
, m_repr { repr }
, m_loc { loc }
{
}
/*virtual*/ Node::~Node()
{
}
void Node::add_child(std::shared_ptr<Node> child)
{
m_children.push_back(child);
}
std::shared_ptr<Node> Node::child(size_t index) const
{
WG_ASSERT(index < size(), "aze");
return m_children.at(index);
}
std::string Node::string() const
{
std::stringstream ss;
ss << (NodeTypeStr[m_type] + strlen("NODE_"));
if (!m_repr.empty())
{
ss << "[" << m_repr << "]";
}
if (size() > 0)
{
ss << "(";
std::string sep;
for (auto child: m_children)
{
ss << sep << child->string();
sep = ",";
}
ss << ")";
}
return ss.str();
}
}

42
lib/Node.hpp Normal file
View File

@ -0,0 +1,42 @@
#ifndef wg_NODE_HPP
#define wg_NODE_HPP
#include "commons.hpp"
#include "Loc.hpp"
#define NODE_TYPES(G) \
G(NODE_PROG), \
G(NODE_IDENT), \
G(NODE_HASH), \
G(NODE_DIR),
namespace wg
{
WG_ENUM(NodeType, NODE_TYPES);
class Node
{
public:
explicit Node(NodeType type, std::string const& repr, Loc const& loc);
virtual ~Node();
NodeType type() const { return m_type; }
std::string repr() const { return m_repr; }
Loc loc() const { return m_loc; }
size_t size() const { return m_children.size(); }
void add_child(std::shared_ptr<Node> child);
std::shared_ptr<Node> child(size_t index) const;
std::string string() const;
private:
NodeType m_type;
std::string m_repr;
Loc m_loc;
std::vector<std::shared_ptr<Node>> m_children;
};
}
#endif

109
lib/Parser.cpp Normal file
View File

@ -0,0 +1,109 @@
#include "Parser.hpp"
namespace wg
{
/*explicit*/ Parser::Parser()
{
}
/*virtual*/ Parser::~Parser()
{
}
std::shared_ptr<Node> Parser::parse(std::vector<std::shared_ptr<Node>>
const& tokens)
{
m_cursor = 0;
m_tokens = tokens;
return parse_prog();
}
Loc Parser::loc() const
{
return m_tokens[m_cursor]->loc();
}
std::shared_ptr<Node> Parser::consume(NodeType type)
{
auto current = m_tokens[m_cursor];
if (current->type() != type)
{
std::stringstream ss;
ss << "type mismatch, expected '"
<< (NodeTypeStr[type] + strlen("NODE_"))
<< "', got '"
<< (NodeTypeStr[current->type()] + strlen("NODE_"))
<< "'";
loc().error<syntax_error>(ss);
}
else
{
return consume();
}
return nullptr;
}
std::shared_ptr<Node> Parser::consume()
{
WG_ASSERT(m_cursor < m_tokens.size(), "cannot consume");
auto node = m_tokens[m_cursor];
m_cursor++;
return node;
}
bool Parser::type_is(NodeType type, int lookahead)
{
if (m_cursor + lookahead >= m_tokens.size())
{
return false;
}
return m_tokens[m_cursor + lookahead]->type() == type;
}
bool Parser::type_isnt(NodeType type, int lookahead)
{
return !type_is(type, lookahead);
}
std::shared_ptr<Node> Parser::make_node(NodeType type) const
{
return std::make_shared<Node>(type, "", loc());
}
std::shared_ptr<Node> Parser::parse_prog()
{
auto node = std::make_shared<Node>(NODE_PROG, "", Loc {});
while (m_cursor < m_tokens.size())
{
node->add_child(parse_instr());
}
return node;
}
std::shared_ptr<Node> Parser::parse_instr()
{
return parse_dir();
}
std::shared_ptr<Node> Parser::parse_dir()
{
auto node = make_node(NODE_DIR);
consume(NODE_HASH);
node->add_child(consume(NODE_IDENT));
node->add_child(parse_expr());
return node;
}
std::shared_ptr<Node> Parser::parse_expr()
{
return consume(NODE_IDENT);
}
}

39
lib/Parser.hpp Normal file
View File

@ -0,0 +1,39 @@
#ifndef wg_PARSER_HPP
#define wg_PARSER_HPP
#include "commons.hpp"
#include "Node.hpp"
namespace wg
{
WG_ERROR(syntax_error);
class Parser
{
public:
explicit Parser();
virtual ~Parser();
std::shared_ptr<Node> parse(std::vector<std::shared_ptr<Node>>
const& tokens);
private:
std::vector<std::shared_ptr<Node>> m_tokens;
size_t m_cursor;
Loc loc() const;
std::shared_ptr<Node> consume(NodeType type);
std::shared_ptr<Node> consume();
bool type_is(NodeType type, int lookahead=0);
bool type_isnt(NodeType type, int lookahead=0);
std::shared_ptr<Node> make_node(NodeType type) const;
std::shared_ptr<Node> parse_prog();
std::shared_ptr<Node> parse_instr();
std::shared_ptr<Node> parse_dir();
std::shared_ptr<Node> parse_expr();
};
}
#endif

29
lib/commons.hpp Normal file
View File

@ -0,0 +1,29 @@
#ifndef wg_COMMONS_HPP
#define wg_COMMONS_HPP
#define WG_GEN_ENUM(X) X
#define WG_GEN_STRING(X) #X
#define WG_ENUM(PREFIX, TYPES) \
enum PREFIX { TYPES(WG_GEN_ENUM) }; \
constexpr char const* PREFIX ## Str [] = { TYPES(WG_GEN_STRING) }
#define WG_ERROR(NAME) \
struct NAME : public std::runtime_error { \
NAME (std::string const& what) : std::runtime_error { what } {} \
}
#define WG_ASSERT(COND, MSG) \
if ( ! (COND) ) { std::cerr << MSG << std::endl; abort(); }
#include <string>
#include <filesystem>
#include <memory>
#include <vector>
#include <sstream>
#include <iostream>
#include <cstring>
#include <functional>
#include <optional>
#endif

47
meson.build Normal file
View File

@ -0,0 +1,47 @@
project('wongola',
'cpp',
version: '0.0.0',
default_options: [
'prefix=/usr',
'warning_level=3',
'cpp_std=c++17'
])
wongola_lib = static_library(
'wongola',
sources: [
'lib/Node.cpp',
'lib/Lexer.cpp',
'lib/Parser.cpp',
'lib/Compiler.cpp',
'lib/Loc.cpp',
],
dependencies: [
dependency('LLVM')
]
)
wongola_dep = declare_dependency(
link_with: [wongola_lib],
include_directories: ['lib']
)
executable('wongoc',
sources: [
'src/main.cpp',
],
dependencies: [
wongola_dep
],
install: true)
executable('wongotest',
sources: [
'tests/main.cpp',
'tests/Lexer.cpp',
'tests/Parser.cpp',
],
dependencies: [
wongola_dep,
dependency('catch2')
])

39
src/main.cpp Normal file
View File

@ -0,0 +1,39 @@
#include <iostream>
#include <fstream>
#include <Lexer.hpp>
#include <Parser.hpp>
#include <Compiler.hpp>
int main(int argc, char** argv)
{
if (argc < 2)
{
return -1;
}
std::string source;
// Get Sources
{
std::ifstream file {argv[1]};
std::string line;
while (std::getline(file, line))
{
source += line + (file.eof() ? "" : "\n");
}
}
// Scan Sources
wg::Lexer lexer;
lexer.scan(source);
auto tokens = lexer.all();
wg::Parser parser;
auto ast = parser.parse(tokens);
wg::Compiler compiler;
compiler.compile(ast);
return 0;
}

36
tests/Lexer.cpp Normal file
View File

@ -0,0 +1,36 @@
#include <catch2/catch.hpp>
#include "../lib/Lexer.hpp"
class LexerTest
{
public:
explicit LexerTest() {}
virtual ~LexerTest() {}
void test_next(wg::Lexer& lexer, std::string const& oracle)
{
auto n = lexer.next();
REQUIRE(nullptr != n);
REQUIRE(oracle == n->string());
}
void test_end(wg::Lexer& lexer)
{
auto n = lexer.next();
REQUIRE(nullptr == n);
}
protected:
};
TEST_CASE_METHOD(LexerTest, "Lexer_")
{
wg::Lexer lex;
lex.scan(" # canard #canard");
test_next(lex, "HASH");
test_next(lex, "IDENT[canard]");
test_next(lex, "HASH");
test_next(lex, "IDENT[canard]");
test_end(lex);
}

32
tests/Parser.cpp Normal file
View File

@ -0,0 +1,32 @@
#include <catch2/catch.hpp>
#include "../lib/Lexer.hpp"
#include "../lib/Parser.hpp"
class ParserTest
{
public:
explicit ParserTest() {}
virtual ~ParserTest() {}
void test_parse(std::string const& oracle,
std::string const& source)
{
wg::Lexer lex;
lex.scan(source);
auto tokens = lex.all();
wg::Parser parser;
auto node = parser.parse(tokens);
REQUIRE(oracle == node->string());
}
protected:
};
TEST_CASE_METHOD(ParserTest, "Parser_")
{
test_parse("PROG(DIR(IDENT[hello],IDENT[world]))",
"#hello world");
}

2
tests/main.cpp Normal file
View File

@ -0,0 +1,2 @@
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>