ADD: parser for integers.

ADD: lexer for comments.
main
bog 2023-09-17 19:26:22 +02:00
parent 62fd1956d8
commit 04f57c631e
21 changed files with 762 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*\#*
*~*
build
.cache

11
Makefile Normal file
View File

@ -0,0 +1,11 @@
.PHONY: build tests
build:
meson setup build
meson compile -C build
tests: build
build/zarn-tests
install: tests
meson install -C build

3
doc/grammar.bnf Normal file
View File

@ -0,0 +1,3 @@
MODULE ::= EXPR*
EXPR ::=
int

43
meson.build Normal file
View File

@ -0,0 +1,43 @@
project('zarn',
'cpp',
version: '0.0.0',
default_options: [
'prefix=/usr',
'warning_level=3',
'cpp_std=c++17'
])
extra_libdir = get_option('prefix') / get_option('libdir') / 'zarn'
zarn_lib = shared_library('zarn',
sources: [
'src/Module.cpp',
'src/Loc.cpp',
'src/Node.cpp',
'src/Logger.cpp',
'src/Lexer.cpp',
'src/Parser.cpp'
],
install: true)
zarn_dep = declare_dependency(link_with: zarn_lib)
executable('zarn',
sources: [
'src/main.cpp'
],
dependencies: [
zarn_dep
],
install: true)
executable('zarn-tests',
sources: [
'tests/main.cpp',
'tests/Lexer.cpp',
'tests/Parser.cpp',
],
dependencies: [
zarn_dep,
dependency('catch2')
])

134
src/Lexer.cpp Normal file
View File

@ -0,0 +1,134 @@
#include "Lexer.hpp"
namespace zn
{
/*explicit*/ Lexer::Lexer(Logger& logger, Loc const& loc)
: m_logger { logger }
, m_loc { loc }
{
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& source)
{
m_source = source;
m_cursor = 0;
}
std::shared_ptr<Node> Lexer::try_next()
{
std::optional<ScanInfo> info;
skip_spaces();
while (m_cursor < m_source.size()
&& m_source[m_cursor] == ';')
{
while (m_cursor < m_source.size()
&& m_source[m_cursor] != '\n')
{
m_cursor++;
}
skip_spaces();
}
for (auto const& scanner: m_scanners)
{
auto myinfo = scanner();
if (myinfo &&
(info == std::nullopt
|| info->cursor < myinfo->cursor))
{
info = myinfo;
}
}
if (info == std::nullopt
&& m_cursor < m_source.size())
{
std::string tok;
while (m_cursor < m_source.size()
&& !std::isspace(m_source[m_cursor]))
{
tok += m_source[m_cursor];
m_cursor++;
}
m_logger.log<lex_error>(LOG_ERROR, m_loc,
"unexpected token '" + tok + "'");
}
if (info)
{
m_cursor = info->cursor;
return std::make_shared<Node>(info->type, info->repr, m_loc);
}
return nullptr;
}
std::vector<std::shared_ptr<Node>> Lexer::all()
{
std::vector<std::shared_ptr<Node>> result;
std::shared_ptr<Node> n;
while ( (n = try_next()) )
{
result.push_back(n);
}
return result;
}
void Lexer::skip_spaces()
{
while (m_cursor < m_source.size()
&& std::isspace(m_source[m_cursor]))
{
if (m_source[m_cursor] == '\n')
{
m_loc = Loc {m_loc.file_path(), m_loc.line() + 1};
}
m_cursor++;
}
}
std::optional<ScanInfo> Lexer::scan_int()
{
size_t cursor = m_cursor;
std::string repr;
if (cursor < m_source.size()
&& m_source[cursor] == '-')
{
repr += '-';
cursor++;
}
while (cursor < m_source.size()
&& std::isdigit(m_source[cursor]))
{
repr += m_source[cursor];
cursor++;
}
if (repr.empty() || repr.back() == '-')
{
return std::nullopt;
}
return ScanInfo {
cursor,
NODE_INT,
repr
};
}
}

44
src/Lexer.hpp Normal file
View File

@ -0,0 +1,44 @@
#ifndef zn_LEXER_HPP
#define zn_LEXER_HPP
#include "common.hpp"
#include "Logger.hpp"
#include "Node.hpp"
#include "Loc.hpp"
namespace zn
{
ZN_ERROR(lex_error);
struct ScanInfo {
size_t cursor;
NodeType type;
std::string repr;
};
using scanner_t = std::function<std::optional<ScanInfo>()>;
class Lexer
{
public:
explicit Lexer(Logger& logger, Loc const& loc);
virtual ~Lexer();
void scan(std::string const& source);
std::shared_ptr<Node> try_next();
std::vector<std::shared_ptr<Node>> all();
private:
Logger& m_logger;
std::string m_source;
size_t m_cursor;
std::vector<scanner_t> m_scanners;
Loc m_loc;
void skip_spaces();
std::optional<ScanInfo> scan_int();
};
}
#endif

14
src/Loc.cpp Normal file
View File

@ -0,0 +1,14 @@
#include "Loc.hpp"
namespace zn
{
/*explicit*/ Loc::Loc(std::filesystem::path file_path, int line)
: m_file_path { file_path }
, m_line { line }
{
}
/*virtual*/ Loc::~Loc()
{
}
}

24
src/Loc.hpp Normal file
View File

@ -0,0 +1,24 @@
#ifndef zn_LOC_HPP
#define zn_LOC_HPP
#include "common.hpp"
namespace zn
{
class Loc
{
public:
explicit Loc(std::filesystem::path file_path, int line=1);
virtual ~Loc();
std::filesystem::path file_path() const { return m_file_path; }
int line() const { return m_line; }
private:
std::filesystem::path m_file_path;
int m_line;
};
}
#endif

12
src/Logger.cpp Normal file
View File

@ -0,0 +1,12 @@
#include "Logger.hpp"
namespace zn
{
/*explicit*/ Logger::Logger()
{
}
/*virtual*/ Logger::~Logger()
{
}
}

44
src/Logger.hpp Normal file
View File

@ -0,0 +1,44 @@
#ifndef zn_LOGGER_HPP
#define zn_LOGGER_HPP
#include "common.hpp"
#include "Loc.hpp"
#define LOG_CATEGORIES(G) \
G(LOG_ERROR), \
G(LOG_WARNING)
namespace zn
{
ZN_MK_ENUM(LogCat, LOG_CATEGORIES);
class Logger
{
public:
explicit Logger();
virtual ~Logger();
template <typename T>
void log(LogCat category,
Loc const& loc,
std::string const& what);
private:
};
template <typename T>
void Logger::log(LogCat category,
Loc const& loc,
std::string const& what)
{
std::stringstream ss;
ss << loc.file_path().string() << ":" << loc.line();
ss << " " << (LogCatStr[category] + strlen("LOG_"));
ss << " " << what;
throw T {ss.str()};
}
}
#endif

44
src/Module.cpp Normal file
View File

@ -0,0 +1,44 @@
#include "Module.hpp"
#include "Lexer.hpp"
#include "Parser.hpp"
namespace zn
{
/*explicit*/ Module::Module(Logger& logger)
: m_logger { logger }
{
}
/*virtual*/ Module::~Module()
{
}
void Module::load_from_file(std::filesystem::path file_path)
{
std::string line;
std::ifstream file { file_path };
if (!file)
{
m_logger.log<module_error>(LOG_ERROR,
Loc {file_path},
"cannot load module '"
+ file_path.string() + "'");
}
m_source = "";
while (std::getline(file, line))
{
m_source += line + (file.eof() ? "":"\n");
}
Lexer lexer { m_logger, Loc {file_path} };
lexer.scan(m_source);
Parser parser { m_logger };
auto ast = parser.parse(lexer.all());
std::cout << ast->string() << std::endl;
}
}

26
src/Module.hpp Normal file
View File

@ -0,0 +1,26 @@
#ifndef zn_MODULE_HPP
#define zn_MODULE_HPP
#include "common.hpp"
#include "Logger.hpp"
namespace zn
{
ZN_ERROR(module_error);
class Module
{
public:
explicit Module(Logger& logger);
virtual ~Module();
void load_from_file(std::filesystem::path file_path);
private:
Logger& m_logger;
std::string m_source;
};
}
#endif

56
src/Node.cpp Normal file
View File

@ -0,0 +1,56 @@
#include "Node.hpp"
namespace zn
{
/*explicit*/ Node::Node(NodeType type,
std::string const& repr,
Loc const& loc)
: m_type { type }
, m_repr { repr }
, m_loc { loc }
{
}
/*virtual*/ Node::~Node()
{
}
void Node::add_child(std::shared_ptr<Node> child)
{
assert(child);
m_children.push_back(child);
}
std::shared_ptr<Node> Node::child_at(size_t index) const
{
assert(index < size());
return m_children.at(index);
}
std::string Node::string() const
{
std::stringstream ss;
ss << (NodeTypeStr[m_type] + strlen("NODE_"));
if (!m_repr.empty())
{
ss << "[" << m_repr << "]";
}
if (size() > 0)
{
ss << "(";
std::string sep;
for (auto const& child: m_children)
{
ss << sep << child->string();
sep = ",";
}
ss << ")";
}
return ss.str();
}
}

41
src/Node.hpp Normal file
View File

@ -0,0 +1,41 @@
#ifndef zn_NODE_HPP
#define zn_NODE_HPP
#include "common.hpp"
#include "Loc.hpp"
#define NODE_TYPES(G) \
G(NODE_MODULE), \
G(NODE_INT)
namespace zn
{
ZN_MK_ENUM(NodeType, NODE_TYPES);
class Node
{
public:
explicit Node(NodeType type,
std::string const& repr,
Loc const& loc);
virtual ~Node();
NodeType type() const { return m_type; }
std::string repr() const { return m_repr; }
Loc loc() const { return m_loc; }
size_t size() const { return m_children.size(); }
void add_child(std::shared_ptr<Node> child);
std::shared_ptr<Node> child_at(size_t index) const;
std::string string() const;
private:
NodeType m_type;
std::string m_repr;
Loc m_loc;
std::vector<std::shared_ptr<Node>> m_children;
};
}
#endif

107
src/Parser.cpp Normal file
View File

@ -0,0 +1,107 @@
#include "Parser.hpp"
#include "src/Logger.hpp"
#include "src/Node.hpp"
namespace zn
{
/*explicit*/ Parser::Parser(Logger& logger)
: m_logger { logger }
{
}
/*virtual*/ Parser::~Parser()
{
}
std::shared_ptr<Node>
Parser::parse(std::vector<std::shared_ptr<Node>> tokens)
{
m_tokens = tokens;
m_cursor = 0;
return parse_module();
}
std::shared_ptr<Node> Parser::mk_node(NodeType type)
{
return std::make_shared<Node>(type, "", m_tokens[m_cursor]->loc());
}
bool Parser::type_is(std::vector<NodeType> types) const
{
if (types.size() + m_cursor > m_tokens.size())
{
return false;
}
for (size_t i=0; i<types.size(); i++)
{
if (types[i] != m_tokens[m_cursor + i]->type())
{
return false;
}
}
return true;
}
bool Parser::type_is(NodeType type) const
{
return type_is(std::vector<NodeType>{type});
}
std::shared_ptr<Node> Parser::consume(NodeType type)
{
if (!type_is(type))
{
std::stringstream ss;
ss << "expected '"
<< (NodeTypeStr[type] + strlen("NODE_"))
<< "', got '"
<< (NodeTypeStr[m_tokens[m_cursor]->type()] + strlen("NODE_"))
<< "'";
m_logger.log<syntax_error>(LOG_ERROR,
m_tokens[m_cursor]->loc(),
ss.str());
}
return consume();
}
std::shared_ptr<Node> Parser::consume()
{
m_cursor++;
return m_tokens[m_cursor - 1];
}
std::shared_ptr<Node> Parser::parse_module()
{
auto node = mk_node(NODE_MODULE);
while (m_cursor < m_tokens.size())
{
node->add_child(parse_expr());
}
return node;
}
std::shared_ptr<Node> Parser::parse_expr()
{
if (type_is(NODE_INT))
{
return consume();
}
std::stringstream ss;
ss << "unknown expression '"
<< m_tokens[m_cursor]->string()
<< "'";
m_logger.log<syntax_error>(LOG_ERROR,
m_tokens[m_cursor]->loc(),
ss.str());
abort();
}
}

38
src/Parser.hpp Normal file
View File

@ -0,0 +1,38 @@
#ifndef zn_PARSER_HPP
#define zn_PARSER_HPP
#include "common.hpp"
#include "Logger.hpp"
#include "Node.hpp"
namespace zn
{
ZN_ERROR(syntax_error);
class Parser
{
public:
explicit Parser(Logger& logger);
virtual ~Parser();
std::shared_ptr<Node>
parse(std::vector<std::shared_ptr<Node>> tokens);
private:
Logger& m_logger;
std::vector<std::shared_ptr<Node>> m_tokens;
size_t m_cursor;
std::shared_ptr<Node> mk_node(NodeType type);
bool type_is(std::vector<NodeType> types) const;
bool type_is(NodeType type) const;
std::shared_ptr<Node> consume(NodeType type);
std::shared_ptr<Node> consume();
std::shared_ptr<Node> parse_module();
std::shared_ptr<Node> parse_expr();
};
}
#endif

30
src/common.hpp Normal file
View File

@ -0,0 +1,30 @@
#ifndef zn_COMMON_HPP
#define zn_COMMON_HPP
#include <stdexcept>
#define ZN_GEN_ENUM(X) X
#define ZN_GEN_STRING(X) #X
#define ZN_MK_ENUM(PREFIX, ENUM) \
enum PREFIX { ENUM(ZN_GEN_ENUM) }; \
constexpr char const* PREFIX ## Str [] = { ENUM(ZN_GEN_STRING) }
#define ZN_ERROR(NAME) \
struct NAME : public std::runtime_error { \
NAME (std::string const& what) : std::runtime_error(what) {} \
}
#include <cassert>
#include <fstream>
#include <iostream>
#include <functional>
#include <unordered_map>
#include <optional>
#include <vector>
#include <memory>
#include <string>
#include <cstring>
#include <filesystem>
#endif

16
src/main.cpp Normal file
View File

@ -0,0 +1,16 @@
#include <iostream>
#include "Module.hpp"
int main(int argc, char** argv)
{
if (argc > 1)
{
zn::Logger logger;
zn::Module mod { logger };
mod.load_from_file(argv[1]);
}
return 0;
}

38
tests/Lexer.cpp Normal file
View File

@ -0,0 +1,38 @@
#include <catch2/catch.hpp>
#include "../src/Lexer.hpp"
class LexerTest
{
public:
explicit LexerTest() {}
virtual ~LexerTest() {}
void test_next(zn::Lexer& lexer, std::string const& oracle)
{
auto node = lexer.try_next();
INFO("expected " << oracle << " got nullptr");
REQUIRE(nullptr != node);
REQUIRE(oracle == node->string());
}
protected:
zn::Logger m_logger;
zn::Loc m_loc {"tests/lexer"};
zn::Lexer m_lexer { m_logger, m_loc };
};
TEST_CASE_METHOD(LexerTest, "Lexer_unknown_text")
{
m_lexer.scan(" §§§ ");
REQUIRE_THROWS_AS(m_lexer.try_next(), zn::lex_error);
}
TEST_CASE_METHOD(LexerTest, "Lexer_int")
{
m_lexer.scan(" 3 -2 167 ");
test_next(m_lexer, "INT[3]");
test_next(m_lexer, "INT[-2]");
test_next(m_lexer, "INT[167]");
REQUIRE(nullptr == m_lexer.try_next());
}

31
tests/Parser.cpp Normal file
View File

@ -0,0 +1,31 @@
#include <catch2/catch.hpp>
#include "../src/Parser.hpp"
#include "../src/Lexer.hpp"
class ParserTest
{
public:
explicit ParserTest() {}
virtual ~ParserTest() {}
void test_parse(std::string const& oracle,
std::string const& source)
{
zn::Logger logger;
zn::Loc loc {"tests/parser"};
zn::Lexer lexer { logger, loc };
lexer.scan(source);
std::vector<std::shared_ptr<zn::Node>> tokens = lexer.all();
zn::Parser parser {logger};
auto node = parser.parse(tokens);
REQUIRE(oracle == node->string());
}
protected:
};
TEST_CASE_METHOD(ParserTest, "Parser_int")
{
test_parse("MODULE(INT[37])", " 37");
}

2
tests/main.cpp Normal file
View File

@ -0,0 +1,2 @@
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>