From 149f866172dba9e39f53aa90ae1a3d37654dac4f Mon Sep 17 00:00:00 2001 From: bog Date: Sat, 9 Sep 2023 15:09:43 +0200 Subject: [PATCH] ADD: int literal. --- lib/Factory.cpp | 26 ++++++++++ lib/Factory.hpp | 26 ++++++++++ lib/Lexer.cpp | 130 +++++++++++++++++++++++++++++++++++++++++++++++ lib/Lexer.hpp | 45 ++++++++++++++++ lib/Loc.cpp | 15 ++++++ lib/Loc.hpp | 25 +++++++++ lib/Logger.cpp | 12 +++++ lib/Logger.hpp | 39 ++++++++++++++ lib/Node.cpp | 57 +++++++++++++++++++++ lib/Node.hpp | 39 ++++++++++++++ lib/Parser.cpp | 19 +++++++ lib/Parser.hpp | 24 +++++++++ lib/Type.cpp | 13 +++++ lib/Type.hpp | 26 ++++++++++ lib/Value.cpp | 25 +++++++++ lib/Value.hpp | 29 +++++++++++ lib/commons.hpp | 12 +++++ lib/mutils.hpp | 14 +++++ meson.build | 16 +++++- src/main.cpp | 53 ++++++++++++++++++- tests/Lexer.cpp | 52 +++++++++++++++++++ tests/Parser.cpp | 17 +++++++ 22 files changed, 712 insertions(+), 2 deletions(-) create mode 100644 lib/Factory.cpp create mode 100644 lib/Factory.hpp create mode 100644 lib/Lexer.cpp create mode 100644 lib/Lexer.hpp create mode 100644 lib/Loc.cpp create mode 100644 lib/Loc.hpp create mode 100644 lib/Logger.cpp create mode 100644 lib/Logger.hpp create mode 100644 lib/Node.cpp create mode 100644 lib/Node.hpp create mode 100644 lib/Parser.cpp create mode 100644 lib/Parser.hpp create mode 100644 lib/Type.cpp create mode 100644 lib/Type.hpp create mode 100644 lib/Value.cpp create mode 100644 lib/Value.hpp create mode 100644 tests/Lexer.cpp create mode 100644 tests/Parser.cpp diff --git a/lib/Factory.cpp b/lib/Factory.cpp new file mode 100644 index 0000000..765aa99 --- /dev/null +++ b/lib/Factory.cpp @@ -0,0 +1,26 @@ +#include "Factory.hpp" + +namespace jk +{ + /*explicit*/ Factory::Factory(Logger& logger, std::filesystem::path path) + : m_logger { logger } + , m_path { path } + { + } + + /*virtual*/ Factory::~Factory() + { + } + + std::shared_ptr Factory::make_lexer() + { + Loc loc {m_path, 1, 0}; + return std::make_shared(m_logger, loc); + } + + std::shared_ptr Factory::make_parser() + { + auto lexer = make_lexer(); + return std::make_shared(m_logger, lexer); + } +} diff --git a/lib/Factory.hpp b/lib/Factory.hpp new file mode 100644 index 0000000..5b79633 --- /dev/null +++ b/lib/Factory.hpp @@ -0,0 +1,26 @@ +#ifndef jk_FACTORY_HPP +#define jk_FACTORY_HPP + +#include "Lexer.hpp" +#include "Parser.hpp" +#include "Logger.hpp" +#include "commons.hpp" + +namespace jk +{ + class Factory + { + public: + explicit Factory(Logger& logger, std::filesystem::path path); + virtual ~Factory(); + + std::shared_ptr make_lexer(); + std::shared_ptr make_parser(); + + private: + Logger& m_logger; + std::filesystem::path m_path; + }; +} + +#endif diff --git a/lib/Lexer.cpp b/lib/Lexer.cpp new file mode 100644 index 0000000..0262818 --- /dev/null +++ b/lib/Lexer.cpp @@ -0,0 +1,130 @@ +#include "Lexer.hpp" + +namespace jk +{ + /*explicit*/ Lexer::Lexer(Logger& logger, Loc const& loc) + : m_logger { logger } + , m_loc { loc } + { + m_scanners.push_back(std::bind(&Lexer::scan_int, this)); + } + + /*virtual*/ Lexer::~Lexer() + { + } + + void Lexer::scan(std::string const& source) + { + m_source = source; + m_cursor = 0; + } + + std::shared_ptr Lexer::next() + { + skip_spaces(); + + while (more(m_cursor) + && current(m_cursor) == '#') + { + while (more(m_cursor) + && current(m_cursor) != '\n') + { + m_cursor++; + } + + skip_spaces(); + } + + std::optional info; + + for (auto scanner: m_scanners) + { + auto my_info = scanner(); + + if ((!info && my_info) + || (info && my_info + && my_info->cursor > info->cursor)) + { + info = my_info; + } + } + + if (info) + { + m_cursor = info->cursor; + return std::make_shared(info->type, info->repr, m_loc); + } + + if (more(m_cursor)) + { + std::string text; + + while (more(m_cursor) + && !std::isspace(current(m_cursor))) + { + text += current(m_cursor); + m_cursor++; + } + + std::stringstream ss; + ss << "unknown text '" << text << "'"; + m_logger.log(LOG_ERROR, m_loc, ss.str()); + } + + return nullptr; + } + + bool Lexer::more(size_t index) const + { + return index < m_source.size(); + } + + char Lexer::current(size_t index) const + { + assert(more(index)); + + return m_source[index]; + } + + void Lexer::skip_spaces() + { + while (more(m_cursor) + && std::isspace(current(m_cursor))) + { + if (current(m_cursor) == '\n') + { + m_loc = Loc { + m_loc.path(), + m_loc.line() + 1, + m_loc.column() + }; + } + + m_cursor++; + } + } + + std::optional Lexer::scan_int() const + { + size_t cursor = m_cursor; + std::string repr; + + while (more(cursor) + && std::isdigit(current(cursor))) + { + repr += current(cursor); + cursor++; + } + + if (repr.empty() == false) + { + return ScanInfo { + cursor, + NODE_INT, + repr + }; + } + + return std::nullopt; + } +} diff --git a/lib/Lexer.hpp b/lib/Lexer.hpp new file mode 100644 index 0000000..14cf035 --- /dev/null +++ b/lib/Lexer.hpp @@ -0,0 +1,45 @@ +#ifndef jk_LEXER_HPP +#define jk_LEXER_HPP + +#include "commons.hpp" +#include "Logger.hpp" +#include "Node.hpp" + +namespace jk +{ + JK_ERROR(lexical_error); + + struct ScanInfo { + size_t cursor; + NodeType type; + std::string repr; + }; + + using scanner_t = std::function()>; + + class Lexer + { + public: + explicit Lexer(Logger& logger, Loc const& loc); + virtual ~Lexer(); + + void scan(std::string const& source); + std::shared_ptr next(); + + private: + Logger& m_logger; + Loc m_loc; + size_t m_cursor; + std::string m_source; + std::vector m_scanners; + + bool more(size_t index) const; + char current(size_t index) const; + + void skip_spaces(); + + std::optional scan_int() const; + }; +} + +#endif diff --git a/lib/Loc.cpp b/lib/Loc.cpp new file mode 100644 index 0000000..1f37928 --- /dev/null +++ b/lib/Loc.cpp @@ -0,0 +1,15 @@ +#include "Loc.hpp" + +namespace jk +{ + /*explicit*/ Loc::Loc(std::filesystem::path path, int line, int column) + : m_path { path } + , m_line { line } + , m_column { column } + { + } + + /*virtual*/ Loc::~Loc() + { + } +} diff --git a/lib/Loc.hpp b/lib/Loc.hpp new file mode 100644 index 0000000..088b603 --- /dev/null +++ b/lib/Loc.hpp @@ -0,0 +1,25 @@ +#ifndef jk_LOC_HPP +#define jk_LOC_HPP + +#include "commons.hpp" + +namespace jk +{ + class Loc + { + public: + explicit Loc(std::filesystem::path path, int line, int column); + virtual ~Loc(); + + std::filesystem::path path() const { return m_path; } + int line() const { return m_line; } + int column() const { return m_column; } + + private: + std::filesystem::path m_path; + int m_line; + int m_column; + }; +} + +#endif diff --git a/lib/Logger.cpp b/lib/Logger.cpp new file mode 100644 index 0000000..db87e1a --- /dev/null +++ b/lib/Logger.cpp @@ -0,0 +1,12 @@ +#include "Logger.hpp" + +namespace jk +{ + /*explicit*/ Logger::Logger() + { + } + + /*virtual*/ Logger::~Logger() + { + } +} diff --git a/lib/Logger.hpp b/lib/Logger.hpp new file mode 100644 index 0000000..ab585d2 --- /dev/null +++ b/lib/Logger.hpp @@ -0,0 +1,39 @@ +#ifndef jk_LOGGER_HPP +#define jk_LOGGER_HPP + +#include "commons.hpp" +#include "Loc.hpp" + +#define LOG_TYPE(G) \ + G(LOG_ERROR) + +namespace jk +{ + JK_ENUM(Log, LOG_TYPE); + + class Logger + { + public: + explicit Logger(); + virtual ~Logger(); + + template + void log(LogType type, Loc const& loc, std::string const& what); + + private: + }; + + template + void Logger::log(LogType type, Loc const& loc, std::string const& what) + { + std::stringstream ss; + ss << loc.path().string() << ":" << loc.line(); + ss << " " << (std::string(LogTypeStr[type]) + .substr(std::string("LOG_").size())); + ss << " " << what; + + throw T { ss.str() }; + } +} + +#endif diff --git a/lib/Node.cpp b/lib/Node.cpp new file mode 100644 index 0000000..b08b666 --- /dev/null +++ b/lib/Node.cpp @@ -0,0 +1,57 @@ +#include "Node.hpp" + +namespace jk +{ + /*explicit*/ Node::Node(NodeType type, + std::string const& repr, + Loc const& loc) + : m_type { type } + , m_repr { repr } + , m_loc { loc } + { + } + + /*virtual*/ Node::~Node() + { + } + + void Node::add_child(std::shared_ptr child) + { + m_children.push_back(child); + } + + std::weak_ptr Node::child(size_t index) const + { + assert(index < size()); + return m_children[index]; + } + + std::string Node::string() const + { + std::stringstream ss; + ss << std::string(NodeTypeStr[m_type]) + .substr(std::string("NODE_").size()); + + if (m_repr.empty() == false) + { + ss << "[" << m_repr << "]"; + } + + if (size() > 0) + { + ss << "("; + + std::string sep; + + for (auto child: m_children) + { + ss << sep << child->string(); + sep = ","; + } + + ss << ")"; + } + + return ss.str(); + } +} diff --git a/lib/Node.hpp b/lib/Node.hpp new file mode 100644 index 0000000..97da022 --- /dev/null +++ b/lib/Node.hpp @@ -0,0 +1,39 @@ +#ifndef jk_NODE_HPP +#define jk_NODE_HPP + +#include "commons.hpp" +#include "Loc.hpp" + +#define NODE_TYPE(G) \ + G(NODE_PROG), \ + G(NODE_INT) + +namespace jk +{ + JK_ENUM(Node, NODE_TYPE); + + class Node + { + public: + explicit Node(NodeType type, std::string const& repr, Loc const& loc); + virtual ~Node(); + + NodeType type() const { return m_type; } + std::string repr() const { return m_repr; } + Loc const& loc() const { return m_loc; } + size_t size() const { return m_children.size(); } + + void add_child(std::shared_ptr child); + std::weak_ptr child(size_t index) const; + + std::string string() const; + + private: + NodeType m_type; + std::string m_repr; + Loc m_loc; + std::vector> m_children; + }; +} + +#endif diff --git a/lib/Parser.cpp b/lib/Parser.cpp new file mode 100644 index 0000000..2a67b3b --- /dev/null +++ b/lib/Parser.cpp @@ -0,0 +1,19 @@ +#include "Parser.hpp" + +namespace jk +{ + /*explicit*/ Parser::Parser(Logger& logger, std::shared_ptr lexer) + : m_logger { logger } + , m_lexer { lexer } + { + } + + /*virtual*/ Parser::~Parser() + { + } + + std::shared_ptr Parser::parse(std::string const&) + { + return nullptr; + } +} diff --git a/lib/Parser.hpp b/lib/Parser.hpp new file mode 100644 index 0000000..d316f24 --- /dev/null +++ b/lib/Parser.hpp @@ -0,0 +1,24 @@ +#ifndef jk_PARSER_HPP +#define jk_PARSER_HPP + +#include "commons.hpp" +#include "Logger.hpp" +#include "Lexer.hpp" + +namespace jk +{ + class Parser + { + public: + explicit Parser(Logger& logger, std::shared_ptr lexer); + virtual ~Parser(); + + std::shared_ptr parse(std::string const& source); + + private: + Logger& m_logger; + std::shared_ptr m_lexer; + }; +} + +#endif diff --git a/lib/Type.cpp b/lib/Type.cpp new file mode 100644 index 0000000..2665c69 --- /dev/null +++ b/lib/Type.cpp @@ -0,0 +1,13 @@ +#include "Type.hpp" + +namespace jk +{ + /*explicit*/ Type::Type(TypeType type) + : m_type { type } + { + } + + /*virtual*/ Type::~Type() + { + } +} diff --git a/lib/Type.hpp b/lib/Type.hpp new file mode 100644 index 0000000..3f49eb5 --- /dev/null +++ b/lib/Type.hpp @@ -0,0 +1,26 @@ +#ifndef jk_TYPE_HPP +#define jk_TYPE_HPP + +#include "commons.hpp" + +#define TYPE_TYPE(G) \ + G(TYPE_NIL), \ + G(TYPE_INT) + +namespace jk +{ + JK_ENUM(Type, TYPE_TYPE); + + class Type + { + public: + explicit Type(TypeType type); + virtual ~Type(); + + TypeType type() const { return m_type; } + private: + TypeType m_type; + }; +} + +#endif diff --git a/lib/Value.cpp b/lib/Value.cpp new file mode 100644 index 0000000..c3547f8 --- /dev/null +++ b/lib/Value.cpp @@ -0,0 +1,25 @@ +#include "Value.hpp" +#include "Type.hpp" + +namespace jk +{ + /*static*/ std::shared_ptr Value::make_nil() + { + auto value = std::make_shared(); + value->m_type = std::make_shared(TYPE_NIL); + return value; + } + + /*static*/ std::shared_ptr Value::make_int(int val) + { + auto value = std::make_shared(); + value->m_type = std::make_shared(TYPE_INT); + value->m_int_val = val; + return value; + } + + std::weak_ptr Value::type() const + { + return m_type; + } +} diff --git a/lib/Value.hpp b/lib/Value.hpp new file mode 100644 index 0000000..44cc922 --- /dev/null +++ b/lib/Value.hpp @@ -0,0 +1,29 @@ +#ifndef jk_VALUE_HPP +#define jk_VALUE_HPP + +#include "commons.hpp" + +namespace jk +{ + class Type; + + class Value + { + public: + static std::shared_ptr make_nil(); + static std::shared_ptr make_int(int val); + + explicit Value() = default; + virtual ~Value() = default; + + int as_int() const { return *m_int_val; } + + std::weak_ptr type() const; + + private: + std::shared_ptr m_type; + std::optional m_int_val; + }; +} + +#endif diff --git a/lib/commons.hpp b/lib/commons.hpp index caed942..69fe580 100644 --- a/lib/commons.hpp +++ b/lib/commons.hpp @@ -1,6 +1,18 @@ #ifndef jk_COMMONS_HPP #define jk_COMMONS_HPP +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include "mutils.hpp" #endif diff --git a/lib/mutils.hpp b/lib/mutils.hpp index fc4fcfd..b1e89bd 100644 --- a/lib/mutils.hpp +++ b/lib/mutils.hpp @@ -1,4 +1,18 @@ #ifndef jk_MUTILS_HPP #define jk_MUTILS_HPP +#include + +#define ENUM_ENUM(X) X +#define ENUM_STRING(X) #X + +#define JK_ENUM(PREFIX, DECL) \ + enum PREFIX ## Type { DECL(ENUM_ENUM) }; \ + constexpr char const* PREFIX ## TypeStr [] { DECL(ENUM_STRING) } + +#define JK_ERROR(NAME) \ + struct NAME : public std::runtime_error { \ + NAME (std::string const& what): std::runtime_error {what} {} \ + } + #endif diff --git a/meson.build b/meson.build index a1eeaf7..74c1b18 100644 --- a/meson.build +++ b/meson.build @@ -21,6 +21,18 @@ configure_file( joko_lib = static_library( 'joko', sources: [ + 'lib/Node.cpp', + 'lib/Loc.cpp', + + 'lib/Factory.cpp', + + 'lib/Logger.cpp', + 'lib/Lexer.cpp', + 'lib/Parser.cpp', + + 'lib/Type.cpp', + 'lib/Value.cpp', + ], dependencies: [ ]) @@ -38,7 +50,9 @@ executable('joko', executable('joko-tests', sources: [ - 'tests/main.cpp' + 'tests/main.cpp', + 'tests/Lexer.cpp', + 'tests/Parser.cpp', ], dependencies: [ joko_dep, diff --git a/src/main.cpp b/src/main.cpp index 00f1972..ea82c86 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,21 +1,26 @@ #include #include #include "config.hpp" +#include "../lib/Lexer.hpp" int main(int argc, char** argv) { int index; + bool debug_mode = false; struct option options[] = { + {"debug", no_argument, 0, 'd'}, {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'v'}, {0, 0, 0, 0} }; - int c = getopt_long(argc, argv, "hv", options, &index); + int c = getopt_long(argc, argv, "dhv", options, &index); switch (c) { + case 'd': debug_mode = true; break; + case 'h': { std::cout << "Usage: joko [OPTIONS] source_file" << std::endl; std::cout << "OPTIONS" << std::endl; @@ -37,5 +42,51 @@ int main(int argc, char** argv) } + if (optind < argc) + { + jk::Logger logger; + jk::Loc loc {argv[optind], 1, 1}; + + std::ifstream file { argv[optind] }; + + if (!file) + { + std::stringstream ss; + ss << "cannot find file '" << argv[optind] << "'"; + logger.log(jk::LOG_ERROR, loc, ss.str()); + } + + std::string source; + std::string line; + + while (std::getline(file, line)) + { + source += line + (file.eof() ? "" : "\n"); + } + + jk::Lexer lexer {logger, loc}; + lexer.scan(source); + + if (debug_mode) + { + std::cout << "--- tokens ---" << std::endl; + std::shared_ptr tok; + + std::string sep; + while ( (tok = lexer.next()) ) + { + std::cout << sep << tok->string(); + sep = " "; + } + + std::cout << std::endl; + } + + return 0; + } + + + + return 0; } diff --git a/tests/Lexer.cpp b/tests/Lexer.cpp new file mode 100644 index 0000000..de204af --- /dev/null +++ b/tests/Lexer.cpp @@ -0,0 +1,52 @@ +#include +#include "../lib/Lexer.hpp" +#include "../lib/Factory.hpp" + +class LexerTest +{ +public: + explicit LexerTest() {} + virtual ~LexerTest() {} + + void test_next(jk::Lexer& lexer, std::string const& oracle) + { + auto token = lexer.next(); + REQUIRE(token); + REQUIRE(oracle == token->string()); + } + + void test_end(jk::Lexer& lexer) + { + auto token = lexer.next(); + REQUIRE(!token); + } + +protected: + jk::Logger m_logger; +}; + +TEST_CASE_METHOD(LexerTest, "Lexer_int") +{ + auto lexer = jk::Factory(m_logger, "tests/lexer").make_lexer(); + lexer->scan("4 128 333"); + test_next(*lexer, "INT[4]"); + test_next(*lexer, "INT[128]"); + test_next(*lexer, "INT[333]"); + test_end(*lexer); +} + +TEST_CASE_METHOD(LexerTest, "Lexer_comments") +{ + auto lexer = jk::Factory(m_logger, "tests/lexer").make_lexer(); + lexer->scan("4 # 128 \n 333"); + test_next(*lexer, "INT[4]"); + test_next(*lexer, "INT[333]"); + test_end(*lexer); +} + +TEST_CASE_METHOD(LexerTest, "Lexer_error") +{ + auto lexer = jk::Factory(m_logger, "tests/lexer").make_lexer(); + lexer->scan(" ยง "); + REQUIRE_THROWS_AS(lexer->next(), jk::lexical_error); +} diff --git a/tests/Parser.cpp b/tests/Parser.cpp new file mode 100644 index 0000000..fd87b66 --- /dev/null +++ b/tests/Parser.cpp @@ -0,0 +1,17 @@ +#include +#include "../lib/Parser.hpp" +#include "../lib/Factory.hpp" + +class ParserTest +{ +public: + explicit ParserTest() {} + virtual ~ParserTest() {} + +protected: + jk::Logger m_logger; +}; + +TEST_CASE_METHOD(ParserTest, "Parser_") +{ +}