From eb95032dfa68254768e91198bf82d1e9e41b9123 Mon Sep 17 00:00:00 2001 From: bog Date: Tue, 30 Jan 2024 20:27:30 +0100 Subject: [PATCH] :sparkles: comments and line on error messages. --- lib/Compiler.cpp | 2 +- lib/Lexer.cpp | 40 ++++++++++++++++++++++++++++++++++------ lib/Lexer.hpp | 5 +++++ lib/Node.cpp | 2 ++ lib/Node.hpp | 3 +++ lib/Parser.cpp | 34 ++++++++++++++++++++++++---------- lib/Parser.hpp | 1 + lib/commons.hpp | 7 +++++++ tests/Lexer.cpp | 22 ++++++++++++++++++++++ 9 files changed, 99 insertions(+), 17 deletions(-) diff --git a/lib/Compiler.cpp b/lib/Compiler.cpp index 5514e16..f0150d7 100644 --- a/lib/Compiler.cpp +++ b/lib/Compiler.cpp @@ -98,7 +98,7 @@ namespace muz void Compiler::check_cmd_arity(Node const& node, int arity) { - if (node.size() - 1 != arity) + if (node.size() - 1 != static_cast(arity)) { throw compile_error { std::string() diff --git a/lib/Lexer.cpp b/lib/Lexer.cpp index c292e23..103a9c8 100644 --- a/lib/Lexer.cpp +++ b/lib/Lexer.cpp @@ -18,6 +18,7 @@ namespace muz { m_source = source; m_cursor = 0; + m_line = 1; } std::vector> Lexer::all() @@ -44,10 +45,18 @@ namespace muz std::shared_ptr Lexer::next() { // consume spaces + skip_spaces(); + while (m_cursor < m_source.size() - && isspace(m_source[m_cursor])) + && m_source[m_cursor] == '#') { - m_cursor++; + while (m_cursor < m_source.size() + && m_source[m_cursor] != '\n') + { + m_cursor++; + } + + skip_spaces(); } // check word @@ -61,7 +70,7 @@ namespace muz if (tok_info && f(tok_info->value)) { - auto node = std::make_shared(type, tok_info->value); + auto node = std::make_shared(type, m_line, tok_info->value); m_cursor = tok_info->position; return node; } @@ -69,17 +78,16 @@ namespace muz return nullptr; }; - if (tok_info && tok_info->value == "[") { - auto node = std::make_shared(NODE_OSQUARE); + auto node = std::make_shared(NODE_OSQUARE, m_line); m_cursor = tok_info->position; return node; } if (tok_info && tok_info->value == "]") { - auto node = std::make_shared(NODE_CSQUARE); + auto node = std::make_shared(NODE_CSQUARE, m_line); m_cursor = tok_info->position; return node; } @@ -102,9 +110,29 @@ namespace muz return res; } + if (m_cursor < m_source.size()) + { + format_error(m_line, + "unknown token <" + tok_info->value + ">"); + } + return nullptr; } + void Lexer::skip_spaces() + { + while (m_cursor < m_source.size() + && isspace(m_source[m_cursor])) + { + if (m_source[m_cursor] == '\n') + { + m_line++; + } + + m_cursor++; + } + } + std::optional Lexer::next_word() { size_t cursor = m_cursor; diff --git a/lib/Lexer.hpp b/lib/Lexer.hpp index 02dd99f..7825bfa 100644 --- a/lib/Lexer.hpp +++ b/lib/Lexer.hpp @@ -6,6 +6,8 @@ namespace muz { + MUZ_ERROR(lexical_error); + struct TokenInfo { size_t position; @@ -30,8 +32,11 @@ namespace muz private: std::string m_source; size_t m_cursor = 0; + int m_line = 1; std::vector m_seps; + void skip_spaces(); + std::optional next_word(); bool is_sep(size_t index) const; bool is_num(std::string const& word) const; diff --git a/lib/Node.cpp b/lib/Node.cpp index e17dee1..e2a1d29 100644 --- a/lib/Node.cpp +++ b/lib/Node.cpp @@ -3,8 +3,10 @@ namespace muz { /*explicit*/ Node::Node(NodeType type, + int line, std::string const& value) : m_type { type } + , m_line { line } , m_value { value } { } diff --git a/lib/Node.hpp b/lib/Node.hpp index 39a2672..e7d321e 100644 --- a/lib/Node.hpp +++ b/lib/Node.hpp @@ -21,12 +21,14 @@ namespace muz { public: explicit Node(NodeType type, + int line, std::string const& value=""); virtual ~Node(); // properties // ---------- inline NodeType type() const { return m_type; } + inline int line() const { return m_line; } inline std::string value() const { return m_value; } // children @@ -39,6 +41,7 @@ namespace muz private: NodeType m_type; + int m_line; std::string m_value; std::vector> m_children; }; diff --git a/lib/Parser.cpp b/lib/Parser.cpp index 6c9eea5..e44bd1b 100644 --- a/lib/Parser.cpp +++ b/lib/Parser.cpp @@ -19,14 +19,26 @@ namespace muz return parse_prog(); } + int Parser::current_line() + { + if (m_cursor < m_tokens.size()) + { + return m_tokens[m_cursor]->line(); + } + + return 0; + } + std::shared_ptr Parser::consume(std::optional type) { if (m_cursor >= m_tokens.size()) { std::string ty_desired = NodeTypeStr[*type] + strlen("NODE_"); - throw syntax_error {"unexpected end: expected <" - + ty_desired - + ">, got nothing."}; + + format_error(current_line(), + "unexpected end: expected <" + + ty_desired + + ">, got nothing."); } auto node = m_tokens[m_cursor]; @@ -35,10 +47,12 @@ namespace muz { std::string ty_got = NodeTypeStr[node->type()] + strlen("NODE_"); std::string ty_desired = NodeTypeStr[*type] + strlen("NODE_"); - throw syntax_error {"expected <" - + ty_desired - + ">, got <" - + ty_got + ">."}; + + format_error(current_line(), + "expected <" + + ty_desired + + ">, got <" + + ty_got + ">."); } m_cursor++; @@ -59,7 +73,7 @@ namespace muz std::shared_ptr Parser::parse_prog() { - auto node = std::make_shared(NODE_PROG); + auto node = std::make_shared(NODE_PROG, current_line()); while (m_cursor < m_tokens.size()) { @@ -81,7 +95,7 @@ namespace muz std::shared_ptr Parser::parse_dir() { - auto node = std::make_shared(NODE_DIR); + auto node = std::make_shared(NODE_DIR, current_line()); node->add_child(consume(NODE_DIR_IDENT)); node->add_child(parse_cmd()); @@ -92,7 +106,7 @@ namespace muz { consume(NODE_OSQUARE); - auto node = std::make_shared(NODE_CMD); + auto node = std::make_shared(NODE_CMD, current_line()); node->add_child(consume(NODE_IDENT)); while (!next_is(NODE_CSQUARE)) diff --git a/lib/Parser.hpp b/lib/Parser.hpp index e4c6f12..932ab3a 100644 --- a/lib/Parser.hpp +++ b/lib/Parser.hpp @@ -25,6 +25,7 @@ namespace muz std::vector> m_tokens; size_t m_cursor = 0; + int current_line(); std::shared_ptr consume(std::optional type=std::nullopt); NodeType peek(size_t lookahead=0) const; bool next_is(NodeType type, size_t lookahead=0) const; diff --git a/lib/commons.hpp b/lib/commons.hpp index 9bc3681..1a6c701 100644 --- a/lib/commons.hpp +++ b/lib/commons.hpp @@ -29,5 +29,12 @@ enum Prefix {Macro(MUZ_ENUM_IDENT)}; \ constexpr char const* Prefix ## Str [] = {Macro(MUZ_ENUM_STRING)}; +template +void format_error(int line, std::string const& what) +{ + std::stringstream ss; + ss << "line " << line << ": " << what; + throw T { ss.str() }; +} #endif diff --git a/tests/Lexer.cpp b/tests/Lexer.cpp index 25bf0e9..4932946 100644 --- a/tests/Lexer.cpp +++ b/tests/Lexer.cpp @@ -22,6 +22,11 @@ static std::string next_val(muz::Lexer& lexer) return ""; } +static void next_val_err(muz::Lexer& lexer) +{ + REQUIRE_THROWS_AS(lexer.next(), muz::lexical_error); +} + TEST_CASE_METHOD(LexerTest, "Lexer_num") { muz::Lexer lexer; @@ -60,3 +65,20 @@ TEST_CASE_METHOD(LexerTest, "Lexer_commands") REQUIRE("" == next_val(lexer)); } + +TEST_CASE_METHOD(LexerTest, "Lexer_unknown_sym_error") +{ + muz::Lexer lexer; + lexer.scan(" ยง [[ \n ]"); + next_val_err(lexer); +} + +TEST_CASE_METHOD(LexerTest, "Lexer_comments") +{ + muz::Lexer lexer; + lexer.scan(" # [[ \n ]"); + + REQUIRE("CSQUARE" == next_val(lexer)); + + REQUIRE("" == next_val(lexer)); +}