From eb95032dfa68254768e91198bf82d1e9e41b9123 Mon Sep 17 00:00:00 2001
From: bog <bog@gozmail.net>
Date: Tue, 30 Jan 2024 20:27:30 +0100
Subject: [PATCH] :sparkles: comments and line on error messages.

---
 lib/Compiler.cpp |  2 +-
 lib/Lexer.cpp    | 40 ++++++++++++++++++++++++++++++++++------
 lib/Lexer.hpp    |  5 +++++
 lib/Node.cpp     |  2 ++
 lib/Node.hpp     |  3 +++
 lib/Parser.cpp   | 34 ++++++++++++++++++++++++----------
 lib/Parser.hpp   |  1 +
 lib/commons.hpp  |  7 +++++++
 tests/Lexer.cpp  | 22 ++++++++++++++++++++++
 9 files changed, 99 insertions(+), 17 deletions(-)
diff --git a/lib/Compiler.cpp b/lib/Compiler.cpp
index 5514e16..f0150d7 100644
--- a/lib/Compiler.cpp
+++ b/lib/Compiler.cpp
@@ -98,7 +98,7 @@ namespace muz
 
   void Compiler::check_cmd_arity(Node const& node, int arity)
   {
-    if (node.size() - 1 != arity)
+    if (node.size() - 1 != static_cast<size_t>(arity))
       {
         throw compile_error {
           std::string()
diff --git a/lib/Lexer.cpp b/lib/Lexer.cpp
index c292e23..103a9c8 100644
--- a/lib/Lexer.cpp
+++ b/lib/Lexer.cpp
@@ -18,6 +18,7 @@ namespace muz
   {
     m_source = source;
     m_cursor = 0;
+    m_line = 1;
   }
 
   std::vector<std::shared_ptr<Node>> Lexer::all()
@@ -44,10 +45,18 @@ namespace muz
   std::shared_ptr<Node> Lexer::next()
   {
     // consume spaces
+    skip_spaces();
+
     while (m_cursor < m_source.size()
-           && isspace(m_source[m_cursor]))
+           && m_source[m_cursor] == '#')
       {
-        m_cursor++;
+        while (m_cursor < m_source.size()
+               && m_source[m_cursor] != '\n')
+          {
+            m_cursor++;
+          }
+
+        skip_spaces();
       }
 
     // check word
@@ -61,7 +70,7 @@ namespace muz
 
       if (tok_info && f(tok_info->value))
         {
-          auto node = std::make_shared<Node>(type, tok_info->value);
+          auto node = std::make_shared<Node>(type, m_line, tok_info->value);
           m_cursor = tok_info->position;
           return node;
         }
@@ -69,17 +78,16 @@ namespace muz
       return nullptr;
     };
 
-
     if (tok_info && tok_info->value == "[")
       {
-        auto node = std::make_shared<Node>(NODE_OSQUARE);
+        auto node = std::make_shared<Node>(NODE_OSQUARE, m_line);
         m_cursor = tok_info->position;
         return node;
       }
 
     if (tok_info && tok_info->value == "]")
       {
-        auto node = std::make_shared<Node>(NODE_CSQUARE);
+        auto node = std::make_shared<Node>(NODE_CSQUARE, m_line);
         m_cursor = tok_info->position;
         return node;
       }
@@ -102,9 +110,29 @@ namespace muz
         return res;
       }
 
+    if (m_cursor < m_source.size())
+      {
+        format_error<lexical_error>(m_line,
+                                    "unknown token <" + tok_info->value + ">");
+      }
+
     return nullptr;
   }
 
+  void Lexer::skip_spaces()
+  {
+    while (m_cursor < m_source.size()
+           && isspace(m_source[m_cursor]))
+      {
+        if (m_source[m_cursor] == '\n')
+          {
+            m_line++;
+          }
+
+        m_cursor++;
+      }
+  }
+
   std::optional<TokenInfo> Lexer::next_word()
   {
     size_t cursor = m_cursor;
diff --git a/lib/Lexer.hpp b/lib/Lexer.hpp
index 02dd99f..7825bfa 100644
--- a/lib/Lexer.hpp
+++ b/lib/Lexer.hpp
@@ -6,6 +6,8 @@
 
 namespace muz
 {
+  MUZ_ERROR(lexical_error);
+
   struct TokenInfo
   {
     size_t position;
@@ -30,8 +32,11 @@ namespace muz
   private:
     std::string m_source;
     size_t m_cursor = 0;
+    int m_line = 1;
     std::vector<char> m_seps;
 
+    void skip_spaces();
+
     std::optional<TokenInfo> next_word();
     bool is_sep(size_t index) const;
     bool is_num(std::string const& word) const;
diff --git a/lib/Node.cpp b/lib/Node.cpp
index e17dee1..e2a1d29 100644
--- a/lib/Node.cpp
+++ b/lib/Node.cpp
@@ -3,8 +3,10 @@
 namespace muz
 {
   /*explicit*/ Node::Node(NodeType type,
+                          int line,
                           std::string const& value)
     : m_type { type }
+    , m_line { line }
     , m_value { value }
   {
   }
diff --git a/lib/Node.hpp b/lib/Node.hpp
index 39a2672..e7d321e 100644
--- a/lib/Node.hpp
+++ b/lib/Node.hpp
@@ -21,12 +21,14 @@ namespace muz
   {
   public:
     explicit Node(NodeType type,
+                  int line,
                   std::string const& value="");
     virtual ~Node();
 
     // properties
     // ----------
     inline NodeType type() const { return m_type; }
+    inline int line() const { return m_line; }
     inline std::string value() const { return m_value; }
 
     // children
@@ -39,6 +41,7 @@ namespace muz
 
   private:
     NodeType m_type;
+    int m_line;
     std::string m_value;
     std::vector<std::shared_ptr<Node>> m_children;
   };
diff --git a/lib/Parser.cpp b/lib/Parser.cpp
index 6c9eea5..e44bd1b 100644
--- a/lib/Parser.cpp
+++ b/lib/Parser.cpp
@@ -19,14 +19,26 @@ namespace muz
     return parse_prog();
   }
 
+  int Parser::current_line()
+  {
+    if (m_cursor < m_tokens.size())
+      {
+        return m_tokens[m_cursor]->line();
+      }
+
+    return 0;
+  }
+
   std::shared_ptr<Node> Parser::consume(std::optional<NodeType> type)
   {
     if (m_cursor >= m_tokens.size())
       {
         std::string ty_desired = NodeTypeStr[*type] + strlen("NODE_");
-        throw syntax_error {"unexpected end: expected <"
-                            + ty_desired
-                            + ">, got nothing."};
+
+        format_error<syntax_error>(current_line(),
+                                   "unexpected end: expected <"
+                                   + ty_desired
+                                   + ">, got nothing.");
       }
 
     auto node = m_tokens[m_cursor];
@@ -35,10 +47,12 @@ namespace muz
       {
         std::string ty_got = NodeTypeStr[node->type()] + strlen("NODE_");
         std::string ty_desired = NodeTypeStr[*type] + strlen("NODE_");
-        throw syntax_error {"expected <"
-                            + ty_desired
-                            + ">, got <"
-                            + ty_got + ">."};
+
+        format_error<syntax_error>(current_line(),
+                                   "expected <"
+                                   + ty_desired
+                                   + ">, got <"
+                                   + ty_got + ">.");
       }
 
     m_cursor++;
@@ -59,7 +73,7 @@ namespace muz
 
   std::shared_ptr<Node> Parser::parse_prog()
   {
-    auto node = std::make_shared<Node>(NODE_PROG);
+    auto node = std::make_shared<Node>(NODE_PROG, current_line());
 
     while (m_cursor < m_tokens.size())
       {
@@ -81,7 +95,7 @@ namespace muz
 
   std::shared_ptr<Node> Parser::parse_dir()
   {
-    auto node = std::make_shared<Node>(NODE_DIR);
+    auto node = std::make_shared<Node>(NODE_DIR, current_line());
     node->add_child(consume(NODE_DIR_IDENT));
     node->add_child(parse_cmd());
 
@@ -92,7 +106,7 @@ namespace muz
   {
     consume(NODE_OSQUARE);
 
-    auto node = std::make_shared<Node>(NODE_CMD);
+    auto node = std::make_shared<Node>(NODE_CMD, current_line());
     node->add_child(consume(NODE_IDENT));
 
     while (!next_is(NODE_CSQUARE))
diff --git a/lib/Parser.hpp b/lib/Parser.hpp
index e4c6f12..932ab3a 100644
--- a/lib/Parser.hpp
+++ b/lib/Parser.hpp
@@ -25,6 +25,7 @@ namespace muz
     std::vector<std::shared_ptr<Node>> m_tokens;
     size_t m_cursor = 0;
 
+    int current_line();
     std::shared_ptr<Node> consume(std::optional<NodeType> type=std::nullopt);
     NodeType peek(size_t lookahead=0) const;
     bool next_is(NodeType type, size_t lookahead=0) const;
diff --git a/lib/commons.hpp b/lib/commons.hpp
index 9bc3681..1a6c701 100644
--- a/lib/commons.hpp
+++ b/lib/commons.hpp
@@ -29,5 +29,12 @@
   enum Prefix {Macro(MUZ_ENUM_IDENT)};                                  \
   constexpr char const* Prefix ## Str [] = {Macro(MUZ_ENUM_STRING)};
 
+template <typename T>
+void format_error(int line, std::string const& what)
+{
+  std::stringstream ss;
+  ss << "line " << line << ": " << what;
+  throw T { ss.str() };
+}
 
 #endif
diff --git a/tests/Lexer.cpp b/tests/Lexer.cpp
index 25bf0e9..4932946 100644
--- a/tests/Lexer.cpp
+++ b/tests/Lexer.cpp
@@ -22,6 +22,11 @@ static std::string next_val(muz::Lexer& lexer)
   return "";
 }
 
+static void next_val_err(muz::Lexer& lexer)
+{
+  REQUIRE_THROWS_AS(lexer.next(), muz::lexical_error);
+}
+
 TEST_CASE_METHOD(LexerTest, "Lexer_num")
 {
   muz::Lexer lexer;
@@ -60,3 +65,20 @@ TEST_CASE_METHOD(LexerTest, "Lexer_commands")
 
   REQUIRE("" == next_val(lexer));
 }
+
+TEST_CASE_METHOD(LexerTest, "Lexer_unknown_sym_error")
+{
+  muz::Lexer lexer;
+  lexer.scan(" § [[ \n ]");
+  next_val_err(lexer);
+}
+
+TEST_CASE_METHOD(LexerTest, "Lexer_comments")
+{
+  muz::Lexer lexer;
+  lexer.scan(" # [[ \n ]");
+
+  REQUIRE("CSQUARE" == next_val(lexer));
+
+  REQUIRE("" == next_val(lexer));
+}