wongola/lib/Lexer.cpp

#include "Lexer.hpp"
#include "lib/Node.hpp"

namespace wg
{
  /*explicit*/ Lexer::Lexer()
  {
    add_text("#", NODE_HASH);
    add_text("+", NODE_ADD);
    add_text("-", NODE_SUB);
    add_text("*", NODE_MUL);
    add_text("/", NODE_DIV);
    add_text("%", NODE_MOD);
    add_text("(", NODE_OPAR);
    add_text(")", NODE_CPAR);
    add_text(";", NODE_SEMICOLON);

    m_scanners.push_back(std::bind(&Lexer::scan_int, this));
    m_scanners.push_back(std::bind(&Lexer::scan_ident, this));
  }

  /*virtual*/ Lexer::~Lexer()
  {
  }

  void Lexer::scan(std::string const& source)
  {
    m_source = source;
    m_cursor = 0;
  }

  std::shared_ptr<Node> Lexer::next()
  {
    std::optional<ScanInfo> scan_info;

    skip_spaces();

    for (auto scanner: m_scanners)
      {
        auto info = scanner();

        if (info && (scan_info == std::nullopt
                     || info->cursor > scan_info->cursor))
          {
            scan_info = info;
          }
      }

    if (scan_info)
      {
        m_cursor = scan_info->cursor;

        return std::make_shared<Node>(scan_info->type,
                                      scan_info->repr,
                                      m_loc);
      }

    WG_ASSERT(m_cursor <= m_source.size(), "unexpected token");

    return nullptr;
  }

  std::vector<std::shared_ptr<Node>> Lexer::all()
  {
    std::vector<std::shared_ptr<Node>> result;
    std::shared_ptr<Node> node;

    while ( (node = next()) != nullptr )
      {
        result.push_back(node);
      }

    return result;
  }

  void Lexer::add_text(std::string const& text,
                       NodeType node,
                       bool has_value)
  {
    if (text.size() == 1)
      {
        m_seps.push_back(text[0]);
      }

    m_scanners.push_back(std::bind(&Lexer::scan_text,
                                   this, text,
                                   node, has_value));
  }

  bool Lexer::is_sep(size_t index) const
  {
    WG_ASSERT(index < m_source.size(), "cannot find separator");

    if (std::isspace(m_source[index]))
      {
        return true;
      }

    auto itr = std::find(std::begin(m_seps),
                         std::end(m_seps),
                         m_source[index]);

    return itr != std::end(m_seps);
  }

  void Lexer::skip_spaces()
  {
    while (m_cursor < m_source.size()
           && std::isspace(m_source[m_cursor]))
      {
        if (m_source[m_cursor] == '\n')
          {
            m_loc = Loc {m_loc.origin(), m_loc.line() + 1};
          }

        m_cursor++;
      }
  }

  std::optional<ScanInfo> Lexer::scan_text(std::string const& text,
                                           NodeType type,
                                           bool has_value) const
  {
    if (m_cursor + text.size() > m_source.size())
      {
        return std::nullopt;
      }

    for (size_t i=0; i<text.size(); i++)
      {
        if (m_source[m_cursor + i] != text[i])
          {
            return std::nullopt;
          }
      }

    return ScanInfo {
      m_cursor + text.size(),
      type,
      has_value ? text : ""
    };
  }

  std::optional<ScanInfo> Lexer::scan_ident() const
  {
    size_t cursor = m_cursor;
    std::string repr;

    while (cursor < m_source.size()
           && !is_sep(cursor))
      {
        repr += m_source[cursor];
        cursor++;
      }

    if (repr.empty() == false)
      {
        return ScanInfo {
          cursor,
          NODE_IDENT,
          repr
        };
      }

    return std::nullopt;
  }

  std::optional<ScanInfo> Lexer::scan_int() const
  {
    size_t cursor = m_cursor;
    std::string repr;

    if (cursor < m_source.size()
        && m_source[cursor] == '-')
      {
        repr += '-';
        cursor++;
      }

    while (cursor < m_source.size()
           && std::isdigit(m_source[cursor]))
      {
        repr += m_source[cursor];
        cursor++;
      }

    if (repr.empty() || repr.back() == '-')
      {
        return std::nullopt;
      }

    return ScanInfo {
      cursor,
      NODE_INT,
      repr
    };
  }
}