#include "Lexer.hpp" namespace wg { /*explicit*/ Lexer::Lexer() { add_text("#", NODE_HASH); m_scanners.push_back(std::bind(&Lexer::scan_ident, this)); } /*virtual*/ Lexer::~Lexer() { } void Lexer::scan(std::string const& source) { m_source = source; m_cursor = 0; } std::shared_ptr Lexer::next() { std::optional scan_info; skip_spaces(); for (auto scanner: m_scanners) { auto info = scanner(); if (info && (scan_info == std::nullopt || info->cursor > scan_info->cursor)) { scan_info = info; } } if (scan_info) { m_cursor = scan_info->cursor; return std::make_shared(scan_info->type, scan_info->repr, m_loc); } WG_ASSERT(m_cursor <= m_source.size(), "unexpected token"); return nullptr; } std::vector> Lexer::all() { std::vector> result; std::shared_ptr node; while ( (node = next()) != nullptr ) { result.push_back(node); } return result; } void Lexer::add_text(std::string const& text, NodeType node, bool has_value) { if (text.size() == 1) { m_seps.push_back(text[0]); } m_scanners.push_back(std::bind(&Lexer::scan_text, this, text, node, has_value)); } bool Lexer::is_sep(size_t index) const { WG_ASSERT(index < m_source.size(), "cannot find separator"); if (std::isspace(m_source[index])) { return true; } auto itr = std::find(std::begin(m_seps), std::end(m_seps), m_source[index]); return itr != std::end(m_seps); } void Lexer::skip_spaces() { while (m_cursor < m_source.size() && std::isspace(m_source[m_cursor])) { if (m_source[m_cursor] == '\n') { m_loc = Loc {m_loc.origin(), m_loc.line() + 1}; } m_cursor++; } } std::optional Lexer::scan_text(std::string const& text, NodeType type, bool has_value) const { if (m_cursor + text.size() > m_source.size()) { return std::nullopt; } for (size_t i=0; i Lexer::scan_ident() const { size_t cursor = m_cursor; std::string repr; while (cursor < m_source.size() && !is_sep(cursor)) { repr += m_source[cursor]; cursor++; } if (repr.empty() == false) { return ScanInfo { cursor, NODE_IDENT, repr }; } return std::nullopt; } }