#include "Lexer.hpp" #include "Node.hpp" namespace muz { /*explicit*/ Lexer::Lexer() : m_seps { {'[', ']'} } { } /*virtual*/ Lexer::~Lexer() { } void Lexer::scan(std::string const& source) { m_source = source; m_cursor = 0; } std::vector> Lexer::all() { std::vector> res; while (true) { auto tok = next(); if (tok) { res.push_back(tok); } else { return res; } } return res; } std::shared_ptr Lexer::next() { // consume spaces while (m_cursor < m_source.size() && isspace(m_source[m_cursor])) { m_cursor++; } // check word auto tok_info = next_word(); auto try_node = [&](NodeType type, bool (Lexer::*fn)(std::string const&) const) -> std::shared_ptr { auto f = std::bind(fn, this, std::placeholders::_1); if (tok_info && f(tok_info->value)) { auto node = std::make_shared(type, tok_info->value); m_cursor = tok_info->position; return node; } return nullptr; }; if (tok_info && tok_info->value == "[") { auto node = std::make_shared(NODE_OSQUARE); m_cursor = tok_info->position; return node; } if (tok_info && tok_info->value == "]") { auto node = std::make_shared(NODE_CSQUARE); m_cursor = tok_info->position; return node; } if (auto res = try_node(NODE_NUM, &Lexer::is_num); res) { return res; } if (auto res = try_node(NODE_IDENT, &Lexer::is_ident); res) { return res; } if (auto res = try_node(NODE_DIR_IDENT, &Lexer::is_dir_ident); res) { return res; } return nullptr; } std::optional Lexer::next_word() { size_t cursor = m_cursor; std::string value; // consume spaces while (cursor < m_source.size() && isspace(m_source[cursor])) { cursor++; } if (is_sep(cursor) && !isspace(m_source[cursor])) { value = std::string(1, m_source[cursor]); cursor++; } else { // read next word while (!is_sep(cursor)) { value += m_source[cursor]; cursor++; } } if (value.size() > 0) { return TokenInfo { cursor, NODE_UNDEFINED, value }; } return std::nullopt; } bool Lexer::is_sep(size_t index) const { if (index >= m_source.size()) { return true; } if (isspace(m_source[index])) { return true; } return std::any_of(std::begin(m_seps), std::end(m_seps), [&](char c){ return c == m_source[index]; }); } bool Lexer::is_num(std::string const& word) const { auto beg = std::begin(word); if (word.size() > 0 && word[0] == '-') { beg++; } int count_dot = 0; return std::all_of(beg, std::end(word), [&](char c){ if (c == '.') { count_dot++; } return isdigit(c) || c == '.'; }) && count_dot <= 1; } bool Lexer::is_ident(std::string const& word) const { if (word.size() == 0) { return false; } if (word[0] == '@') { return false; } if (isdigit(word[0])) { return false; } return std::all_of(std::begin(word), std::end(word), [&](char c){ return isalnum(c) || c == '_'; }); } bool Lexer::is_dir_ident(std::string const& word) const { if (word.size() == 0) { return false; } if (word[0] != '@') { return false; } return std::all_of(std::begin(word), std::end(word), [&](char c){ return isalnum(c) || c == '_' || c == '@'; }); } }