muzgen/lib/Lexer.cpp

244 lines
4.5 KiB
C++
Raw Normal View History

#include "Lexer.hpp"
#include "Node.hpp"
namespace muz
{
/*explicit*/ Lexer::Lexer()
: m_seps {
{'[', ']'}
}
{
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& source)
{
m_source = source;
m_cursor = 0;
m_line = 1;
}
std::vector<std::shared_ptr<Node>> Lexer::all()
{
std::vector<std::shared_ptr<Node>> res;
while (true)
{
auto tok = next();
if (tok)
{
res.push_back(tok);
}
else
{
return res;
}
}
return res;
}
std::shared_ptr<Node> Lexer::next()
{
// consume spaces
skip_spaces();
while (m_cursor < m_source.size()
&& m_source[m_cursor] == '#')
{
while (m_cursor < m_source.size()
&& m_source[m_cursor] != '\n')
{
m_cursor++;
}
skip_spaces();
}
// check word
auto tok_info = next_word();
auto try_node = [&](NodeType type,
bool (Lexer::*fn)(std::string const&) const)
-> std::shared_ptr<Node>
{
auto f = std::bind(fn, this, std::placeholders::_1);
if (tok_info && f(tok_info->value))
{
auto node = std::make_shared<Node>(type, m_line, tok_info->value);
m_cursor = tok_info->position;
return node;
}
return nullptr;
};
if (tok_info && tok_info->value == "[")
{
auto node = std::make_shared<Node>(NODE_OSQUARE, m_line);
m_cursor = tok_info->position;
return node;
}
if (tok_info && tok_info->value == "]")
{
auto node = std::make_shared<Node>(NODE_CSQUARE, m_line);
m_cursor = tok_info->position;
return node;
}
if (auto res = try_node(NODE_NUM, &Lexer::is_num);
res)
{
return res;
}
if (auto res = try_node(NODE_IDENT, &Lexer::is_ident);
res)
{
return res;
}
if (auto res = try_node(NODE_DIR_IDENT, &Lexer::is_dir_ident);
res)
{
return res;
}
if (m_cursor < m_source.size())
{
format_error<lexical_error>(m_line,
"unknown token <" + tok_info->value + ">");
}
return nullptr;
}
void Lexer::skip_spaces()
{
while (m_cursor < m_source.size()
&& isspace(m_source[m_cursor]))
{
if (m_source[m_cursor] == '\n')
{
m_line++;
}
m_cursor++;
}
}
std::optional<TokenInfo> Lexer::next_word()
{
size_t cursor = m_cursor;
std::string value;
// consume spaces
while (cursor < m_source.size()
&& isspace(m_source[cursor]))
{
cursor++;
}
if (is_sep(cursor) && !isspace(m_source[cursor]))
{
value = std::string(1, m_source[cursor]);
cursor++;
}
else
{
// read next word
while (!is_sep(cursor))
{
value += m_source[cursor];
cursor++;
}
}
if (value.size() > 0)
{
return TokenInfo {
cursor,
NODE_UNDEFINED,
value
};
}
return std::nullopt;
}
bool Lexer::is_sep(size_t index) const
{
if (index >= m_source.size())
{
return true;
}
if (isspace(m_source[index]))
{
return true;
}
return std::any_of(std::begin(m_seps), std::end(m_seps), [&](char c){
return c == m_source[index];
});
}
bool Lexer::is_num(std::string const& word) const
{
auto beg = std::begin(word);
if (word.size() > 0 && word[0] == '-')
{
beg++;
}
int count_dot = 0;
return std::all_of(beg, std::end(word), [&](char c){
if (c == '.')
{
count_dot++;
}
return isdigit(c) || c == '.';
}) && count_dot <= 1;
}
bool Lexer::is_ident(std::string const& word) const
{
if (word.size() == 0)
{
return false;
}
if (word[0] == '@') { return false; }
if (isdigit(word[0])) { return false; }
return std::all_of(std::begin(word), std::end(word), [&](char c){
return isalnum(c) || c == '_';
});
}
bool Lexer::is_dir_ident(std::string const& word) const
{
if (word.size() == 0)
{
return false;
}
if (word[0] != '@') { return false; }
return std::all_of(std::begin(word), std::end(word), [&](char c){
return isalnum(c) || c == '_' || c == '@';
});
}
}