2024-01-30 18:09:57 +00:00
|
|
|
#include "Lexer.hpp"
|
|
|
|
#include "Node.hpp"
|
|
|
|
|
|
|
|
namespace muz
|
|
|
|
{
|
|
|
|
/*explicit*/ Lexer::Lexer()
|
|
|
|
: m_seps {
|
|
|
|
{'[', ']'}
|
|
|
|
}
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
/*virtual*/ Lexer::~Lexer()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void Lexer::scan(std::string const& source)
|
|
|
|
{
|
|
|
|
m_source = source;
|
|
|
|
m_cursor = 0;
|
2024-01-30 19:27:30 +00:00
|
|
|
m_line = 1;
|
2024-01-30 18:09:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::shared_ptr<Node>> Lexer::all()
|
|
|
|
{
|
|
|
|
std::vector<std::shared_ptr<Node>> res;
|
|
|
|
|
|
|
|
while (true)
|
|
|
|
{
|
|
|
|
auto tok = next();
|
|
|
|
|
|
|
|
if (tok)
|
|
|
|
{
|
|
|
|
res.push_back(tok);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::shared_ptr<Node> Lexer::next()
|
|
|
|
{
|
|
|
|
// consume spaces
|
2024-01-30 19:27:30 +00:00
|
|
|
skip_spaces();
|
|
|
|
|
2024-01-30 18:09:57 +00:00
|
|
|
while (m_cursor < m_source.size()
|
2024-01-30 19:27:30 +00:00
|
|
|
&& m_source[m_cursor] == '#')
|
2024-01-30 18:09:57 +00:00
|
|
|
{
|
2024-01-30 19:27:30 +00:00
|
|
|
while (m_cursor < m_source.size()
|
|
|
|
&& m_source[m_cursor] != '\n')
|
|
|
|
{
|
|
|
|
m_cursor++;
|
|
|
|
}
|
|
|
|
|
|
|
|
skip_spaces();
|
2024-01-30 18:09:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// check word
|
|
|
|
auto tok_info = next_word();
|
|
|
|
|
|
|
|
auto try_node = [&](NodeType type,
|
|
|
|
bool (Lexer::*fn)(std::string const&) const)
|
|
|
|
-> std::shared_ptr<Node>
|
|
|
|
{
|
|
|
|
auto f = std::bind(fn, this, std::placeholders::_1);
|
|
|
|
|
|
|
|
if (tok_info && f(tok_info->value))
|
|
|
|
{
|
2024-01-30 19:27:30 +00:00
|
|
|
auto node = std::make_shared<Node>(type, m_line, tok_info->value);
|
2024-01-30 18:09:57 +00:00
|
|
|
m_cursor = tok_info->position;
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
return nullptr;
|
|
|
|
};
|
|
|
|
|
|
|
|
if (tok_info && tok_info->value == "[")
|
|
|
|
{
|
2024-01-30 19:27:30 +00:00
|
|
|
auto node = std::make_shared<Node>(NODE_OSQUARE, m_line);
|
2024-01-30 18:09:57 +00:00
|
|
|
m_cursor = tok_info->position;
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tok_info && tok_info->value == "]")
|
|
|
|
{
|
2024-01-30 19:27:30 +00:00
|
|
|
auto node = std::make_shared<Node>(NODE_CSQUARE, m_line);
|
2024-01-30 18:09:57 +00:00
|
|
|
m_cursor = tok_info->position;
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (auto res = try_node(NODE_NUM, &Lexer::is_num);
|
|
|
|
res)
|
|
|
|
{
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (auto res = try_node(NODE_IDENT, &Lexer::is_ident);
|
|
|
|
res)
|
|
|
|
{
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (auto res = try_node(NODE_DIR_IDENT, &Lexer::is_dir_ident);
|
|
|
|
res)
|
|
|
|
{
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2024-01-30 19:27:30 +00:00
|
|
|
if (m_cursor < m_source.size())
|
|
|
|
{
|
|
|
|
format_error<lexical_error>(m_line,
|
|
|
|
"unknown token <" + tok_info->value + ">");
|
|
|
|
}
|
|
|
|
|
2024-01-30 18:09:57 +00:00
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
2024-01-30 19:27:30 +00:00
|
|
|
void Lexer::skip_spaces()
|
|
|
|
{
|
|
|
|
while (m_cursor < m_source.size()
|
|
|
|
&& isspace(m_source[m_cursor]))
|
|
|
|
{
|
|
|
|
if (m_source[m_cursor] == '\n')
|
|
|
|
{
|
|
|
|
m_line++;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_cursor++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-30 18:09:57 +00:00
|
|
|
std::optional<TokenInfo> Lexer::next_word()
|
|
|
|
{
|
|
|
|
size_t cursor = m_cursor;
|
|
|
|
std::string value;
|
|
|
|
|
|
|
|
// consume spaces
|
|
|
|
while (cursor < m_source.size()
|
|
|
|
&& isspace(m_source[cursor]))
|
|
|
|
{
|
|
|
|
cursor++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_sep(cursor) && !isspace(m_source[cursor]))
|
|
|
|
{
|
|
|
|
value = std::string(1, m_source[cursor]);
|
|
|
|
cursor++;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// read next word
|
|
|
|
while (!is_sep(cursor))
|
|
|
|
{
|
|
|
|
value += m_source[cursor];
|
|
|
|
cursor++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value.size() > 0)
|
|
|
|
{
|
|
|
|
return TokenInfo {
|
|
|
|
cursor,
|
|
|
|
NODE_UNDEFINED,
|
|
|
|
value
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::nullopt;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Lexer::is_sep(size_t index) const
|
|
|
|
{
|
|
|
|
if (index >= m_source.size())
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isspace(m_source[index]))
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::any_of(std::begin(m_seps), std::end(m_seps), [&](char c){
|
|
|
|
return c == m_source[index];
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Lexer::is_num(std::string const& word) const
|
|
|
|
{
|
|
|
|
auto beg = std::begin(word);
|
|
|
|
|
|
|
|
if (word.size() > 0 && word[0] == '-')
|
|
|
|
{
|
|
|
|
beg++;
|
|
|
|
}
|
|
|
|
|
|
|
|
int count_dot = 0;
|
|
|
|
|
|
|
|
return std::all_of(beg, std::end(word), [&](char c){
|
|
|
|
|
|
|
|
if (c == '.')
|
|
|
|
{
|
|
|
|
count_dot++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return isdigit(c) || c == '.';
|
|
|
|
}) && count_dot <= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Lexer::is_ident(std::string const& word) const
|
|
|
|
{
|
|
|
|
if (word.size() == 0)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (word[0] == '@') { return false; }
|
|
|
|
if (isdigit(word[0])) { return false; }
|
|
|
|
|
|
|
|
return std::all_of(std::begin(word), std::end(word), [&](char c){
|
|
|
|
return isalnum(c) || c == '_';
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Lexer::is_dir_ident(std::string const& word) const
|
|
|
|
{
|
|
|
|
if (word.size() == 0)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (word[0] != '@') { return false; }
|
|
|
|
|
|
|
|
return std::all_of(std::begin(word), std::end(word), [&](char c){
|
|
|
|
return isalnum(c) || c == '_' || c == '@';
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|