199 lines
4.0 KiB
C++
199 lines
4.0 KiB
C++
#include "Lexer.hpp"
|
|
#include "lib/Node.hpp"
|
|
|
|
namespace wg
|
|
{
|
|
/*explicit*/ Lexer::Lexer()
|
|
{
|
|
add_text("#", NODE_HASH);
|
|
add_text("+", NODE_ADD);
|
|
add_text("-", NODE_SUB);
|
|
add_text("*", NODE_MUL);
|
|
add_text("/", NODE_DIV);
|
|
add_text("%", NODE_MOD);
|
|
add_text("(", NODE_OPAR);
|
|
add_text(")", NODE_CPAR);
|
|
add_text(";", NODE_SEMICOLON);
|
|
|
|
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
|
|
m_scanners.push_back(std::bind(&Lexer::scan_ident, this));
|
|
}
|
|
|
|
/*virtual*/ Lexer::~Lexer()
|
|
{
|
|
}
|
|
|
|
void Lexer::scan(std::string const& source)
|
|
{
|
|
m_source = source;
|
|
m_cursor = 0;
|
|
}
|
|
|
|
std::shared_ptr<Node> Lexer::next()
|
|
{
|
|
std::optional<ScanInfo> scan_info;
|
|
|
|
skip_spaces();
|
|
|
|
for (auto scanner: m_scanners)
|
|
{
|
|
auto info = scanner();
|
|
|
|
if (info && (scan_info == std::nullopt
|
|
|| info->cursor > scan_info->cursor))
|
|
{
|
|
scan_info = info;
|
|
}
|
|
}
|
|
|
|
if (scan_info)
|
|
{
|
|
m_cursor = scan_info->cursor;
|
|
|
|
return std::make_shared<Node>(scan_info->type,
|
|
scan_info->repr,
|
|
m_loc);
|
|
}
|
|
|
|
WG_ASSERT(m_cursor <= m_source.size(), "unexpected token");
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
std::vector<std::shared_ptr<Node>> Lexer::all()
|
|
{
|
|
std::vector<std::shared_ptr<Node>> result;
|
|
std::shared_ptr<Node> node;
|
|
|
|
while ( (node = next()) != nullptr )
|
|
{
|
|
result.push_back(node);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void Lexer::add_text(std::string const& text,
|
|
NodeType node,
|
|
bool has_value)
|
|
{
|
|
if (text.size() == 1)
|
|
{
|
|
m_seps.push_back(text[0]);
|
|
}
|
|
|
|
m_scanners.push_back(std::bind(&Lexer::scan_text,
|
|
this, text,
|
|
node, has_value));
|
|
}
|
|
|
|
bool Lexer::is_sep(size_t index) const
|
|
{
|
|
WG_ASSERT(index < m_source.size(), "cannot find separator");
|
|
|
|
if (std::isspace(m_source[index]))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
auto itr = std::find(std::begin(m_seps),
|
|
std::end(m_seps),
|
|
m_source[index]);
|
|
|
|
return itr != std::end(m_seps);
|
|
}
|
|
|
|
void Lexer::skip_spaces()
|
|
{
|
|
while (m_cursor < m_source.size()
|
|
&& std::isspace(m_source[m_cursor]))
|
|
{
|
|
if (m_source[m_cursor] == '\n')
|
|
{
|
|
m_loc = Loc {m_loc.origin(), m_loc.line() + 1};
|
|
}
|
|
|
|
m_cursor++;
|
|
}
|
|
}
|
|
|
|
std::optional<ScanInfo> Lexer::scan_text(std::string const& text,
|
|
NodeType type,
|
|
bool has_value) const
|
|
{
|
|
if (m_cursor + text.size() > m_source.size())
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
|
|
for (size_t i=0; i<text.size(); i++)
|
|
{
|
|
if (m_source[m_cursor + i] != text[i])
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
return ScanInfo {
|
|
m_cursor + text.size(),
|
|
type,
|
|
has_value ? text : ""
|
|
};
|
|
}
|
|
|
|
std::optional<ScanInfo> Lexer::scan_ident() const
|
|
{
|
|
size_t cursor = m_cursor;
|
|
std::string repr;
|
|
|
|
while (cursor < m_source.size()
|
|
&& !is_sep(cursor))
|
|
{
|
|
repr += m_source[cursor];
|
|
cursor++;
|
|
}
|
|
|
|
if (repr.empty() == false)
|
|
{
|
|
return ScanInfo {
|
|
cursor,
|
|
NODE_IDENT,
|
|
repr
|
|
};
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::optional<ScanInfo> Lexer::scan_int() const
|
|
{
|
|
size_t cursor = m_cursor;
|
|
std::string repr;
|
|
|
|
if (cursor < m_source.size()
|
|
&& m_source[cursor] == '-')
|
|
{
|
|
repr += '-';
|
|
cursor++;
|
|
}
|
|
|
|
while (cursor < m_source.size()
|
|
&& std::isdigit(m_source[cursor]))
|
|
{
|
|
repr += m_source[cursor];
|
|
cursor++;
|
|
}
|
|
|
|
if (repr.empty() || repr.back() == '-')
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
|
|
return ScanInfo {
|
|
cursor,
|
|
NODE_INT,
|
|
repr
|
|
};
|
|
}
|
|
}
|