roza/lib/Lexer.cpp

135 lines
2.6 KiB
C++

#include "Lexer.hpp"
#include "lib/Node.hpp"
namespace roza
{
/*explicit*/ Lexer::Lexer(StatusLog& log, SrcLoc loc)
: m_log { log }
, m_loc { loc }
{
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& source)
{
m_source = source;
m_cursor = 0;
skip_blanks();
while (m_cursor < source.size())
{
while (m_cursor < source.size()
&& source[m_cursor] == '#')
{
while (m_cursor < source.size()
&& source[m_cursor] != '\n')
{
m_cursor++;
}
m_cursor++;
}
skip_blanks();
ScanInfo info;
std::shared_ptr<Node> node;
size_t cursor = m_cursor;
for (auto& scanner: m_scanners)
{
ScanInfo info = scanner();
if (info.node && info.cursor > cursor)
{
node = info.node;
cursor = info.cursor;
}
}
m_nodes.push_back(node);
m_cursor = cursor;
skip_blanks();
if (!node)
{
std::string symb;
while (m_cursor < m_source.size()
&& !std::isblank(m_source[m_cursor]))
{
symb += m_source[m_cursor];
m_cursor++;
}
m_log.fatal(m_loc
, std::string()
+ "unexpected symbol '"
+ symb + "'");
}
}
}
void Lexer::skip_blanks()
{
while (m_cursor < m_source.size()
&& std::isspace(m_source.at(m_cursor)))
{
if (m_source.at(m_cursor) == '\n')
{
m_loc.set_line(m_loc.line() + 1);
auto root = std::make_shared<Node>(NODE_EOI, "", m_loc);
m_nodes.push_back(root);
}
m_cursor++;
}
}
std::shared_ptr<Node> Lexer::get_or_nullptr(size_t index) const
{
if (index >= m_nodes.size())
{
return nullptr;
}
return m_nodes.at(index);
}
ScanInfo Lexer::scan_int() const
{
size_t cursor = m_cursor;
std::string repr;
if (m_source[cursor] == '-')
{
repr += "-";
cursor++;
}
while (cursor < m_source.size()
&& std::isdigit(m_source[cursor]))
{
repr += m_source[cursor];
cursor++;
}
if (!repr.empty())
{
return ScanInfo {
std::make_shared<Node>(NodeType::NODE_INT, repr, m_loc),
cursor
};
}
return ScanInfo {};
}
}