roza/lib/Lexer.cpp

227 lines
5.0 KiB
C++
Raw Normal View History

2023-08-30 18:06:26 +00:00
#include "Lexer.hpp"
#include "lib/Node.hpp"
namespace roza
{
/*explicit*/ Lexer::Lexer(StatusLog& log, SrcLoc loc)
: m_log { log }
, m_loc { loc }
{
2023-08-31 09:07:03 +00:00
std::vector<std::tuple<std::string, NodeType, bool>> texts = {
2023-08-31 09:37:13 +00:00
{"==", NODE_EQ, false},
{"!=", NODE_NE, false},
{"<=", NODE_LE, false},
{">=", NODE_GE, false},
{"<", NODE_LT, false},
{">", NODE_GT, false},
2023-08-31 09:07:03 +00:00
{"=>", NODE_IMP, false},
{"+", NODE_ADD, false},
{"-", NODE_SUB, false},
{"*", NODE_MUL, false},
{"/", NODE_DIV, false},
{"%", NODE_MOD, false},
{"^", NODE_POW, false},
{"(", NODE_OPAR, false},
{")", NODE_CPAR, false},
};
std::vector<std::tuple<std::string, NodeType, bool>> keywords = {
{"true", NODE_BOOL, true},
{"false", NODE_BOOL, true},
{"and", NODE_AND, false},
{"or", NODE_OR, false},
{"not", NODE_NOT, false},
};
2023-08-30 18:06:26 +00:00
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
2023-08-31 09:07:03 +00:00
for (auto const& entry: keywords)
{
m_scanners.push_back(std::bind(&Lexer::scan_keyword, this,
std::get<0>(entry),
std::get<1>(entry),
std::get<2>(entry)));
}
for (auto const& entry: texts)
{
m_scanners.push_back(std::bind(&Lexer::scan_text, this,
std::get<0>(entry),
std::get<1>(entry),
std::get<2>(entry)));
}
2023-08-30 22:31:19 +00:00
2023-08-30 18:06:26 +00:00
}
/*virtual*/ Lexer::~Lexer()
{
}
2023-08-30 22:31:19 +00:00
bool Lexer::is_at_end() const
{
return m_cursor >= size();
}
2023-08-30 18:06:26 +00:00
void Lexer::scan(std::string const& source)
{
m_source = source;
m_cursor = 0;
skip_blanks();
while (m_cursor < source.size())
{
while (m_cursor < source.size()
&& source[m_cursor] == '#')
{
while (m_cursor < source.size()
&& source[m_cursor] != '\n')
{
m_cursor++;
}
2023-08-30 22:31:19 +00:00
m_loc.set_line(m_loc.line() + 1);
2023-08-30 18:06:26 +00:00
m_cursor++;
}
skip_blanks();
ScanInfo info;
std::shared_ptr<Node> node;
size_t cursor = m_cursor;
for (auto& scanner: m_scanners)
{
ScanInfo info = scanner();
if (info.node && info.cursor > cursor)
{
node = info.node;
cursor = info.cursor;
}
}
m_nodes.push_back(node);
m_cursor = cursor;
skip_blanks();
if (!node)
{
std::string symb;
while (m_cursor < m_source.size()
&& !std::isblank(m_source[m_cursor]))
{
symb += m_source[m_cursor];
m_cursor++;
}
m_log.fatal(m_loc
, std::string()
+ "unexpected symbol '"
+ symb + "'");
}
}
}
void Lexer::skip_blanks()
{
while (m_cursor < m_source.size()
&& std::isspace(m_source.at(m_cursor)))
{
if (m_source.at(m_cursor) == '\n')
{
m_loc.set_line(m_loc.line() + 1);
auto root = std::make_shared<Node>(NODE_EOI, "", m_loc);
m_nodes.push_back(root);
}
m_cursor++;
}
}
std::shared_ptr<Node> Lexer::get_or_nullptr(size_t index) const
{
if (index >= m_nodes.size())
{
return nullptr;
}
return m_nodes.at(index);
}
2023-08-31 09:07:03 +00:00
bool Lexer::is_sep(size_t index) const
{
if (index >= m_source.size())
{
return true;
}
char c = m_source[index];
return !std::isalnum(c);
}
2023-08-30 18:06:26 +00:00
ScanInfo Lexer::scan_int() const
{
size_t cursor = m_cursor;
std::string repr;
while (cursor < m_source.size()
&& std::isdigit(m_source[cursor]))
{
repr += m_source[cursor];
cursor++;
}
if (!repr.empty())
{
return ScanInfo {
std::make_shared<Node>(NodeType::NODE_INT, repr, m_loc),
cursor
};
}
return ScanInfo {};
}
2023-08-30 22:31:19 +00:00
ScanInfo Lexer::scan_text(std::string const& text
, NodeType type
, bool value) const
{
for (size_t i=0; i<text.size(); i++)
{
if (m_cursor + i >= m_source.size()
|| m_source[m_cursor + i] != text[i])
{
return ScanInfo {
};
}
}
return ScanInfo {
std::make_shared<Node>(type, value ? text : "", m_loc),
m_cursor + text.size()
};
}
2023-08-31 09:07:03 +00:00
ScanInfo Lexer::scan_keyword(std::string const& keyword,
NodeType type,
bool value) const
{
auto info = scan_text(keyword, type, value);
if (is_sep(info.cursor))
{
return info;
}
return ScanInfo {
};
}
2023-08-30 18:06:26 +00:00
}