2023-08-30 18:06:26 +00:00
|
|
|
#include "Lexer.hpp"
|
|
|
|
#include "lib/Node.hpp"
|
|
|
|
|
|
|
|
namespace roza
|
|
|
|
{
|
|
|
|
/*explicit*/ Lexer::Lexer(StatusLog& log, SrcLoc loc)
|
|
|
|
: m_log { log }
|
|
|
|
, m_loc { loc }
|
|
|
|
{
|
2023-08-31 09:07:03 +00:00
|
|
|
std::vector<std::tuple<std::string, NodeType, bool>> texts = {
|
2023-08-31 09:37:13 +00:00
|
|
|
{"==", NODE_EQ, false},
|
|
|
|
{"!=", NODE_NE, false},
|
|
|
|
{"<=", NODE_LE, false},
|
|
|
|
{">=", NODE_GE, false},
|
|
|
|
{"<", NODE_LT, false},
|
|
|
|
{">", NODE_GT, false},
|
2023-08-31 09:07:03 +00:00
|
|
|
{"=>", NODE_IMP, false},
|
|
|
|
{"+", NODE_ADD, false},
|
|
|
|
{"-", NODE_SUB, false},
|
|
|
|
{"*", NODE_MUL, false},
|
|
|
|
{"/", NODE_DIV, false},
|
|
|
|
{"%", NODE_MOD, false},
|
|
|
|
{"^", NODE_POW, false},
|
|
|
|
{"(", NODE_OPAR, false},
|
|
|
|
{")", NODE_CPAR, false},
|
2023-08-31 19:25:00 +00:00
|
|
|
{"=", NODE_ASSIGN, false},
|
2023-08-31 09:07:03 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<std::tuple<std::string, NodeType, bool>> keywords = {
|
2023-08-31 19:25:00 +00:00
|
|
|
{"let!", NODE_LET_MUT, false},
|
|
|
|
{"let", NODE_LET, false},
|
2023-08-31 09:07:03 +00:00
|
|
|
{"true", NODE_BOOL, true},
|
|
|
|
{"false", NODE_BOOL, true},
|
|
|
|
{"and", NODE_AND, false},
|
|
|
|
{"or", NODE_OR, false},
|
|
|
|
{"not", NODE_NOT, false},
|
2023-08-31 12:41:43 +00:00
|
|
|
{"assert_static_fail", NODE_ASSERT_STATIC_FAIL, false},
|
|
|
|
{"assert", NODE_ASSERT, false},
|
2023-08-31 09:07:03 +00:00
|
|
|
};
|
|
|
|
|
2023-08-30 18:06:26 +00:00
|
|
|
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
|
2023-08-31 09:07:03 +00:00
|
|
|
|
|
|
|
for (auto const& entry: keywords)
|
|
|
|
{
|
|
|
|
m_scanners.push_back(std::bind(&Lexer::scan_keyword, this,
|
|
|
|
std::get<0>(entry),
|
|
|
|
std::get<1>(entry),
|
|
|
|
std::get<2>(entry)));
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto const& entry: texts)
|
|
|
|
{
|
|
|
|
m_scanners.push_back(std::bind(&Lexer::scan_text, this,
|
|
|
|
std::get<0>(entry),
|
|
|
|
std::get<1>(entry),
|
|
|
|
std::get<2>(entry)));
|
|
|
|
}
|
2023-08-30 22:31:19 +00:00
|
|
|
|
2023-08-31 19:25:00 +00:00
|
|
|
m_scanners.push_back(std::bind(&Lexer::scan_ident, this));
|
2023-08-30 18:06:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*virtual*/ Lexer::~Lexer()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2023-08-30 22:31:19 +00:00
|
|
|
bool Lexer::is_at_end() const
|
|
|
|
{
|
|
|
|
return m_cursor >= size();
|
|
|
|
}
|
|
|
|
|
2023-08-30 18:06:26 +00:00
|
|
|
void Lexer::scan(std::string const& source)
|
|
|
|
{
|
|
|
|
m_source = source;
|
|
|
|
m_cursor = 0;
|
|
|
|
|
|
|
|
skip_blanks();
|
|
|
|
|
|
|
|
while (m_cursor < source.size())
|
|
|
|
{
|
|
|
|
while (m_cursor < source.size()
|
|
|
|
&& source[m_cursor] == '#')
|
|
|
|
{
|
|
|
|
while (m_cursor < source.size()
|
|
|
|
&& source[m_cursor] != '\n')
|
|
|
|
{
|
|
|
|
m_cursor++;
|
|
|
|
}
|
|
|
|
|
2023-08-30 22:31:19 +00:00
|
|
|
m_loc.set_line(m_loc.line() + 1);
|
2023-08-30 18:06:26 +00:00
|
|
|
m_cursor++;
|
|
|
|
}
|
|
|
|
|
|
|
|
skip_blanks();
|
|
|
|
|
|
|
|
ScanInfo info;
|
|
|
|
|
|
|
|
std::shared_ptr<Node> node;
|
|
|
|
size_t cursor = m_cursor;
|
|
|
|
|
|
|
|
for (auto& scanner: m_scanners)
|
|
|
|
{
|
|
|
|
ScanInfo info = scanner();
|
|
|
|
|
|
|
|
if (info.node && info.cursor > cursor)
|
|
|
|
{
|
|
|
|
node = info.node;
|
|
|
|
cursor = info.cursor;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
m_nodes.push_back(node);
|
|
|
|
m_cursor = cursor;
|
|
|
|
|
|
|
|
skip_blanks();
|
|
|
|
|
|
|
|
if (!node)
|
|
|
|
{
|
|
|
|
std::string symb;
|
|
|
|
while (m_cursor < m_source.size()
|
|
|
|
&& !std::isblank(m_source[m_cursor]))
|
|
|
|
{
|
|
|
|
symb += m_source[m_cursor];
|
|
|
|
m_cursor++;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_log.fatal(m_loc
|
|
|
|
, std::string()
|
|
|
|
+ "unexpected symbol '"
|
|
|
|
+ symb + "'");
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Lexer::skip_blanks()
|
|
|
|
{
|
|
|
|
while (m_cursor < m_source.size()
|
|
|
|
&& std::isspace(m_source.at(m_cursor)))
|
|
|
|
{
|
|
|
|
if (m_source.at(m_cursor) == '\n')
|
|
|
|
{
|
|
|
|
m_loc.set_line(m_loc.line() + 1);
|
|
|
|
auto root = std::make_shared<Node>(NODE_EOI, "", m_loc);
|
|
|
|
m_nodes.push_back(root);
|
|
|
|
}
|
|
|
|
|
|
|
|
m_cursor++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::shared_ptr<Node> Lexer::get_or_nullptr(size_t index) const
|
|
|
|
{
|
|
|
|
if (index >= m_nodes.size())
|
|
|
|
{
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
return m_nodes.at(index);
|
|
|
|
}
|
|
|
|
|
2023-08-31 09:07:03 +00:00
|
|
|
bool Lexer::is_sep(size_t index) const
|
|
|
|
{
|
|
|
|
if (index >= m_source.size())
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
char c = m_source[index];
|
|
|
|
|
|
|
|
return !std::isalnum(c);
|
|
|
|
}
|
|
|
|
|
2023-08-30 18:06:26 +00:00
|
|
|
ScanInfo Lexer::scan_int() const
|
|
|
|
{
|
|
|
|
size_t cursor = m_cursor;
|
|
|
|
std::string repr;
|
|
|
|
|
|
|
|
while (cursor < m_source.size()
|
|
|
|
&& std::isdigit(m_source[cursor]))
|
|
|
|
{
|
|
|
|
repr += m_source[cursor];
|
|
|
|
cursor++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!repr.empty())
|
|
|
|
{
|
|
|
|
return ScanInfo {
|
|
|
|
std::make_shared<Node>(NodeType::NODE_INT, repr, m_loc),
|
|
|
|
cursor
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
return ScanInfo {};
|
|
|
|
}
|
2023-08-30 22:31:19 +00:00
|
|
|
|
|
|
|
ScanInfo Lexer::scan_text(std::string const& text
|
|
|
|
, NodeType type
|
|
|
|
, bool value) const
|
|
|
|
{
|
|
|
|
for (size_t i=0; i<text.size(); i++)
|
|
|
|
{
|
|
|
|
if (m_cursor + i >= m_source.size()
|
|
|
|
|| m_source[m_cursor + i] != text[i])
|
|
|
|
{
|
|
|
|
return ScanInfo {
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ScanInfo {
|
|
|
|
std::make_shared<Node>(type, value ? text : "", m_loc),
|
|
|
|
m_cursor + text.size()
|
|
|
|
};
|
|
|
|
}
|
2023-08-31 09:07:03 +00:00
|
|
|
|
|
|
|
ScanInfo Lexer::scan_keyword(std::string const& keyword,
|
|
|
|
NodeType type,
|
|
|
|
bool value) const
|
|
|
|
{
|
|
|
|
auto info = scan_text(keyword, type, value);
|
|
|
|
|
|
|
|
if (is_sep(info.cursor))
|
|
|
|
{
|
|
|
|
return info;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ScanInfo {
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-08-31 19:25:00 +00:00
|
|
|
ScanInfo Lexer::scan_ident() const
|
|
|
|
{
|
|
|
|
size_t cursor = m_cursor;
|
|
|
|
std::string value;
|
|
|
|
|
|
|
|
auto is_ident = [](size_t pos, char c){
|
|
|
|
bool other = false;
|
|
|
|
|
|
|
|
if (pos > 0)
|
|
|
|
{
|
|
|
|
other =
|
|
|
|
std::isdigit(c);
|
|
|
|
}
|
|
|
|
|
|
|
|
return c == '_' || std::isalpha(c) || other;
|
|
|
|
};
|
|
|
|
|
|
|
|
size_t pos = 0;
|
|
|
|
|
|
|
|
while (cursor < m_source.size()
|
|
|
|
&& is_ident(pos, m_source[cursor]))
|
|
|
|
{
|
|
|
|
value += m_source[cursor];
|
|
|
|
cursor++;
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (value.empty() == false)
|
|
|
|
{
|
|
|
|
return ScanInfo {
|
|
|
|
std::make_shared<Node>(NODE_IDENT, value, loc()),
|
|
|
|
cursor
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
return ScanInfo {
|
|
|
|
};
|
|
|
|
}
|
2023-08-30 18:06:26 +00:00
|
|
|
}
|