287 lines
5.8 KiB
C++
287 lines
5.8 KiB
C++
#include "Lexer.hpp"
|
|
#include "lib/Node.hpp"
|
|
|
|
namespace sk
|
|
{
|
|
/*explicit*/ Lexer::Lexer(std::filesystem::path path, Logger& logger)
|
|
: m_loc {path, 1}
|
|
, m_logger { logger }
|
|
{
|
|
std::vector<std::tuple<std::string, NodeType, bool>> texts = {
|
|
};
|
|
|
|
std::vector<std::tuple<std::string, NodeType, bool>> keywords = {
|
|
{"true", NODE_BOOL, true},
|
|
{"false", NODE_BOOL, true}
|
|
};
|
|
|
|
for (auto entry: texts)
|
|
{
|
|
m_scanners.push_back(std::bind(&Lexer::scan_text,
|
|
this,
|
|
std::get<0>(entry),
|
|
std::get<1>(entry),
|
|
std::get<2>(entry)));
|
|
}
|
|
|
|
for (auto entry: keywords)
|
|
{
|
|
m_scanners.push_back(std::bind(&Lexer::scan_keyword,
|
|
this,
|
|
std::get<0>(entry),
|
|
std::get<1>(entry),
|
|
std::get<2>(entry)));
|
|
}
|
|
|
|
m_scanners.push_back(std::bind(&Lexer::scan_string, this));
|
|
m_scanners.push_back(std::bind(&Lexer::scan_float, this));
|
|
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
|
|
|
|
}
|
|
|
|
/*virtual*/ Lexer::~Lexer()
|
|
{
|
|
}
|
|
|
|
void Lexer::scan(std::string const& source)
|
|
{
|
|
m_source = source;
|
|
m_cursor = 0;
|
|
}
|
|
|
|
std::shared_ptr<Node> Lexer::next()
|
|
{
|
|
size_t cursor = m_cursor;
|
|
std::shared_ptr<Node> node;
|
|
|
|
skip_spaces();
|
|
|
|
while (m_cursor < m_source.size()
|
|
&& m_source[m_cursor] == ';')
|
|
{
|
|
while (m_cursor < m_source.size()
|
|
&& m_source[m_cursor] != '\n')
|
|
{
|
|
m_cursor++;
|
|
}
|
|
|
|
skip_spaces();
|
|
}
|
|
|
|
for (auto scanner: m_scanners)
|
|
{
|
|
ScanInfo info = scanner();
|
|
|
|
if (info.ok && info.cursor > cursor)
|
|
{
|
|
node = std::make_shared<Node>(info.type, info.repr, m_loc);
|
|
cursor = info.cursor;
|
|
}
|
|
}
|
|
|
|
if (!node
|
|
&& m_cursor < m_source.size())
|
|
{
|
|
std::stringstream ss;
|
|
std::string text;
|
|
size_t cursor = m_cursor;
|
|
while (cursor < m_source.size()
|
|
&& !std::isspace(m_source[cursor]))
|
|
{
|
|
text += m_source[cursor];
|
|
cursor++;
|
|
}
|
|
|
|
ss << "invalid token '" << text << "'.";
|
|
|
|
m_logger.log<lexical_error>(m_loc,
|
|
LOGGER_ERROR,
|
|
ss);
|
|
}
|
|
|
|
m_cursor = cursor;
|
|
|
|
return node;
|
|
}
|
|
|
|
void Lexer::skip_spaces()
|
|
{
|
|
while (m_cursor < m_source.size()
|
|
&& std::isspace(m_source[m_cursor]))
|
|
{
|
|
if (m_source[m_cursor] == '\n')
|
|
{
|
|
m_loc = Loc {m_loc.path(), m_loc.line() + 1};
|
|
}
|
|
|
|
m_cursor++;
|
|
}
|
|
}
|
|
|
|
ScanInfo Lexer::scan_int()
|
|
{
|
|
size_t cursor = m_cursor;
|
|
std::string repr;
|
|
|
|
while (cursor < m_source.size()
|
|
&& std::isdigit(m_source[cursor]))
|
|
{
|
|
repr += m_source[cursor];
|
|
cursor++;
|
|
}
|
|
|
|
if (repr.empty() == false)
|
|
{
|
|
return ScanInfo {
|
|
true,
|
|
cursor,
|
|
repr,
|
|
NODE_INT
|
|
};
|
|
}
|
|
|
|
return ScanInfo {};
|
|
}
|
|
|
|
ScanInfo Lexer::scan_float()
|
|
{
|
|
size_t cursor = m_cursor;
|
|
std::string repr;
|
|
|
|
while (cursor < m_source.size()
|
|
&& std::isdigit(m_source[cursor]))
|
|
{
|
|
repr += m_source[cursor];
|
|
cursor++;
|
|
}
|
|
|
|
if (cursor >= m_source.size()
|
|
|| m_source[cursor] != '.')
|
|
{
|
|
return ScanInfo {};
|
|
}
|
|
|
|
repr += ".";
|
|
cursor++;
|
|
|
|
while (cursor < m_source.size()
|
|
&& std::isdigit(m_source[cursor]))
|
|
{
|
|
repr += m_source[cursor];
|
|
cursor++;
|
|
}
|
|
|
|
if (repr.empty() == false && repr[0] != '.'
|
|
&& repr[repr.size() - 1] != '.')
|
|
{
|
|
return ScanInfo {
|
|
true,
|
|
cursor,
|
|
repr,
|
|
NODE_FLOAT
|
|
};
|
|
}
|
|
|
|
return ScanInfo {};
|
|
}
|
|
|
|
ScanInfo Lexer::scan_text(std::string const& text,
|
|
NodeType type,
|
|
bool value)
|
|
{
|
|
if (m_cursor + text.size() > m_source.size())
|
|
{
|
|
return ScanInfo {};
|
|
}
|
|
|
|
for (size_t i=0; i<text.size(); i++)
|
|
{
|
|
if (text[i] != m_source[m_cursor + i])
|
|
{
|
|
return ScanInfo {};
|
|
}
|
|
}
|
|
|
|
return ScanInfo {
|
|
true,
|
|
m_cursor + text.size(),
|
|
value ? text : "",
|
|
type
|
|
};
|
|
}
|
|
|
|
ScanInfo Lexer::scan_keyword(std::string const& text,
|
|
NodeType type,
|
|
bool value)
|
|
{
|
|
if (m_cursor + text.size() > m_source.size())
|
|
{
|
|
return ScanInfo {};
|
|
}
|
|
|
|
for (size_t i=0; i<text.size(); i++)
|
|
{
|
|
if (text[i] != m_source[m_cursor + i])
|
|
{
|
|
return ScanInfo {};
|
|
}
|
|
}
|
|
|
|
size_t index = m_cursor + text.size();
|
|
|
|
if (index < m_source.size()
|
|
&& std::isalnum(m_source[index]))
|
|
{
|
|
return ScanInfo {
|
|
};
|
|
}
|
|
|
|
return ScanInfo {
|
|
true,
|
|
m_cursor + text.size(),
|
|
value ? text : "",
|
|
type
|
|
};
|
|
}
|
|
|
|
ScanInfo Lexer::scan_string()
|
|
{
|
|
size_t cursor = m_cursor;
|
|
std::string repr;
|
|
|
|
if (cursor >= m_source.size()
|
|
|| m_source[cursor] != '\'')
|
|
{
|
|
return ScanInfo {
|
|
};
|
|
}
|
|
|
|
cursor++;
|
|
repr += "'";
|
|
|
|
while (cursor < m_source.size()
|
|
&& m_source[cursor] != '\'')
|
|
{
|
|
repr += m_source[cursor];
|
|
cursor++;
|
|
}
|
|
|
|
if (cursor >= m_source.size()
|
|
|| m_source[cursor] != '\'')
|
|
{
|
|
return ScanInfo {
|
|
};
|
|
}
|
|
|
|
cursor++;
|
|
repr += "'";
|
|
|
|
return ScanInfo {
|
|
true,
|
|
cursor,
|
|
repr,
|
|
NODE_STRING
|
|
};
|
|
}
|
|
}
|