This repository has been archived on 2023-09-09. You can view files and clone it, but cannot push or open issues/pull-requests.
skemla/lib/Lexer.cpp

287 lines
5.8 KiB
C++

#include "Lexer.hpp"
#include "lib/Node.hpp"
namespace sk
{
/*explicit*/ Lexer::Lexer(std::filesystem::path path, Logger& logger)
: m_loc {path, 1}
, m_logger { logger }
{
std::vector<std::tuple<std::string, NodeType, bool>> texts = {
};
std::vector<std::tuple<std::string, NodeType, bool>> keywords = {
{"true", NODE_BOOL, true},
{"false", NODE_BOOL, true}
};
for (auto entry: texts)
{
m_scanners.push_back(std::bind(&Lexer::scan_text,
this,
std::get<0>(entry),
std::get<1>(entry),
std::get<2>(entry)));
}
for (auto entry: keywords)
{
m_scanners.push_back(std::bind(&Lexer::scan_keyword,
this,
std::get<0>(entry),
std::get<1>(entry),
std::get<2>(entry)));
}
m_scanners.push_back(std::bind(&Lexer::scan_string, this));
m_scanners.push_back(std::bind(&Lexer::scan_float, this));
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& source)
{
m_source = source;
m_cursor = 0;
}
std::shared_ptr<Node> Lexer::next()
{
size_t cursor = m_cursor;
std::shared_ptr<Node> node;
skip_spaces();
while (m_cursor < m_source.size()
&& m_source[m_cursor] == ';')
{
while (m_cursor < m_source.size()
&& m_source[m_cursor] != '\n')
{
m_cursor++;
}
skip_spaces();
}
for (auto scanner: m_scanners)
{
ScanInfo info = scanner();
if (info.ok && info.cursor > cursor)
{
node = std::make_shared<Node>(info.type, info.repr, m_loc);
cursor = info.cursor;
}
}
if (!node
&& m_cursor < m_source.size())
{
std::stringstream ss;
std::string text;
size_t cursor = m_cursor;
while (cursor < m_source.size()
&& !std::isspace(m_source[cursor]))
{
text += m_source[cursor];
cursor++;
}
ss << "invalid token '" << text << "'.";
m_logger.log<lexical_error>(m_loc,
LOGGER_ERROR,
ss);
}
m_cursor = cursor;
return node;
}
void Lexer::skip_spaces()
{
while (m_cursor < m_source.size()
&& std::isspace(m_source[m_cursor]))
{
if (m_source[m_cursor] == '\n')
{
m_loc = Loc {m_loc.path(), m_loc.line() + 1};
}
m_cursor++;
}
}
ScanInfo Lexer::scan_int()
{
size_t cursor = m_cursor;
std::string repr;
while (cursor < m_source.size()
&& std::isdigit(m_source[cursor]))
{
repr += m_source[cursor];
cursor++;
}
if (repr.empty() == false)
{
return ScanInfo {
true,
cursor,
repr,
NODE_INT
};
}
return ScanInfo {};
}
ScanInfo Lexer::scan_float()
{
size_t cursor = m_cursor;
std::string repr;
while (cursor < m_source.size()
&& std::isdigit(m_source[cursor]))
{
repr += m_source[cursor];
cursor++;
}
if (cursor >= m_source.size()
|| m_source[cursor] != '.')
{
return ScanInfo {};
}
repr += ".";
cursor++;
while (cursor < m_source.size()
&& std::isdigit(m_source[cursor]))
{
repr += m_source[cursor];
cursor++;
}
if (repr.empty() == false && repr[0] != '.'
&& repr[repr.size() - 1] != '.')
{
return ScanInfo {
true,
cursor,
repr,
NODE_FLOAT
};
}
return ScanInfo {};
}
ScanInfo Lexer::scan_text(std::string const& text,
NodeType type,
bool value)
{
if (m_cursor + text.size() > m_source.size())
{
return ScanInfo {};
}
for (size_t i=0; i<text.size(); i++)
{
if (text[i] != m_source[m_cursor + i])
{
return ScanInfo {};
}
}
return ScanInfo {
true,
m_cursor + text.size(),
value ? text : "",
type
};
}
ScanInfo Lexer::scan_keyword(std::string const& text,
NodeType type,
bool value)
{
if (m_cursor + text.size() > m_source.size())
{
return ScanInfo {};
}
for (size_t i=0; i<text.size(); i++)
{
if (text[i] != m_source[m_cursor + i])
{
return ScanInfo {};
}
}
size_t index = m_cursor + text.size();
if (index < m_source.size()
&& std::isalnum(m_source[index]))
{
return ScanInfo {
};
}
return ScanInfo {
true,
m_cursor + text.size(),
value ? text : "",
type
};
}
ScanInfo Lexer::scan_string()
{
size_t cursor = m_cursor;
std::string repr;
if (cursor >= m_source.size()
|| m_source[cursor] != '\'')
{
return ScanInfo {
};
}
cursor++;
repr += "'";
while (cursor < m_source.size()
&& m_source[cursor] != '\'')
{
repr += m_source[cursor];
cursor++;
}
if (cursor >= m_source.size()
|| m_source[cursor] != '\'')
{
return ScanInfo {
};
}
cursor++;
repr += "'";
return ScanInfo {
true,
cursor,
repr,
NODE_STRING
};
}
}