This repository has been archived on 2024-03-07. You can view files and clone it, but cannot push or open issues/pull-requests.
wongola/lib/Lexer.cpp

277 lines
5.9 KiB
C++

#include "Lexer.hpp"
#include "lib/Node.hpp"
namespace wg
{
/*explicit*/ Lexer::Lexer()
{
add_keyword("true", NODE_BOOL, true);
add_keyword("false", NODE_BOOL, true);
add_keyword("as", NODE_AS);
add_keyword("int", NODE_TYPE, true);
add_keyword("fun", NODE_FUN);
add_keyword("return", NODE_RETURN);
add_keyword("extern", NODE_EXTERN);
add_keyword("and", NODE_AND);
add_keyword("or", NODE_OR);
add_keyword("not", NODE_NOT);
add_text("==", NODE_EQ);
add_text("<>", NODE_NE);
add_text("<", NODE_LT);
add_text("<=", NODE_LE);
add_text(">", NODE_GT);
add_text(">=", NODE_GE);
add_text(".", NODE_DOT);
add_text("{", NODE_OBRACE);
add_text("}", NODE_CBRACE);
add_text(",", NODE_COMMA);
add_text("#", NODE_HASH);
add_text("+", NODE_ADD);
add_text("-", NODE_SUB);
add_text("*", NODE_MUL);
add_text("/", NODE_DIV);
add_text("%", NODE_MOD);
add_text("(", NODE_OPAR);
add_text(")", NODE_CPAR);
add_text(";", NODE_SEMICOLON);
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
m_scanners.push_back(std::bind(&Lexer::scan_ident, this));
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& source)
{
m_source = source;
m_cursor = 0;
}
std::shared_ptr<Node> Lexer::next()
{
std::optional<ScanInfo> scan_info;
skip_spaces();
while (m_cursor + 1 < m_source.size()
&& m_source[m_cursor] == ':'
&& m_source[m_cursor + 1] == ':')
{
while (m_source[m_cursor] != '\n')
{
m_cursor++;
}
skip_spaces();
}
for (auto scanner: m_scanners)
{
auto info = scanner();
if (info && (scan_info == std::nullopt
|| info->cursor > scan_info->cursor))
{
scan_info = info;
}
}
if (scan_info)
{
m_cursor = scan_info->cursor;
return std::make_shared<Node>(scan_info->type,
scan_info->repr,
m_loc);
}
WG_ASSERT(m_cursor <= m_source.size(), "unexpected token");
return nullptr;
}
std::vector<std::shared_ptr<Node>> Lexer::all()
{
std::vector<std::shared_ptr<Node>> result;
std::shared_ptr<Node> node;
while ( (node = next()) != nullptr )
{
result.push_back(node);
}
return result;
}
void Lexer::add_text(std::string const& text,
NodeType node,
bool has_value)
{
if (text.size() == 1)
{
m_seps.push_back(text[0]);
}
m_scanners.push_back(std::bind(&Lexer::scan_text,
this, text,
node, has_value));
}
void Lexer::add_keyword(std::string const& text,
NodeType node,
bool has_value)
{
if (text.size() == 1)
{
m_seps.push_back(text[0]);
}
m_scanners.push_back(std::bind(&Lexer::scan_keyword,
this, text,
node, has_value));
}
bool Lexer::is_sep(size_t index) const
{
WG_ASSERT(index < m_source.size(), "cannot find separator");
if (std::isspace(m_source[index]))
{
return true;
}
auto itr = std::find(std::begin(m_seps),
std::end(m_seps),
m_source[index]);
return itr != std::end(m_seps);
}
void Lexer::skip_spaces()
{
while (m_cursor < m_source.size()
&& std::isspace(m_source[m_cursor]))
{
if (m_source[m_cursor] == '\n')
{
m_loc = Loc {m_loc.origin(), m_loc.line() + 1};
}
m_cursor++;
}
}
std::optional<ScanInfo> Lexer::scan_text(std::string const& text,
NodeType type,
bool has_value) const
{
if (m_cursor + text.size() > m_source.size())
{
return std::nullopt;
}
for (size_t i=0; i<text.size(); i++)
{
if (m_source[m_cursor + i] != text[i])
{
return std::nullopt;
}
}
return ScanInfo {
m_cursor + text.size(),
type,
has_value ? text : ""
};
}
std::optional<ScanInfo> Lexer::scan_keyword(std::string const& text,
NodeType type,
bool has_value) const
{
if (m_cursor + text.size() > m_source.size())
{
return std::nullopt;
}
for (size_t i=0; i<text.size(); i++)
{
if (m_source[m_cursor + i] != text[i])
{
return std::nullopt;
}
}
if (!is_sep(m_cursor + text.size()))
{
return std::nullopt;
}
return ScanInfo {
m_cursor + text.size(),
type,
has_value ? text : ""
};
}
std::optional<ScanInfo> Lexer::scan_ident() const
{
size_t cursor = m_cursor;
std::string repr;
while (cursor < m_source.size()
&& !is_sep(cursor))
{
repr += m_source[cursor];
cursor++;
}
if (repr.empty() == false)
{
return ScanInfo {
cursor,
NODE_IDENT,
repr
};
}
return std::nullopt;
}
std::optional<ScanInfo> Lexer::scan_int() const
{
size_t cursor = m_cursor;
std::string repr;
if (cursor < m_source.size()
&& m_source[cursor] == '-')
{
repr += '-';
cursor++;
}
while (cursor < m_source.size()
&& std::isdigit(m_source[cursor]))
{
repr += m_source[cursor];
cursor++;
}
if (repr.empty() || repr.back() == '-')
{
return std::nullopt;
}
return ScanInfo {
cursor,
NODE_INT,
repr
};
}
}