219 lines
4.2 KiB
C++
219 lines
4.2 KiB
C++
#include "Lexer.hpp"
|
|
|
|
namespace jk
|
|
{
|
|
/*explicit*/ Lexer::Lexer(Logger& logger, Loc const& loc)
|
|
: m_logger { logger }
|
|
, m_loc { loc }
|
|
{
|
|
std::vector<std::tuple<NodeType, std::string, bool>> texts = {
|
|
{NODE_RARROW, "->", false},
|
|
{NODE_DECL, "$", false},
|
|
{NODE_OPAR, "(", false},
|
|
{NODE_CPAR, ")", false}
|
|
};
|
|
|
|
for (auto text: texts)
|
|
{
|
|
m_scanners.push_back(std::bind(&Lexer::scan_text,
|
|
this,
|
|
std::get<0>(text),
|
|
std::get<1>(text),
|
|
std::get<2>(text)));
|
|
}
|
|
|
|
m_scanners.push_back(std::bind(&Lexer::scan_ident, this));
|
|
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
|
|
}
|
|
|
|
/*virtual*/ Lexer::~Lexer()
|
|
{
|
|
}
|
|
|
|
void Lexer::scan(std::string const& source)
|
|
{
|
|
m_source = source;
|
|
m_cursor = 0;
|
|
}
|
|
|
|
std::shared_ptr<Node> Lexer::next()
|
|
{
|
|
skip_spaces();
|
|
|
|
while (more(m_cursor)
|
|
&& at(m_cursor) == '#')
|
|
{
|
|
while (more(m_cursor)
|
|
&& at(m_cursor) != '\n')
|
|
{
|
|
m_cursor++;
|
|
}
|
|
|
|
skip_spaces();
|
|
}
|
|
|
|
std::optional<ScanInfo> info;
|
|
|
|
for (auto scanner: m_scanners)
|
|
{
|
|
auto my_info = scanner();
|
|
|
|
if ((!info && my_info)
|
|
|| (info && my_info
|
|
&& my_info->cursor > info->cursor))
|
|
{
|
|
info = my_info;
|
|
}
|
|
}
|
|
|
|
if (info)
|
|
{
|
|
m_cursor = info->cursor;
|
|
return std::make_shared<Node>(info->type, info->repr, m_loc);
|
|
}
|
|
|
|
if (more(m_cursor))
|
|
{
|
|
std::string text;
|
|
|
|
while (more(m_cursor)
|
|
&& !std::isspace(at(m_cursor)))
|
|
{
|
|
text += at(m_cursor);
|
|
m_cursor++;
|
|
}
|
|
|
|
std::stringstream ss;
|
|
ss << "unknown text '" << text << "'";
|
|
m_logger.log<lexical_error>(LOG_ERROR, m_loc, ss.str());
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
bool Lexer::more(size_t index) const
|
|
{
|
|
return index < m_source.size();
|
|
}
|
|
|
|
char Lexer::at(size_t index) const
|
|
{
|
|
assert(more(index));
|
|
|
|
return m_source[index];
|
|
}
|
|
|
|
void Lexer::skip_spaces()
|
|
{
|
|
while (more(m_cursor)
|
|
&& std::isspace(at(m_cursor)))
|
|
{
|
|
if (at(m_cursor) == '\n')
|
|
{
|
|
m_loc = Loc {
|
|
m_loc.path(),
|
|
m_loc.line() + 1,
|
|
m_loc.column()
|
|
};
|
|
}
|
|
|
|
m_cursor++;
|
|
}
|
|
}
|
|
|
|
std::optional<ScanInfo> Lexer::scan_int() const
|
|
{
|
|
size_t cursor = m_cursor;
|
|
std::string repr;
|
|
|
|
while (more(cursor)
|
|
&& std::isdigit(at(cursor)))
|
|
{
|
|
repr += at(cursor);
|
|
cursor++;
|
|
}
|
|
|
|
if (repr.empty() == false)
|
|
{
|
|
return ScanInfo {
|
|
cursor,
|
|
NODE_INT,
|
|
repr
|
|
};
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::optional<ScanInfo> Lexer::scan_text(NodeType type,
|
|
std::string const& text,
|
|
bool has_value) const
|
|
{
|
|
if (m_cursor + text.size() > m_source.size())
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
|
|
for (size_t i=0; i<text.size(); i++)
|
|
{
|
|
if (at(m_cursor + i) != text[i])
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
return ScanInfo {
|
|
m_cursor + text.size(),
|
|
type,
|
|
has_value ? text : ""
|
|
};
|
|
}
|
|
|
|
std::optional<ScanInfo> Lexer::scan_ident() const
|
|
{
|
|
auto car = [](char c){
|
|
return std::isalpha(c)
|
|
|| c == '_'
|
|
|| c == '-'
|
|
|| c == '+'
|
|
|| c == '*'
|
|
|| c == '^'
|
|
|| c == '%'
|
|
|| c == '?'
|
|
|| c == '!'
|
|
|| c == '/';
|
|
};
|
|
|
|
auto cdr = [car](char c){
|
|
return car(c)
|
|
|| std::isdigit(c)
|
|
;
|
|
};
|
|
|
|
size_t cursor = m_cursor;
|
|
std::string repr;
|
|
|
|
if (!more(cursor)
|
|
|| !car(at(cursor)))
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
|
|
repr += at(cursor);
|
|
cursor++;
|
|
|
|
while (more(cursor)
|
|
&& cdr(at(cursor)))
|
|
{
|
|
repr += at(cursor);
|
|
cursor++;
|
|
}
|
|
|
|
return ScanInfo {
|
|
cursor,
|
|
NODE_IDENT,
|
|
repr
|
|
};
|
|
}
|
|
}
|