This repository has been archived on 2023-09-10. You can view files and clone it, but cannot push or open issues/pull-requests.
joko/lib/Lexer.cpp

215 lines
4.1 KiB
C++
Raw Normal View History

2023-09-09 13:09:43 +00:00
#include "Lexer.hpp"
namespace jk
{
/*explicit*/ Lexer::Lexer(Logger& logger, Loc const& loc)
: m_logger { logger }
, m_loc { loc }
{
2023-09-09 22:03:28 +00:00
std::vector<std::tuple<NodeType, std::string, bool>> texts = {
{NODE_RARROW, "->", false},
2023-09-10 06:06:34 +00:00
{NODE_DECL, "$", false},
2023-09-09 22:03:28 +00:00
{NODE_OPAR, "(", false},
{NODE_CPAR, ")", false}
};
for (auto text: texts)
{
m_scanners.push_back(std::bind(&Lexer::scan_text,
this,
std::get<0>(text),
std::get<1>(text),
std::get<2>(text)));
}
m_scanners.push_back(std::bind(&Lexer::scan_ident, this));
2023-09-09 13:09:43 +00:00
m_scanners.push_back(std::bind(&Lexer::scan_int, this));
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& source)
{
m_source = source;
m_cursor = 0;
}
std::shared_ptr<Node> Lexer::next()
{
skip_spaces();
while (more(m_cursor)
2023-09-09 22:03:28 +00:00
&& at(m_cursor) == '#')
2023-09-09 13:09:43 +00:00
{
while (more(m_cursor)
2023-09-09 22:03:28 +00:00
&& at(m_cursor) != '\n')
2023-09-09 13:09:43 +00:00
{
m_cursor++;
}
skip_spaces();
}
std::optional<ScanInfo> info;
for (auto scanner: m_scanners)
{
auto my_info = scanner();
if ((!info && my_info)
|| (info && my_info
&& my_info->cursor > info->cursor))
{
info = my_info;
}
}
if (info)
{
m_cursor = info->cursor;
return std::make_shared<Node>(info->type, info->repr, m_loc);
}
if (more(m_cursor))
{
std::string text;
while (more(m_cursor)
2023-09-09 22:03:28 +00:00
&& !std::isspace(at(m_cursor)))
2023-09-09 13:09:43 +00:00
{
2023-09-09 22:03:28 +00:00
text += at(m_cursor);
2023-09-09 13:09:43 +00:00
m_cursor++;
}
std::stringstream ss;
ss << "unknown text '" << text << "'";
m_logger.log<lexical_error>(LOG_ERROR, m_loc, ss.str());
}
return nullptr;
}
bool Lexer::more(size_t index) const
{
return index < m_source.size();
}
2023-09-09 22:03:28 +00:00
char Lexer::at(size_t index) const
2023-09-09 13:09:43 +00:00
{
assert(more(index));
return m_source[index];
}
void Lexer::skip_spaces()
{
while (more(m_cursor)
2023-09-09 22:03:28 +00:00
&& std::isspace(at(m_cursor)))
2023-09-09 13:09:43 +00:00
{
2023-09-09 22:03:28 +00:00
if (at(m_cursor) == '\n')
2023-09-09 13:09:43 +00:00
{
m_loc = Loc {
m_loc.path(),
m_loc.line() + 1,
m_loc.column()
};
}
m_cursor++;
}
}
std::optional<ScanInfo> Lexer::scan_int() const
{
size_t cursor = m_cursor;
std::string repr;
while (more(cursor)
2023-09-09 22:03:28 +00:00
&& std::isdigit(at(cursor)))
2023-09-09 13:09:43 +00:00
{
2023-09-09 22:03:28 +00:00
repr += at(cursor);
2023-09-09 13:09:43 +00:00
cursor++;
}
if (repr.empty() == false)
{
return ScanInfo {
cursor,
NODE_INT,
repr
};
}
return std::nullopt;
}
2023-09-09 22:03:28 +00:00
std::optional<ScanInfo> Lexer::scan_text(NodeType type,
std::string const& text,
bool has_value) const
{
if (m_cursor + text.size() > m_source.size())
{
return std::nullopt;
}
for (size_t i=0; i<text.size(); i++)
{
if (at(m_cursor + i) != text[i])
{
return std::nullopt;
}
}
return ScanInfo {
m_cursor + text.size(),
type,
has_value ? text : ""
};
}
std::optional<ScanInfo> Lexer::scan_ident() const
{
auto car = [](char c){
return std::isalpha(c)
|| c == '_'
|| c == '-'
|| c == '?'
|| c == '!'
|| c == '/';
};
auto cdr = [car](char c){
return car(c)
|| std::isdigit(c)
;
};
size_t cursor = m_cursor;
std::string repr;
if (!more(cursor)
|| !car(at(cursor)))
{
return std::nullopt;
}
repr += at(cursor);
cursor++;
while (more(cursor)
&& cdr(at(cursor)))
{
repr += at(cursor);
cursor++;
}
return ScanInfo {
cursor,
NODE_IDENT,
repr
};
}
2023-09-09 13:09:43 +00:00
}