#include "Lexer.hpp" #include "lib/Node.hpp" namespace wg { /*explicit*/ Lexer::Lexer() { add_keyword("as", NODE_AS); add_keyword("int", NODE_TYPE, true); add_keyword("fun", NODE_FUN); add_keyword("return", NODE_RETURN); add_keyword("extern", NODE_EXTERN); add_text(".", NODE_DOT); add_text("{", NODE_OBRACE); add_text("}", NODE_CBRACE); add_text(",", NODE_COMMA); add_text("#", NODE_HASH); add_text("+", NODE_ADD); add_text("-", NODE_SUB); add_text("*", NODE_MUL); add_text("/", NODE_DIV); add_text("%", NODE_MOD); add_text("(", NODE_OPAR); add_text(")", NODE_CPAR); add_text(";", NODE_SEMICOLON); m_scanners.push_back(std::bind(&Lexer::scan_int, this)); m_scanners.push_back(std::bind(&Lexer::scan_ident, this)); } /*virtual*/ Lexer::~Lexer() { } void Lexer::scan(std::string const& source) { m_source = source; m_cursor = 0; } std::shared_ptr Lexer::next() { std::optional scan_info; skip_spaces(); while (m_cursor + 1 < m_source.size() && m_source[m_cursor] == ':' && m_source[m_cursor + 1] == ':') { while (m_source[m_cursor] != '\n') { m_cursor++; } skip_spaces(); } for (auto scanner: m_scanners) { auto info = scanner(); if (info && (scan_info == std::nullopt || info->cursor > scan_info->cursor)) { scan_info = info; } } if (scan_info) { m_cursor = scan_info->cursor; return std::make_shared(scan_info->type, scan_info->repr, m_loc); } WG_ASSERT(m_cursor <= m_source.size(), "unexpected token"); return nullptr; } std::vector> Lexer::all() { std::vector> result; std::shared_ptr node; while ( (node = next()) != nullptr ) { result.push_back(node); } return result; } void Lexer::add_text(std::string const& text, NodeType node, bool has_value) { if (text.size() == 1) { m_seps.push_back(text[0]); } m_scanners.push_back(std::bind(&Lexer::scan_text, this, text, node, has_value)); } void Lexer::add_keyword(std::string const& text, NodeType node, bool has_value) { if (text.size() == 1) { m_seps.push_back(text[0]); } m_scanners.push_back(std::bind(&Lexer::scan_keyword, this, text, node, has_value)); } bool Lexer::is_sep(size_t index) const { WG_ASSERT(index < m_source.size(), "cannot find separator"); if (std::isspace(m_source[index])) { return true; } auto itr = std::find(std::begin(m_seps), std::end(m_seps), m_source[index]); return itr != std::end(m_seps); } void Lexer::skip_spaces() { while (m_cursor < m_source.size() && std::isspace(m_source[m_cursor])) { if (m_source[m_cursor] == '\n') { m_loc = Loc {m_loc.origin(), m_loc.line() + 1}; } m_cursor++; } } std::optional Lexer::scan_text(std::string const& text, NodeType type, bool has_value) const { if (m_cursor + text.size() > m_source.size()) { return std::nullopt; } for (size_t i=0; i Lexer::scan_keyword(std::string const& text, NodeType type, bool has_value) const { if (m_cursor + text.size() > m_source.size()) { return std::nullopt; } for (size_t i=0; i Lexer::scan_ident() const { size_t cursor = m_cursor; std::string repr; while (cursor < m_source.size() && !is_sep(cursor)) { repr += m_source[cursor]; cursor++; } if (repr.empty() == false) { return ScanInfo { cursor, NODE_IDENT, repr }; } return std::nullopt; } std::optional Lexer::scan_int() const { size_t cursor = m_cursor; std::string repr; if (cursor < m_source.size() && m_source[cursor] == '-') { repr += '-'; cursor++; } while (cursor < m_source.size() && std::isdigit(m_source[cursor])) { repr += m_source[cursor]; cursor++; } if (repr.empty() || repr.back() == '-') { return std::nullopt; } return ScanInfo { cursor, NODE_INT, repr }; } }