fol parser.

main
bog 2023-10-10 15:29:22 +02:00
parent 3fe44655cd
commit 93f7430e11
15 changed files with 930 additions and 1 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.cache
*~*
*\#*
build

View File

@ -1,6 +1,6 @@
MIT No Attribution
Copyright <YEAR> <COPYRIGHT HOLDER>
Copyright 2023 bog
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software

11
Makefile Normal file
View File

@ -0,0 +1,11 @@
.PHONY: build tests
build:
meson setup build
meson compile -C build
tests: build
build/sine-patre-tests
install: tests
meson install -C build

39
meson.build Normal file
View File

@ -0,0 +1,39 @@
project('sine-patre',
'cpp',
version: '0.0.0',
default_options: [
'prefix=/usr',
'cpp_std=c++17',
'warning_level=3'
])
sp_lib = static_library(
'sine-patre',
sources: [
'src/fol/Node.cpp',
'src/fol/Lexer.cpp',
'src/fol/Parser.cpp',
]
)
sp_dep = declare_dependency(link_with: [sp_lib])
executable('sine-patre',
sources: [
'src/main.cpp',
],
dependencies: [
sp_dep
],
install: true)
executable('sine-patre-tests',
sources: [
'tests/main.cpp',
'tests/Lexer.cpp',
'tests/Parser.cpp',
],
dependencies: [
sp_dep,
dependency('catch2')
])

34
src/commons.hpp Normal file
View File

@ -0,0 +1,34 @@
#ifndef sp_COMMONS_HPP
#define sp_COMMONS_HPP
#include <iostream>
#include <stdexcept>
#include <cstring>
#include <vector>
#include <memory>
#include <string>
#include <sstream>
#include <optional>
#include <functional>
#define SP_GEN_ENUM(X) X
#define SP_GEN_STRING(X) #X
#define SP_ENUM(PREFIX, KINDS) \
enum PREFIX { KINDS(SP_GEN_ENUM) }; \
constexpr char const* PREFIX ## Str [] = { KINDS(SP_GEN_STRING) }
#define SP_ASSERT(COND, MSG) \
if ( !(COND) ) \
{ \
std::cerr << MSG; \
abort(); \
}
#define SP_ERROR(NAME) \
struct NAME : public std::runtime_error { \
NAME (std::string const& what) : std::runtime_error(what) {} \
}
#endif

207
src/fol/Lexer.cpp Normal file
View File

@ -0,0 +1,207 @@
#include "Lexer.hpp"
namespace sp
{
namespace fol
{
/*explicit*/ Lexer::Lexer()
{
add_text(NODE_IMP, "->");
add_text(NODE_OR, "|");
add_text(NODE_AND, "&");
add_text(NODE_OPAR, "(");
add_text(NODE_CPAR, ")");
add_text(NODE_NOT, "!");
add_text(NODE_COMMA, ",");
m_scanners.push_back(std::bind(&Lexer::scan_var, this));
m_scanners.push_back(std::bind(&Lexer::scan_const, this));
m_scanners.push_back(std::bind(&Lexer::scan_pred, this));
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& text)
{
m_text = text;
m_cursor = 0;
}
std::vector<std::shared_ptr<Node>> Lexer::all()
{
std::vector<std::shared_ptr<Node>> tokens;
std::shared_ptr<Node> tok;
while ( (tok = next()) )
{
tokens.push_back(tok);
}
return tokens;
}
std::shared_ptr<Node> Lexer::next()
{
std::shared_ptr<Node> token;
std::optional<ScanInfo> curr_info;
skip_spaces();
for (auto& scanner: m_scanners)
{
auto info = scanner();
if ((info && !curr_info)
|| (info && curr_info && info->cursor > curr_info->cursor))
{
curr_info = info;
}
}
if (curr_info)
{
token = std::make_shared<Node>(curr_info->type,
curr_info->value);
m_cursor = curr_info->cursor;
}
else
{
if (m_cursor < m_text.size())
{
std::string text;
while (m_cursor < m_text.size()
&& !std::isspace(m_text.at(m_cursor)))
{
text += m_text[m_cursor];
m_cursor++;
}
throw lex_error {"unexpected end near '"
+ text
+ "'"};
}
}
return token;
}
void Lexer::skip_spaces()
{
while (m_cursor < m_text.size()
&& std::isspace(m_text[m_cursor]))
{
m_cursor++;
}
}
void Lexer::add_text(NodeType type,
std::string const& text,
bool has_value)
{
m_scanners.push_back(std::bind(&Lexer::scan_text, this,
type,
text,
has_value));
}
std::optional<ScanInfo> Lexer::scan_text(NodeType type,
std::string const& text,
bool has_value)
{
if (m_cursor + text.size() > m_text.size())
{
return std::nullopt;
}
for (size_t i=0; i<text.size(); i++)
{
if (m_text[m_cursor + i] != text[i])
{
return std::nullopt;
}
}
return ScanInfo {
m_cursor + text.size(),
type,
has_value ? text : ""
};
}
std::optional<ScanInfo> Lexer::scan_var()
{
size_t cursor = m_cursor;
std::string value;
while (cursor < m_text.size()
&& std::isalpha(m_text[cursor]))
{
value += m_text[cursor];
cursor++;
}
if (value.empty() || !std::islower(value[0]))
{
return std::nullopt;
}
return ScanInfo {
cursor,
NODE_VAR,
value
};
}
std::optional<ScanInfo> Lexer::scan_pred()
{
size_t cursor = m_cursor;
std::string value;
while (cursor < m_text.size()
&& std::isalpha(m_text[cursor]))
{
value += m_text[cursor];
cursor++;
}
if (value.size() < 2 || !std::isupper(value[0])
|| std::isupper(value[1]))
{
return std::nullopt;
}
return ScanInfo {
cursor,
NODE_PRED,
value
};
}
std::optional<ScanInfo> Lexer::scan_const()
{
size_t cursor = m_cursor;
std::string value;
while (cursor < m_text.size()
&& std::isalpha(m_text[cursor])
&& std::isupper(m_text[cursor]))
{
value += m_text[cursor];
cursor++;
}
if (value.empty())
{
return std::nullopt;
}
return ScanInfo {
cursor,
NODE_CONST,
value
};
}
}
}

53
src/fol/Lexer.hpp Normal file
View File

@ -0,0 +1,53 @@
#ifndef sp_fol_LEXER_HPP
#define sp_fol_LEXER_HPP
#include "../commons.hpp"
#include "Node.hpp"
namespace sp
{
namespace fol
{
SP_ERROR(lex_error);
struct ScanInfo {
size_t cursor;
NodeType type;
std::string value;
};
using scanner_t = std::function<std::optional<ScanInfo>()>;
class Lexer
{
public:
explicit Lexer();
virtual ~Lexer();
void scan(std::string const& text);
std::vector<std::shared_ptr<Node>> all();
std::shared_ptr<Node> next();
private:
std::string m_text;
size_t m_cursor;
std::vector<scanner_t> m_scanners;
void add_text(NodeType type,
std::string const& text,
bool has_value=false);
void skip_spaces();
std::optional<ScanInfo> scan_text(NodeType type,
std::string const& text,
bool has_value=false);
std::optional<ScanInfo> scan_var();
std::optional<ScanInfo> scan_pred();
std::optional<ScanInfo> scan_const();
};
}
}
#endif

58
src/fol/Node.cpp Normal file
View File

@ -0,0 +1,58 @@
#include "Node.hpp"
namespace sp
{
namespace fol
{
/*explicit*/ Node::Node(NodeType type, std::string const& value)
: m_type { type }
, m_value { value }
{
}
/*virtual*/ Node::~Node()
{
}
void Node::add_child(std::shared_ptr<Node> child)
{
m_children.push_back(child);
}
std::shared_ptr<Node> Node::child(size_t index)
{
SP_ASSERT(index < size(), "cannot get child at index '"
+ std::to_string(index)
+ "'");
return m_children[index];
}
std::string Node::string() const
{
std::stringstream ss;
ss << (NodeTypeStr[m_type] + strlen("NODE_"));
if (m_value.empty() == false)
{
ss << "[" << m_value << "]";
}
if (m_children.empty() == false)
{
std::string sep;
ss << "(";
for (auto child: m_children)
{
ss << sep << child->string();
sep = ",";
}
ss << ")";
}
return ss.str();
}
}
}

55
src/fol/Node.hpp Normal file
View File

@ -0,0 +1,55 @@
#ifndef sp_fol_NODE_HPP
#define sp_fol_NODE_HPP
#include "../commons.hpp"
#include <memory>
#define NODE_TYPES(G) \
G(NODE_FORMULA), \
G(NODE_VAR), \
G(NODE_CONST), \
G(NODE_FUNC), \
G(NODE_PRED), \
G(NODE_OR), \
G(NODE_AND), \
G(NODE_IMP), \
G(NODE_OPAR), \
G(NODE_CPAR), \
G(NODE_NOT), \
G(NODE_COMMA), \
G(NODE_LITERAL), \
G(NODE_TERM), \
G(NODE_TERM_LST)
namespace sp
{
namespace fol
{
SP_ENUM(NodeType, NODE_TYPES);
class Node
{
public:
explicit Node(NodeType type, std::string const& value);
virtual ~Node();
NodeType type() const { return m_type; }
std::string value() const { return m_value; }
size_t size() const { return m_children.size(); }
void add_child(std::shared_ptr<Node> child);
std::shared_ptr<Node> child(size_t index);
std::string string() const;
private:
NodeType m_type;
std::string m_value;
std::vector<std::shared_ptr<Node>> m_children;
};
}
}
#endif

223
src/fol/Parser.cpp Normal file
View File

@ -0,0 +1,223 @@
#include "Parser.hpp"
#include "src/fol/Node.hpp"
namespace sp
{
namespace fol
{
/*explicit*/ Parser::Parser()
{
}
/*virtual*/ Parser::~Parser()
{
}
std::shared_ptr<Node>
Parser::parse(std::vector<std::shared_ptr<Node>> const& tokens)
{
m_cursor = 0;
m_tokens = tokens;
auto formula = parse_formula();
if (m_cursor < m_tokens.size())
{
throw parse_error {"unexpected end near '"
+ m_tokens[m_cursor]->string()
+ "'"};
}
return formula;
}
std::shared_ptr<Node> Parser::make_node(NodeType type) const
{
return std::make_shared<Node>(type, "");
}
bool Parser::type_is(NodeType type, size_t lookahead) const
{
if (m_cursor + lookahead >= m_tokens.size())
{
return false;
}
return m_tokens[m_cursor + lookahead]->type() == type;
}
std::shared_ptr<Node> Parser::consume()
{
if (m_cursor >= m_tokens.size())
{
throw parse_error {"no token to consume"};
}
auto token = m_tokens[m_cursor];
m_cursor++;
return token;
}
std::shared_ptr<Node> Parser::consume(NodeType type)
{
if (!type_is(type))
{
auto current = consume();
std::string expected = NodeTypeStr[type] + strlen("NODE_");
std::string got = NodeTypeStr[current->type()] + strlen("NODE_");
throw parse_error {"expected '"+expected+"', got '"+got+"'"};
}
return consume();
}
std::shared_ptr<Node> Parser::parse_formula()
{
auto node = make_node(NODE_FORMULA);
node->add_child(parse_or());
return node;
}
std::shared_ptr<Node> Parser::parse_or()
{
auto lhs = parse_and();
while (type_is(NODE_OR))
{
auto node = consume();
node->add_child(lhs);
node->add_child(parse_and());
lhs = node;
}
return lhs;
}
std::shared_ptr<Node> Parser::parse_and()
{
auto lhs = parse_imp();
while (type_is(NODE_AND))
{
auto node = consume();
node->add_child(lhs);
node->add_child(parse_imp());
lhs = node;
}
return lhs;
}
std::shared_ptr<Node> Parser::parse_imp()
{
auto lhs = parse_not();
if (type_is(NODE_IMP))
{
auto node = consume();
node->add_child(lhs);
node->add_child(parse_not());
lhs = node;
}
return lhs;
}
std::shared_ptr<Node> Parser::parse_not()
{
if (type_is(NODE_NOT))
{
auto node = consume(NODE_NOT);
node->add_child(parse_group());
return node;
}
return parse_group();
}
std::shared_ptr<Node> Parser::parse_group()
{
if (type_is(NODE_OPAR))
{
consume();
auto node = parse_or();
consume(NODE_CPAR);
return node;
}
return parse_pred();
}
std::shared_ptr<Node> Parser::parse_pred()
{
auto node = consume(NODE_PRED);
consume(NODE_OPAR);
auto lst = parse_term_lst();
for (size_t i=0; i<lst->size(); i++)
{
node->add_child(lst->child(i));
}
consume(NODE_CPAR);
return node;
}
std::shared_ptr<Node> Parser::parse_term()
{
if (type_is(NODE_VAR)
&& type_is(NODE_OPAR, 1))
{
return parse_func();
}
if (type_is(NODE_VAR)
|| type_is(NODE_CONST))
{
return consume();
}
auto current = consume();
std::string type_str = NodeTypeStr[current->type()] + strlen("NODE_");
throw parse_error {"unknown term '" + type_str + "'"};
}
std::shared_ptr<Node> Parser::parse_func()
{
auto var = consume(NODE_VAR);
auto node = std::make_shared<Node>(NODE_FUNC, var->value());
consume(NODE_OPAR);
auto lst = parse_term_lst();
for (size_t i=0; i<lst->size(); i++)
{
node->add_child(lst->child(i));
}
consume(NODE_CPAR);
return node;
}
std::shared_ptr<Node> Parser::parse_term_lst()
{
auto node = make_node(NODE_TERM_LST);
if (type_is(NODE_CPAR))
{
return node;
}
node->add_child(parse_term());
while (type_is(NODE_COMMA))
{
consume();
node->add_child(parse_term());
}
return node;
}
}
}

50
src/fol/Parser.hpp Normal file
View File

@ -0,0 +1,50 @@
#ifndef sp_fol_PARSER_HPP
#define sp_fol_PARSER_HPP
#include "../commons.hpp"
#include "Node.hpp"
namespace sp
{
namespace fol
{
SP_ERROR(parse_error);
class Parser
{
public:
explicit Parser();
virtual ~Parser();
std::shared_ptr<Node>
parse(std::vector<std::shared_ptr<Node>> const& tokens);
private:
std::vector<std::shared_ptr<Node>> m_tokens;
size_t m_cursor = 0;
std::shared_ptr<Node> make_node(NodeType type) const;
bool type_is(NodeType type, size_t lookahead=0) const;
std::shared_ptr<Node> consume();
std::shared_ptr<Node> consume(NodeType type);
std::shared_ptr<Node> parse_formula();
std::shared_ptr<Node> parse_or();
std::shared_ptr<Node> parse_and();
std::shared_ptr<Node> parse_imp();
std::shared_ptr<Node> parse_not();
std::shared_ptr<Node> parse_group();
std::shared_ptr<Node> parse_pred();
std::shared_ptr<Node> parse_term();
std::shared_ptr<Node> parse_func();
std::shared_ptr<Node> parse_term_lst();
};
}
}
#endif

29
src/main.cpp Normal file
View File

@ -0,0 +1,29 @@
#include <iostream>
#include "fol/Lexer.hpp"
#include "fol/Parser.hpp"
int main(int, char**)
{
std::cout << "Sine Patre" << std::endl;
std::string line;
while (std::getline(std::cin, line))
{
try
{
sp::fol::Lexer lex;
lex.scan(line);
sp::fol::Parser parser;
auto node = parser.parse(lex.all());
std::cout << node->string() << std::endl;
}
catch (std::exception const& err)
{
std::cerr << "E: " << err.what() << std::endl;
}
}
return 0;
}

70
tests/Lexer.cpp Normal file
View File

@ -0,0 +1,70 @@
#include <catch2/catch.hpp>
#include "../src/fol/Lexer.hpp"
using namespace sp::fol;
class LexerTest
{
public:
explicit LexerTest() {}
virtual ~LexerTest() {}
void test_next(Lexer& lexer, std::string const& oracle)
{
auto token = lexer.next();
REQUIRE(nullptr != token);
REQUIRE(oracle == token->string());
}
void test_end(Lexer& lexer)
{
auto token = lexer.next();
REQUIRE(nullptr == token);
}
protected:
};
TEST_CASE_METHOD(LexerTest, "Lexer_var")
{
Lexer lexer;
lexer.scan("hello");
test_next(lexer, "VAR[hello]");
test_end(lexer);
}
TEST_CASE_METHOD(LexerTest, "Lexer_operators")
{
Lexer lexer;
lexer.scan("&!salut|(monde)->hello");
test_next(lexer, "AND");
test_next(lexer, "NOT");
test_next(lexer, "VAR[salut]");
test_next(lexer, "OR");
test_next(lexer, "OPAR");
test_next(lexer, "VAR[monde]");
test_next(lexer, "CPAR");
test_next(lexer, "IMP");
test_next(lexer, "VAR[hello]");
test_end(lexer);
}
TEST_CASE_METHOD(LexerTest, "Lexer_pred")
{
Lexer lexer;
lexer.scan("Hello");
test_next(lexer, "PRED[Hello]");
test_end(lexer);
}
TEST_CASE_METHOD(LexerTest, "Lexer_const")
{
Lexer lexer;
lexer.scan("HELLO");
test_next(lexer, "CONST[HELLO]");
test_end(lexer);
}

94
tests/Parser.cpp Normal file
View File

@ -0,0 +1,94 @@
#include <catch2/catch.hpp>
#include "../src/fol/Lexer.hpp"
#include "../src/fol/Parser.hpp"
using namespace sp::fol;
class ParserTest
{
public:
explicit ParserTest() {}
virtual ~ParserTest() {}
void test_parser(std::string const& oracle, std::string const& text)
{
Lexer lexer;
lexer.scan(text);
Parser parser;
auto node = parser.parse(lexer.all());
REQUIRE(oracle == node->string());
}
protected:
};
TEST_CASE_METHOD(ParserTest, "Parser_var")
{
test_parser("FORMULA(PRED[Pr](VAR[hello]))", " Pr(hello) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_const")
{
test_parser("FORMULA(PRED[Pr](CONST[WORLD]))", " Pr(WORLD) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_func")
{
test_parser("FORMULA(PRED[Pr](FUNC[father]))",
" Pr(father()) ");
test_parser("FORMULA(PRED[Pr](FUNC[mother](VAR[x])))",
" Pr(mother(x)) ");
test_parser("FORMULA(PRED[Pr](FUNC[brother](VAR[x],VAR[y])))",
" Pr(brother(x, y)) ");
test_parser("FORMULA(PRED[Pr]("
"FUNC[sister](VAR[x],FUNC[mother](VAR[y],VAR[z]))))",
" Pr(sister(x, mother(y, z))) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_not")
{
test_parser("FORMULA(NOT(PRED[Pr](VAR[x])))",
" !Pr(x) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_pred")
{
test_parser("FORMULA(PRED[Happy](CONST[PIERRE]))",
" Happy(PIERRE) ");
test_parser("FORMULA(PRED[Sad](VAR[x],VAR[y]))",
" Sad(x, y) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_pred_'or'_'imp'_'and'")
{
test_parser("FORMULA(AND("
"PRED[Pr](VAR[x])"
",IMP("
"PRED[Pr](VAR[y]),"
"PRED[Pr](VAR[z]))))",
"Pr(x) & Pr(y) -> Pr(z)");
test_parser("FORMULA(OR("
"PRED[Ab](VAR[x]),"
"AND("
"PRED[Cd](VAR[y]),"
"PRED[Ef](CONST[Z]))))",
" Ab(x) | Cd(y) & Ef(Z)");
}
TEST_CASE_METHOD(ParserTest, "Parser_group")
{
test_parser("FORMULA(IMP(AND("
"PRED[Pr](VAR[x]),"
"PRED[Pr](VAR[y])),"
"PRED[Pr](VAR[z])))",
"(Pr(x) & Pr(y)) -> Pr(z)");
}

2
tests/main.cpp Normal file
View File

@ -0,0 +1,2 @@
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>