fol parser.

main
bog 2023-10-10 15:29:22 +02:00
parent 3fe44655cd
commit 93f7430e11
15 changed files with 930 additions and 1 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.cache
*~*
*\#*
build

View File

@ -1,6 +1,6 @@
MIT No Attribution MIT No Attribution
Copyright <YEAR> <COPYRIGHT HOLDER> Copyright 2023 bog
Permission is hereby granted, free of charge, to any person obtaining a copy of this Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software software and associated documentation files (the "Software"), to deal in the Software

11
Makefile Normal file
View File

@ -0,0 +1,11 @@
.PHONY: build tests
build:
meson setup build
meson compile -C build
tests: build
build/sine-patre-tests
install: tests
meson install -C build

39
meson.build Normal file
View File

@ -0,0 +1,39 @@
project('sine-patre',
'cpp',
version: '0.0.0',
default_options: [
'prefix=/usr',
'cpp_std=c++17',
'warning_level=3'
])
sp_lib = static_library(
'sine-patre',
sources: [
'src/fol/Node.cpp',
'src/fol/Lexer.cpp',
'src/fol/Parser.cpp',
]
)
sp_dep = declare_dependency(link_with: [sp_lib])
executable('sine-patre',
sources: [
'src/main.cpp',
],
dependencies: [
sp_dep
],
install: true)
executable('sine-patre-tests',
sources: [
'tests/main.cpp',
'tests/Lexer.cpp',
'tests/Parser.cpp',
],
dependencies: [
sp_dep,
dependency('catch2')
])

34
src/commons.hpp Normal file
View File

@ -0,0 +1,34 @@
#ifndef sp_COMMONS_HPP
#define sp_COMMONS_HPP
#include <iostream>
#include <stdexcept>
#include <cstring>
#include <vector>
#include <memory>
#include <string>
#include <sstream>
#include <optional>
#include <functional>
#define SP_GEN_ENUM(X) X
#define SP_GEN_STRING(X) #X
#define SP_ENUM(PREFIX, KINDS) \
enum PREFIX { KINDS(SP_GEN_ENUM) }; \
constexpr char const* PREFIX ## Str [] = { KINDS(SP_GEN_STRING) }
#define SP_ASSERT(COND, MSG) \
if ( !(COND) ) \
{ \
std::cerr << MSG; \
abort(); \
}
#define SP_ERROR(NAME) \
struct NAME : public std::runtime_error { \
NAME (std::string const& what) : std::runtime_error(what) {} \
}
#endif

207
src/fol/Lexer.cpp Normal file
View File

@ -0,0 +1,207 @@
#include "Lexer.hpp"
namespace sp
{
namespace fol
{
/*explicit*/ Lexer::Lexer()
{
add_text(NODE_IMP, "->");
add_text(NODE_OR, "|");
add_text(NODE_AND, "&");
add_text(NODE_OPAR, "(");
add_text(NODE_CPAR, ")");
add_text(NODE_NOT, "!");
add_text(NODE_COMMA, ",");
m_scanners.push_back(std::bind(&Lexer::scan_var, this));
m_scanners.push_back(std::bind(&Lexer::scan_const, this));
m_scanners.push_back(std::bind(&Lexer::scan_pred, this));
}
/*virtual*/ Lexer::~Lexer()
{
}
void Lexer::scan(std::string const& text)
{
m_text = text;
m_cursor = 0;
}
std::vector<std::shared_ptr<Node>> Lexer::all()
{
std::vector<std::shared_ptr<Node>> tokens;
std::shared_ptr<Node> tok;
while ( (tok = next()) )
{
tokens.push_back(tok);
}
return tokens;
}
std::shared_ptr<Node> Lexer::next()
{
std::shared_ptr<Node> token;
std::optional<ScanInfo> curr_info;
skip_spaces();
for (auto& scanner: m_scanners)
{
auto info = scanner();
if ((info && !curr_info)
|| (info && curr_info && info->cursor > curr_info->cursor))
{
curr_info = info;
}
}
if (curr_info)
{
token = std::make_shared<Node>(curr_info->type,
curr_info->value);
m_cursor = curr_info->cursor;
}
else
{
if (m_cursor < m_text.size())
{
std::string text;
while (m_cursor < m_text.size()
&& !std::isspace(m_text.at(m_cursor)))
{
text += m_text[m_cursor];
m_cursor++;
}
throw lex_error {"unexpected end near '"
+ text
+ "'"};
}
}
return token;
}
void Lexer::skip_spaces()
{
while (m_cursor < m_text.size()
&& std::isspace(m_text[m_cursor]))
{
m_cursor++;
}
}
void Lexer::add_text(NodeType type,
std::string const& text,
bool has_value)
{
m_scanners.push_back(std::bind(&Lexer::scan_text, this,
type,
text,
has_value));
}
std::optional<ScanInfo> Lexer::scan_text(NodeType type,
std::string const& text,
bool has_value)
{
if (m_cursor + text.size() > m_text.size())
{
return std::nullopt;
}
for (size_t i=0; i<text.size(); i++)
{
if (m_text[m_cursor + i] != text[i])
{
return std::nullopt;
}
}
return ScanInfo {
m_cursor + text.size(),
type,
has_value ? text : ""
};
}
std::optional<ScanInfo> Lexer::scan_var()
{
size_t cursor = m_cursor;
std::string value;
while (cursor < m_text.size()
&& std::isalpha(m_text[cursor]))
{
value += m_text[cursor];
cursor++;
}
if (value.empty() || !std::islower(value[0]))
{
return std::nullopt;
}
return ScanInfo {
cursor,
NODE_VAR,
value
};
}
std::optional<ScanInfo> Lexer::scan_pred()
{
size_t cursor = m_cursor;
std::string value;
while (cursor < m_text.size()
&& std::isalpha(m_text[cursor]))
{
value += m_text[cursor];
cursor++;
}
if (value.size() < 2 || !std::isupper(value[0])
|| std::isupper(value[1]))
{
return std::nullopt;
}
return ScanInfo {
cursor,
NODE_PRED,
value
};
}
std::optional<ScanInfo> Lexer::scan_const()
{
size_t cursor = m_cursor;
std::string value;
while (cursor < m_text.size()
&& std::isalpha(m_text[cursor])
&& std::isupper(m_text[cursor]))
{
value += m_text[cursor];
cursor++;
}
if (value.empty())
{
return std::nullopt;
}
return ScanInfo {
cursor,
NODE_CONST,
value
};
}
}
}

53
src/fol/Lexer.hpp Normal file
View File

@ -0,0 +1,53 @@
#ifndef sp_fol_LEXER_HPP
#define sp_fol_LEXER_HPP
#include "../commons.hpp"
#include "Node.hpp"
namespace sp
{
namespace fol
{
SP_ERROR(lex_error);
struct ScanInfo {
size_t cursor;
NodeType type;
std::string value;
};
using scanner_t = std::function<std::optional<ScanInfo>()>;
class Lexer
{
public:
explicit Lexer();
virtual ~Lexer();
void scan(std::string const& text);
std::vector<std::shared_ptr<Node>> all();
std::shared_ptr<Node> next();
private:
std::string m_text;
size_t m_cursor;
std::vector<scanner_t> m_scanners;
void add_text(NodeType type,
std::string const& text,
bool has_value=false);
void skip_spaces();
std::optional<ScanInfo> scan_text(NodeType type,
std::string const& text,
bool has_value=false);
std::optional<ScanInfo> scan_var();
std::optional<ScanInfo> scan_pred();
std::optional<ScanInfo> scan_const();
};
}
}
#endif

58
src/fol/Node.cpp Normal file
View File

@ -0,0 +1,58 @@
#include "Node.hpp"
namespace sp
{
namespace fol
{
/*explicit*/ Node::Node(NodeType type, std::string const& value)
: m_type { type }
, m_value { value }
{
}
/*virtual*/ Node::~Node()
{
}
void Node::add_child(std::shared_ptr<Node> child)
{
m_children.push_back(child);
}
std::shared_ptr<Node> Node::child(size_t index)
{
SP_ASSERT(index < size(), "cannot get child at index '"
+ std::to_string(index)
+ "'");
return m_children[index];
}
std::string Node::string() const
{
std::stringstream ss;
ss << (NodeTypeStr[m_type] + strlen("NODE_"));
if (m_value.empty() == false)
{
ss << "[" << m_value << "]";
}
if (m_children.empty() == false)
{
std::string sep;
ss << "(";
for (auto child: m_children)
{
ss << sep << child->string();
sep = ",";
}
ss << ")";
}
return ss.str();
}
}
}

55
src/fol/Node.hpp Normal file
View File

@ -0,0 +1,55 @@
#ifndef sp_fol_NODE_HPP
#define sp_fol_NODE_HPP
#include "../commons.hpp"
#include <memory>
#define NODE_TYPES(G) \
G(NODE_FORMULA), \
G(NODE_VAR), \
G(NODE_CONST), \
G(NODE_FUNC), \
G(NODE_PRED), \
G(NODE_OR), \
G(NODE_AND), \
G(NODE_IMP), \
G(NODE_OPAR), \
G(NODE_CPAR), \
G(NODE_NOT), \
G(NODE_COMMA), \
G(NODE_LITERAL), \
G(NODE_TERM), \
G(NODE_TERM_LST)
namespace sp
{
namespace fol
{
SP_ENUM(NodeType, NODE_TYPES);
class Node
{
public:
explicit Node(NodeType type, std::string const& value);
virtual ~Node();
NodeType type() const { return m_type; }
std::string value() const { return m_value; }
size_t size() const { return m_children.size(); }
void add_child(std::shared_ptr<Node> child);
std::shared_ptr<Node> child(size_t index);
std::string string() const;
private:
NodeType m_type;
std::string m_value;
std::vector<std::shared_ptr<Node>> m_children;
};
}
}
#endif

223
src/fol/Parser.cpp Normal file
View File

@ -0,0 +1,223 @@
#include "Parser.hpp"
#include "src/fol/Node.hpp"
namespace sp
{
namespace fol
{
/*explicit*/ Parser::Parser()
{
}
/*virtual*/ Parser::~Parser()
{
}
std::shared_ptr<Node>
Parser::parse(std::vector<std::shared_ptr<Node>> const& tokens)
{
m_cursor = 0;
m_tokens = tokens;
auto formula = parse_formula();
if (m_cursor < m_tokens.size())
{
throw parse_error {"unexpected end near '"
+ m_tokens[m_cursor]->string()
+ "'"};
}
return formula;
}
std::shared_ptr<Node> Parser::make_node(NodeType type) const
{
return std::make_shared<Node>(type, "");
}
bool Parser::type_is(NodeType type, size_t lookahead) const
{
if (m_cursor + lookahead >= m_tokens.size())
{
return false;
}
return m_tokens[m_cursor + lookahead]->type() == type;
}
std::shared_ptr<Node> Parser::consume()
{
if (m_cursor >= m_tokens.size())
{
throw parse_error {"no token to consume"};
}
auto token = m_tokens[m_cursor];
m_cursor++;
return token;
}
std::shared_ptr<Node> Parser::consume(NodeType type)
{
if (!type_is(type))
{
auto current = consume();
std::string expected = NodeTypeStr[type] + strlen("NODE_");
std::string got = NodeTypeStr[current->type()] + strlen("NODE_");
throw parse_error {"expected '"+expected+"', got '"+got+"'"};
}
return consume();
}
std::shared_ptr<Node> Parser::parse_formula()
{
auto node = make_node(NODE_FORMULA);
node->add_child(parse_or());
return node;
}
std::shared_ptr<Node> Parser::parse_or()
{
auto lhs = parse_and();
while (type_is(NODE_OR))
{
auto node = consume();
node->add_child(lhs);
node->add_child(parse_and());
lhs = node;
}
return lhs;
}
std::shared_ptr<Node> Parser::parse_and()
{
auto lhs = parse_imp();
while (type_is(NODE_AND))
{
auto node = consume();
node->add_child(lhs);
node->add_child(parse_imp());
lhs = node;
}
return lhs;
}
std::shared_ptr<Node> Parser::parse_imp()
{
auto lhs = parse_not();
if (type_is(NODE_IMP))
{
auto node = consume();
node->add_child(lhs);
node->add_child(parse_not());
lhs = node;
}
return lhs;
}
std::shared_ptr<Node> Parser::parse_not()
{
if (type_is(NODE_NOT))
{
auto node = consume(NODE_NOT);
node->add_child(parse_group());
return node;
}
return parse_group();
}
std::shared_ptr<Node> Parser::parse_group()
{
if (type_is(NODE_OPAR))
{
consume();
auto node = parse_or();
consume(NODE_CPAR);
return node;
}
return parse_pred();
}
std::shared_ptr<Node> Parser::parse_pred()
{
auto node = consume(NODE_PRED);
consume(NODE_OPAR);
auto lst = parse_term_lst();
for (size_t i=0; i<lst->size(); i++)
{
node->add_child(lst->child(i));
}
consume(NODE_CPAR);
return node;
}
std::shared_ptr<Node> Parser::parse_term()
{
if (type_is(NODE_VAR)
&& type_is(NODE_OPAR, 1))
{
return parse_func();
}
if (type_is(NODE_VAR)
|| type_is(NODE_CONST))
{
return consume();
}
auto current = consume();
std::string type_str = NodeTypeStr[current->type()] + strlen("NODE_");
throw parse_error {"unknown term '" + type_str + "'"};
}
std::shared_ptr<Node> Parser::parse_func()
{
auto var = consume(NODE_VAR);
auto node = std::make_shared<Node>(NODE_FUNC, var->value());
consume(NODE_OPAR);
auto lst = parse_term_lst();
for (size_t i=0; i<lst->size(); i++)
{
node->add_child(lst->child(i));
}
consume(NODE_CPAR);
return node;
}
std::shared_ptr<Node> Parser::parse_term_lst()
{
auto node = make_node(NODE_TERM_LST);
if (type_is(NODE_CPAR))
{
return node;
}
node->add_child(parse_term());
while (type_is(NODE_COMMA))
{
consume();
node->add_child(parse_term());
}
return node;
}
}
}

50
src/fol/Parser.hpp Normal file
View File

@ -0,0 +1,50 @@
#ifndef sp_fol_PARSER_HPP
#define sp_fol_PARSER_HPP
#include "../commons.hpp"
#include "Node.hpp"
namespace sp
{
namespace fol
{
SP_ERROR(parse_error);
class Parser
{
public:
explicit Parser();
virtual ~Parser();
std::shared_ptr<Node>
parse(std::vector<std::shared_ptr<Node>> const& tokens);
private:
std::vector<std::shared_ptr<Node>> m_tokens;
size_t m_cursor = 0;
std::shared_ptr<Node> make_node(NodeType type) const;
bool type_is(NodeType type, size_t lookahead=0) const;
std::shared_ptr<Node> consume();
std::shared_ptr<Node> consume(NodeType type);
std::shared_ptr<Node> parse_formula();
std::shared_ptr<Node> parse_or();
std::shared_ptr<Node> parse_and();
std::shared_ptr<Node> parse_imp();
std::shared_ptr<Node> parse_not();
std::shared_ptr<Node> parse_group();
std::shared_ptr<Node> parse_pred();
std::shared_ptr<Node> parse_term();
std::shared_ptr<Node> parse_func();
std::shared_ptr<Node> parse_term_lst();
};
}
}
#endif

29
src/main.cpp Normal file
View File

@ -0,0 +1,29 @@
#include <iostream>
#include "fol/Lexer.hpp"
#include "fol/Parser.hpp"
int main(int, char**)
{
std::cout << "Sine Patre" << std::endl;
std::string line;
while (std::getline(std::cin, line))
{
try
{
sp::fol::Lexer lex;
lex.scan(line);
sp::fol::Parser parser;
auto node = parser.parse(lex.all());
std::cout << node->string() << std::endl;
}
catch (std::exception const& err)
{
std::cerr << "E: " << err.what() << std::endl;
}
}
return 0;
}

70
tests/Lexer.cpp Normal file
View File

@ -0,0 +1,70 @@
#include <catch2/catch.hpp>
#include "../src/fol/Lexer.hpp"
using namespace sp::fol;
class LexerTest
{
public:
explicit LexerTest() {}
virtual ~LexerTest() {}
void test_next(Lexer& lexer, std::string const& oracle)
{
auto token = lexer.next();
REQUIRE(nullptr != token);
REQUIRE(oracle == token->string());
}
void test_end(Lexer& lexer)
{
auto token = lexer.next();
REQUIRE(nullptr == token);
}
protected:
};
TEST_CASE_METHOD(LexerTest, "Lexer_var")
{
Lexer lexer;
lexer.scan("hello");
test_next(lexer, "VAR[hello]");
test_end(lexer);
}
TEST_CASE_METHOD(LexerTest, "Lexer_operators")
{
Lexer lexer;
lexer.scan("&!salut|(monde)->hello");
test_next(lexer, "AND");
test_next(lexer, "NOT");
test_next(lexer, "VAR[salut]");
test_next(lexer, "OR");
test_next(lexer, "OPAR");
test_next(lexer, "VAR[monde]");
test_next(lexer, "CPAR");
test_next(lexer, "IMP");
test_next(lexer, "VAR[hello]");
test_end(lexer);
}
TEST_CASE_METHOD(LexerTest, "Lexer_pred")
{
Lexer lexer;
lexer.scan("Hello");
test_next(lexer, "PRED[Hello]");
test_end(lexer);
}
TEST_CASE_METHOD(LexerTest, "Lexer_const")
{
Lexer lexer;
lexer.scan("HELLO");
test_next(lexer, "CONST[HELLO]");
test_end(lexer);
}

94
tests/Parser.cpp Normal file
View File

@ -0,0 +1,94 @@
#include <catch2/catch.hpp>
#include "../src/fol/Lexer.hpp"
#include "../src/fol/Parser.hpp"
using namespace sp::fol;
class ParserTest
{
public:
explicit ParserTest() {}
virtual ~ParserTest() {}
void test_parser(std::string const& oracle, std::string const& text)
{
Lexer lexer;
lexer.scan(text);
Parser parser;
auto node = parser.parse(lexer.all());
REQUIRE(oracle == node->string());
}
protected:
};
TEST_CASE_METHOD(ParserTest, "Parser_var")
{
test_parser("FORMULA(PRED[Pr](VAR[hello]))", " Pr(hello) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_const")
{
test_parser("FORMULA(PRED[Pr](CONST[WORLD]))", " Pr(WORLD) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_func")
{
test_parser("FORMULA(PRED[Pr](FUNC[father]))",
" Pr(father()) ");
test_parser("FORMULA(PRED[Pr](FUNC[mother](VAR[x])))",
" Pr(mother(x)) ");
test_parser("FORMULA(PRED[Pr](FUNC[brother](VAR[x],VAR[y])))",
" Pr(brother(x, y)) ");
test_parser("FORMULA(PRED[Pr]("
"FUNC[sister](VAR[x],FUNC[mother](VAR[y],VAR[z]))))",
" Pr(sister(x, mother(y, z))) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_not")
{
test_parser("FORMULA(NOT(PRED[Pr](VAR[x])))",
" !Pr(x) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_pred")
{
test_parser("FORMULA(PRED[Happy](CONST[PIERRE]))",
" Happy(PIERRE) ");
test_parser("FORMULA(PRED[Sad](VAR[x],VAR[y]))",
" Sad(x, y) ");
}
TEST_CASE_METHOD(ParserTest, "Parser_pred_'or'_'imp'_'and'")
{
test_parser("FORMULA(AND("
"PRED[Pr](VAR[x])"
",IMP("
"PRED[Pr](VAR[y]),"
"PRED[Pr](VAR[z]))))",
"Pr(x) & Pr(y) -> Pr(z)");
test_parser("FORMULA(OR("
"PRED[Ab](VAR[x]),"
"AND("
"PRED[Cd](VAR[y]),"
"PRED[Ef](CONST[Z]))))",
" Ab(x) | Cd(y) & Ef(Z)");
}
TEST_CASE_METHOD(ParserTest, "Parser_group")
{
test_parser("FORMULA(IMP(AND("
"PRED[Pr](VAR[x]),"
"PRED[Pr](VAR[y])),"
"PRED[Pr](VAR[z])))",
"(Pr(x) & Pr(y)) -> Pr(z)");
}

2
tests/main.cpp Normal file
View File

@ -0,0 +1,2 @@
#define CATCH_CONFIG_MAIN
#include <catch2/catch.hpp>