From badb08ec0525c8faa68fba017ce35f56b9ac71b5 Mon Sep 17 00:00:00 2001 From: bog Date: Wed, 24 Apr 2024 14:40:43 +0200 Subject: [PATCH] :sparkles: parser for formula. --- conftest.py | 11 +++ doc/grammar.bnf | 7 +- fol/__init__.py | 10 +++ fol/lexer.py | 14 ++++ fol/parser.py | 170 +++++++++++++++++++++++++++++++++++++++ fol/tests/test_parser.py | 45 +++++++++++ 6 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 fol/__init__.py create mode 100644 fol/parser.py create mode 100644 fol/tests/test_parser.py diff --git a/conftest.py b/conftest.py index f68b808..20ff16d 100644 --- a/conftest.py +++ b/conftest.py @@ -1,7 +1,18 @@ import pytest from fol.lexer import Lexer +from fol.parser import Parser @pytest.fixture def lexer(): return Lexer() + + +@pytest.fixture +def parser(): + def inner(text): + lexer = Lexer() + lexer.scan(text) + parser = Parser(lexer) + return parser + return inner diff --git a/doc/grammar.bnf b/doc/grammar.bnf index fdf195a..4d66d6e 100644 --- a/doc/grammar.bnf +++ b/doc/grammar.bnf @@ -4,10 +4,13 @@ FORALL ::= forall var IMP | IMP IMP ::= OR imp OR OR ::= AND (or AND)* AND ::= NOT (and NOT)* -NOT ::= not? PRED +NOT ::= not? GROUP +GROUP ::= NOT | opar F cpar PRED ::= pred opar TERM (comma TERM)* cpar TERM ::= | var | const -| var opar TERM (comma TERM)* cpar +| FUN + +FUN ::= var opar TERM (comma TERM)* cpar diff --git a/fol/__init__.py b/fol/__init__.py new file mode 100644 index 0000000..0967ba8 --- /dev/null +++ b/fol/__init__.py @@ -0,0 +1,10 @@ +from fol import lexer +from fol import parser + + +def parse(text): + lex = lexer.Lexer() + lex.scan(text) + p = parser.Parser(lex) + root = p.parse() + return root diff --git a/fol/lexer.py b/fol/lexer.py index fe87702..07c02da 100644 --- a/fol/lexer.py +++ b/fol/lexer.py @@ -3,6 +3,9 @@ class Token: self.name = name self.value = value + def is_a(self, name): + return self.name == name + def __str__(self): if self.value is not None: return f'{self.name}[{self.value}]' @@ -43,6 +46,17 @@ class Lexer: return None + def peek(self, lookahead=0): + tok = None + cursor = self.cursor + + for i in range(lookahead + 1): + tok = self.next() + + self.cursor = cursor + + return tok + def scan_spaces(self): while self.cursor < len(self.text) \ and self.text[self.cursor].isspace(): diff --git a/fol/parser.py b/fol/parser.py new file mode 100644 index 0000000..af894d1 --- /dev/null +++ b/fol/parser.py @@ -0,0 +1,170 @@ +class Node: + def __init__(self, name, value=None): + self.name = name + self.value = value + self.children = [] + + def add_child(self, child): + self.children.append(child) + + def __str__(self): + res = self.name + if self.value is not None: + res += f'[{self.value}]' + + if len(self.children) > 0: + res += '(' + sep = '' + + for child in self.children: + res += sep + str(child) + sep = ',' + res += ')' + return res + + +class Parser: + def __init__(self, lexer): + self.lexer = lexer + + def consume(self, name): + tok = self.lexer.peek() + + if tok and tok.is_a(name): + self.lexer.next() + return tok + + return None + + def ensure(self, name): + tok = self.consume(name) + + if tok is None: + raise Exception(f'invalid {name}') + + if tok and not tok.is_a(name): + raise Exception(f'expected {name}, got {tok.name}') + + return tok + + def parse(self): + return self.parse_f() + + def parse_f(self): + return self.parse_exists() + + def parse_exists(self): + if self.consume('EXISTS'): + tok_var = self.ensure('VAR') + rhs = self.parse_forall() + node = Node('EXISTS') + node.add_child(Node('VAR', tok_var.value)) + node.add_child(rhs) + return node + + return self.parse_forall() + + def parse_forall(self): + if self.consume('FORALL'): + tok_var = self.ensure('VAR') + rhs = self.parse_imp() + node = Node('FORALL') + node.add_child(Node('VAR', tok_var.value)) + node.add_child(rhs) + return node + + return self.parse_imp() + + def parse_imp(self): + lhs = self.parse_or() + + if self.consume('IMP'): + rhs = self.parse_or() + node = Node('IMP') + node.add_child(lhs) + node.add_child(rhs) + lhs = node + + return lhs + + def parse_or(self): + lhs = self.parse_and() + + while self.consume('OR'): + rhs = self.parse_and() + node = Node('OR') + node.add_child(lhs) + node.add_child(rhs) + lhs = node + + return lhs + + def parse_and(self): + lhs = self.parse_not() + + while self.consume('AND'): + rhs = self.parse_not() + node = Node('AND') + node.add_child(lhs) + node.add_child(rhs) + lhs = node + + return lhs + + def parse_not(self): + if self.consume('NOT'): + node = Node('NOT') + node.add_child(self.parse_pred()) + return node + + return self.parse_group() + + def parse_group(self): + if self.consume('OPAR'): + node = self.parse_f() + self.ensure('CPAR') + return node + return self.parse_pred() + + def parse_pred(self): + tok = self.ensure('PRED') + self.ensure('OPAR') + + node = Node('PRED', tok.value) + node.add_child(self.parse_term()) + + while self.consume('COMMA'): + node.add_child(self.parse_term()) + + self.ensure('CPAR') + return node + + def parse_term(self): + maybe_var = self.lexer.peek(0) + maybe_opar = self.lexer.peek(1) + + if maybe_var and maybe_opar and maybe_var.is_a('VAR') \ + and maybe_opar.is_a('OPAR'): + return self.parse_function() + + tok = self.lexer.next() + if tok is None: + raise Exception('unexpected end') + + if tok.name in ['VAR', 'CONST']: + return Node(tok.name, tok.value) + + raise Exception(f'unknown term {tok.value}') + + def parse_function(self): + tok = self.ensure('VAR') + self.ensure('OPAR') + + node = Node('FUN', tok.value) + node.add_child(self.parse_term()) + + while self.consume('COMMA'): + node.add_child(self.parse_term()) + + self.ensure('CPAR') + return node diff --git a/fol/tests/test_parser.py b/fol/tests/test_parser.py new file mode 100644 index 0000000..33f19e0 --- /dev/null +++ b/fol/tests/test_parser.py @@ -0,0 +1,45 @@ +import pytest + + +@pytest.mark.parametrize(['oracle', 'text'], [ + ('PRED[Happy](VAR[x])', 'Happy(x)'), + ('PRED[Sad](VAR[y],VAR[z])', 'Sad(y, z)'), + # EXISTS + ('EXISTS(VAR[x],PRED[Happy](VAR[x]))', '\\exists x Happy(x)'), + # FORALL + ('FORALL(VAR[z],PRED[Sad](VAR[z]))', '\\forall z Sad(z)'), + # IMP + ('IMP(PRED[Rich](VAR[x]),PRED[Happy](VAR[x]))', + 'Rich(x) -> Happy(x)'), + # OR + ('OR(OR(PRED[Rich](VAR[x]),PRED[Happy](VAR[x])),' + 'PRED[Smart](VAR[x]))', + 'Rich(x) | Happy(x) | Smart(x)'), + # AND + ('AND(AND(PRED[Rich](VAR[x]),PRED[Happy](VAR[x])),' + 'PRED[Smart](VAR[x]))', + 'Rich(x) & Happy(x) & Smart(x)'), + + ('OR(AND(PRED[Rich](VAR[x]),' + 'PRED[Happy](VAR[x])),PRED[Smart](VAR[x]))', + 'Rich(x) & Happy(x) | Smart(x)'), + + ('AND(PRED[Rich](VAR[x]),OR(PRED[Happy](VAR[x]),' + 'PRED[Smart](VAR[x])))', + 'Rich(x) & (Happy(x) | Smart(x))'), + + # NOT + ('FORALL(VAR[x],NOT(PRED[Happy](VAR[x])))', + '\\forall x ~Happy(x)'), + # PRED + ('PRED[Rich](CONST[BOB])', + 'Rich(BOB)'), + ('PRED[Rich](FUN[father](CONST[BOB]))', + 'Rich(father(BOB))'), +]) +def test_parser(parser, oracle, text): + + p = parser(text) + root = p.parse() + + assert oracle == str(root)