parser for formula.

main
bog 2024-04-24 14:40:43 +02:00
parent 5cecb1f1ed
commit badb08ec05
6 changed files with 255 additions and 2 deletions

View File

@ -1,7 +1,18 @@
import pytest import pytest
from fol.lexer import Lexer from fol.lexer import Lexer
from fol.parser import Parser
@pytest.fixture @pytest.fixture
def lexer(): def lexer():
return Lexer() return Lexer()
@pytest.fixture
def parser():
def inner(text):
lexer = Lexer()
lexer.scan(text)
parser = Parser(lexer)
return parser
return inner

View File

@ -4,10 +4,13 @@ FORALL ::= forall var IMP | IMP
IMP ::= OR imp OR IMP ::= OR imp OR
OR ::= AND (or AND)* OR ::= AND (or AND)*
AND ::= NOT (and NOT)* AND ::= NOT (and NOT)*
NOT ::= not? PRED NOT ::= not? GROUP
GROUP ::= NOT | opar F cpar
PRED ::= pred opar TERM (comma TERM)* cpar PRED ::= pred opar TERM (comma TERM)* cpar
TERM ::= TERM ::=
| var | var
| const | const
| var opar TERM (comma TERM)* cpar | FUN
FUN ::= var opar TERM (comma TERM)* cpar

10
fol/__init__.py Normal file
View File

@ -0,0 +1,10 @@
from fol import lexer
from fol import parser
def parse(text):
lex = lexer.Lexer()
lex.scan(text)
p = parser.Parser(lex)
root = p.parse()
return root

View File

@ -3,6 +3,9 @@ class Token:
self.name = name self.name = name
self.value = value self.value = value
def is_a(self, name):
return self.name == name
def __str__(self): def __str__(self):
if self.value is not None: if self.value is not None:
return f'{self.name}[{self.value}]' return f'{self.name}[{self.value}]'
@ -43,6 +46,17 @@ class Lexer:
return None return None
def peek(self, lookahead=0):
tok = None
cursor = self.cursor
for i in range(lookahead + 1):
tok = self.next()
self.cursor = cursor
return tok
def scan_spaces(self): def scan_spaces(self):
while self.cursor < len(self.text) \ while self.cursor < len(self.text) \
and self.text[self.cursor].isspace(): and self.text[self.cursor].isspace():

170
fol/parser.py Normal file
View File

@ -0,0 +1,170 @@
class Node:
def __init__(self, name, value=None):
self.name = name
self.value = value
self.children = []
def add_child(self, child):
self.children.append(child)
def __str__(self):
res = self.name
if self.value is not None:
res += f'[{self.value}]'
if len(self.children) > 0:
res += '('
sep = ''
for child in self.children:
res += sep + str(child)
sep = ','
res += ')'
return res
class Parser:
def __init__(self, lexer):
self.lexer = lexer
def consume(self, name):
tok = self.lexer.peek()
if tok and tok.is_a(name):
self.lexer.next()
return tok
return None
def ensure(self, name):
tok = self.consume(name)
if tok is None:
raise Exception(f'invalid {name}')
if tok and not tok.is_a(name):
raise Exception(f'expected {name}, got {tok.name}')
return tok
def parse(self):
return self.parse_f()
def parse_f(self):
return self.parse_exists()
def parse_exists(self):
if self.consume('EXISTS'):
tok_var = self.ensure('VAR')
rhs = self.parse_forall()
node = Node('EXISTS')
node.add_child(Node('VAR', tok_var.value))
node.add_child(rhs)
return node
return self.parse_forall()
def parse_forall(self):
if self.consume('FORALL'):
tok_var = self.ensure('VAR')
rhs = self.parse_imp()
node = Node('FORALL')
node.add_child(Node('VAR', tok_var.value))
node.add_child(rhs)
return node
return self.parse_imp()
def parse_imp(self):
lhs = self.parse_or()
if self.consume('IMP'):
rhs = self.parse_or()
node = Node('IMP')
node.add_child(lhs)
node.add_child(rhs)
lhs = node
return lhs
def parse_or(self):
lhs = self.parse_and()
while self.consume('OR'):
rhs = self.parse_and()
node = Node('OR')
node.add_child(lhs)
node.add_child(rhs)
lhs = node
return lhs
def parse_and(self):
lhs = self.parse_not()
while self.consume('AND'):
rhs = self.parse_not()
node = Node('AND')
node.add_child(lhs)
node.add_child(rhs)
lhs = node
return lhs
def parse_not(self):
if self.consume('NOT'):
node = Node('NOT')
node.add_child(self.parse_pred())
return node
return self.parse_group()
def parse_group(self):
if self.consume('OPAR'):
node = self.parse_f()
self.ensure('CPAR')
return node
return self.parse_pred()
def parse_pred(self):
tok = self.ensure('PRED')
self.ensure('OPAR')
node = Node('PRED', tok.value)
node.add_child(self.parse_term())
while self.consume('COMMA'):
node.add_child(self.parse_term())
self.ensure('CPAR')
return node
def parse_term(self):
maybe_var = self.lexer.peek(0)
maybe_opar = self.lexer.peek(1)
if maybe_var and maybe_opar and maybe_var.is_a('VAR') \
and maybe_opar.is_a('OPAR'):
return self.parse_function()
tok = self.lexer.next()
if tok is None:
raise Exception('unexpected end')
if tok.name in ['VAR', 'CONST']:
return Node(tok.name, tok.value)
raise Exception(f'unknown term {tok.value}')
def parse_function(self):
tok = self.ensure('VAR')
self.ensure('OPAR')
node = Node('FUN', tok.value)
node.add_child(self.parse_term())
while self.consume('COMMA'):
node.add_child(self.parse_term())
self.ensure('CPAR')
return node

45
fol/tests/test_parser.py Normal file
View File

@ -0,0 +1,45 @@
import pytest
@pytest.mark.parametrize(['oracle', 'text'], [
('PRED[Happy](VAR[x])', 'Happy(x)'),
('PRED[Sad](VAR[y],VAR[z])', 'Sad(y, z)'),
# EXISTS
('EXISTS(VAR[x],PRED[Happy](VAR[x]))', '\\exists x Happy(x)'),
# FORALL
('FORALL(VAR[z],PRED[Sad](VAR[z]))', '\\forall z Sad(z)'),
# IMP
('IMP(PRED[Rich](VAR[x]),PRED[Happy](VAR[x]))',
'Rich(x) -> Happy(x)'),
# OR
('OR(OR(PRED[Rich](VAR[x]),PRED[Happy](VAR[x])),'
'PRED[Smart](VAR[x]))',
'Rich(x) | Happy(x) | Smart(x)'),
# AND
('AND(AND(PRED[Rich](VAR[x]),PRED[Happy](VAR[x])),'
'PRED[Smart](VAR[x]))',
'Rich(x) & Happy(x) & Smart(x)'),
('OR(AND(PRED[Rich](VAR[x]),'
'PRED[Happy](VAR[x])),PRED[Smart](VAR[x]))',
'Rich(x) & Happy(x) | Smart(x)'),
('AND(PRED[Rich](VAR[x]),OR(PRED[Happy](VAR[x]),'
'PRED[Smart](VAR[x])))',
'Rich(x) & (Happy(x) | Smart(x))'),
# NOT
('FORALL(VAR[x],NOT(PRED[Happy](VAR[x])))',
'\\forall x ~Happy(x)'),
# PRED
('PRED[Rich](CONST[BOB])',
'Rich(BOB)'),
('PRED[Rich](FUN[father](CONST[BOB]))',
'Rich(father(BOB))'),
])
def test_parser(parser, oracle, text):
p = parser(text)
root = p.parse()
assert oracle == str(root)