From 5cecb1f1ed9efa34e060b322388347847386ffb8 Mon Sep 17 00:00:00 2001 From: bog Date: Wed, 24 Apr 2024 10:56:13 +0200 Subject: [PATCH] :tada: :sparkles: lexer for formula. --- .gitignore | 1 + conftest.py | 7 ++ doc/grammar.bnf | 13 ++++ fol/lexer.py | 159 ++++++++++++++++++++++++++++++++++++++++ fol/tests/test_lexer.py | 26 +++++++ requirements.txt | 8 ++ sine_patre.py | 2 + 7 files changed, 216 insertions(+) create mode 100644 .gitignore create mode 100644 conftest.py create mode 100644 doc/grammar.bnf create mode 100644 fol/lexer.py create mode 100644 fol/tests/test_lexer.py create mode 100644 requirements.txt create mode 100644 sine_patre.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ed8ebf5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ \ No newline at end of file diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..f68b808 --- /dev/null +++ b/conftest.py @@ -0,0 +1,7 @@ +import pytest +from fol.lexer import Lexer + + +@pytest.fixture +def lexer(): + return Lexer() diff --git a/doc/grammar.bnf b/doc/grammar.bnf new file mode 100644 index 0000000..fdf195a --- /dev/null +++ b/doc/grammar.bnf @@ -0,0 +1,13 @@ +F ::= EXISTS +EXISTS ::= exists var FORALL | FORALL +FORALL ::= forall var IMP | IMP +IMP ::= OR imp OR +OR ::= AND (or AND)* +AND ::= NOT (and NOT)* +NOT ::= not? PRED +PRED ::= pred opar TERM (comma TERM)* cpar + +TERM ::= +| var +| const +| var opar TERM (comma TERM)* cpar diff --git a/fol/lexer.py b/fol/lexer.py new file mode 100644 index 0000000..fe87702 --- /dev/null +++ b/fol/lexer.py @@ -0,0 +1,159 @@ +class Token: + def __init__(self, name, value=None): + self.name = name + self.value = value + + def __str__(self): + if self.value is not None: + return f'{self.name}[{self.value}]' + return f'{self.name}' + + +class Lexer: + def __init__(self): + self.text = '' + self.cursor = 0 + + def scan(self, text): + self.text = text + self.cursor = 0 + + def next(self): + self.scan_spaces() + + scanners = [ + self.scan_text('AND', '&'), + self.scan_text('OR', '|'), + self.scan_text('IMP', '->'), + self.scan_text('NOT', '~'), + self.scan_text('OPAR', '('), + self.scan_text('CPAR', ')'), + self.scan_text('COMMA', ','), + self.scan_keyword('exists'), + self.scan_keyword('forall'), + self.scan_var, + self.scan_const, + self.scan_pred, + ] + + for scanner in scanners: + tok = scanner() + if tok is not None: + return tok + + return None + + def scan_spaces(self): + while self.cursor < len(self.text) \ + and self.text[self.cursor].isspace(): + self.cursor += 1 + + def scan_var(self): + cursor = self.cursor + value = '' + + while cursor < len(self.text) and self.text[cursor].islower(): + value += self.text[cursor] + cursor += 1 + + if value != '': + self.cursor = cursor + return Token('VAR', value) + + return None + + def scan_const(self): + cursor = self.cursor + value = '' + + while cursor < len(self.text) and self.text[cursor].isupper(): + value += self.text[cursor] + cursor += 1 + + if value != '' and self.is_sep(cursor): + self.cursor = cursor + return Token('CONST', value) + + return None + + def scan_pred(self): + cursor = self.cursor + value = '' + + while cursor < len(self.text) and not self.is_sep(cursor): + value += self.text[cursor] + cursor += 1 + + format = len(value) > 1 and value[0].isupper() \ + and value[1:].islower() + + if len(value) > 0 and format: + self.cursor = cursor + return Token('PRED', value) + + return None + + def scan_keyword(self, name): + def scanner(): + cursor = self.cursor + value = '' + + if cursor >= len(self.text) or self.text[cursor] != '\\': + return None + + cursor += 1 + i = 0 + + while cursor < len(self.text) and i < len(name) \ + and self.text[cursor] == name[i]: + cursor += 1 + value += name[i] + i += 1 + + if not self.is_sep(cursor): + return None + + if value == name: + self.cursor = cursor + return Token(f'{name.upper()}') + + return None + return scanner + + def scan_text(self, name, text, tok_value=None): + def scanner(): + cursor = self.cursor + value = '' + + i = 0 + + while cursor < len(self.text) and i < len(text) \ + and self.text[cursor] == text[i]: + cursor += 1 + value += text[i] + i += 1 + + if value == text: + self.cursor = cursor + return Token(name, tok_value) + + return None + return scanner + + def is_sep(self, idx): + if idx < 0 or idx >= len(self.text): + return True + + return self.text[idx] in [ + ' ', + '\n', + '\t', + '-', + '\\', + '&', + '|', + '~', + '(', + ')', + ',' + ] diff --git a/fol/tests/test_lexer.py b/fol/tests/test_lexer.py new file mode 100644 index 0000000..d502da5 --- /dev/null +++ b/fol/tests/test_lexer.py @@ -0,0 +1,26 @@ +import pytest + + +@pytest.mark.parametrize(['text', 'oracles'], [ + ('hello world', ['VAR[hello]', 'VAR[world]']), + ('HELLO WORLD', ['CONST[HELLO]', 'CONST[WORLD]']), + ('\\exists \\forall', ['EXISTS', 'FORALL']), + ('\\existsX', [None]), + ('\\exists X', ['EXISTS', 'CONST[X]']), + (' A->B ', ['CONST[A]', 'IMP', 'CONST[B]']), + (' x&y ', ['VAR[x]', 'AND', 'VAR[y]']), + (' a|b ', ['VAR[a]', 'OR', 'VAR[b]']), + (' ~s ', ['NOT', 'VAR[s]']), + (' (k) ', ['OPAR', 'VAR[k]', 'CPAR']), + (' a,b ', ['VAR[a]', 'COMMA', 'VAR[b]']), + (' Pr(x) ', ['PRED[Pr]', 'OPAR', 'VAR[x]', 'CPAR']), +]) +def test_token(lexer, text, oracles): + lexer.scan(text) + + for oracle in oracles: + tok = lexer.next() + if tok is None: + assert oracle is None + else: + assert str(tok) == oracle diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..da2ff94 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +flake8==7.0.0 +iniconfig==2.0.0 +mccabe==0.7.0 +packaging==24.0 +pluggy==1.5.0 +pycodestyle==2.11.1 +pyflakes==3.2.0 +pytest==8.1.1 diff --git a/sine_patre.py b/sine_patre.py new file mode 100644 index 0000000..19fc553 --- /dev/null +++ b/sine_patre.py @@ -0,0 +1,2 @@ +if __name__ == '__main__': + print('Sine Patre')