sine-patre/fol/lexer.py

180 lines
4.2 KiB
Python
Raw Permalink Normal View History

2024-04-24 08:56:13 +00:00
class Token:
def __init__(self, name, value=None):
self.name = name
self.value = value
2024-04-24 12:40:43 +00:00
def is_a(self, name):
return self.name == name
2024-04-24 08:56:13 +00:00
def __str__(self):
if self.value is not None:
return f'{self.name}[{self.value}]'
return f'{self.name}'
class Lexer:
def __init__(self):
self.text = ''
self.cursor = 0
def scan(self, text):
self.text = text
self.cursor = 0
def next(self):
self.scan_spaces()
scanners = [
self.scan_text('AND', '&'),
self.scan_text('OR', '|'),
self.scan_text('IMP', '->'),
self.scan_text('NOT', '~'),
self.scan_text('OPAR', '('),
self.scan_text('CPAR', ')'),
self.scan_text('COMMA', ','),
self.scan_keyword('exists'),
self.scan_keyword('forall'),
self.scan_var,
self.scan_const,
self.scan_pred,
]
for scanner in scanners:
tok = scanner()
if tok is not None:
return tok
return None
2024-04-24 12:40:43 +00:00
def peek(self, lookahead=0):
tok = None
cursor = self.cursor
for i in range(lookahead + 1):
tok = self.next()
self.cursor = cursor
return tok
2024-04-24 08:56:13 +00:00
def scan_spaces(self):
while self.cursor < len(self.text) \
and self.text[self.cursor].isspace():
self.cursor += 1
def scan_var(self):
cursor = self.cursor
value = ''
2024-04-25 16:47:45 +00:00
while cursor < len(self.text) and (
self.text[cursor].islower()
or self.text[cursor].isdigit()
):
2024-04-24 08:56:13 +00:00
value += self.text[cursor]
cursor += 1
if value != '':
self.cursor = cursor
return Token('VAR', value)
return None
def scan_const(self):
cursor = self.cursor
value = ''
2024-04-25 16:47:45 +00:00
while cursor < len(self.text) and (
self.text[cursor].isupper()
or self.text[cursor].isdigit()
):
2024-04-24 08:56:13 +00:00
value += self.text[cursor]
cursor += 1
if value != '' and self.is_sep(cursor):
self.cursor = cursor
return Token('CONST', value)
return None
def scan_pred(self):
cursor = self.cursor
value = ''
while cursor < len(self.text) and not self.is_sep(cursor):
value += self.text[cursor]
cursor += 1
format = len(value) > 1 and value[0].isupper() \
and value[1:].islower()
if len(value) > 0 and format:
self.cursor = cursor
return Token('PRED', value)
return None
def scan_keyword(self, name):
def scanner():
cursor = self.cursor
value = ''
if cursor >= len(self.text) or self.text[cursor] != '\\':
return None
cursor += 1
i = 0
while cursor < len(self.text) and i < len(name) \
and self.text[cursor] == name[i]:
cursor += 1
value += name[i]
i += 1
if not self.is_sep(cursor):
return None
if value == name:
self.cursor = cursor
return Token(f'{name.upper()}')
return None
return scanner
def scan_text(self, name, text, tok_value=None):
def scanner():
cursor = self.cursor
value = ''
i = 0
while cursor < len(self.text) and i < len(text) \
and self.text[cursor] == text[i]:
cursor += 1
value += text[i]
i += 1
if value == text:
self.cursor = cursor
return Token(name, tok_value)
return None
return scanner
def is_sep(self, idx):
if idx < 0 or idx >= len(self.text):
return True
return self.text[idx] in [
' ',
'\n',
'\t',
'-',
'\\',
'&',
'|',
'~',
'(',
')',
','
]