ccm/lib/parser.c

747 lines
16 KiB
C
Raw Normal View History

2024-03-18 17:20:40 +00:00
#include "parser.h"
#define CCM_TRY(rule) parser_try_new_rule(self, rule)
#define CCM_TRY_LL1(rule, node) \
parser_try_new_rule_ll1(self, rule, node)
2024-03-18 17:20:40 +00:00
void parser_init(parser_t* self, lexer_t* lexer)
{
assert(self);
assert(lexer);
self->lexer = lexer;
err_init(&self->err);
self->current = NULL;
}
void parser_free(parser_t* self)
{
err_free(&self->err);
}
node_t* parser_try_new_parse(parser_t* self)
{
assert(self);
2024-03-19 13:23:11 +00:00
node_t* res = CCM_TRY(parser_try_new_module);
if (self->lexer->cursor < (ssize_t)strlen(self->lexer->source))
{
2024-03-21 11:00:20 +00:00
str_t s;
str_init(&s);
while (self->lexer->cursor
< (ssize_t)strlen(self->lexer->source)
&& !isspace(self->lexer->source[self->lexer->cursor]))
{
str_push(&s, self->lexer->source[self->lexer->cursor]);
self->lexer->cursor++;
}
err_push(&self->err, res ? res->line : 0,
"unexpected end after '%s'", s.value);
str_free(&s);
2024-03-19 13:23:11 +00:00
}
return res;
2024-03-18 17:20:40 +00:00
}
node_t* parser_try_new_rule_ll1(parser_t* self,
rule_ll1_t rule,
node_t* node)
{
if (!err_is_ok(&self->err))
{
return NULL;
}
lexer_state_t state = lexer_state(self->lexer);
node_t* result = rule(self, node);
if (result) { return result; }
lexer_restore(self->lexer, state);
return (void*) NULL;
}
2024-03-18 17:20:40 +00:00
node_t* parser_try_new_rule(parser_t* self, rule_t rule)
{
if (!err_is_ok(&self->err))
{
return NULL;
}
lexer_state_t state = lexer_state(self->lexer);
node_t* result = rule(self);
if (result) { return result; }
lexer_restore(self->lexer, state);
return (void*) NULL;
}
int parser_ensure(parser_t* self, node_t* node, NodeKind kind)
{
assert(self);
if (!node)
{
err_push(&self->err, self->lexer->line,
"expected token '%s', got nothing",
NodeKindStr[kind] + strlen("NODE_"));
return 0;
}
if (node->kind != kind)
{
err_push(&self->err, self->lexer->line,
"expected token '%s', got '%s'",
NodeKindStr[kind] + strlen("NODE_"),
NodeKindStr[node->kind] + strlen("NODE_"));
return 0;
}
return 1;
}
node_t* parser_try_new_module(parser_t* self)
{
assert(self);
node_t* module = malloc(sizeof(node_t));
node_init(module, NODE_MODULE, "", self->lexer->line);
node_t* node = NULL;
do {
node = CCM_TRY(parser_try_new_expr);
if (!node)
{
node_free(module);
free(module);
return NULL;
}
node_push_new_child(module, node);
lexer_skip_spaces(self->lexer);
} while(self->lexer->cursor < (ssize_t) strlen(self->lexer->source));
return module;
}
node_t* parser_try_new_expr(parser_t* self)
{
assert(self);
2024-03-19 06:11:28 +00:00
2024-03-21 11:00:20 +00:00
if (lexer_peek_kind(self->lexer, NODE_VAR, 0))
{
return CCM_TRY(parser_try_new_decl);
}
2024-03-18 17:20:40 +00:00
if (lexer_peek_kind(self->lexer, NODE_ASSERT_EQ, 0)
|| lexer_peek_kind(self->lexer, NODE_ASSERT_NE, 0))
{
return CCM_TRY(parser_try_new_assert);
}
2024-03-20 15:26:59 +00:00
return CCM_TRY(parser_try_new_or);
2024-03-18 17:20:40 +00:00
}
2024-03-21 11:00:20 +00:00
node_t* parser_try_new_decl(parser_t* self)
{
assert(self);
if (!lexer_consume_next(self->lexer, NODE_VAR))
{
return NULL;
}
if (!lexer_peek_kind(self->lexer, NODE_IDENT, 0))
{
return NULL;
}
node_t* ident = lexer_try_new_next(self->lexer);
if (!lexer_consume_next(self->lexer, NODE_ASSIGN))
{
node_free(ident); free(ident);
return NULL;
}
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!expr)
{
node_free(ident); free(ident);
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_VARDECL, "", self->lexer->line);
node_push_new_child(node, ident);
node_push_new_child(node, expr);
return node;
}
2024-03-18 17:20:40 +00:00
node_t* parser_try_new_assert(parser_t* self)
{
assert(self);
node_t* node = malloc(sizeof(node_t));
if (lexer_peek_kind(self->lexer, NODE_ASSERT_EQ, 0))
{
lexer_consume_next(self->lexer, NODE_ASSERT_EQ);
node_init(node, NODE_ASSERT_EQ, "", self->lexer->line);
}
else if (lexer_peek_kind(self->lexer, NODE_ASSERT_NE, 0))
{
lexer_consume_next(self->lexer, NODE_ASSERT_NE);
node_init(node, NODE_ASSERT_NE, "", self->lexer->line);
}
else
{
free(node);
return NULL;
}
2024-03-19 06:11:28 +00:00
2024-03-18 17:20:40 +00:00
node_t* tuple = CCM_TRY(parser_try_new_tuple);
if (!tuple)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, tuple);
return node;
}
2024-03-19 06:11:28 +00:00
node_t* parser_try_new_or(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_and);
if (!lhs) { return NULL; }
while (lexer_peek_kind(self->lexer, NODE_OR, 0))
{
lexer_consume_next(self->lexer, NODE_OR);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_OR, "", lhs->line);
node_push_new_child(node, lhs);
node_t* rhs = CCM_TRY(parser_try_new_and);
if (!rhs)
{
node_free(lhs);
free(lhs);
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_and(parser_t* self)
{
assert(self);
2024-03-19 15:25:02 +00:00
node_t* lhs = CCM_TRY(parser_try_new_eqne);
2024-03-19 06:11:28 +00:00
if (!lhs) { return NULL; }
while (lexer_peek_kind(self->lexer, NODE_AND, 0))
{
lexer_consume_next(self->lexer, NODE_AND);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_AND, "", lhs->line);
node_push_new_child(node, lhs);
2024-03-19 15:25:02 +00:00
node_t* rhs = CCM_TRY(parser_try_new_eqne);
2024-03-19 06:11:28 +00:00
if (!rhs)
{
node_free(lhs);
free(lhs);
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
2024-03-19 15:25:02 +00:00
node_t* parser_try_new_eqne(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_cmp);
if (!lhs) { return NULL; }
if (lexer_peek_kind(self->lexer, NODE_EQ, 0)
|| lexer_peek_kind(self->lexer, NODE_NE, 0))
{
node_t* node = lexer_try_new_next(self->lexer);
node_t* rhs = CCM_TRY(parser_try_new_cmp);
2024-03-20 15:26:59 +00:00
if (!rhs)
{
2024-03-19 15:25:02 +00:00
node_free(node); free(node);
node_free(lhs); free(lhs);
2024-03-20 15:26:59 +00:00
return NULL;
2024-03-19 15:25:02 +00:00
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_cmp(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_term);
if (!lhs) { return NULL; }
if (lexer_peek_kind(self->lexer, NODE_LT, 0)
|| lexer_peek_kind(self->lexer, NODE_LE, 0)
|| lexer_peek_kind(self->lexer, NODE_GT, 0)
|| lexer_peek_kind(self->lexer, NODE_GE, 0))
{
node_t* node = lexer_try_new_next(self->lexer);
node_t* rhs = CCM_TRY(parser_try_new_term);
2024-03-20 15:26:59 +00:00
if (!rhs)
{
2024-03-19 15:25:02 +00:00
node_free(node); free(node);
node_free(lhs); free(lhs);
2024-03-20 15:26:59 +00:00
return NULL;
2024-03-19 15:25:02 +00:00
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
2024-03-18 17:20:40 +00:00
node_t* parser_try_new_term(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_factor);
if (!lhs) { return NULL; }
while (lexer_peek_kind(self->lexer, NODE_ADD, 0)
|| lexer_peek_kind(self->lexer, NODE_SUB, 0))
{
node_t* node = lexer_try_new_next(self->lexer);
node_t* rhs = CCM_TRY(parser_try_new_factor);
if (!rhs)
{
node_free(lhs);
free(lhs);
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_factor(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_usub);
if (!lhs) { return NULL; }
while (lexer_peek_kind(self->lexer, NODE_MUL, 0)
|| lexer_peek_kind(self->lexer, NODE_DIV, 0)
|| lexer_peek_kind(self->lexer, NODE_MOD, 0))
{
node_t* node = lexer_try_new_next(self->lexer);
node_t* rhs = CCM_TRY(parser_try_new_usub);
if (!rhs)
{
node_free(lhs);
free(lhs);
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_usub(parser_t* self)
{
assert(self);
if (lexer_peek_kind(self->lexer, NODE_SUB, 0))
{
lexer_consume_next(self->lexer, NODE_SUB);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_SUB, "", self->lexer->line);
node_t* rhs = CCM_TRY(parser_try_new_usub);
if (!rhs)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, rhs);
return node;
}
else
2024-03-19 06:11:28 +00:00
{
return CCM_TRY(parser_try_new_not);
}
return NULL;
}
node_t* parser_try_new_not(parser_t* self)
{
assert(self);
if (lexer_peek_kind(self->lexer, NODE_NOT, 0))
{
lexer_consume_next(self->lexer, NODE_NOT);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_NOT, "", self->lexer->line);
node_t* rhs = CCM_TRY(parser_try_new_not);
if (!rhs)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, rhs);
return node;
}
else
2024-03-18 17:20:40 +00:00
{
return CCM_TRY(parser_try_new_pow);
}
return NULL;
}
node_t* parser_try_new_pow(parser_t* self)
{
assert(self);
2024-03-20 15:26:59 +00:00
node_t* lhs = CCM_TRY(parser_try_new_in);
2024-03-18 17:20:40 +00:00
if (!lhs) { return NULL; }
if (lexer_peek_kind(self->lexer, NODE_POW, 0))
{
if (!lexer_consume_next(self->lexer, NODE_POW))
{
node_free(lhs);
free(lhs);
return NULL;
}
2024-03-20 15:26:59 +00:00
node_t* rhs = CCM_TRY(parser_try_new_in);
2024-03-18 17:20:40 +00:00
if (!rhs)
{
node_free(lhs);
free(lhs);
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_POW, "", self->lexer->line);
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
2024-03-20 15:26:59 +00:00
node_t* parser_try_new_in(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_literal);
if (!lhs) { return NULL; }
if (lexer_peek_kind(self->lexer, NODE_IN, 0))
{
lexer_consume_next(self->lexer, NODE_IN);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_IN, "", lhs->line);
node_t* rhs = CCM_TRY(parser_try_new_literal);
if (!rhs)
{
node_free(lhs); free(lhs);
node_free(node); free(node);
return NULL;
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
2024-03-18 17:20:40 +00:00
node_t* parser_try_new_literal(parser_t* self)
{
assert(self);
2024-03-20 15:26:59 +00:00
if (lexer_peek_kind(self->lexer, NODE_OSQUARE, 0))
{
node_t* array = CCM_TRY(parser_try_new_array);
if (lexer_peek_kind(self->lexer, NODE_OSQUARE, 0))
{
return CCM_TRY_LL1(parser_try_new_index, array);
}
return array;
}
2024-03-18 17:20:40 +00:00
if (lexer_peek_kind(self->lexer, NODE_OPAR, 0))
{
node_t* tuple = CCM_TRY(parser_try_new_tuple);
if (tuple)
{
if (lexer_peek_kind(self->lexer, NODE_OSQUARE, 0))
{
return CCM_TRY_LL1(parser_try_new_index, tuple);
}
2024-03-18 17:20:40 +00:00
return tuple;
}
2024-03-19 13:23:11 +00:00
2024-03-18 17:20:40 +00:00
if (!lexer_consume_next(self->lexer, NODE_OPAR))
{
return NULL;
}
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!lexer_consume_next(self->lexer, NODE_CPAR))
{
if (expr)
{
node_free(expr);
free(expr);
}
return NULL;
}
return expr;
}
2024-03-21 11:00:20 +00:00
if ((lexer_peek_kind(self->lexer, NODE_STR, 0)
|| lexer_peek_kind(self->lexer, NODE_IDENT, 0))
2024-03-19 13:23:11 +00:00
&& lexer_peek_kind(self->lexer, NODE_OSQUARE, 1))
{
node_t* target = CCM_TRY(parser_try_new_builtin);
return CCM_TRY_LL1(parser_try_new_index, target);
}
2024-03-18 17:20:40 +00:00
return CCM_TRY(parser_try_new_builtin);
}
2024-03-20 15:26:59 +00:00
node_t* parser_try_new_array(parser_t* self)
{
assert(self);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_ARRAY, "", self->lexer->line);
lexer_consume_next(self->lexer, NODE_OSQUARE);
int first = 1;
while (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0))
{
if (!first)
{
lexer_consume_next(self->lexer, NODE_COMMA);
}
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!expr)
{
node_free(node); free(node);
return NULL;
}
node_push_new_child(node, expr);
first = 0;
}
lexer_consume_next(self->lexer, NODE_CSQUARE);
return node;
}
node_t* parser_try_new_index(parser_t* self, node_t* target)
{
assert(self);
lexer_consume_next(self->lexer, NODE_OSQUARE);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_INDEX, "", target->line);
node_push_new_child(node, target);
while (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0))
{
node_t* element = CCM_TRY(parser_try_new_expr);
if (!element)
{
node_free(node); free(node);
return NULL;
}
node_push_new_child(node, element);
if (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0))
{
lexer_consume_next(self->lexer, NODE_COMMA);
}
}
lexer_consume_next(self->lexer, NODE_CSQUARE);
return node;
}
2024-03-18 17:20:40 +00:00
node_t* parser_try_new_tuple(parser_t* self)
{
assert(self);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_TUPLE, "", self->lexer->line);
if (!lexer_consume_next(self->lexer, NODE_OPAR))
{
node_free(node);
free(node);
return NULL;
}
node_t* lhs = CCM_TRY(parser_try_new_expr);
if (!lhs)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, lhs);
int contains_more_than_one_expr = 0;
while (lexer_peek_kind(self->lexer, NODE_COMMA, 0))
{
lexer_consume_next(self->lexer, NODE_COMMA);
node_t* child = CCM_TRY(parser_try_new_expr);
if (!child)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, child);
contains_more_than_one_expr = 1;
}
lexer_consume_next(self->lexer, NODE_CPAR);
if (!contains_more_than_one_expr)
{
node_free(node);
free(node);
return NULL;
}
return node;
}
node_t* parser_try_new_builtin(parser_t* self)
{
assert(self);
node_t* node = lexer_try_new_next(self->lexer);
if (node &&
2024-03-19 06:11:28 +00:00
(
node->kind == NODE_NUM
|| node->kind == NODE_BOOL
2024-03-19 13:23:11 +00:00
|| node->kind == NODE_STR
2024-03-21 11:00:20 +00:00
|| node->kind == NODE_IDENT
2024-03-19 06:11:28 +00:00
)
)
2024-03-18 17:20:40 +00:00
{
return node;
}
if (node)
{
node_free(node);
free(node);
}
return NULL;
}