ccm/lib/parser.c

1053 lines
22 KiB
C

#include "parser.h"
#define CCM_TRY(rule) parser_try_new_rule(self, rule)
#define CCM_TRY_LL1(rule, node) \
parser_try_new_rule_ll1(self, rule, node)
void parser_init(parser_t* self, lexer_t* lexer)
{
assert(self);
assert(lexer);
self->lexer = lexer;
err_init(&self->err);
self->current = NULL;
}
void parser_free(parser_t* self)
{
err_free(&self->err);
}
node_t* parser_try_new_parse(parser_t* self)
{
assert(self);
node_t* res = CCM_TRY(parser_try_new_module);
if (self->lexer->cursor < (ssize_t)strlen(self->lexer->source))
{
str_t s;
str_init(&s);
while (self->lexer->cursor
< (ssize_t)strlen(self->lexer->source)
&& !isspace(self->lexer->source[self->lexer->cursor]))
{
str_push(&s, self->lexer->source[self->lexer->cursor]);
self->lexer->cursor++;
}
err_push(&self->err, res ? res->line : 0,
"unexpected end after '%s'", s.value);
str_free(&s);
}
return res;
}
node_t* parser_try_new_rule_ll1(parser_t* self,
rule_ll1_t rule,
node_t* node)
{
if (!err_is_ok(&self->err))
{
return NULL;
}
lexer_state_t state = lexer_state(self->lexer);
node_t* result = rule(self, node);
if (result) { return result; }
lexer_restore(self->lexer, state);
return (void*) NULL;
}
node_t* parser_try_new_rule(parser_t* self, rule_t rule)
{
if (!err_is_ok(&self->err))
{
return NULL;
}
lexer_state_t state = lexer_state(self->lexer);
node_t* result = rule(self);
if (result) { return result; }
lexer_restore(self->lexer, state);
return (void*) NULL;
}
int parser_consume(parser_t* self, NodeKind kind)
{
if (!lexer_peek_kind(self->lexer, kind, 0))
{
return 0;
}
lexer_consume_next(self->lexer, kind);
return 1;
}
int parser_ensure(parser_t* self, node_t* node, NodeKind kind)
{
assert(self);
if (!node)
{
err_push(&self->err, self->lexer->line,
"expected token '%s', got nothing",
NodeKindStr[kind] + strlen("NODE_"));
return 0;
}
if (node->kind != kind)
{
err_push(&self->err, self->lexer->line,
"expected token '%s', got '%s'",
NodeKindStr[kind] + strlen("NODE_"),
NodeKindStr[node->kind] + strlen("NODE_"));
return 0;
}
return 1;
}
node_t* parser_try_new_module(parser_t* self)
{
assert(self);
node_t* module = malloc(sizeof(node_t));
node_init(module, NODE_MODULE, "", self->lexer->line);
node_t* node = NULL;
do {
node = CCM_TRY(parser_try_new_expr);
if (!node)
{
node_free(module);
free(module);
return NULL;
}
node_push_new_child(module, node);
lexer_skip_spaces(self->lexer);
} while(self->lexer->cursor < (ssize_t) strlen(self->lexer->source));
return module;
}
node_t* parser_try_new_expr(parser_t* self)
{
assert(self);
if (lexer_peek_kind(self->lexer, NODE_VAR, 0)
|| lexer_peek_kind(self->lexer, NODE_CONST, 0))
{
return CCM_TRY(parser_try_new_decl);
}
if (lexer_peek_kind(self->lexer, NODE_ASSERT_EQ, 0)
|| lexer_peek_kind(self->lexer, NODE_ASSERT_NE, 0))
{
return CCM_TRY(parser_try_new_assert);
}
if (lexer_peek_kind(self->lexer, NODE_BEGIN, 0))
{
return CCM_TRY(parser_try_new_begin);
}
if (lexer_peek_kind(self->lexer, NODE_WHILE, 0))
{
return CCM_TRY(parser_try_new_while);
}
if (lexer_peek_kind(self->lexer, NODE_FOR, 0))
{
return CCM_TRY(parser_try_new_for);
}
if (lexer_peek_kind(self->lexer, NODE_CONTINUE, 0)
|| lexer_peek_kind(self->lexer, NODE_BREAK, 0))
{
return lexer_try_new_next(self->lexer);
}
if (lexer_peek_kind(self->lexer, NODE_IF, 0))
{
node_t* node = CCM_TRY(parser_try_new_if);
if (!node) { return NULL; }
lexer_consume_next(self->lexer, NODE_END);
return node;
}
node_t* assign = CCM_TRY(parser_try_new_assign);
if (assign) { return assign; }
return CCM_TRY(parser_try_new_or);
}
node_t* parser_try_new_while(parser_t* self)
{
assert(self);
if (!parser_consume(self, NODE_WHILE))
{
return NULL;
}
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!expr) { return NULL; }
node_t* block = CCM_TRY(parser_try_new_block);
if (!block || !parser_consume(self, NODE_END))
{
node_free(expr); free(expr);
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_WHILE, "", expr->line);
node_push_new_child(node, expr);
node_push_new_child(node, block);
return node;
}
node_t* parser_try_new_for(parser_t* self)
{
assert(self);
if (!parser_consume(self, NODE_FOR))
{
return NULL;
}
node_t* ident = NULL;
if (lexer_peek_kind(self->lexer, NODE_IDENT, 0))
{
ident = lexer_try_new_next(self->lexer);
}
if (!ident) { return NULL; }
if (!parser_consume(self, NODE_IN))
{
node_free(ident); free(ident);
return NULL;
}
node_t* target = CCM_TRY(parser_try_new_expr);
if (!target)
{
node_free(ident); free(ident);
return NULL;
}
node_t* block = CCM_TRY(parser_try_new_block);
if (!block || !parser_consume(self, NODE_END))
{
node_free(ident); free(ident);
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_FOR, "", ident->line);
node_push_new_child(node, ident);
node_push_new_child(node, target);
node_push_new_child(node, block);
return node;
}
node_t* parser_try_new_if(parser_t* self)
{
assert(self);
if (!parser_consume(self, NODE_IF))
{
return NULL;
}
node_t* cond = CCM_TRY(parser_try_new_expr);
if (!cond) { return NULL; }
node_t* block = CCM_TRY(parser_try_new_block);
if (!block)
{
node_free(cond); free(cond);
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_IF, "", cond->line);
node_push_new_child(node, cond);
node_push_new_child(node, block);
if (parser_consume(self, NODE_ELSE))
{
if (lexer_peek_kind(self->lexer, NODE_IF, 0))
{
node_t* next = CCM_TRY(parser_try_new_if);
if (!next)
{
node_free(cond); free(cond);
node_free(block); free(block);
node_free(node); free(node);
return NULL;
}
node_push_new_child(node, next);
}
else
{
node_t* next = CCM_TRY(parser_try_new_block);
if (!next)
{
node_free(cond); free(cond);
node_free(block); free(block);
node_free(node); free(node);
return NULL;
}
node_push_new_child(node, next);
}
}
return node;
}
node_t* parser_try_new_begin(parser_t* self)
{
assert(self);
if (!lexer_peek_kind(self->lexer, NODE_BEGIN, 0))
{
return NULL;
}
lexer_consume_next(self->lexer, NODE_BEGIN);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_BEGIN, "", self->lexer->line);
node_t* block = CCM_TRY(parser_try_new_block);
node_push_new_child(node, block);
if (!block || !lexer_peek_kind(self->lexer, NODE_END, 0))
{
node_free(node); free(node);
return NULL;
}
lexer_consume_next(self->lexer, NODE_END);
return node;
}
node_t* parser_try_new_block(parser_t* self)
{
assert(self);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_BLOCK, "", self->lexer->line);
while (1)
{
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!expr) { break; }
node_push_new_child(node, expr);
}
return node;
}
node_t* parser_try_new_assign(parser_t* self)
{
assert(self);
node_t* target = CCM_TRY(parser_try_new_index);
if (!target && lexer_peek_kind(self->lexer, NODE_IDENT, 0)) {
target = lexer_try_new_next(self->lexer);
}
if (target == NULL) {
return NULL;
}
if (!lexer_peek_kind(self->lexer, NODE_ASSIGN, 0))
{
node_free(target); free(target);
return NULL;
}
lexer_consume_next(self->lexer, NODE_ASSIGN);
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!expr)
{
node_free(target); free(target);
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_ASSIGN, "", self->lexer->line);
node_push_new_child(node, target);
node_push_new_child(node, expr);
return node;
}
node_t* parser_try_new_decl(parser_t* self)
{
assert(self);
int is_const = 0;
if (lexer_peek_kind(self->lexer, NODE_VAR, 0)) {
lexer_consume_next(self->lexer, NODE_VAR);
} else if (lexer_peek_kind(self->lexer, NODE_CONST, 0)) {
lexer_consume_next(self->lexer, NODE_CONST);
is_const = 1;
} else {
return NULL;
}
if (!lexer_peek_kind(self->lexer, NODE_IDENT, 0))
{
return NULL;
}
node_t* ident = lexer_try_new_next(self->lexer);
if (!lexer_consume_next(self->lexer, NODE_ASSIGN))
{
node_free(ident); free(ident);
return NULL;
}
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!expr)
{
node_free(ident); free(ident);
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, is_const ? NODE_CONSTDECL : NODE_VARDECL,
"", self->lexer->line);
node_push_new_child(node, ident);
node_push_new_child(node, expr);
return node;
}
node_t* parser_try_new_assert(parser_t* self)
{
assert(self);
node_t* node = malloc(sizeof(node_t));
if (lexer_peek_kind(self->lexer, NODE_ASSERT_EQ, 0))
{
lexer_consume_next(self->lexer, NODE_ASSERT_EQ);
node_init(node, NODE_ASSERT_EQ, "", self->lexer->line);
}
else if (lexer_peek_kind(self->lexer, NODE_ASSERT_NE, 0))
{
lexer_consume_next(self->lexer, NODE_ASSERT_NE);
node_init(node, NODE_ASSERT_NE, "", self->lexer->line);
}
else
{
free(node);
return NULL;
}
node_t* tuple = CCM_TRY(parser_try_new_tuple);
if (!tuple)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, tuple);
return node;
}
node_t* parser_try_new_or(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_and);
if (!lhs) { return NULL; }
while (lexer_peek_kind(self->lexer, NODE_OR, 0))
{
lexer_consume_next(self->lexer, NODE_OR);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_OR, "", lhs->line);
node_push_new_child(node, lhs);
node_t* rhs = CCM_TRY(parser_try_new_and);
if (!rhs)
{
node_free(lhs);
free(lhs);
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_and(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_eqne);
if (!lhs) { return NULL; }
while (lexer_peek_kind(self->lexer, NODE_AND, 0))
{
lexer_consume_next(self->lexer, NODE_AND);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_AND, "", lhs->line);
node_push_new_child(node, lhs);
node_t* rhs = CCM_TRY(parser_try_new_eqne);
if (!rhs)
{
node_free(lhs);
free(lhs);
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_eqne(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_cmp);
if (!lhs) { return NULL; }
if (lexer_peek_kind(self->lexer, NODE_EQ, 0)
|| lexer_peek_kind(self->lexer, NODE_NE, 0))
{
node_t* node = lexer_try_new_next(self->lexer);
node_t* rhs = CCM_TRY(parser_try_new_cmp);
if (!rhs)
{
node_free(node); free(node);
node_free(lhs); free(lhs);
return NULL;
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_cmp(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_term);
if (!lhs) { return NULL; }
if (lexer_peek_kind(self->lexer, NODE_LT, 0)
|| lexer_peek_kind(self->lexer, NODE_LE, 0)
|| lexer_peek_kind(self->lexer, NODE_GT, 0)
|| lexer_peek_kind(self->lexer, NODE_GE, 0))
{
node_t* node = lexer_try_new_next(self->lexer);
node_t* rhs = CCM_TRY(parser_try_new_term);
if (!rhs)
{
node_free(node); free(node);
node_free(lhs); free(lhs);
return NULL;
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_term(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_factor);
if (!lhs) { return NULL; }
while (lexer_peek_kind(self->lexer, NODE_ADD, 0)
|| lexer_peek_kind(self->lexer, NODE_SUB, 0))
{
node_t* node = lexer_try_new_next(self->lexer);
node_t* rhs = CCM_TRY(parser_try_new_factor);
if (!rhs)
{
node_free(lhs);
free(lhs);
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_factor(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_usub);
if (!lhs) { return NULL; }
while (lexer_peek_kind(self->lexer, NODE_MUL, 0)
|| lexer_peek_kind(self->lexer, NODE_DIV, 0)
|| lexer_peek_kind(self->lexer, NODE_MOD, 0))
{
node_t* node = lexer_try_new_next(self->lexer);
node_t* rhs = CCM_TRY(parser_try_new_usub);
if (!rhs)
{
node_free(lhs);
free(lhs);
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_usub(parser_t* self)
{
assert(self);
if (lexer_peek_kind(self->lexer, NODE_SUB, 0))
{
lexer_consume_next(self->lexer, NODE_SUB);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_SUB, "", self->lexer->line);
node_t* rhs = CCM_TRY(parser_try_new_usub);
if (!rhs)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, rhs);
return node;
}
else
{
return CCM_TRY(parser_try_new_not);
}
return NULL;
}
node_t* parser_try_new_not(parser_t* self)
{
assert(self);
if (lexer_peek_kind(self->lexer, NODE_NOT, 0))
{
lexer_consume_next(self->lexer, NODE_NOT);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_NOT, "", self->lexer->line);
node_t* rhs = CCM_TRY(parser_try_new_not);
if (!rhs)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, rhs);
return node;
}
else
{
return CCM_TRY(parser_try_new_pow);
}
return NULL;
}
node_t* parser_try_new_pow(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_in);
if (!lhs) { return NULL; }
if (lexer_peek_kind(self->lexer, NODE_POW, 0))
{
if (!lexer_consume_next(self->lexer, NODE_POW))
{
node_free(lhs);
free(lhs);
return NULL;
}
node_t* rhs = CCM_TRY(parser_try_new_in);
if (!rhs)
{
node_free(lhs);
free(lhs);
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_POW, "", self->lexer->line);
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_in(parser_t* self)
{
assert(self);
node_t* lhs = CCM_TRY(parser_try_new_literal);
if (!lhs) { return NULL; }
if (lexer_peek_kind(self->lexer, NODE_IN, 0))
{
lexer_consume_next(self->lexer, NODE_IN);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_IN, "", lhs->line);
node_t* rhs = CCM_TRY(parser_try_new_literal);
if (!rhs)
{
node_free(lhs); free(lhs);
node_free(node); free(node);
return NULL;
}
node_push_new_child(node, lhs);
node_push_new_child(node, rhs);
lhs = node;
}
return lhs;
}
node_t* parser_try_new_literal(parser_t* self)
{
assert(self);
node_t* index = CCM_TRY(parser_try_new_index);
if (index) { return index; }
if (lexer_peek_kind(self->lexer, NODE_OSQUARE, 0))
{
node_t* array = CCM_TRY(parser_try_new_array);
return array;
}
if (lexer_peek_kind(self->lexer, NODE_OPAR, 0))
{
node_t* tuple = CCM_TRY(parser_try_new_tuple);
if (tuple)
{
return tuple;
}
if (!lexer_consume_next(self->lexer, NODE_OPAR))
{
return NULL;
}
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!lexer_consume_next(self->lexer, NODE_CPAR))
{
if (expr)
{
node_free(expr);
free(expr);
}
return NULL;
}
return expr;
}
return CCM_TRY(parser_try_new_builtin);
}
node_t* parser_try_new_array(parser_t* self)
{
assert(self);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_ARRAY, "", self->lexer->line);
if (!lexer_peek_kind(self->lexer, NODE_OSQUARE, 0))
{
node_free(node); free(node);
return NULL;
}
lexer_consume_next(self->lexer, NODE_OSQUARE);
int first = 1;
while (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0))
{
if (!first)
{
lexer_consume_next(self->lexer, NODE_COMMA);
}
node_t* expr = CCM_TRY(parser_try_new_expr);
if (!expr)
{
node_free(node); free(node);
return NULL;
}
node_push_new_child(node, expr);
first = 0;
}
if (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0))
{
node_free(node); free(node);
return NULL;
}
lexer_consume_next(self->lexer, NODE_CSQUARE);
return node;
}
node_t* parser_try_new_index(parser_t* self)
{
assert(self);
node_t* target = NULL;
if (lexer_peek_kind(self->lexer, NODE_STR, 0)
|| lexer_peek_kind(self->lexer, NODE_IDENT, 0))
{
target = lexer_try_new_next(self->lexer);
}
if (!target)
{
target = CCM_TRY(parser_try_new_tuple);
}
if (!target)
{
target = CCM_TRY(parser_try_new_array);
}
if (target == NULL)
{
return NULL;
}
if (!lexer_peek_kind(self->lexer, NODE_OSQUARE, 0))
{
node_free(target); free(target);
return NULL;
}
lexer_consume_next(self->lexer, NODE_OSQUARE);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_INDEX, "", target->line);
node_push_new_child(node, target);
while (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0))
{
node_t* element = CCM_TRY(parser_try_new_expr);
if (!element)
{
node_free(node); free(node);
node_free(target); free(target);
return NULL;
}
node_push_new_child(node, element);
if (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0))
{
lexer_consume_next(self->lexer, NODE_COMMA);
}
}
if (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0))
{
node_free(node); free(node);
node_free(target); free(target);
return NULL;
}
lexer_consume_next(self->lexer, NODE_CSQUARE);
return node;
}
node_t* parser_try_new_tuple(parser_t* self)
{
assert(self);
node_t* node = malloc(sizeof(node_t));
node_init(node, NODE_TUPLE, "", self->lexer->line);
if (!lexer_peek_kind(self->lexer, NODE_OPAR, 0))
{
node_free(node);
free(node);
return NULL;
}
lexer_consume_next(self->lexer, NODE_OPAR);
node_t* lhs = CCM_TRY(parser_try_new_expr);
if (!lhs)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, lhs);
int contains_more_than_one_expr = 0;
while (lexer_peek_kind(self->lexer, NODE_COMMA, 0))
{
lexer_consume_next(self->lexer, NODE_COMMA);
node_t* child = CCM_TRY(parser_try_new_expr);
if (!child)
{
node_free(node);
free(node);
return NULL;
}
node_push_new_child(node, child);
contains_more_than_one_expr = 1;
}
if (!lexer_peek_kind(self->lexer, NODE_CPAR, 0))
{
node_free(node);
free(node);
return NULL;
}
lexer_consume_next(self->lexer, NODE_CPAR);
if (!contains_more_than_one_expr)
{
node_free(node);
free(node);
return NULL;
}
return node;
}
node_t* parser_try_new_builtin(parser_t* self)
{
assert(self);
node_t* node = lexer_try_new_next(self->lexer);
if (node &&
(
node->kind == NODE_NUM
|| node->kind == NODE_BOOL
|| node->kind == NODE_STR
|| node->kind == NODE_IDENT
)
)
{
return node;
}
if (node)
{
node_free(node);
free(node);
}
return NULL;
}