#include "parser.h" #define CCM_TRY(rule) parser_try_new_rule(self, rule) #define CCM_TRY_LL1(rule, node) \ parser_try_new_rule_ll1(self, rule, node) void parser_init(parser_t* self, lexer_t* lexer) { assert(self); assert(lexer); self->lexer = lexer; err_init(&self->err); self->current = NULL; } void parser_free(parser_t* self) { err_free(&self->err); } node_t* parser_try_new_parse(parser_t* self) { assert(self); node_t* res = CCM_TRY(parser_try_new_module); if (self->lexer->cursor < (ssize_t)strlen(self->lexer->source)) { str_t s; str_init(&s); while (self->lexer->cursor < (ssize_t)strlen(self->lexer->source) && !isspace(self->lexer->source[self->lexer->cursor])) { str_push(&s, self->lexer->source[self->lexer->cursor]); self->lexer->cursor++; } err_push(&self->err, res ? res->line : 0, "unexpected end after '%s'", s.value); str_free(&s); } return res; } node_t* parser_try_new_rule_ll1(parser_t* self, rule_ll1_t rule, node_t* node) { if (!err_is_ok(&self->err)) { return NULL; } lexer_state_t state = lexer_state(self->lexer); node_t* result = rule(self, node); if (result) { return result; } lexer_restore(self->lexer, state); return (void*) NULL; } node_t* parser_try_new_rule(parser_t* self, rule_t rule) { if (!err_is_ok(&self->err)) { return NULL; } lexer_state_t state = lexer_state(self->lexer); node_t* result = rule(self); if (result) { return result; } lexer_restore(self->lexer, state); return (void*) NULL; } int parser_consume(parser_t* self, NodeKind kind) { if (!lexer_peek_kind(self->lexer, kind, 0)) { return 0; } lexer_consume_next(self->lexer, kind); return 1; } int parser_ensure(parser_t* self, node_t* node, NodeKind kind) { assert(self); if (!node) { err_push(&self->err, self->lexer->line, "expected token '%s', got nothing", NodeKindStr[kind] + strlen("NODE_")); return 0; } if (node->kind != kind) { err_push(&self->err, self->lexer->line, "expected token '%s', got '%s'", NodeKindStr[kind] + strlen("NODE_"), NodeKindStr[node->kind] + strlen("NODE_")); return 0; } return 1; } node_t* parser_try_new_module(parser_t* self) { assert(self); node_t* module = malloc(sizeof(node_t)); node_init(module, NODE_MODULE, "", self->lexer->line); node_t* node = NULL; do { node = CCM_TRY(parser_try_new_expr); if (!node) { node_free(module); free(module); return NULL; } node_push_new_child(module, node); lexer_skip_spaces(self->lexer); } while(self->lexer->cursor < (ssize_t) strlen(self->lexer->source)); return module; } node_t* parser_try_new_expr(parser_t* self) { assert(self); if (lexer_peek_kind(self->lexer, NODE_VAR, 0) || lexer_peek_kind(self->lexer, NODE_CONST, 0)) { return CCM_TRY(parser_try_new_decl); } if (lexer_peek_kind(self->lexer, NODE_ASSERT_EQ, 0) || lexer_peek_kind(self->lexer, NODE_ASSERT_NE, 0)) { return CCM_TRY(parser_try_new_assert); } if (lexer_peek_kind(self->lexer, NODE_BEGIN, 0)) { return CCM_TRY(parser_try_new_begin); } if (lexer_peek_kind(self->lexer, NODE_IF, 0)) { node_t* node = CCM_TRY(parser_try_new_if); if (!node) { return NULL; } lexer_consume_next(self->lexer, NODE_END); return node; } node_t* assign = CCM_TRY(parser_try_new_assign); if (assign) { return assign; } return CCM_TRY(parser_try_new_or); } node_t* parser_try_new_if(parser_t* self) { assert(self); if (!parser_consume(self, NODE_IF)) { return NULL; } node_t* cond = CCM_TRY(parser_try_new_expr); if (!cond) { return NULL; } node_t* block = CCM_TRY(parser_try_new_block); if (!block) { node_free(cond); free(cond); return NULL; } node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_IF, "", cond->line); node_push_new_child(node, cond); node_push_new_child(node, block); if (parser_consume(self, NODE_ELSE)) { if (lexer_peek_kind(self->lexer, NODE_IF, 0)) { node_t* next = CCM_TRY(parser_try_new_if); if (!next) { node_free(cond); free(cond); node_free(block); free(block); node_free(node); free(node); return NULL; } node_push_new_child(node, next); } else { node_t* next = CCM_TRY(parser_try_new_block); if (!next) { node_free(cond); free(cond); node_free(block); free(block); node_free(node); free(node); return NULL; } node_push_new_child(node, next); } } return node; } node_t* parser_try_new_begin(parser_t* self) { assert(self); if (!lexer_peek_kind(self->lexer, NODE_BEGIN, 0)) { return NULL; } lexer_consume_next(self->lexer, NODE_BEGIN); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_BEGIN, "", self->lexer->line); node_t* block = CCM_TRY(parser_try_new_block); node_push_new_child(node, block); if (!block || !lexer_peek_kind(self->lexer, NODE_END, 0)) { node_free(node); free(node); return NULL; } lexer_consume_next(self->lexer, NODE_END); return node; } node_t* parser_try_new_block(parser_t* self) { assert(self); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_BLOCK, "", self->lexer->line); while (1) { node_t* expr = CCM_TRY(parser_try_new_expr); if (!expr) { break; } node_push_new_child(node, expr); } return node; } node_t* parser_try_new_assign(parser_t* self) { assert(self); node_t* target = CCM_TRY(parser_try_new_index); if (!target && lexer_peek_kind(self->lexer, NODE_IDENT, 0)) { target = lexer_try_new_next(self->lexer); } if (target == NULL) { return NULL; } if (!lexer_peek_kind(self->lexer, NODE_ASSIGN, 0)) { node_free(target); free(target); return NULL; } lexer_consume_next(self->lexer, NODE_ASSIGN); node_t* expr = CCM_TRY(parser_try_new_expr); if (!expr) { node_free(target); free(target); return NULL; } node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_ASSIGN, "", self->lexer->line); node_push_new_child(node, target); node_push_new_child(node, expr); return node; } node_t* parser_try_new_decl(parser_t* self) { assert(self); int is_const = 0; if (lexer_peek_kind(self->lexer, NODE_VAR, 0)) { lexer_consume_next(self->lexer, NODE_VAR); } else if (lexer_peek_kind(self->lexer, NODE_CONST, 0)) { lexer_consume_next(self->lexer, NODE_CONST); is_const = 1; } else { return NULL; } if (!lexer_peek_kind(self->lexer, NODE_IDENT, 0)) { return NULL; } node_t* ident = lexer_try_new_next(self->lexer); if (!lexer_consume_next(self->lexer, NODE_ASSIGN)) { node_free(ident); free(ident); return NULL; } node_t* expr = CCM_TRY(parser_try_new_expr); if (!expr) { node_free(ident); free(ident); return NULL; } node_t* node = malloc(sizeof(node_t)); node_init(node, is_const ? NODE_CONSTDECL : NODE_VARDECL, "", self->lexer->line); node_push_new_child(node, ident); node_push_new_child(node, expr); return node; } node_t* parser_try_new_assert(parser_t* self) { assert(self); node_t* node = malloc(sizeof(node_t)); if (lexer_peek_kind(self->lexer, NODE_ASSERT_EQ, 0)) { lexer_consume_next(self->lexer, NODE_ASSERT_EQ); node_init(node, NODE_ASSERT_EQ, "", self->lexer->line); } else if (lexer_peek_kind(self->lexer, NODE_ASSERT_NE, 0)) { lexer_consume_next(self->lexer, NODE_ASSERT_NE); node_init(node, NODE_ASSERT_NE, "", self->lexer->line); } else { free(node); return NULL; } node_t* tuple = CCM_TRY(parser_try_new_tuple); if (!tuple) { node_free(node); free(node); return NULL; } node_push_new_child(node, tuple); return node; } node_t* parser_try_new_or(parser_t* self) { assert(self); node_t* lhs = CCM_TRY(parser_try_new_and); if (!lhs) { return NULL; } while (lexer_peek_kind(self->lexer, NODE_OR, 0)) { lexer_consume_next(self->lexer, NODE_OR); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_OR, "", lhs->line); node_push_new_child(node, lhs); node_t* rhs = CCM_TRY(parser_try_new_and); if (!rhs) { node_free(lhs); free(lhs); node_free(node); free(node); return NULL; } node_push_new_child(node, rhs); lhs = node; } return lhs; } node_t* parser_try_new_and(parser_t* self) { assert(self); node_t* lhs = CCM_TRY(parser_try_new_eqne); if (!lhs) { return NULL; } while (lexer_peek_kind(self->lexer, NODE_AND, 0)) { lexer_consume_next(self->lexer, NODE_AND); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_AND, "", lhs->line); node_push_new_child(node, lhs); node_t* rhs = CCM_TRY(parser_try_new_eqne); if (!rhs) { node_free(lhs); free(lhs); node_free(node); free(node); return NULL; } node_push_new_child(node, rhs); lhs = node; } return lhs; } node_t* parser_try_new_eqne(parser_t* self) { assert(self); node_t* lhs = CCM_TRY(parser_try_new_cmp); if (!lhs) { return NULL; } if (lexer_peek_kind(self->lexer, NODE_EQ, 0) || lexer_peek_kind(self->lexer, NODE_NE, 0)) { node_t* node = lexer_try_new_next(self->lexer); node_t* rhs = CCM_TRY(parser_try_new_cmp); if (!rhs) { node_free(node); free(node); node_free(lhs); free(lhs); return NULL; } node_push_new_child(node, lhs); node_push_new_child(node, rhs); lhs = node; } return lhs; } node_t* parser_try_new_cmp(parser_t* self) { assert(self); node_t* lhs = CCM_TRY(parser_try_new_term); if (!lhs) { return NULL; } if (lexer_peek_kind(self->lexer, NODE_LT, 0) || lexer_peek_kind(self->lexer, NODE_LE, 0) || lexer_peek_kind(self->lexer, NODE_GT, 0) || lexer_peek_kind(self->lexer, NODE_GE, 0)) { node_t* node = lexer_try_new_next(self->lexer); node_t* rhs = CCM_TRY(parser_try_new_term); if (!rhs) { node_free(node); free(node); node_free(lhs); free(lhs); return NULL; } node_push_new_child(node, lhs); node_push_new_child(node, rhs); lhs = node; } return lhs; } node_t* parser_try_new_term(parser_t* self) { assert(self); node_t* lhs = CCM_TRY(parser_try_new_factor); if (!lhs) { return NULL; } while (lexer_peek_kind(self->lexer, NODE_ADD, 0) || lexer_peek_kind(self->lexer, NODE_SUB, 0)) { node_t* node = lexer_try_new_next(self->lexer); node_t* rhs = CCM_TRY(parser_try_new_factor); if (!rhs) { node_free(lhs); free(lhs); node_free(node); free(node); return NULL; } node_push_new_child(node, lhs); node_push_new_child(node, rhs); lhs = node; } return lhs; } node_t* parser_try_new_factor(parser_t* self) { assert(self); node_t* lhs = CCM_TRY(parser_try_new_usub); if (!lhs) { return NULL; } while (lexer_peek_kind(self->lexer, NODE_MUL, 0) || lexer_peek_kind(self->lexer, NODE_DIV, 0) || lexer_peek_kind(self->lexer, NODE_MOD, 0)) { node_t* node = lexer_try_new_next(self->lexer); node_t* rhs = CCM_TRY(parser_try_new_usub); if (!rhs) { node_free(lhs); free(lhs); node_free(node); free(node); return NULL; } node_push_new_child(node, lhs); node_push_new_child(node, rhs); lhs = node; } return lhs; } node_t* parser_try_new_usub(parser_t* self) { assert(self); if (lexer_peek_kind(self->lexer, NODE_SUB, 0)) { lexer_consume_next(self->lexer, NODE_SUB); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_SUB, "", self->lexer->line); node_t* rhs = CCM_TRY(parser_try_new_usub); if (!rhs) { node_free(node); free(node); return NULL; } node_push_new_child(node, rhs); return node; } else { return CCM_TRY(parser_try_new_not); } return NULL; } node_t* parser_try_new_not(parser_t* self) { assert(self); if (lexer_peek_kind(self->lexer, NODE_NOT, 0)) { lexer_consume_next(self->lexer, NODE_NOT); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_NOT, "", self->lexer->line); node_t* rhs = CCM_TRY(parser_try_new_not); if (!rhs) { node_free(node); free(node); return NULL; } node_push_new_child(node, rhs); return node; } else { return CCM_TRY(parser_try_new_pow); } return NULL; } node_t* parser_try_new_pow(parser_t* self) { assert(self); node_t* lhs = CCM_TRY(parser_try_new_in); if (!lhs) { return NULL; } if (lexer_peek_kind(self->lexer, NODE_POW, 0)) { if (!lexer_consume_next(self->lexer, NODE_POW)) { node_free(lhs); free(lhs); return NULL; } node_t* rhs = CCM_TRY(parser_try_new_in); if (!rhs) { node_free(lhs); free(lhs); return NULL; } node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_POW, "", self->lexer->line); node_push_new_child(node, lhs); node_push_new_child(node, rhs); lhs = node; } return lhs; } node_t* parser_try_new_in(parser_t* self) { assert(self); node_t* lhs = CCM_TRY(parser_try_new_literal); if (!lhs) { return NULL; } if (lexer_peek_kind(self->lexer, NODE_IN, 0)) { lexer_consume_next(self->lexer, NODE_IN); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_IN, "", lhs->line); node_t* rhs = CCM_TRY(parser_try_new_literal); if (!rhs) { node_free(lhs); free(lhs); node_free(node); free(node); return NULL; } node_push_new_child(node, lhs); node_push_new_child(node, rhs); lhs = node; } return lhs; } node_t* parser_try_new_literal(parser_t* self) { assert(self); node_t* index = CCM_TRY(parser_try_new_index); if (index) { return index; } if (lexer_peek_kind(self->lexer, NODE_OSQUARE, 0)) { node_t* array = CCM_TRY(parser_try_new_array); return array; } if (lexer_peek_kind(self->lexer, NODE_OPAR, 0)) { node_t* tuple = CCM_TRY(parser_try_new_tuple); if (tuple) { return tuple; } if (!lexer_consume_next(self->lexer, NODE_OPAR)) { return NULL; } node_t* expr = CCM_TRY(parser_try_new_expr); if (!lexer_consume_next(self->lexer, NODE_CPAR)) { if (expr) { node_free(expr); free(expr); } return NULL; } return expr; } return CCM_TRY(parser_try_new_builtin); } node_t* parser_try_new_array(parser_t* self) { assert(self); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_ARRAY, "", self->lexer->line); if (!lexer_peek_kind(self->lexer, NODE_OSQUARE, 0)) { node_free(node); free(node); return NULL; } lexer_consume_next(self->lexer, NODE_OSQUARE); int first = 1; while (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0)) { if (!first) { lexer_consume_next(self->lexer, NODE_COMMA); } node_t* expr = CCM_TRY(parser_try_new_expr); if (!expr) { node_free(node); free(node); return NULL; } node_push_new_child(node, expr); first = 0; } if (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0)) { node_free(node); free(node); return NULL; } lexer_consume_next(self->lexer, NODE_CSQUARE); return node; } node_t* parser_try_new_index(parser_t* self) { assert(self); node_t* target = NULL; if (lexer_peek_kind(self->lexer, NODE_STR, 0) || lexer_peek_kind(self->lexer, NODE_IDENT, 0)) { target = lexer_try_new_next(self->lexer); } if (!target) { target = CCM_TRY(parser_try_new_tuple); } if (!target) { target = CCM_TRY(parser_try_new_array); } if (target == NULL) { return NULL; } if (!lexer_peek_kind(self->lexer, NODE_OSQUARE, 0)) { node_free(target); free(target); return NULL; } lexer_consume_next(self->lexer, NODE_OSQUARE); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_INDEX, "", target->line); node_push_new_child(node, target); while (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0)) { node_t* element = CCM_TRY(parser_try_new_expr); if (!element) { node_free(node); free(node); node_free(target); free(target); return NULL; } node_push_new_child(node, element); if (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0)) { lexer_consume_next(self->lexer, NODE_COMMA); } } if (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0)) { node_free(node); free(node); node_free(target); free(target); return NULL; } lexer_consume_next(self->lexer, NODE_CSQUARE); return node; } node_t* parser_try_new_tuple(parser_t* self) { assert(self); node_t* node = malloc(sizeof(node_t)); node_init(node, NODE_TUPLE, "", self->lexer->line); if (!lexer_peek_kind(self->lexer, NODE_OPAR, 0)) { node_free(node); free(node); return NULL; } lexer_consume_next(self->lexer, NODE_OPAR); node_t* lhs = CCM_TRY(parser_try_new_expr); if (!lhs) { node_free(node); free(node); return NULL; } node_push_new_child(node, lhs); int contains_more_than_one_expr = 0; while (lexer_peek_kind(self->lexer, NODE_COMMA, 0)) { lexer_consume_next(self->lexer, NODE_COMMA); node_t* child = CCM_TRY(parser_try_new_expr); if (!child) { node_free(node); free(node); return NULL; } node_push_new_child(node, child); contains_more_than_one_expr = 1; } if (!lexer_peek_kind(self->lexer, NODE_CPAR, 0)) { node_free(node); free(node); return NULL; } lexer_consume_next(self->lexer, NODE_CPAR); if (!contains_more_than_one_expr) { node_free(node); free(node); return NULL; } return node; } node_t* parser_try_new_builtin(parser_t* self) { assert(self); node_t* node = lexer_try_new_next(self->lexer); if (node && ( node->kind == NODE_NUM || node->kind == NODE_BOOL || node->kind == NODE_STR || node->kind == NODE_IDENT ) ) { return node; } if (node) { node_free(node); free(node); } return NULL; }