From fe0767bfe576fa7f031fff3285282be54b456c98 Mon Sep 17 00:00:00 2001 From: bog Date: Tue, 19 Mar 2024 08:56:41 +0100 Subject: [PATCH] :sparkles: tuples in and index operations. --- doc/grammar.bnf | 6 ++- lib/bytecode.h | 2 +- lib/ccm.c | 75 +++++++++++++++++++++++++++++++++++++ lib/ccm.h | 3 ++ lib/commons.h | 3 ++ lib/compiler.c | 21 ++++++++++- lib/exec.c | 44 +++++++++++++++++++++- lib/lexer.c | 3 ++ lib/module.c | 5 +++ lib/node.h | 3 +- lib/parser.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++- lib/parser.h | 5 +++ tests/tuple.ccm | 25 +++++++++++++ 13 files changed, 284 insertions(+), 9 deletions(-) create mode 100644 tests/tuple.ccm diff --git a/doc/grammar.bnf b/doc/grammar.bnf index 1b4ce13..c80f9bf 100644 --- a/doc/grammar.bnf +++ b/doc/grammar.bnf @@ -1,8 +1,9 @@ MODULE ::= EXPR* EXPR ::= -| OR +| IN | ASSERT ASSERT ::= (assert_eq|assert_ne) tuple +IN ::= OR (in OR)? OR ::= AND (or AND)* AND ::= TERM (and TERM)* TERM ::= FACTOR ((add|sub) FACTOR)* @@ -13,7 +14,10 @@ POW ::= LITERAL (pow LITERAL)? LITERAL ::= | BUILTIN | TUPLE +| INDEX | opar EXPR cpar +INDEX ::= +| TUPLE osquare (EXPR (comma EXPR)*)? csquare TUPLE ::= | opar EXPR+ cpar BUILTIN ::= num | bool diff --git a/lib/bytecode.h b/lib/bytecode.h index 4c64b26..23ae1f1 100644 --- a/lib/bytecode.h +++ b/lib/bytecode.h @@ -8,7 +8,7 @@ G(OP_PUSH), G(OP_POP), \ G(OP_ADD), G(OP_SUB), G(OP_USUB), G(OP_MUL), \ G(OP_DIV), G(OP_POW), G(OP_MOD), G(OP_MK_TUPLE), \ G(OP_ASSERT_EQ), G(OP_ASSERT_NE), G(OP_BRF), G(OP_BR), \ -G(OP_NOT), G(OP_NOP) +G(OP_NOT), G(OP_IN), G(OP_INDEX) CCM_ENUM_H(Opcode, OPCODES); diff --git a/lib/ccm.c b/lib/ccm.c index 6976743..5a0012f 100644 --- a/lib/ccm.c +++ b/lib/ccm.c @@ -34,6 +34,51 @@ size_t ccm_str(ccm_t* self, char* buffer, size_t size) return sz; } +CCM ccm_from_value(ccm_t* self, value_t* value) +{ + assert(self); + assert(value); + + switch (value->type) + { + case TYPE_NUM: { + return ccm_to_num(self, value->data.num, value->line); + } break; + + case TYPE_BOOLEAN: { + return ccm_to_boolean( + self, + value->data.boolean, + value->line + ); + } break; + + case TYPE_TUPLE: { + vec_t * vec = malloc(sizeof(vec_t)); + vec_init(vec); + for (size_t i=0; idata.tuple->size; i++) + { + vec_push(vec, value_new_clone( + value->data.tuple->data[i] + )); + } + + return ccm_to_tuple( + self, + vec, + value->line + ); + } break; + + default: { + fprintf(stderr, + "cannot convert value of type <%s> to CCM\n", + TypeStr[value->type]); + abort(); + } break; + } +} + int ccm_is_num(ccm_t* self, CCM value) { value_t const* val = self->values.data[value]; @@ -141,6 +186,36 @@ CCM ccm_top(ccm_t* self, int depth) return (CCM) self->stack.data[self->stack.size - 1 - depth]; } +void ccm_in(ccm_t* self) +{ + assert(self); + CCM ccm_rhs = ccm_pop(self); + CCM ccm_lhs = ccm_pop(self); + + int line = ((value_t*) self->values.data[ccm_lhs])->line; + + if (!ccm_is_tuple(self, ccm_rhs)) + { + err_push(&self->err, line, "cannot test membership"); + return; + } + + + vec_t* rhs = ccm_from_tuple(self, ccm_rhs); + value_t* val = ((value_t*)self->values.data[ccm_lhs]); + + for (size_t i=0; isize; i++) + { + if (value_equals(val, rhs->data[i])) + { + ccm_push(self, ccm_to_boolean(self, 1, line)); + return; + } + } + + ccm_push(self, ccm_to_boolean(self, 0, line)); +} + void ccm_add(ccm_t* self) { assert(self); diff --git a/lib/ccm.h b/lib/ccm.h index 632e55b..f33c7c9 100644 --- a/lib/ccm.h +++ b/lib/ccm.h @@ -19,6 +19,8 @@ void ccm_free(ccm_t* self); size_t ccm_str(ccm_t* self, char* buffer, size_t size); +CCM ccm_from_value(ccm_t* self, value_t* value); + int ccm_is_num(ccm_t* self, CCM value); double ccm_from_num(ccm_t* self, CCM value); CCM ccm_to_num(ccm_t* self, double value, int line); @@ -35,6 +37,7 @@ void ccm_push(ccm_t* self, CCM value); CCM ccm_pop(ccm_t* self); CCM ccm_top(ccm_t* self, int depth); +void ccm_in(ccm_t* self); void ccm_add(ccm_t* self); void ccm_sub(ccm_t* self); void ccm_usub(ccm_t* self); diff --git a/lib/commons.h b/lib/commons.h index d28c60f..08a67a7 100644 --- a/lib/commons.h +++ b/lib/commons.h @@ -13,6 +13,9 @@ #define CCM_ENUM_ENUM(X) X #define CCM_ENUM_STR(X) #X +#define CCM_DEBUG(TARGET, FUN) \ +{char m[CCM_STRLEN]; FUN(TARGET, m, CCM_STRLEN), printf("%s\n", m);} + #define CCM_ENUM_H(PREFIX, DEF) \ typedef enum { DEF(CCM_ENUM_ENUM) } PREFIX ; \ extern char const* PREFIX ## Str [] diff --git a/lib/compiler.c b/lib/compiler.c index dbca146..142bd6f 100644 --- a/lib/compiler.c +++ b/lib/compiler.c @@ -30,6 +30,23 @@ void compiler_compile(compiler_t* self, switch (node->kind) { + case NODE_INDEX: { + for (size_t i=0; ichildren.size; i++) + { + size_t k = node->children.size - 1 - i; + compiler_compile(self, node->children.data[k], prog); + } + + prog_add_instr(prog, OP_INDEX, node->children.size - 1); + } break; + + case NODE_IN: { + compiler_compile(self, node->children.data[0], prog); + compiler_compile(self, node->children.data[1], prog); + + prog_add_instr(prog, OP_IN, CCM_NO_PARAM); + } break; + case NODE_AND: { compiler_compile_and(self, node, prog); } break; @@ -203,8 +220,8 @@ void compiler_compile_and(compiler_t* self, // FALSE size_t false_point = prog_add_instr( - prog, - OP_PUSH, + prog, + OP_PUSH, prog_add_new_constant( prog, ccm_to_boolean(ccm, 0, node->line) diff --git a/lib/exec.c b/lib/exec.c index 9227af4..a918c43 100644 --- a/lib/exec.c +++ b/lib/exec.c @@ -44,12 +44,52 @@ void exec_instr(exec_t* self, switch (op) { + case OP_INDEX: { + CCM ccm_target = ccm_pop(ccm); + vec_t* target = ccm_from_tuple(ccm, ccm_target); + value_t* result = NULL; + + for (int i=0; isize + idx; + } + + if (idx < 0 || idx >= (ssize_t) target->size) + { + assert(target->size > 0); + err_push(&self->err, ((value_t*)target->data[0])->line, + "index out of bounds"); + return; + } + + value_t* val = target->data[idx]; + + if (i == param - 1) + { + result = val; + } + else + { + target = val->data.tuple; + } + } + + ccm_push(ccm, ccm_from_value(ccm, result)); + self->pc++; + } break; + case OP_NOT: { ccm_not(ccm); self->pc++; } break; - - case OP_NOP: { + + case OP_IN: { + ccm_in(ccm); self->pc++; } break; diff --git a/lib/lexer.c b/lib/lexer.c index 0495664..91cede6 100644 --- a/lib/lexer.c +++ b/lib/lexer.c @@ -24,6 +24,8 @@ void lexer_init(lexer_t* self) lexer_add_text(self, "/", NODE_DIV); lexer_add_text(self, "^", NODE_POW); lexer_add_text(self, "%", NODE_MOD); + lexer_add_text(self, "[", NODE_OSQUARE); + lexer_add_text(self, "]", NODE_CSQUARE); } void lexer_free(lexer_t* self) @@ -164,6 +166,7 @@ node_t* lexer_try_new_next(lexer_t* self) CCM_KEYWORD("and", NODE_AND, 0); CCM_KEYWORD("or", NODE_OR, 0); CCM_KEYWORD("not", NODE_NOT, 0); + CCM_KEYWORD("in", NODE_IN, 0); if (self->cursor < (ssize_t) strlen(self->source)) { diff --git a/lib/module.c b/lib/module.c index 2e8578f..f85e42e 100644 --- a/lib/module.c +++ b/lib/module.c @@ -46,6 +46,11 @@ int module_load(module_t* self, char const* path) node_t* ast = parser_try_new_parse(&parser); + if (!ast) + { + goto free_parser; + } + if (!err_is_ok(&lexer.err) || !err_is_ok(&parser.err)) { err_print_stack_trace(&lexer.err); diff --git a/lib/node.h b/lib/node.h index a47030d..abb679d 100644 --- a/lib/node.h +++ b/lib/node.h @@ -10,7 +10,8 @@ G(NODE_NUM), G(NODE_OPAR), G(NODE_CPAR), \ G(NODE_POW), G(NODE_ADD), G(NODE_SUB), G(NODE_MUL), \ G(NODE_DIV), G(NODE_MOD), G(NODE_COMMA), G(NODE_TUPLE), \ G(NODE_ASSERT_EQ), G(NODE_ASSERT_NE), G(NODE_BOOL), \ -G(NODE_AND), G(NODE_OR), G(NODE_NOT) +G(NODE_AND), G(NODE_OR), G(NODE_NOT), G(NODE_IN), \ +G(NODE_OSQUARE), G(NODE_CSQUARE), G(NODE_INDEX) CCM_ENUM_H(NodeKind, NODE_KIND); diff --git a/lib/parser.c b/lib/parser.c index c98ede6..186cfab 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -1,6 +1,8 @@ #include "parser.h" #define CCM_TRY(rule) parser_try_new_rule(self, rule) +#define CCM_TRY_LL1(rule, node) \ +parser_try_new_rule_ll1(self, rule, node) void parser_init(parser_t* self, lexer_t* lexer) { @@ -23,6 +25,25 @@ node_t* parser_try_new_parse(parser_t* self) return CCM_TRY(parser_try_new_module); } +node_t* parser_try_new_rule_ll1(parser_t* self, + rule_ll1_t rule, + node_t* node) +{ + if (!err_is_ok(&self->err)) + { + return NULL; + } + + lexer_state_t state = lexer_state(self->lexer); + + node_t* result = rule(self, node); + if (result) { return result; } + + lexer_restore(self->lexer, state); + + return (void*) NULL; +} + node_t* parser_try_new_rule(parser_t* self, rule_t rule) { if (!err_is_ok(&self->err)) @@ -99,7 +120,7 @@ node_t* parser_try_new_expr(parser_t* self) return CCM_TRY(parser_try_new_assert); } - return CCM_TRY(parser_try_new_or); + return CCM_TRY(parser_try_new_in); } node_t* parser_try_new_assert(parser_t* self) @@ -138,6 +159,36 @@ node_t* parser_try_new_assert(parser_t* self) return node; } +node_t* parser_try_new_in(parser_t* self) +{ + assert(self); + node_t* lhs = CCM_TRY(parser_try_new_or); + if (!lhs) { return NULL; } + + if (lexer_peek_kind(self->lexer, NODE_IN, 0)) + { + lexer_consume_next(self->lexer, NODE_IN); + node_t* node = malloc(sizeof(node_t)); + node_init(node, NODE_IN, "", lhs->line); + + node_t* rhs = CCM_TRY(parser_try_new_or); + + if (!rhs) + { + node_free(lhs); free(lhs); + node_free(node); free(node); + + return NULL; + } + + node_push_new_child(node, lhs); + node_push_new_child(node, rhs); + lhs = node; + } + + return lhs; +} + node_t* parser_try_new_or(parser_t* self) { assert(self); @@ -368,6 +419,11 @@ node_t* parser_try_new_literal(parser_t* self) if (tuple) { + if (lexer_peek_kind(self->lexer, NODE_OSQUARE, 0)) + { + return CCM_TRY_LL1(parser_try_new_index, tuple); + } + return tuple; } @@ -395,6 +451,44 @@ node_t* parser_try_new_literal(parser_t* self) return CCM_TRY(parser_try_new_builtin); } +node_t* parser_try_new_index(parser_t* self, node_t* target) +{ + assert(self); + + lexer_consume_next(self->lexer, NODE_OSQUARE); + + node_t* node = malloc(sizeof(node_t)); + node_init(node, NODE_INDEX, "", target->line); + + node_push_new_child(node, target); + + while (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0)) + { + node_t* element = CCM_TRY(parser_try_new_expr); + if (!element) + { + node_free(node); free(node); + return NULL; + } + + node_push_new_child(node, element); + + if (!lexer_peek_kind(self->lexer, NODE_CSQUARE, 0)) + { + lexer_consume_next(self->lexer, NODE_COMMA); + } + } + + lexer_consume_next(self->lexer, NODE_CSQUARE); + + return node; +} + +node_t* parser_try_new_expr_lst(parser_t* self) +{ + assert(self); +} + node_t* parser_try_new_tuple(parser_t* self) { assert(self); @@ -456,7 +550,7 @@ node_t* parser_try_new_builtin(parser_t* self) node_t* node = lexer_try_new_next(self->lexer); - if (node && + if (node && ( node->kind == NODE_NUM || node->kind == NODE_BOOL diff --git a/lib/parser.h b/lib/parser.h index 036791a..f1b0e17 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -13,17 +13,20 @@ typedef struct { } parser_t; typedef node_t* (*rule_t)(parser_t*); +typedef node_t* (*rule_ll1_t)(parser_t*, node_t*); void parser_init(parser_t* self, lexer_t* lexer); void parser_free(parser_t* self); node_t* parser_try_new_parse(parser_t* self); node_t* parser_try_new_rule(parser_t* self, rule_t rule); +node_t* parser_try_new_rule_ll1(parser_t* self, rule_ll1_t rule, node_t* node); int parser_ensure(parser_t* self, node_t* node, NodeKind kind); node_t* parser_try_new_module(parser_t* self); node_t* parser_try_new_expr(parser_t* self); node_t* parser_try_new_assert(parser_t* self); +node_t* parser_try_new_in(parser_t* self); node_t* parser_try_new_or(parser_t* self); node_t* parser_try_new_and(parser_t* self); node_t* parser_try_new_term(parser_t* self); @@ -32,6 +35,8 @@ node_t* parser_try_new_usub(parser_t* self); node_t* parser_try_new_not(parser_t* self); node_t* parser_try_new_pow(parser_t* self); node_t* parser_try_new_literal(parser_t* self); +node_t* parser_try_new_index(parser_t* self, node_t* target); +node_t* parser_try_new_expr_lst(parser_t* self); node_t* parser_try_new_tuple(parser_t* self); node_t* parser_try_new_builtin(parser_t* self); diff --git a/tests/tuple.ccm b/tests/tuple.ccm new file mode 100644 index 0000000..3110461 --- /dev/null +++ b/tests/tuple.ccm @@ -0,0 +1,25 @@ +assert_eq ((3, 2), (3, 2)) +assert_ne ((3, 2), (3, 4)) + +# IN KEYWORD +# ========== +assert_eq (true, 7 in (2, 7, 6)) +assert_eq (false, 7 in (2, 2, 6)) + +assert_eq (true, (2, 3) in (1, (2, 3))) +assert_eq (false, (2, 3) in (1, (2, 4))) + +# INDEX +# ===== +assert_eq (2, (2, 7)[0]) +assert_eq (7, (2, 7)[1]) +assert_eq (9, (2, 7, 9)[-1]) +assert_eq (7, (2, 7, 9)[-2]) +assert_eq (2, (2, 7, 9)[-3]) +assert_eq (1, ((1, 2), (3, 4))[0, 0]) +assert_eq (2, ((1, 2), (3, 4))[0, 1]) +assert_eq (3, ((1, 2), (3, 4))[1, 0]) +assert_eq (4, ((1, 2), (3, 4))[1, 1]) + +assert_eq (false, 23 in (2, (7, 23, 2), 9)) +assert_eq (true, 23 in (2, (7, 23, 2), 9)[1])