From a19071bf1d9c3bf55949b4a53aebd424d016cf61 Mon Sep 17 00:00:00 2001 From: bog Date: Thu, 21 Mar 2024 12:00:20 +0100 Subject: [PATCH] :sparkles: vars declaration. --- doc/grammar.bnf | 6 +- lib/CMakeLists.txt | 1 + lib/bytecode.h | 3 +- lib/ccm.c | 24 ++++++++ lib/ccm.h | 6 +- lib/compiler.c | 21 +++++++ lib/compiler.h | 1 + lib/exec.c | 34 +++++++++-- lib/lexer.c | 52 ++++++++++++++++ lib/lexer.h | 4 ++ lib/module.c | 5 +- lib/module.h | 2 + lib/node.h | 4 +- lib/parser.c | 62 ++++++++++++++++++- lib/parser.h | 1 + lib/sym.c | 146 +++++++++++++++++++++++++++++++++++++++++++++ lib/sym.h | 42 +++++++++++++ tests/str.ccm | 2 +- tests/var.ccm | 19 ++++++ 19 files changed, 420 insertions(+), 15 deletions(-) create mode 100644 lib/sym.c create mode 100644 lib/sym.h create mode 100644 tests/var.ccm diff --git a/doc/grammar.bnf b/doc/grammar.bnf index 182ac89..89533b3 100644 --- a/doc/grammar.bnf +++ b/doc/grammar.bnf @@ -2,6 +2,8 @@ MODULE ::= EXPR* EXPR ::= | OR | ASSERT +| DECL +DECL ::= var ident assign EXPR ASSERT ::= (assert_eq|assert_ne) tuple OR ::= AND (or AND)* AND ::= EQNE (and EQNE)* @@ -21,7 +23,7 @@ LITERAL ::= | opar EXPR cpar ARRAY ::= osquare (EXPR (comma EXPR)*)? csquare INDEX ::= -| (TUPLE|str|ARRAY) osquare (EXPR (comma EXPR)*)? csquare +| (TUPLE|str|ident|ARRAY) osquare (EXPR (comma EXPR)*)? csquare TUPLE ::= | opar EXPR+ cpar -BUILTIN ::= num | bool | str +BUILTIN ::= num | bool | str | ident diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index f48f33b..4e4cf15 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -17,6 +17,7 @@ add_library(ccm_lib prog.c compiler.c exec.c + sym.c ) set_property(TARGET ccm_lib PROPERTY C_STANDARD 99) diff --git a/lib/bytecode.h b/lib/bytecode.h index fc3904d..5b401b4 100644 --- a/lib/bytecode.h +++ b/lib/bytecode.h @@ -9,7 +9,8 @@ G(OP_ADD), G(OP_SUB), G(OP_USUB), G(OP_MUL), \ G(OP_DIV), G(OP_POW), G(OP_MOD), G(OP_MK_TUPLE), \ G(OP_ASSERT_EQ), G(OP_ASSERT_NE), G(OP_BRF), G(OP_BR), \ G(OP_NOT), G(OP_IN), G(OP_INDEX), G(OP_EQ), G(OP_LT), \ -G(OP_GT), G(OP_MK_ARRAY) +G(OP_GT), G(OP_MK_ARRAY), G(OP_LOCAL_STORE), \ +G(OP_LOCAL_LOAD) CCM_ENUM_H(Opcode, OPCODES); diff --git a/lib/ccm.c b/lib/ccm.c index 0b05175..bf31fbe 100644 --- a/lib/ccm.c +++ b/lib/ccm.c @@ -4,11 +4,14 @@ void ccm_init(ccm_t* self) { assert(self); + sym_init(&self->sym); vec_init(&self->values); vec_init(&self->stack); vec_init(&self->globals); err_init(&self->err); self->id_counter = 0; + + sym_open_scope(&self->sym); } void ccm_entry_free(ccm_entry_t* self) @@ -20,6 +23,7 @@ void ccm_entry_free(ccm_entry_t* self) void ccm_free(ccm_t* self) { assert(self); + sym_free(&self->sym); err_free(&self->err); vec_free_elements(&self->values, (void*) ccm_entry_free); vec_free(&self->values); @@ -332,6 +336,26 @@ CCM ccm_top(ccm_t* self, int depth) return (CCM) self->stack.data[self->stack.size - 1 - depth]; } +void ccm_set(ccm_t* self, CCM value, int addr) +{ + assert(self); + while (addr >= (ssize_t) self->stack.size) + { + ccm_push(self, 0); + } + + self->stack.data[addr] = (void*) value; +} + +CCM ccm_get(ccm_t* self, int addr) +{ + assert(self); + assert(addr < (ssize_t) self->stack.size); + assert(addr >= 0); + + return (CCM) self->stack.data[addr]; +} + void ccm_in(ccm_t* self) { assert(self); diff --git a/lib/ccm.h b/lib/ccm.h index f28c778..702101c 100644 --- a/lib/ccm.h +++ b/lib/ccm.h @@ -5,6 +5,7 @@ #include "vec.h" #include "value.h" #include "err.h" +#include "sym.h" typedef size_t CCM; @@ -14,6 +15,7 @@ typedef struct { } ccm_entry_t; typedef struct { + sym_t sym; err_t err; vec_t values; vec_t stack; @@ -62,6 +64,8 @@ CCM ccm_to_ref(ccm_t* self, size_t value, int line); void ccm_push(ccm_t* self, CCM value); CCM ccm_pop(ccm_t* self); CCM ccm_top(ccm_t* self, int depth); +void ccm_set(ccm_t* self, CCM value, int addr); +CCM ccm_get(ccm_t* self, int addr); void ccm_in(ccm_t* self); void ccm_add(ccm_t* self); @@ -80,4 +84,4 @@ void ccm_le(ccm_t* self); void ccm_gt(ccm_t* self); void ccm_ge(ccm_t* self); -#endif +#endif diff --git a/lib/compiler.c b/lib/compiler.c index 208bb2c..2a092c4 100644 --- a/lib/compiler.c +++ b/lib/compiler.c @@ -5,6 +5,7 @@ void compiler_init(compiler_t* self, module_t* module) assert(self); err_init(&self->err); self->module = module; + self->loc_counter = 0; } void compiler_free(compiler_t* self) @@ -27,9 +28,29 @@ void compiler_compile(compiler_t* self, } ccm_t* ccm = &self->module->ccm; + sym_t* sym = &self->module->sym; switch (node->kind) { + case NODE_VARDECL: { + node_t const* ident = node->children.data[0]; + node_t* expr = node->children.data[1]; + + compiler_compile(self, expr, prog); + int status = + sym_declare(sym, ident->value, self->loc_counter); + assert(status); + prog_add_instr(prog, OP_LOCAL_STORE, self->loc_counter); + self->loc_counter++; + } break; + + case NODE_IDENT: { + char const* name = node->value; + sym_entry_t* entry = sym_try_get_value(sym, name); + assert(entry); + prog_add_instr(prog, OP_LOCAL_LOAD, entry->local_addr); + } break; + case NODE_ARRAY: { for (size_t i=0; ichildren.size; i++) { diff --git a/lib/compiler.h b/lib/compiler.h index c0c306d..e3176c5 100644 --- a/lib/compiler.h +++ b/lib/compiler.h @@ -11,6 +11,7 @@ typedef struct { module_t* module; err_t err; + int loc_counter; } compiler_t; void compiler_init(compiler_t* self, module_t* module); diff --git a/lib/exec.c b/lib/exec.c index 5ba24d4..41c3448 100644 --- a/lib/exec.c +++ b/lib/exec.c @@ -44,6 +44,22 @@ void exec_instr(exec_t* self, switch (op) { + case OP_LOCAL_STORE: { + CCM value = ccm_pop(ccm); + int addr = param; + + ccm_set(ccm, value, addr); + ccm_push(ccm, value); + self->pc++; + } break; + + case OP_LOCAL_LOAD: { + int addr = param; + CCM value = ccm_get(ccm, addr); + ccm_push(ccm, value); + self->pc++; + } break; + case OP_MK_ARRAY: { vec_t* vec = malloc(sizeof(vec_t)); vec_init(vec); @@ -165,9 +181,11 @@ void exec_instr(exec_t* self, CCM ccm_result = ccm_from_value(ccm, result); if (ccm_is_array(ccm, ccm_result)) { - CCM ref = ccm_to_ref(ccm, - ccm_store_global(ccm, ccm_result), - result->line); + CCM ref = ccm_to_ref( + ccm, + ccm_store_global(ccm, ccm_result), + result->line + ); ccm_push(ccm, ref); } else { ccm_push(ccm, ccm_result); @@ -210,9 +228,6 @@ void exec_instr(exec_t* self, case OP_ASSERT_NE: case OP_ASSERT_EQ: { - // TODO: bug here - // [1] == [1] -> OK - // assert_eq ([1], [1]) -> Failed CCM val = ccm_pop(ccm); vec_t* values = ccm_from_tuple(ccm, val); assert(values->size == 2); @@ -255,6 +270,13 @@ void exec_instr(exec_t* self, "assertion failed: <%s> %s <%s>", lhs, operator, rhs ); + } else { + ccm_push(ccm, + ccm_to_boolean( + ccm, 1, + ((value_t*) values->data[0])->line + ) + ); } self->pc++; diff --git a/lib/lexer.c b/lib/lexer.c index d730d3e..641e661 100644 --- a/lib/lexer.c +++ b/lib/lexer.c @@ -32,6 +32,7 @@ void lexer_init(lexer_t* self) lexer_add_text(self, "%", NODE_MOD); lexer_add_text(self, "[", NODE_OSQUARE); lexer_add_text(self, "]", NODE_CSQUARE); + lexer_add_text(self, "=", NODE_ASSIGN); } void lexer_free(lexer_t* self) @@ -170,6 +171,7 @@ node_t* lexer_try_new_next(lexer_t* self) } } + CCM_KEYWORD("var", NODE_VAR, 0); CCM_KEYWORD("assert_eq", NODE_ASSERT_EQ, 0); CCM_KEYWORD("assert_ne", NODE_ASSERT_NE, 0); CCM_KEYWORD("true", NODE_BOOL, 1); @@ -179,7 +181,13 @@ node_t* lexer_try_new_next(lexer_t* self) CCM_KEYWORD("not", NODE_NOT, 0); CCM_KEYWORD("in", NODE_IN, 0); + if ( (node = lexer_try_new_ident(self)) ) + { + return node; + } + if (self->cursor < (ssize_t) strlen(self->source)) + { str_t s; str_init(&s); @@ -441,3 +449,47 @@ node_t* lexer_try_new_str(lexer_t* self) return node; } +node_t* lexer_try_new_ident(lexer_t* self) +{ + int cursor = self->cursor; + str_t value; + str_init(&value); + + if (cursor >= (ssize_t) strlen(self->source) + || !lexer_is_ident_start(self, self->source[cursor])) + { + return NULL; + } + + str_push(&value, self->source[cursor]); + cursor++; + + while (cursor < (ssize_t) strlen(self->source) + && lexer_is_ident(self, self->source[cursor])) + { + str_push(&value, self->source[cursor]); + cursor++; + } + + node_t* node = malloc(sizeof(node_t)); + node_init(node, NODE_IDENT, value.value, self->line); + str_free(&value); + self->cursor = cursor; + return node; +} + +int lexer_is_ident_start(lexer_t* lexer, char c) +{ + assert(lexer); + + return isalpha(c) + || c == '_' + || c == '?' + || c == '!'; +} + +int lexer_is_ident(lexer_t* lexer, char c) +{ + return isdigit(c) + || lexer_is_ident_start(lexer, c); +} diff --git a/lib/lexer.h b/lib/lexer.h index eee9c75..e265ad7 100644 --- a/lib/lexer.h +++ b/lib/lexer.h @@ -54,4 +54,8 @@ node_t* lexer_try_new_text(lexer_t* self, node_t* lexer_try_new_num(lexer_t* self); node_t* lexer_try_new_str(lexer_t* self); +node_t* lexer_try_new_ident(lexer_t* self); +int lexer_is_ident_start(lexer_t* lexer, char c); +int lexer_is_ident(lexer_t* lexer, char c); + #endif diff --git a/lib/module.c b/lib/module.c index 42218e4..70dff67 100644 --- a/lib/module.c +++ b/lib/module.c @@ -10,11 +10,15 @@ void module_init(module_t* self) prog_init(&self->prog); err_init(&self->err); ccm_init(&self->ccm); + sym_init(&self->sym); + sym_open_scope(&self->sym); } void module_free(module_t* self) { assert(self); + + sym_free(&self->sym); if (self->source) { @@ -45,7 +49,6 @@ int module_load(module_t* self, char const* path) parser_init(&parser, &lexer); node_t* ast = parser_try_new_parse(&parser); - if (!err_is_ok(&lexer.err) || !err_is_ok(&parser.err)) { err_print_stack_trace(&lexer.err); diff --git a/lib/module.h b/lib/module.h index d5ba22e..5d5f11b 100644 --- a/lib/module.h +++ b/lib/module.h @@ -5,12 +5,14 @@ #include "prog.h" #include "err.h" #include "ccm.h" +#include "sym.h" typedef struct { err_t err; char* source; prog_t prog; ccm_t ccm; + sym_t sym; } module_t; void module_init(module_t* self); diff --git a/lib/node.h b/lib/node.h index 88e8655..136398f 100644 --- a/lib/node.h +++ b/lib/node.h @@ -13,7 +13,9 @@ G(NODE_ASSERT_EQ), G(NODE_ASSERT_NE), G(NODE_BOOL), \ G(NODE_AND), G(NODE_OR), G(NODE_NOT), G(NODE_IN), \ G(NODE_OSQUARE), G(NODE_CSQUARE), G(NODE_INDEX), \ G(NODE_STR), G(NODE_LT), G(NODE_LE), G(NODE_GT), \ -G(NODE_GE), G(NODE_EQ), G(NODE_NE), G(NODE_ARRAY) +G(NODE_GE), G(NODE_EQ), G(NODE_NE), G(NODE_ARRAY), \ +G(NODE_VAR), G(NODE_IDENT), G(NODE_ASSIGN), \ +G(NODE_VARDECL) CCM_ENUM_H(NodeKind, NODE_KIND); diff --git a/lib/parser.c b/lib/parser.c index 95a3152..971c8a0 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -26,7 +26,21 @@ node_t* parser_try_new_parse(parser_t* self) if (self->lexer->cursor < (ssize_t)strlen(self->lexer->source)) { - err_push(&self->err, res ? res->line : 0, "unexpected end"); + str_t s; + str_init(&s); + + while (self->lexer->cursor + < (ssize_t)strlen(self->lexer->source) + && !isspace(self->lexer->source[self->lexer->cursor])) + { + str_push(&s, self->lexer->source[self->lexer->cursor]); + self->lexer->cursor++; + } + + err_push(&self->err, res ? res->line : 0, + "unexpected end after '%s'", s.value); + str_free(&s); + } return res; @@ -121,6 +135,11 @@ node_t* parser_try_new_expr(parser_t* self) { assert(self); + if (lexer_peek_kind(self->lexer, NODE_VAR, 0)) + { + return CCM_TRY(parser_try_new_decl); + } + if (lexer_peek_kind(self->lexer, NODE_ASSERT_EQ, 0) || lexer_peek_kind(self->lexer, NODE_ASSERT_NE, 0)) { @@ -130,6 +149,43 @@ node_t* parser_try_new_expr(parser_t* self) return CCM_TRY(parser_try_new_or); } +node_t* parser_try_new_decl(parser_t* self) +{ + assert(self); + if (!lexer_consume_next(self->lexer, NODE_VAR)) + { + return NULL; + } + + if (!lexer_peek_kind(self->lexer, NODE_IDENT, 0)) + { + return NULL; + } + + node_t* ident = lexer_try_new_next(self->lexer); + + if (!lexer_consume_next(self->lexer, NODE_ASSIGN)) + { + node_free(ident); free(ident); + return NULL; + } + + node_t* expr = CCM_TRY(parser_try_new_expr); + if (!expr) + { + node_free(ident); free(ident); + return NULL; + } + + node_t* node = malloc(sizeof(node_t)); + node_init(node, NODE_VARDECL, "", self->lexer->line); + + node_push_new_child(node, ident); + node_push_new_child(node, expr); + + return node; +} + node_t* parser_try_new_assert(parser_t* self) { assert(self); @@ -526,7 +582,8 @@ node_t* parser_try_new_literal(parser_t* self) return expr; } - if (lexer_peek_kind(self->lexer, NODE_STR, 0) + if ((lexer_peek_kind(self->lexer, NODE_STR, 0) + || lexer_peek_kind(self->lexer, NODE_IDENT, 0)) && lexer_peek_kind(self->lexer, NODE_OSQUARE, 1)) { node_t* target = CCM_TRY(parser_try_new_builtin); @@ -671,6 +728,7 @@ node_t* parser_try_new_builtin(parser_t* self) node->kind == NODE_NUM || node->kind == NODE_BOOL || node->kind == NODE_STR + || node->kind == NODE_IDENT ) ) { diff --git a/lib/parser.h b/lib/parser.h index f6a156e..9fb50ba 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -25,6 +25,7 @@ int parser_ensure(parser_t* self, node_t* node, NodeKind kind); node_t* parser_try_new_module(parser_t* self); node_t* parser_try_new_expr(parser_t* self); +node_t* parser_try_new_decl(parser_t* self); node_t* parser_try_new_assert(parser_t* self); node_t* parser_try_new_or(parser_t* self); node_t* parser_try_new_and(parser_t* self); diff --git a/lib/sym.c b/lib/sym.c new file mode 100644 index 0000000..6958007 --- /dev/null +++ b/lib/sym.c @@ -0,0 +1,146 @@ +#include "sym.h" + +void sym_init(sym_t* self) +{ + assert(self); + self->env = NULL; +} + +void sym_free(sym_t* self) +{ + assert(self); + + if (self->env) + { + sym_free_env(self, self->env); + free(self->env); + } +} + +void sym_free_env(sym_t* self, env_t* env) +{ + assert(self); + assert(env); + + if (env->parent) + { + sym_free_env(self, env->parent); + free(env->parent); + } + + for (size_t i=0; ientries.size; i++) + { + sym_entry_t* entry = env->entries.data[i]; + free(entry->name); + free(entry); + } + + vec_free(&env->entries); +} + +void sym_open_scope(sym_t* self) +{ + assert(self); + env_t* env = malloc(sizeof(env_t)); + env->parent = NULL; + vec_init(&env->entries); + + if (self->env == NULL) { + self->env = env; + } else { + env->parent = self->env; + self->env = env; + } +} + +void sym_close_scope(sym_t* self) +{ + assert(self); + assert(self->env); + + vec_free(&self->env->entries); + self->env = self->env->parent; +} + + +int sym_declare(sym_t* self, + char const* name, + int addr) +{ + assert(self); + assert(self->env); + assert(name); + + if (sym_try_get_value(self, name) != NULL) + { + return 0; + } + + sym_entry_t* entry = malloc(sizeof(sym_entry_t)); + entry->name = strdup(name); + entry->local_addr = addr; + + vec_push(&self->env->entries, entry); + return 1; +} + +int sym_try_assign(sym_t* self, + char const* name, + int addr) +{ + assert(self); + assert(name); + sym_entry_t* entry = sym_try_get_value(self, name); + + if (!entry) { return 0; } + + entry->local_addr = addr; + + return 1; +} + +sym_entry_t* sym_try_get_value(sym_t* self, char const* name) +{ + assert(self); + assert(name); + + env_t* env = self->env; + + while (env) + { + sym_entry_t* entry = sym_try_get_env_value( + self, + env, + name + ); + + if (entry) + { + return entry; + } + + env = env->parent; + } + + return NULL; +} + +sym_entry_t* sym_try_get_env_value(sym_t* self, + env_t* env, + char const* name) +{ + assert(self); assert(env); assert(name); + + for (size_t i=0; ientries.size; i++) + { + sym_entry_t const* entry = + ((sym_entry_t*) env->entries.data[i]); + + if (strcmp(entry->name, name) == 0) + { + return env->entries.data[i]; + } + } + + return NULL; +} diff --git a/lib/sym.h b/lib/sym.h new file mode 100644 index 0000000..69b793b --- /dev/null +++ b/lib/sym.h @@ -0,0 +1,42 @@ +#ifndef CCM_SYM_H +#define CCM_SYM_H + +#include "commons.h" +#include "vec.h" +#include "value.h" + +typedef struct { + char* name; + int local_addr; +} sym_entry_t; + +typedef struct env { + vec_t entries; + struct env* parent; +} env_t; + +typedef struct { + env_t* env; +} sym_t; + +void sym_init(sym_t* self); +void sym_free(sym_t* self); +void sym_free_env(sym_t* self, env_t* env); + +void sym_open_scope(sym_t* self); +void sym_close_scope(sym_t* self); + +int sym_declare(sym_t* self, + char const* name, + int addr); + +int sym_try_assign(sym_t* self, + char const* name, + int addr); + +sym_entry_t* sym_try_get_value(sym_t* self, char const* name); +sym_entry_t* sym_try_get_env_value(sym_t* self, + env_t* env, + char const* name); + +#endif diff --git a/tests/str.ccm b/tests/str.ccm index 6f5fc09..87e3694 100644 --- a/tests/str.ccm +++ b/tests/str.ccm @@ -18,7 +18,7 @@ assert_eq ("hello"[-1], "o") # BINOPS # ====== assert_eq ( - "hello world", "hello " + "hello world", "hello " + "world" ) diff --git a/tests/var.ccm b/tests/var.ccm new file mode 100644 index 0000000..9eb1265 --- /dev/null +++ b/tests/var.ccm @@ -0,0 +1,19 @@ +# DECLARATION +# =========== +var a = 32 +assert_eq (32, a) + +var b = a + 1 +assert_eq(33, b) + +var c = (var d = 3) +assert_eq(3, c) +assert_eq(3, d) + +var e = [1, 2] +assert_eq (2, e[-1]) + +var f = [2, 3] +var g = 1 + +assert_eq (3, f[g])