From 12fd626cd502ff51bbadf43a7368ff929bccb62f Mon Sep 17 00:00:00 2001 From: bog Date: Tue, 26 Mar 2024 19:31:33 +0100 Subject: [PATCH] :sparkles: builtin types. --- doc/grammar.bnf | 7 + lib/CMakeLists.txt | 11 +- lib/commons.h | 1 + lib/compiler.c | 74 ++++++++ lib/compiler.h | 21 +++ lib/exec.c | 82 +++++++++ lib/exec.h | 24 +++ lib/lexer.c | 427 +++++++++++++++++++++++++++++++++++++++++++++ lib/lexer.h | 57 ++++++ lib/moka.c | 216 +++++++++++++++++++++++ lib/moka.h | 51 ++++++ lib/node.c | 69 ++++++++ lib/node.h | 35 ++++ lib/parser.c | 118 +++++++++++++ lib/parser.h | 24 +++ lib/prog.c | 60 +++++++ lib/prog.h | 37 ++++ lib/status.c | 10 +- lib/status.h | 11 +- lib/token.c | 17 ++ lib/token.h | 25 +++ lib/value.c | 61 +++++++ lib/value.h | 35 ++++ lib/vec.c | 1 - src/main.c | 88 +++++++++- tests/lexer.h | 85 +++++++++ tests/main.c | 5 +- tests/parser.h | 70 ++++++++ 28 files changed, 1710 insertions(+), 12 deletions(-) create mode 100644 doc/grammar.bnf create mode 100644 lib/compiler.c create mode 100644 lib/compiler.h create mode 100644 lib/exec.c create mode 100644 lib/exec.h create mode 100644 lib/lexer.c create mode 100644 lib/lexer.h create mode 100644 lib/moka.c create mode 100644 lib/moka.h create mode 100644 lib/node.c create mode 100644 lib/node.h create mode 100644 lib/parser.c create mode 100644 lib/parser.h create mode 100644 lib/prog.c create mode 100644 lib/prog.h create mode 100644 lib/token.c create mode 100644 lib/token.h create mode 100644 lib/value.c create mode 100644 lib/value.h create mode 100644 tests/lexer.h create mode 100644 tests/parser.h diff --git a/doc/grammar.bnf b/doc/grammar.bnf new file mode 100644 index 0000000..afbfe21 --- /dev/null +++ b/doc/grammar.bnf @@ -0,0 +1,7 @@ +ROOT ::= ATOM* +ATOM ::= +| int +| float +| bool +| string +| symbol diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 6abfbf1..df906d0 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -7,10 +7,19 @@ add_library(moka-core status.c vec.c str.c + token.c + lexer.c + node.c + parser.c + prog.c + compiler.c + value.c + exec.c + moka.c ) target_compile_options(moka-core - PUBLIC -Wall -Wextra + PUBLIC -Wall -Wextra -g ) target_include_directories(moka-core diff --git a/lib/commons.h b/lib/commons.h index ac475e3..4250fbc 100644 --- a/lib/commons.h +++ b/lib/commons.h @@ -7,6 +7,7 @@ #include #include #include +#include #define MK_STRLEN 4096 #define MK_ENUM_ENUM(X) X diff --git a/lib/compiler.c b/lib/compiler.c new file mode 100644 index 0000000..342eda6 --- /dev/null +++ b/lib/compiler.c @@ -0,0 +1,74 @@ +#include "compiler.h" + +void compiler_init(struct compiler* self, + struct status* status) +{ + assert(self); + self->status = status; +} + +void compiler_free(struct compiler* self) +{ + assert(self); +} + +void compiler_compile(struct compiler* self, + struct node* node, + struct prog* prog) +{ + assert(self); assert(node); assert(prog); + + switch (node->kind) + { + case NODE_ROOT: { + for (size_t i=0; ichildren.size; i++) + { + struct node* child = node->children.data[i]; + compiler_compile(self, child, prog); + } + } break; + + case NODE_INT: { + struct value* value = malloc(sizeof(struct value)); + int val = atoi(node->token->value); + value_init_int(value, val, node->line); + ssize_t addr = prog_add_new_value(prog, value); + prog_add_instruction(prog, OP_PUSH, addr); + } break; + + case NODE_FLOAT: { + struct value* value = malloc(sizeof(struct value)); + float val = atof(node->token->value); + value_init_float(value, val, node->line); + ssize_t addr = prog_add_new_value(prog, value); + prog_add_instruction(prog, OP_PUSH, addr); + } break; + + case NODE_BOOL: { + struct value* value = malloc(sizeof(struct value)); + bool val = strcmp("true", node->token->value) == 0; + value_init_bool(value, val, node->line); + ssize_t addr = prog_add_new_value(prog, value); + prog_add_instruction(prog, OP_PUSH, addr); + } break; + + case NODE_STRING: { + struct value* value = malloc(sizeof(struct value)); + value_init_string(value, node->token->value, node->line); + ssize_t addr = prog_add_new_value(prog, value); + prog_add_instruction(prog, OP_PUSH, addr); + } break; + + case NODE_SYMBOL: { + struct value* value = malloc(sizeof(struct value)); + value_init_symbol(value, node->token->value, node->line); + ssize_t addr = prog_add_new_value(prog, value); + prog_add_instruction(prog, OP_PUSH, addr); + } break; + default: { + fprintf(stderr, "cannot compile node %s\n", + NodeKindStr[node->kind]); + abort(); + } break; + } +} diff --git a/lib/compiler.h b/lib/compiler.h new file mode 100644 index 0000000..f2bf6f4 --- /dev/null +++ b/lib/compiler.h @@ -0,0 +1,21 @@ +#ifndef MK_COMPILER_H +#define MK_COMPILER_H + +#include "commons.h" +#include "status.h" +#include "prog.h" +#include "node.h" + +struct compiler +{ + struct status* status; +}; + +void compiler_init(struct compiler* self, + struct status* status); +void compiler_free(struct compiler* self); + +void compiler_compile(struct compiler* self, + struct node* node, + struct prog* prog); +#endif diff --git a/lib/exec.c b/lib/exec.c new file mode 100644 index 0000000..c3bb43f --- /dev/null +++ b/lib/exec.c @@ -0,0 +1,82 @@ +#include "exec.h" + +void exec_init(struct exec* self) +{ + assert(self); + self->pc = 0; +} + +void exec_free(struct exec* self) +{ + assert(self); +} + +void exec_prog(struct exec* self, + struct moka* moka, + struct prog* prog) +{ + assert(self); assert(moka); assert(prog); + self->pc = 0; + + while (self->pc < prog->instructions.size) + { + exec_instr(self, + moka, + prog, + prog->instructions.data[self->pc]); + } +} + +void exec_instr(struct exec* self, + struct moka* moka, + struct prog* prog, + struct instruction* instr) +{ + assert(self); assert(moka); assert(instr); + int param = instr->param; + + switch (instr->opcode) + { + case OP_PUSH: { + struct value* value = prog->values.data[param]; + switch (value->type) + { + case TY_INT: { + moka_push_int(moka, value->data.integer, value->line); + } break; + + case TY_FLOAT: { + moka_push_float(moka, value->data.real, value->line); + } break; + + case TY_BOOL: { + moka_push_bool(moka, value->data.boolean, value->line); + } break; + + case TY_STRING: { + moka_push_string(moka, value->data.str, value->line); + } break; + + case TY_SYMBOL: { + moka_push_symbol(moka, value->data.sym, value->line); + } break; + + default: { + fprintf(stderr, + "cannot push value of type <%s>\n", + TypeKindStr[value->type] + ); + + abort(); + } break; + } + self->pc++; + } break; + + default: { + fprintf(stderr, "cannot execute opcode <%s>\n", + OpcodeKindStr[instr->opcode]); + abort(); + } break; + } +} diff --git a/lib/exec.h b/lib/exec.h new file mode 100644 index 0000000..4e8b1cf --- /dev/null +++ b/lib/exec.h @@ -0,0 +1,24 @@ +#ifndef MK_EXEC_H +#define MK_EXEC_H + +#include "commons.h" +#include "moka.h" +#include "prog.h" + +struct exec +{ + size_t pc; +}; + +void exec_init(struct exec* self); +void exec_free(struct exec* self); + +void exec_prog(struct exec* self, + struct moka* moka, + struct prog* prog); + +void exec_instr(struct exec* self, + struct moka* moka, + struct prog* prog, + struct instruction* instr); +#endif diff --git a/lib/lexer.c b/lib/lexer.c new file mode 100644 index 0000000..4ba70f7 --- /dev/null +++ b/lib/lexer.c @@ -0,0 +1,427 @@ +#include "lexer.h" +#include "str.h" + +void lexer_init(struct lexer* self, + char const* source, + struct status* status) +{ + assert(self); + self->status = status; + self->source = NULL; + self->len = 0; + if (source) + { + self->source = strdup(source); + self->len = strlen(self->source); + } + self->context.line = 1; + self->context.cursor = 0; +} + +void lexer_free(struct lexer* self) +{ + assert(self); + free(self->source); +} + +struct token* lexer_try_new_next(struct lexer* self) +{ + assert(self); + struct token* tok = NULL; + + lexer_skip_spaces(self); + + if ( (tok=lexer_try_new_float(self)) ) + { + return tok; + } + + if ( (tok=lexer_try_new_int(self)) ) + { + return tok; + } + + if ( (tok=lexer_try_new_string(self)) ) + { + return tok; + } + + if ( (tok=lexer_try_new_symbol(self)) ) + { + return tok; + } + + if ( (tok=lexer_try_new_keyword(self, TOKEN_BOOL, "true", "true")) ) + { + return tok; + } + + if ( (tok=lexer_try_new_keyword(self, TOKEN_BOOL, "false", "false")) ) + { + return tok; + } + + if (self->context.cursor < self->len) + { + struct str str; + str_init(&str); + size_t cursor = self->context.cursor; + + while (cursor < self->len + && !isspace(self->source[cursor])) + { + str_push(&str, self->source[cursor]); + cursor++; + } + + status_push( + self->status, + STATUS_ERROR, + self->context.line, + "unknown literal <%s>", + str.value + ); + + str_free(&str); + } + + return tok; +} + +void lexer_skip_spaces(struct lexer* self) +{ + assert(self); + + while (self->context.cursor < self->len + && isspace(self->source[self->context.cursor])) + { + if (self->source[self->context.cursor] == '\n') + { + self->context.line++; + } + + self->context.cursor++; + } +} + +struct token* lexer_try_new_int(struct lexer* self) +{ + assert(self); + size_t cursor = self->context.cursor; + struct str str; + str_init(&str); + + if (cursor < self->len + && self->source[cursor] == '-') + { + str_push(&str, '-'); + cursor++; + } + + while (cursor < self->len + && isdigit(self->source[cursor])) + { + str_push(&str, self->source[cursor]); + cursor++; + } + + if (str.size > 0 + && (str.value[0] != '-' || str.size > 1)) + { + self->context.cursor = cursor; + struct token* tok = malloc(sizeof(struct token)); + token_init(tok, TOKEN_INT, str.value); + str_free(&str); + return tok; + } + + str_free(&str); + + return NULL; +} + +struct token* lexer_try_new_float(struct lexer* self) +{ + assert(self); + size_t cursor = self->context.cursor; + struct str str; + str_init(&str); + + if (cursor < self->len + && self->source[cursor] == '-') + { + str_push(&str, '-'); + cursor++; + } + + while (cursor < self->len + && isdigit(self->source[cursor])) + { + str_push(&str, self->source[cursor]); + cursor++; + } + + if (cursor >= self->len + || self->source[cursor] != '.') + { + str_free(&str); + return NULL; + } + + str_push(&str, '.'); + cursor++; + + while (cursor < self->len + && isdigit(self->source[cursor])) + { + str_push(&str, self->source[cursor]); + cursor++; + } + + if (str.size > 0 + && (str.value[0] != '-' || str.size > 1)) + { + self->context.cursor = cursor; + struct token* tok = malloc(sizeof(struct token)); + token_init(tok, TOKEN_FLOAT, str.value); + str_free(&str); + return tok; + } + + str_free(&str); + + return NULL; +} + +struct token* lexer_try_new_string(struct lexer* self) +{ + assert(self); + size_t cursor = self->context.cursor; + + if (cursor >= self->len + || self->source[cursor] != '"') + { + return NULL; + } + cursor++; + + struct str value; + str_init(&value); + + while (cursor < self->len + && self->source[cursor] != '"') + { + char c = self->source[cursor]; + + if (c == '\\') + { + char c_next = self->source[cursor + 1]; + + switch (c_next) + { + case '"': + case '\\': + str_push(&value, c_next); + break; + case 'n': str_push(&value, '\n'); break; + case 't': str_push(&value, '\t'); break; + case 'r': str_push(&value, '\r'); break; + case 'e': str_push(&value, '\e'); break; + default: { + fprintf(stderr, "unknown escaped char %c\n", c_next); + abort(); + } break; + } + + cursor += 2; + continue; + } + + str_push(&value, c); + cursor++; + } + + if (cursor >= self->len) + { + str_free(&value); + return NULL; + } + + cursor++; + + struct token* tok = malloc(sizeof(struct token)); + token_init(tok, TOKEN_STRING, value.value); + str_free(&value); + + self->context.cursor = cursor; + return tok; +} + +struct token* lexer_try_new_symbol(struct lexer* self) +{ + assert(self); + size_t cursor = self->context.cursor; + + if (cursor >= self->len + || self->source[cursor] != '\'') + { + return NULL; + } + cursor++; + + struct str value; + str_init(&value); + + while (cursor < self->len + && !lexer_is_sep(self, cursor)) + { + char c = self->source[cursor]; + str_push(&value, c); + cursor++; + } + + struct token* tok = malloc(sizeof(struct token)); + token_init(tok, TOKEN_SYMBOL, value.value); + str_free(&value); + + self->context.cursor = cursor; + return tok; +} + +bool lexer_is_sep(struct lexer* self, size_t index) +{ + assert(self); + + if (index >= self->len) + { + return true; + } + + char c = self->source[index]; + return isspace(c); +} + +struct token* lexer_try_new_text(struct lexer* self, + TokenKind kind, + char const* text) +{ + size_t cursor = self->context.cursor; + + if (strlen(text) + cursor > self->len) + { + return NULL; + } + + size_t text_len = strlen(text); + + for (size_t i=0; isource[cursor + i]) + { + return NULL; + } + } + + struct token* token = malloc(sizeof(struct token)); + token_init(token, kind, text); + self->context.cursor += strlen(text); + + return token; +} + +struct token* lexer_try_new_keyword(struct lexer* self, + TokenKind kind, + char const* keyword, + char const* value) +{ + size_t cursor = self->context.cursor; + + if (strlen(keyword) + cursor > self->len) + { + return NULL; + } + + size_t kw_len = strlen(keyword); + + for (size_t i=0; isource[cursor + i]) + { + return NULL; + } + } + + if ((cursor == 0 || lexer_is_sep(self, cursor - 1)) + && (cursor + kw_len == self->len + || lexer_is_sep(self, cursor + kw_len))) + + { + struct token* token = malloc(sizeof(struct token)); + token_init(token, kind, value); + self->context.cursor += strlen(keyword); + + return token; + } + + return NULL; +} + +struct lex_context lexer_state(struct lexer* self) +{ + assert(self); + return self->context; +} + +void lexer_restore(struct lexer* self, + struct lex_context context) +{ + assert(self); + self->context = context; +} + +bool lexer_next_is(struct lexer* self, + TokenKind kind, + int lookahead) +{ + assert(self); + struct lex_context ctx = lexer_state(self); + + for (int i=0; ikind == kind; + + if(tok) + { + token_free(tok); + free(tok); + } + else + { + lexer_restore(self, ctx); + return false; + } + + lexer_restore(self, ctx); + return res; +} + +bool lexer_end(struct lexer* self) +{ + assert(self); + lexer_skip_spaces(self); + return self->context.cursor >= self->len; +} diff --git a/lib/lexer.h b/lib/lexer.h new file mode 100644 index 0000000..f891efd --- /dev/null +++ b/lib/lexer.h @@ -0,0 +1,57 @@ +#ifndef MK_LEXER_H +#define MK_LEXER_H + +#include "token.h" +#include "commons.h" +#include "status.h" +#include "str.h" + +struct lex_context +{ + size_t cursor; + int line; +}; + +struct lexer +{ + struct status* status; + char* source; + size_t len; + struct lex_context context; + struct str separators; +}; + +void lexer_init(struct lexer* self, + char const* source, + struct status* status); + +void lexer_free(struct lexer* self); + +struct token* lexer_try_new_next(struct lexer* self); +void lexer_skip_spaces(struct lexer* self); + +struct token* lexer_try_new_int(struct lexer* self); +struct token* lexer_try_new_float(struct lexer* self); +struct token* lexer_try_new_string(struct lexer* self); +struct token* lexer_try_new_symbol(struct lexer* self); + +bool lexer_is_sep(struct lexer* self, size_t index); +struct token* lexer_try_new_text(struct lexer* self, + TokenKind kind, + char const* text); +struct token* lexer_try_new_keyword(struct lexer* self, + TokenKind kind, + char const* keyword, + char const* value); + +struct lex_context lexer_state(struct lexer* self); +void lexer_restore(struct lexer* self, + struct lex_context context); + +bool lexer_next_is(struct lexer* self, + TokenKind kind, + int lookahead); + +bool lexer_end(struct lexer* self); +#endif + diff --git a/lib/moka.c b/lib/moka.c new file mode 100644 index 0000000..4912bcd --- /dev/null +++ b/lib/moka.c @@ -0,0 +1,216 @@ +#include "moka.h" + +void moka_init(struct moka* self) +{ + assert(self); + vec_init(&self->frame_stack); + vec_init(&self->global_values); + + struct frame* frame = malloc(sizeof(struct frame)); + frame_init(frame); + vec_push(&self->frame_stack, frame); +} + +void frame_init(struct frame* self) +{ + vec_init(&self->local_values); + vec_init(&self->stack); +} + +void frame_free(struct frame* self) +{ + assert(self); + vec_free_elements(&self->local_values, (void*)value_free); + vec_free(&self->local_values); + vec_free(&self->stack); +} + +void moka_free(struct moka* self) +{ + assert(self); + vec_free_elements(&self->frame_stack, (void*) frame_free); + vec_free(&self->frame_stack); + vec_free(&self->global_values); +} + +struct frame* moka_frame(struct moka* self) +{ + assert(self); + assert(self->frame_stack.size > 0); + return self->frame_stack.data[ + self->frame_stack.size - 1 + ]; +} + +bool moka_has_top(struct moka* self) +{ + assert(self); + struct frame* frame = moka_frame(self); + return frame->stack.size > 0; +} + +MOKA moka_top(struct moka* self) +{ + assert(self); + struct frame* frame = moka_frame(self); + assert(frame->stack.size > 0); + MOKA val = (MOKA) frame->stack.data[frame->stack.size - 1]; + return val; +} + +bool moka_is(struct moka* self, MOKA value, TypeKind type) +{ + return moka_type_of(self, value) == type; +} + +TypeKind moka_type_of(struct moka* self, MOKA value) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value const* val = frame->local_values.data[value]; + return val->type; +} + +void moka_dump(struct moka* self, MOKA value) +{ + if (moka_is(self, value, TY_INT)) + { + printf("%d", moka_get_int(self, value)); + return; + } + + if (moka_is(self, value, TY_FLOAT)) + { + printf("%f", moka_get_float(self, value)); + return; + } + + if (moka_is(self, value, TY_BOOL)) + { + printf("%s", moka_get_bool(self, value) ? "true" : "false"); + return; + } + + if (moka_is(self, value, TY_STRING)) + { + printf("%s", moka_get_string(self, value)); + return; + } + + if (moka_is(self, value, TY_SYMBOL)) + { + printf("'%s", moka_get_symbol(self, value)); + return; + } + + fprintf(stderr, "cannot dump value of type <%s>\n", + TypeKindStr[moka_type_of(self, value)] + ); + + abort(); +} + +MOKA moka_push_int(struct moka* self, int value, int line) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = malloc(sizeof(struct value)); + value_init_int(val, value, line); + vec_push(&frame->local_values, val); + size_t addr = frame->local_values.size - 1; + vec_push(&frame->stack, (void*) addr); + return addr; +} + +int moka_get_int(struct moka* self, MOKA value) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = frame->local_values.data[value]; + assert(val->type == TY_INT); + return val->data.integer; +} + +MOKA moka_push_float(struct moka* self, float value, int line) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = malloc(sizeof(struct value)); + value_init_float(val, value, line); + vec_push(&frame->local_values, val); + size_t addr = frame->local_values.size - 1; + vec_push(&frame->stack, (void*) addr); + return addr; +} + +float moka_get_float(struct moka* self, MOKA value) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = frame->local_values.data[value]; + assert(val->type == TY_FLOAT); + return val->data.real; +} + +MOKA moka_push_bool(struct moka* self, bool value, int line) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = malloc(sizeof(struct value)); + value_init_bool(val, value, line); + vec_push(&frame->local_values, val); + size_t addr = frame->local_values.size - 1; + vec_push(&frame->stack, (void*) addr); + return addr; +} + +float moka_get_bool(struct moka* self, MOKA value) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = frame->local_values.data[value]; + assert(val->type == TY_BOOL); + return val->data.boolean; +} + +MOKA moka_push_string(struct moka* self, char const* value, int line) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = malloc(sizeof(struct value)); + value_init_string(val, value, line); + vec_push(&frame->local_values, val); + size_t addr = frame->local_values.size - 1; + vec_push(&frame->stack, (void*) addr); + return addr; +} + +char* moka_get_string(struct moka* self, MOKA value) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = frame->local_values.data[value]; + assert(val->type == TY_STRING); + return val->data.str; +} + +MOKA moka_push_symbol(struct moka* self, char const* value, int line) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = malloc(sizeof(struct value)); + value_init_symbol(val, value, line); + vec_push(&frame->local_values, val); + size_t addr = frame->local_values.size - 1; + vec_push(&frame->stack, (void*) addr); + return addr; +} + +char* moka_get_symbol(struct moka* self, MOKA value) +{ + assert(self); + struct frame* frame = moka_frame(self); + struct value* val = frame->local_values.data[value]; + assert(val->type == TY_SYMBOL); + return val->data.sym; +} diff --git a/lib/moka.h b/lib/moka.h new file mode 100644 index 0000000..fd80f77 --- /dev/null +++ b/lib/moka.h @@ -0,0 +1,51 @@ +#ifndef MK_MOKA_H +#define MK_MOKA_H + +#include "commons.h" +#include "vec.h" +#include "value.h" + +typedef size_t MOKA; + +struct frame +{ + struct vec stack; + struct vec local_values; +}; + +struct moka +{ + struct vec frame_stack; + struct vec global_values; +}; + +void moka_init(struct moka* self); +void frame_init(struct frame* self); +void frame_free(struct frame* self); +void moka_free(struct moka* self); + +struct frame* moka_frame(struct moka* self); +bool moka_has_top(struct moka* self); +MOKA moka_top(struct moka* self); + +bool moka_is(struct moka* self, MOKA value, TypeKind type); +TypeKind moka_type_of(struct moka* self, MOKA value); + +void moka_dump(struct moka* self, MOKA value); + +MOKA moka_push_int(struct moka* self, int value, int line); +int moka_get_int(struct moka* self, MOKA value); + +MOKA moka_push_float(struct moka* self, float value, int line); +float moka_get_float(struct moka* self, MOKA value); + +MOKA moka_push_bool(struct moka* self, bool value, int line); +float moka_get_bool(struct moka* self, MOKA value); + +MOKA moka_push_string(struct moka* self, char const* value, int line); +char* moka_get_string(struct moka* self, MOKA value); + +MOKA moka_push_symbol(struct moka* self, char const* value, int line); +char* moka_get_symbol(struct moka* self, MOKA value); + +#endif diff --git a/lib/node.c b/lib/node.c new file mode 100644 index 0000000..2bc67d2 --- /dev/null +++ b/lib/node.c @@ -0,0 +1,69 @@ +#include "node.h" + +MK_ENUM_C(NodeKind, NODE_KIND); + +void node_init(struct node* self, + NodeKind kind, + struct token* token, + int line) +{ + assert(self); + self->kind = kind; + self->token = token; + vec_init(&self->children); + self->line = line; +} + +void node_free(struct node* self) +{ + if (self->token) + { + token_free(self->token); + free(self->token); + } + + vec_free_elements(&self->children, (void*) node_free); + vec_free(&self->children); +} + +void node_str(struct node* self, struct str* str) +{ + assert(self); + assert(str); + + str_format(str, "%s", NodeKindStr[self->kind] + strlen("NODE_")); + + if (self->token && self->token->value) + { + str_format(str, "[%s]", self->token->value); + } + + if (self->children.size > 0) + { + str_push(str, '('); + + for (size_t i=0; ichildren.size; i++) + { + if (i > 0) + { + str_push(str, ','); + } + + struct str child_str; + str_init(&child_str); + + node_str(self->children.data[i], &child_str); + str_extend(str, child_str.value); + str_free(&child_str); + } + + str_push(str, ')'); + } +} + +void node_add_new_child(struct node* self, struct node* child) +{ + assert(self); + assert(child); + vec_push(&self->children, child); +} diff --git a/lib/node.h b/lib/node.h new file mode 100644 index 0000000..2f7137c --- /dev/null +++ b/lib/node.h @@ -0,0 +1,35 @@ +#ifndef MK_NODE_H +#define MK_NODE_H + +#include "commons.h" +#include "token.h" +#include "vec.h" +#include "str.h" + +#define NODE_KIND(G) \ +G(NODE_ROOT), \ +G(NODE_INT), G(NODE_FLOAT), G(NODE_BOOL), \ +G(NODE_STRING), G(NODE_SYMBOL) + +MK_ENUM_H(NodeKind, NODE_KIND); + +struct node +{ + NodeKind kind; + struct token* token; + struct vec children; + int line; +}; + +void node_init(struct node* self, + NodeKind kind, + struct token* token, + int line); + +void node_free(struct node* self); + +void node_str(struct node* self, struct str* str); + +void node_add_new_child(struct node* self, struct node* child); + +#endif diff --git a/lib/parser.c b/lib/parser.c new file mode 100644 index 0000000..32645b9 --- /dev/null +++ b/lib/parser.c @@ -0,0 +1,118 @@ +#include "parser.h" + + +#define MK_TRY(func) parser_try(self, func) + +void parser_init(struct parser* self, struct lexer* lexer) +{ + assert(self); + assert(lexer); + self->lexer = lexer; +} + +void parser_free(struct parser* self) +{ + assert(self); +} + +struct node* parser_try_new_parse(struct parser* self) +{ + assert(self); + return MK_TRY(parser_try_new_root); +} + +struct node* parser_try(struct parser* self, + struct node* (*rule)(struct parser*)) +{ + assert(self); + assert(rule); + struct lex_context ctx = lexer_state(self->lexer); + + struct node* node = rule(self); + + if (node == NULL) + { + lexer_restore(self->lexer, ctx); + } + + return node; +} + +struct node* parser_try_new_root(struct parser* self) +{ + assert(self); + struct node* root = malloc(sizeof(struct node)); + node_init(root, NODE_ROOT, NULL, self->lexer->context.line); + + while (!lexer_end(self->lexer)) + { + struct node* atom = MK_TRY(parser_try_new_atom); + if (!atom && !lexer_end(self->lexer)) + { + node_free(root); + free(root); + return NULL; + } + + node_add_new_child(root, atom); + } + + return root; +} + +struct node* parser_try_new_atom(struct parser* self) +{ + assert(self); + + if (lexer_next_is(self->lexer, TOKEN_FLOAT, 0)) + { + struct node* node = malloc(sizeof(struct node)); + struct token* tok = lexer_try_new_next(self->lexer); + assert(tok); + node_init(node, NODE_FLOAT, tok, + self->lexer->context.line); + return node; + } + + if (lexer_next_is(self->lexer, TOKEN_INT, 0)) + { + struct node* node = malloc(sizeof(struct node)); + struct token* tok = lexer_try_new_next(self->lexer); + assert(tok); + node_init(node, NODE_INT, tok, + self->lexer->context.line); + return node; + } + + if (lexer_next_is(self->lexer, TOKEN_BOOL, 0)) + { + struct node* node = malloc(sizeof(struct node)); + struct token* tok = lexer_try_new_next(self->lexer); + assert(tok); + node_init(node, NODE_BOOL, tok, + self->lexer->context.line); + return node; + } + + if (lexer_next_is(self->lexer, TOKEN_STRING, 0)) + { + struct node* node = malloc(sizeof(struct node)); + struct token* tok = lexer_try_new_next(self->lexer); + assert(tok); + node_init(node, NODE_STRING, tok, + self->lexer->context.line); + return node; + } + + if (lexer_next_is(self->lexer, TOKEN_SYMBOL, 0)) + { + struct node* node = malloc(sizeof(struct node)); + struct token* tok = lexer_try_new_next(self->lexer); + assert(tok); + node_init(node, NODE_SYMBOL, tok, + self->lexer->context.line); + return node; + } + return NULL; +} + diff --git a/lib/parser.h b/lib/parser.h new file mode 100644 index 0000000..1754a19 --- /dev/null +++ b/lib/parser.h @@ -0,0 +1,24 @@ +#ifndef MK_PARSER_H +#define MK_PARSER_H + +#include "commons.h" +#include "node.h" +#include "lexer.h" + +struct parser +{ + struct lexer* lexer; +}; + +void parser_init(struct parser* self, struct lexer* lexer); +void parser_free(struct parser* self); + +struct node* parser_try_new_parse(struct parser* self); + +struct node* parser_try(struct parser* self, + struct node* (*rule)(struct parser*)); + +struct node* parser_try_new_root(struct parser* self); +struct node* parser_try_new_atom(struct parser* self); + +#endif diff --git a/lib/prog.c b/lib/prog.c new file mode 100644 index 0000000..aeab237 --- /dev/null +++ b/lib/prog.c @@ -0,0 +1,60 @@ +#include "prog.h" + +MK_ENUM_C(OpcodeKind, OPCODE_KIND); + +void prog_init(struct prog* self) +{ + assert(self); + vec_init(&self->instructions); + vec_init(&self->values); +} + +void prog_free(struct prog* self) +{ + assert(self); + vec_free_elements(&self->instructions, NULL); + vec_free(&self->instructions); + + vec_free_elements(&self->values, (void*) value_free); + vec_free(&self->values); +} + +size_t prog_add_instruction(struct prog* self, + OpcodeKind opcode, + ssize_t param) +{ + assert(self); + struct instruction* instr = malloc(sizeof(struct instruction)); + instr->opcode = opcode; + instr->param = param; + + vec_push(&self->instructions, instr); + + return self->instructions.size - 1; +} + +size_t prog_add_new_value(struct prog* self, + struct value* value) +{ + assert(self); + assert(value); + + vec_push(&self->values, value); + + return self->values.size - 1; +} + +void prog_dump(struct prog* self) +{ + assert(self); + + printf("--- PROG ---\n"); + for (size_t i=0; iinstructions.size; i++) + { + struct instruction const* instr = self->instructions.data[i]; + printf("%zu\t%s %zd\n", + i, + OpcodeKindStr[instr->opcode], + instr->param); + } +} diff --git a/lib/prog.h b/lib/prog.h new file mode 100644 index 0000000..7ff0dfe --- /dev/null +++ b/lib/prog.h @@ -0,0 +1,37 @@ +#ifndef MK_PROG_H +#define MK_PROG_H + +#include "commons.h" +#include "vec.h" +#include "value.h" + +#define OPCODE_KIND(G) \ +G(OP_PUSH) + +MK_ENUM_H(OpcodeKind, OPCODE_KIND); + +struct instruction +{ + OpcodeKind opcode; + ssize_t param; +}; + +struct prog +{ + struct vec instructions; + struct vec values; +}; + +void prog_init(struct prog* self); +void prog_free(struct prog* self); + +size_t prog_add_instruction(struct prog* self, + OpcodeKind opcode, + ssize_t param); + +size_t prog_add_new_value(struct prog* self, + struct value* value); + +void prog_dump(struct prog* self); + +#endif diff --git a/lib/status.c b/lib/status.c index b1739a6..d07dfee 100644 --- a/lib/status.c +++ b/lib/status.c @@ -4,13 +4,15 @@ MK_ENUM_C(Status, STATUS_KIND); void message_init(struct message* self, Status kind, - char const* what) + char const* what, + int where) { assert(self); assert(what); self->kind = kind; self->what = strdup(what); + self->where = where; } void message_free(struct message* self) @@ -33,6 +35,7 @@ void status_free(struct status* self) } void status_push(struct status* self, Status kind, + int where, char const* format, ...) { @@ -43,7 +46,7 @@ void status_push(struct status* self, vsnprintf(msg, MK_STRLEN, format, lst); struct message* message = malloc(sizeof(struct message)); - message_init(message, kind, msg); + message_init(message, kind, msg, where); vec_push(&self->messages, message); va_end(lst); @@ -57,8 +60,9 @@ void status_dump(struct status* self) { struct message const* msg = self->messages.data[i]; - printf("%s| %s\n", + printf("[%s:%d] %s\n", StatusStr[msg->kind] + strlen("STATUS_"), + msg->where, msg->what ); } diff --git a/lib/status.h b/lib/status.h index e7de7c7..6db7d46 100644 --- a/lib/status.h +++ b/lib/status.h @@ -14,6 +14,7 @@ struct message { Status kind; char* what; + int where; }; struct status @@ -21,16 +22,18 @@ struct status struct vec messages; }; -void message_init(struct message* self, - Status kind, - char const* what); +void message_init(struct message* self, + Status kind, + char const* what, + int where); void message_free(struct message* self); void status_init(struct status* self); void status_free(struct status* self); -void status_push(struct status* self, +void status_push(struct status* self, Status kind, + int where, char const* format, ...); diff --git a/lib/token.c b/lib/token.c new file mode 100644 index 0000000..abdacbf --- /dev/null +++ b/lib/token.c @@ -0,0 +1,17 @@ +#include "token.h" + +MK_ENUM_C(TokenKind, TOKEN_KIND); + +void token_init(struct token* self, TokenKind kind, char const* value) +{ + assert(self); + self->kind = kind; + self->value = strdup(value); +} + +void token_free(struct token* self) +{ + assert(self); + free(self->value); + self->value = NULL; +} diff --git a/lib/token.h b/lib/token.h new file mode 100644 index 0000000..2135ba4 --- /dev/null +++ b/lib/token.h @@ -0,0 +1,25 @@ +#ifndef MK_TOKEN_H +#define MK_TOKEN_H + +#include "commons.h" + +#define TOKEN_KIND(G) \ +G(TOKEN_INT), G(TOKEN_FLOAT), \ +G(TOKEN_BOOL), G(TOKEN_STRING), \ +G(TOKEN_SYMBOL) + +MK_ENUM_H(TokenKind, TOKEN_KIND); + +struct token +{ + TokenKind kind; + char* value; + int line; +}; + +void token_init(struct token* self, + TokenKind kind, + char const* value); +void token_free(struct token* self); + +#endif diff --git a/lib/value.c b/lib/value.c new file mode 100644 index 0000000..25b0675 --- /dev/null +++ b/lib/value.c @@ -0,0 +1,61 @@ +#include "value.h" + +MK_ENUM_C(TypeKind, TYPE_KIND); + +void value_init_int(struct value* self, int value, int line) +{ + assert(self); + self->data.integer = value; + self->type = TY_INT; + self->line = line; +} + +void value_init_float(struct value* self, float value, int line) +{ + assert(self); + self->data.real = value; + self->type = TY_FLOAT; + self->line = line; +} + +void value_init_bool(struct value* self, bool value, int line) +{ + assert(self); + self->data.boolean = value; + self->type = TY_BOOL; + self->line = line; +} + +void value_init_string(struct value* self, char const* value, int line) +{ + assert(self); + assert(value); + self->data.str = strdup(value); + self->type = TY_STRING; + self->line = line; +} + +void value_init_symbol(struct value* self, char const* value, int line) +{ + assert(self); + assert(value); + self->data.sym = strdup(value); + self->type = TY_SYMBOL; + self->line = line; +} + +void value_free(struct value* self) +{ + assert(self); + + if (self->type == TY_STRING) + { + free(self->data.str); + } + + if (self->type == TY_SYMBOL) + { + free(self->data.sym); + } + +} diff --git a/lib/value.h b/lib/value.h new file mode 100644 index 0000000..78bd79a --- /dev/null +++ b/lib/value.h @@ -0,0 +1,35 @@ +#ifndef MK_VALUE_H +#define MK_VALUE_H + +#include "commons.h" + +#define TYPE_KIND(G) \ +G(TY_INT), G(TY_FLOAT), G(TY_BOOL), \ +G(TY_STRING), G(TY_SYMBOL) + +MK_ENUM_H(TypeKind, TYPE_KIND); + +union value_data +{ + int integer; + float real; + bool boolean; + char* str; + char* sym; +}; + +struct value +{ + union value_data data; + TypeKind type; + int line; +}; + +void value_init_int(struct value* self, int value, int line); +void value_init_float(struct value* self, float value, int line); +void value_init_bool(struct value* self, bool value, int line); +void value_init_string(struct value* self, char const* value, int line); +void value_init_symbol(struct value* self, char const* value, int line); +void value_free(struct value* self); + +#endif diff --git a/lib/vec.c b/lib/vec.c index cbb78ad..402d64e 100644 --- a/lib/vec.c +++ b/lib/vec.c @@ -35,7 +35,6 @@ void vec_free(struct vec* self) void vec_push(struct vec* self, void* element) { assert(self); - assert(element); if (self->capacity == 0) { diff --git a/src/main.c b/src/main.c index 905869d..b86237f 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,88 @@ -int main() +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { - return 0; + if (argc <= 1) { return EXIT_FAILURE; } + + struct status status; + status_init(&status); + + struct str source; + str_init(&source); + + { + FILE* file = fopen(argv[1], "r+"); + size_t sz; + char buf; + while ( (sz=fread(&buf, sizeof(char), 1, file)) ) + { + str_push(&source, buf); + } + fclose(file); + } + + struct lexer lex; + lexer_init(&lex, source.value, &status); + + struct parser parser; + parser_init(&parser, &lex); + struct node* root = parser_try_new_root(&parser); + + if (!root || !status_is_ok(&status)) + { + status_dump(&status); + goto free_parser; + } + + struct compiler compiler; + compiler_init(&compiler, &status); + + struct prog prog; + prog_init(&prog); + + compiler_compile(&compiler, root, &prog); + + if (!status_is_ok(&status)) + { + status_dump(&status); + goto free_compiler; + } + + struct moka moka; + moka_init(&moka); + + struct exec exec; + exec_init(&exec); + + exec_prog(&exec, &moka, &prog); + + if (moka_has_top(&moka)) + { + MOKA value = moka_top(&moka); + moka_dump(&moka, value); + printf("\n"); + } + + exec_free(&exec); + moka_free(&moka); + prog_free(&prog); +free_compiler: + compiler_free(&compiler); + node_free(root); + free(root); +free_parser: + parser_free(&parser); + lexer_free(&lex); + str_free(&source); + + int ret = status_is_ok(&status) ? EXIT_SUCCESS : EXIT_FAILURE; + status_free(&status); + + return ret; } diff --git a/tests/lexer.h b/tests/lexer.h new file mode 100644 index 0000000..32fa986 --- /dev/null +++ b/tests/lexer.h @@ -0,0 +1,85 @@ +#ifndef MK_TEST_LEXER_H +#define MK_TEST_LEXER_H +#include +#include +#include + +static void test_lexer(char const* source, int n, ...) +{ + va_list lst; + va_start(lst, n); + + struct status status; + status_init(&status); + + struct lexer lex; + lexer_init(&lex, source, &status); + + for (int i=0; ivalue); + ck_assert_int_eq(kind, tok->kind); + token_free(tok); + free(tok); + } + + status_free(&status); + + lexer_free(&lex); + va_end(lst); +} + +START_TEST(lexer_atom) +{ + test_lexer(" 34 -2 0 ", 3, + TOKEN_INT, "34", + TOKEN_INT, "-2", + TOKEN_INT, "0" + ); + + test_lexer(" 3.4 -2.2 .6 7. ", 4, + TOKEN_FLOAT, "3.4", + TOKEN_FLOAT, "-2.2", + TOKEN_FLOAT, ".6", + TOKEN_FLOAT, "7." + ); + + test_lexer(" true false ", 2, + TOKEN_BOOL, "true", + TOKEN_BOOL, "false" + ); + + test_lexer(" \"\\\\hel\\rlo\" \"wo\\trld\\n\" \" \\\"bim\\\" \" ", 3, + TOKEN_STRING, "\\hel\rlo", + TOKEN_STRING, "wo\trld\n", + TOKEN_STRING, " \"bim\" " + ); + + test_lexer(" 'hello ", 1, + TOKEN_SYMBOL, "hello" + ); +} +END_TEST + +void register_lexer(Suite* suite) +{ + TCase* tcase = tcase_create("Lexer"); + tcase_add_test(tcase, lexer_atom); + suite_add_tcase(suite, tcase); +} + +#endif diff --git a/tests/main.c b/tests/main.c index b7e2071..2fd6297 100644 --- a/tests/main.c +++ b/tests/main.c @@ -1,11 +1,14 @@ #include #include - +#include "lexer.h" +#include "parser.h" int main() { Suite* s = suite_create("Moka Frontend"); + register_lexer(s); + register_parser(s); SRunner* runner = srunner_create(s); srunner_run_all(runner, CK_VERBOSE); diff --git a/tests/parser.h b/tests/parser.h new file mode 100644 index 0000000..94eff23 --- /dev/null +++ b/tests/parser.h @@ -0,0 +1,70 @@ +#ifndef MK_TEST_PARSER_H +#define MK_TEST_PARSER_H +#include +#include +#include +#include + +void test_parser(char const* oracle, char const* source) +{ + struct status status; + status_init(&status); + + struct lexer lex; + lexer_init(&lex, source, &status); + + struct parser parser; + parser_init(&parser, &lex); + + struct node* ast = parser_try_new_parse(&parser); + ck_assert(ast); + + struct str my_node_str; + str_init(&my_node_str); + node_str(ast, &my_node_str); + + ck_assert_str_eq(oracle, my_node_str.value); + + bool ok = status_is_ok(&status); + if (!ok) + { + status_dump(&status); + } + ck_assert(ok); + + str_free(&my_node_str); + node_free(ast); + free(ast); + parser_free(&parser); + lexer_free(&lex); + status_free(&status); +} + +START_TEST(parser_atom) +{ + test_parser("ROOT(INT[34])", + " 34 "); + + test_parser("ROOT(FLOAT[0.8])", + " 0.8 "); + + test_parser("ROOT(BOOL[true],BOOL[false])", + " true false "); + + test_parser("ROOT(STRING[pizza!])", + " \"pizza!\" "); + + test_parser("ROOT(SYMBOL[tea])", + " 'tea "); +} +END_TEST + +void register_parser(Suite* suite) +{ + TCase* tcase = tcase_create("Parser"); + tcase_add_test(tcase, parser_atom); + + suite_add_tcase(suite, tcase); +} + +#endif