ccm/lib/compiler.c

641 lines
19 KiB
C

#include "compiler.h"
#include "prog.h"
void compiler_init(compiler_t* self, module_t* module)
{
assert(self);
err_init(&self->err);
self->module = module;
self->loc_counter = 0;
vec_init(&self->loop_infos);
}
void compiler_free(compiler_t* self)
{
assert(self);
err_free(&self->err);
vec_free_elements(&self->loop_infos, NULL);
vec_free(&self->loop_infos);
}
void compiler_compile(compiler_t* self,
node_t* node,
prog_t* prog)
{
assert(self);
assert(node);
assert(prog);
if (!err_is_ok(&self->err))
{
return;
}
ccm_t* ccm = &self->module->ccm;
sym_t* sym = &self->module->sym;
switch (node->kind)
{
case NODE_CONTINUE: {
assert(self->loop_infos.size > 0);
loop_info_t const* info
= self->loop_infos.data[
self->loop_infos.size - 1
];
prog_add_instr(prog, OP_BR, info->start_point);
} break;
case NODE_BREAK: {
assert(self->loop_infos.size > 0);
loop_info_t* info
= self->loop_infos.data[
self->loop_infos.size - 1
];
size_t point = prog_add_instr(prog, OP_BR, 0);
vec_push(&info->to_end, (void*) point);
} break;
case NODE_FOR: {
node_t const* ident = node->children.data[0];
node_t* expr = node->children.data[1];
node_t* block = node->children.data[2];
sym_open_scope(sym);
sym_declare(sym, "__iter",
self->loc_counter++, 0);
sym_declare(sym, ident->value,
self->loc_counter++, 0);
int var_id =
sym_try_get_value(sym, ident->value)->id;
int iter_id =
sym_try_get_value(sym, "__iter")->id;
// initialize iteration counter
value_t* value = malloc(sizeof(value_t));
value_init_num(value, -1, node->line);
prog_add_instr(prog, OP_PUSH, prog_add_new_constant(
prog,
ccm_from_value(ccm, value)
));
prog_add_instr(prog, OP_LOCAL_STORE, iter_id);
int start = prog->instrs.size;
loop_info_t* info = malloc(sizeof(loop_info_t));
info->start_point = start;
vec_init(&info->to_end);
vec_push(&self->loop_infos, info);
// incr iteration counter
prog_add_instr(prog, OP_LOCAL_LOAD, iter_id);
value_free(value); free(value);
value = malloc(sizeof(value_t));
value_init_num(value, 1, node->line);
prog_add_instr(prog, OP_PUSH, prog_add_new_constant(
prog,
ccm_from_value(ccm, value)
));
prog_add_instr(prog, OP_ADD, CCM_NO_PARAM);
prog_add_instr(prog, OP_LOCAL_STORE, iter_id);
// test if end is reached
prog_add_instr(prog, OP_LOCAL_LOAD, iter_id);
compiler_compile(self, expr, prog);
prog_add_instr(prog, OP_LEN, CCM_NO_PARAM);
prog_add_instr(prog, OP_LT, CCM_NO_PARAM);
int cond_point = prog_add_instr(prog, OP_BRF, 0);
// get current element
prog_add_instr(prog, OP_LOCAL_LOAD, iter_id);
compiler_compile(self, expr, prog);
prog_add_instr(prog, OP_INDEX, 1);
prog_add_instr(prog, OP_LOCAL_STORE, var_id);
// compile body
compiler_compile(self, block, prog);
// next iteration
prog_add_instr(prog, OP_BR, start);
value_free(value); free(value);
int end_point = prog->instrs.size;
((instr_t*)prog->instrs.data[cond_point])->param
= end_point;
for (size_t i=0; i<info->to_end.size; i++)
{
int addr = (size_t)info->to_end.data[i];
instr_t* instr = prog->instrs.data[addr];
instr->param = end_point;
}
sym_close_scope(sym);
info = vec_pop(&self->loop_infos);
vec_free(&info->to_end);
free(info);
} break;
case NODE_WHILE: {
node_t* expr = node->children.data[0];
node_t* block = node->children.data[1];
int start = prog->instrs.size;
loop_info_t* info = malloc(sizeof(loop_info_t));
info->start_point = start;
vec_init(&info->to_end);
vec_push(&self->loop_infos, info);
compiler_compile(self, expr, prog);
int cond_point = prog_add_instr(prog, OP_BRF, 0);
compiler_compile(self, block, prog);
prog_add_instr(prog, OP_BR, start);
int end_point = prog->instrs.size;
((instr_t*)prog->instrs.data[cond_point])->param
= end_point;
for (size_t i=0; i<info->to_end.size; i++)
{
int addr = (size_t)info->to_end.data[i];
instr_t* instr = prog->instrs.data[addr];
instr->param = end_point;
}
info = vec_pop(&self->loop_infos);
vec_free(&info->to_end);
free(info);
} break;
case NODE_IF: {
vec_t to_end;
vec_init(&to_end);
compiler_compile_if(self, node, prog, &to_end);
size_t end_point = prog->instrs.size;
for (size_t i=0; i<to_end.size; i++)
{
int const* addr = to_end.data[i];
((instr_t*) prog->instrs.data[*addr])
->param = end_point;
}
vec_free_elements(&to_end, NULL);
vec_free(&to_end);
} break;
case NODE_BLOCK: {
sym_open_scope(sym);
for (size_t i=0; i<node->children.size; i++)
{
compiler_compile(
self,
node->children.data[i],
prog
);
}
sym_close_scope(sym);
} break;
case NODE_BEGIN: {
compiler_compile(self, node->children.data[0], prog);
} break;
case NODE_ASSIGN: {
node_t* target = node->children.data[0];
node_t* expr = node->children.data[1];
if (target->kind == NODE_INDEX) {
node_t* ident = target->children.data[0];
sym_entry_t const* entry = sym_try_get_value(
sym,
ident->value
);
if (!entry) {
err_push(&self->err, ident->line,
"undefined array '%s'", ident->value);
return;
}
if (entry->is_const) {
err_push(&self->err, ident->line,
"cannot assign value to constant '%s'",
ident->value);
return;
}
compiler_compile(self, expr, prog);
size_t indexes_count = target->children.size - 1;
for (size_t i=1; i<target->children.size; i++) {
compiler_compile(
self,
target->children.data[i],
prog
);
}
prog_add_instr(prog, OP_PUSH, entry->local_addr);
prog_add_instr(prog, OP_ASTORE, indexes_count);
} else {
compiler_compile(self, expr, prog);
sym_entry_t* entry = sym_try_get_value(
sym,
target->value
);
int status =
sym_try_assign(
sym,
target->value, self->loc_counter
);
assert(entry);
if (entry->is_const) {
err_push(&self->err, target->line,
"cannot assign value to constant '%s'",
target->value);
return;
}
if (!status)
{
err_push(&self->err, target->line,
"cannot assign value to '%s'",
target->value);
return;
}
prog_add_instr(
prog,
OP_LOCAL_STORE,
entry->id
);
}
} break;
case NODE_CONSTDECL:
case NODE_VARDECL: {
node_t const* ident = node->children.data[0];
node_t* expr = node->children.data[1];
compiler_compile(self, expr, prog);
int status =
sym_declare(
sym, ident->value,
self->loc_counter,
node->kind == NODE_CONSTDECL
);
assert(status);
prog_add_instr(prog, OP_LOCAL_STORE, self->loc_counter);
self->loc_counter++;
} break;
case NODE_IDENT: {
char const* name = node->value;
sym_entry_t const* entry = sym_try_get_value(
sym,
name
);
if (!entry) {
err_push(&self->err, node->line,
"undefined '%s'", name);
return;
}
prog_add_instr(prog, OP_LOCAL_LOAD, entry->id);
} break;
case NODE_ARRAY: {
for (size_t i=0; i<node->children.size; i++)
{
size_t k = node->children.size - 1 - i;
compiler_compile(self, node->children.data[k], prog);
}
prog_add_instr(prog, OP_MK_ARRAY, node->children.size);
} break;
case NODE_LT: {
compiler_compile(self, node->children.data[0], prog);
compiler_compile(self, node->children.data[1], prog);
prog_add_instr(prog, OP_LT, CCM_NO_PARAM);
} break;
case NODE_LE: {
compiler_compile(self, node->children.data[0], prog);
compiler_compile(self, node->children.data[1], prog);
prog_add_instr(prog, OP_GT, CCM_NO_PARAM);
prog_add_instr(prog, OP_NOT, CCM_NO_PARAM);
} break;
case NODE_GT: {
compiler_compile(self, node->children.data[0], prog);
compiler_compile(self, node->children.data[1], prog);
prog_add_instr(prog, OP_GT, CCM_NO_PARAM);
} break;
case NODE_GE: {
compiler_compile(self, node->children.data[0], prog);
compiler_compile(self, node->children.data[1], prog);
prog_add_instr(prog, OP_LT, CCM_NO_PARAM);
prog_add_instr(prog, OP_NOT, CCM_NO_PARAM);
} break;
case NODE_EQ: {
compiler_compile(self, node->children.data[0], prog);
compiler_compile(self, node->children.data[1], prog);
prog_add_instr(prog, OP_EQ, CCM_NO_PARAM);
} break;
case NODE_NE: {
compiler_compile(self, node->children.data[0], prog);
compiler_compile(self, node->children.data[1], prog);
prog_add_instr(prog, OP_EQ, CCM_NO_PARAM);
prog_add_instr(prog, OP_NOT, CCM_NO_PARAM);
} break;
case NODE_INDEX: {
for (size_t i=0; i<node->children.size; i++)
{
size_t k = node->children.size - 1 - i;
compiler_compile(self, node->children.data[k], prog);
}
prog_add_instr(prog, OP_INDEX, node->children.size - 1);
} break;
case NODE_IN: {
compiler_compile(self, node->children.data[0], prog);
compiler_compile(self, node->children.data[1], prog);
prog_add_instr(prog, OP_IN, CCM_NO_PARAM);
} break;
case NODE_AND: {
compiler_compile_and(self, node, prog);
} break;
case NODE_OR: {
compiler_compile_or(self, node, prog);
} break;
case NODE_NOT: {
compiler_compile(self, node->children.data[0], prog);
prog_add_instr(prog, OP_NOT, CCM_NO_PARAM);
} break;
case NODE_ASSERT_EQ: {
compiler_compile(self, node->children.data[0], prog);
prog_add_instr(prog, OP_ASSERT_EQ, CCM_NO_PARAM);
} break;
case NODE_ASSERT_NE: {
compiler_compile(self, node->children.data[0], prog);
prog_add_instr(prog, OP_ASSERT_NE, CCM_NO_PARAM);
} break;
case NODE_MODULE: {
for (size_t i=0; i<node->children.size; i++)
{
compiler_compile(self,
node->children.data[i],
prog);
}
} break;
case NODE_STR: {
size_t id = prog_add_new_constant(
prog,
ccm_to_str(&self->module->ccm,
node->value,
node->line)
);
prog_add_instr(prog, OP_PUSH, id);
} break;
case NODE_NUM: {
size_t id = prog_add_new_constant(
prog,
ccm_to_num(&self->module->ccm,
atof(node->value),
node->line)
);
prog_add_instr(prog, OP_PUSH, id);
} break;
case NODE_TUPLE: {
for (size_t i=0; i<node->children.size; i++)
{
size_t k = node->children.size - 1 - i;
compiler_compile(self,
node->children.data[k],
prog);
}
prog_add_instr(prog, OP_MK_TUPLE, node->children.size);
} break;
case NODE_BOOL: {
int val = strcmp(node->value, "true") == 0;
CCM value = ccm_to_boolean(ccm, val, node->line);
size_t id = prog_add_new_constant(prog, value);
prog_add_instr(prog, OP_PUSH, id);
} break;
case NODE_SUB: {
compiler_compile(self, node->children.data[0], prog);
if (node->children.size == 2)
{
compiler_compile(self, node->children.data[1], prog);
prog_add_instr(prog, OP_SUB, CCM_NO_PARAM);
}
else
{
prog_add_instr(prog, OP_USUB, CCM_NO_PARAM);
}
} break;
case NODE_ADD:
case NODE_MUL:
case NODE_DIV:
case NODE_MOD:
case NODE_POW: {
compiler_compile(self, node->children.data[0], prog);
compiler_compile(self, node->children.data[1], prog);
Opcode op;
switch (node->kind)
{
case NODE_ADD: op = OP_ADD; break;
case NODE_MUL: op = OP_MUL; break;
case NODE_DIV: op = OP_DIV; break;
case NODE_MOD: op = OP_MOD; break;
case NODE_POW: op = OP_POW; break;
default: abort();
}
prog_add_instr(prog, op, CCM_NO_PARAM);
} break;
default: {
fprintf(stderr, "cannot compile node %s\n",
NodeKindStr[node->kind]);
abort();
}
}
}
void compiler_compile_or(compiler_t* self,
node_t* node,
prog_t* prog)
{
ccm_t* ccm = &self->module->ccm;
vec_t to_true;
vec_init(&to_true);
for (size_t i=0; i<node->children.size; i++)
{
compiler_compile(self, node->children.data[i], prog);
prog_add_instr(prog, OP_NOT, 0);
size_t addr = prog_add_instr(prog, OP_BRF, 0);
vec_push(&to_true, (void*) addr);
}
// FALSE
prog_add_instr(prog, OP_PUSH, prog_add_new_constant(
prog,
ccm_to_boolean(ccm, 0, node->line)
));
size_t to_end = prog_add_instr(prog, OP_BR, 0);
// TRUE
size_t true_point = prog->instrs.size;
prog_add_instr(prog, OP_PUSH, prog_add_new_constant(
prog,
ccm_to_boolean(ccm, 1, node->line)
));
for (size_t i=0; i<to_true.size; i++)
{
size_t k = ((size_t) to_true.data[i]);
((instr_t*) prog->instrs.data[k])->param = true_point;
}
size_t end_point = prog->instrs.size;
((instr_t*) prog->instrs.data[to_end])->param = end_point;
vec_free(&to_true);
}
void compiler_compile_and(compiler_t* self,
node_t* node,
prog_t* prog)
{
ccm_t* ccm = &self->module->ccm;
vec_t to_false;
vec_init(&to_false);
for (size_t i=0; i<node->children.size; i++)
{
compiler_compile(self, node->children.data[i], prog);
size_t addr = prog_add_instr(prog, OP_BRF, 0);
vec_push(&to_false, (void*) addr);
}
// TRUE
prog_add_instr(prog, OP_PUSH, prog_add_new_constant(
prog,
ccm_to_boolean(ccm, 1, node->line)
));
size_t to_end = prog_add_instr(prog, OP_BR, 0);
// FALSE
size_t false_point = prog_add_instr(
prog,
OP_PUSH,
prog_add_new_constant(
prog,
ccm_to_boolean(ccm, 0, node->line)
)
);
for (size_t i=0; i<to_false.size; i++)
{
size_t k = ((size_t) to_false.data[i]);
((instr_t*) prog->instrs.data[k])->param = false_point;
}
size_t end_point = prog->instrs.size;
((instr_t*) prog->instrs.data[to_end])->param = end_point;
vec_free(&to_false);
}
void compiler_compile_if(compiler_t* self,
node_t* node,
prog_t* prog,
vec_t* to_end)
{
assert(self); assert(node); assert(prog);
if (node->kind != NODE_IF)
{
compiler_compile(self, node, prog);
return;
}
node_t* cond = node->children.data[0];
node_t* block = node->children.data[1];
node_t* next = node->children.size == 3 ?
node->children.data[2] :
NULL;
compiler_compile_if(self, cond, prog, to_end);
int cond_point = prog_add_instr(prog, OP_BRF, 0);
compiler_compile_if(self, block, prog, to_end);
int end_point = prog_add_instr(prog, OP_BR, 0); // to end
int* addr = malloc(sizeof(int));
*addr = end_point;
vec_push(to_end, (void*) addr);
int next_pos = prog->instrs.size;
((instr_t*) prog->instrs.data[cond_point])->param
= next_pos;
if (next)
{
compiler_compile_if(self, next, prog, to_end);
}
}