roza/lib/compiler.c

522 lines
15 KiB
C

#include "compiler.h"
#include "lib/commons.h"
#include "lib/mod.h"
#include "lib/prepass.h"
#include "lib/program.h"
#include "lib/sym.h"
#include "lib/tysy.h"
#include "node.h"
#include "fun.h"
#include "locals.h"
void compiler_init(compiler_t* compiler,
int* id,
sym_t* sym,
tysy_t* tysy,
err_t* err)
{
assert(compiler);
assert(sym);
assert(tysy);
assert(err);
compiler->parent = NULL;
compiler->sym = sym;
compiler->tysy = tysy;
compiler->err = err;
compiler->scope = 0;
compiler->id = id;
}
void compiler_free(compiler_t* compiler)
{
assert(compiler);
}
int compiler_run(compiler_t* compiler, node_t* node, program_t* program)
{
assert(compiler);
assert(node);
switch (node->type)
{
case NODE_MOD: {
for (size_t i=0; i<node->children.size; i++)
{
compiler_run(compiler, (node_t*) node->children.data[i], program);
}
} break;
case NODE_NUM: {
double value = atof(node->value.data);
value_t* val = tysy_new_num(compiler->tysy, value, node->line);
Opcode op = OP_PUSH;
param_t param = (param_t) program_push_new_value(program,
(struct value*) val);
program_push_instr(program, op, param);
} break;
case NODE_BOOL: {
int value = strcmp(node->value.data, "true") == 0;
value_t* val = tysy_new_bool(compiler->tysy, value, node->line);
Opcode op = OP_PUSH;
param_t param = (param_t) program_push_new_value(program,
(struct value*) val);
program_push_instr(program, op, param);
} break;
case NODE_STR: {
char* value = node->value.data;
value_t* val = tysy_new_str(compiler->tysy, value, node->line);
Opcode op = OP_PUSH;
param_t param = (param_t) program_push_new_value(program,
(struct value*) val);
program_push_instr(program, op, param);
} break;
case NODE_ASSERT: {
assert(node->children.size == 1);
compiler_run(compiler, node_child(node, 0), program);
program_push_instr(program, OP_ASSERT, RZ_NO_PARAM);
} break;
case NODE_NE:
case NODE_EQ: {
assert(node->children.size == 2);
for (size_t i=0; i<node->children.size; i++)
{
compiler_run(compiler, node_child(node, i), program);
}
program_push_instr(program,
node->type == NODE_EQ ? OP_EQ : OP_NE, RZ_NO_PARAM);
} break;
case NODE_LT:
case NODE_LE:
case NODE_GT:
case NODE_GE: {
assert(node->children.size == 2);
for (size_t i=0; i<node->children.size; i++)
{
compiler_run(compiler, node_child(node, i), program);
}
Opcode op;
switch (node->type)
{
case NODE_LT: op = OP_LT; break;
case NODE_LE: op = OP_LE; break;
case NODE_GT: op = OP_GT; break;
case NODE_GE: op = OP_GE; break;
default: assert(0);
}
program_push_instr(program, op, RZ_NO_PARAM);
} break;
case NODE_ADD:
case NODE_SUB:
case NODE_MUL:
case NODE_DIV:
case NODE_MODULO:
case NODE_POW: {
for (size_t i=0; i<node->children.size; i++)
{
compiler_run(compiler, node_child(node, i), program);
}
Opcode op;
if (node->type == NODE_SUB
&& node->children.size == 1)
{
op = OP_USUB;
}
else if (node->type == NODE_ADD
&& node_find_first(node, NODE_STR))
{
op = OP_STRCAT;
}
else if (node->type == NODE_MUL
&& node_find_first(node, NODE_STR)
&& node_find_first(node, NODE_NUM))
{
op = OP_STRDUP;
}
else
{
switch (node->type)
{
case NODE_ADD: op = OP_ADD; break;
case NODE_SUB: op = OP_SUB; break;
case NODE_MUL: op = OP_MUL; break;
case NODE_DIV: op = OP_DIV; break;
case NODE_MODULO: op = OP_MODULO; break;
case NODE_POW: op = OP_POW; break;
default: assert(0);
}
}
program_push_instr(program, op, RZ_NO_PARAM);
} break;
case NODE_AND: {
size_t const SZ = 512;
size_t to_false[SZ];
size_t sz = 0;
for (size_t i=0; i<node->children.size; i++)
{
compiler_run(compiler, node_child(node, i), program);
size_t addr = program_push_instr(program, OP_BRF, RZ_NO_PARAM);
to_false[sz++] = addr;
}
// True case
value_t* t = tysy_new_bool(compiler->tysy, 1, node->line);
param_t t_param = program_push_new_value(program, (struct value*) t);
program_push_instr(program, OP_PUSH, t_param);
size_t to_end = program_push_instr(program, OP_BR, RZ_NO_PARAM);
// False case
size_t program_sz = program->size;
for (size_t i=0; i<sz; i++)
{
program->params[to_false[i]] = program_sz;
}
value_t* f = tysy_new_bool(compiler->tysy, 0, node->line);
param_t f_param = program_push_new_value(program, (struct value*) f);
program_push_instr(program, OP_PUSH, f_param);
program->params[to_end] = program->size;
} break;
case NODE_OR: {
size_t const SZ = 512;
size_t to_true[SZ];
size_t sz = 0;
for (size_t i=0; i<node->children.size; i++)
{
compiler_run(compiler, node_child(node, i), program);
size_t addr = program_push_instr(program, OP_BRT, RZ_NO_PARAM);
to_true[sz++] = addr;
}
// False case
value_t* f = tysy_new_bool(compiler->tysy, 0, node->line);
param_t f_param = program_push_new_value(program, (struct value*) f);
program_push_instr(program, OP_PUSH, f_param);
size_t to_end = program_push_instr(program, OP_BR, RZ_NO_PARAM);
// True case
size_t program_sz = program->size;
for (size_t i=0; i<sz; i++)
{
program->params[to_true[i]] = program_sz;
}
value_t* t = tysy_new_bool(compiler->tysy, 1, node->line);
param_t t_param = program_push_new_value(program, (struct value*) t);
program_push_instr(program, OP_PUSH, t_param);
program->params[to_end] = program->size;
} break;
case NODE_NOT: {
assert(node->children.size > 0);
compiler_run(compiler, (node_t*) node->children.data[0], program);
program_push_instr(program, OP_NOT, RZ_NO_PARAM);
} break;
case NODE_VARDECL: {
char* name = ((node_t*) node->children.data[0])->value.data;
sym_entry_t* entry = NULL;
entry = sym_try_find_by_name(compiler->sym, name,
compiler->scope,
SYM_PRE,
node);
if (!entry)
{
entry = sym_try_find_by_name(compiler->sym, name,
compiler->scope,
SYM_DECL,
node);
}
if (!entry)
{
char msg[RZ_STR_LIMIT];
snprintf(msg, RZ_STR_LIMIT, "unknown entry %s\n", name);
err_fatal(compiler->err, msg, node->line);
err_dump(compiler->err);
}
assert(entry);
entry->state = SYM_DECL;
int id = entry->id;
compiler_run(compiler, (node_t*) node->children.data[1], program);
program_push_instr(program, OP_STORE, id);
} break;
case NODE_VARSET: {
char* name = ((node_t*) node->children.data[0])->value.data;
sym_entry_t* entry = sym_try_find_by_name(compiler->sym, name,
compiler->scope,
SYM_DECL,
node);
if (!entry)
{
char msg[RZ_STR_LIMIT];
snprintf(msg, RZ_STR_LIMIT,
"cannot assign value"
" to undefined variable '%s'.", name);
err_fatal(compiler->err, msg, node->line);
}
else
{
int id = entry->id;
compiler_run(compiler, (node_t*) node->children.data[1], program);
program_push_instr(program, OP_STORE, id);
}
} break;
case NODE_IDENT: {
char* name = node->value.data;
sym_entry_t* entry = sym_try_find_by_name(compiler->sym, name,
compiler->scope,
SYM_DECL,
node);
if (0 && compiler->parent && !entry)
{
entry = sym_try_find_by_name(compiler->parent->sym, name,
compiler->scope,
SYM_DECL,
node);
}
if (!entry)
{
char msg[RZ_STR_LIMIT];
snprintf(msg, RZ_STR_LIMIT, "undefined variable '%s'.",
name);
err_fatal(compiler->err, msg, node->line);
return -1;
}
else
{
int id = entry->id;
program_push_instr(program, OP_LOAD, id);
}
} break;
case NODE_SCOPE: {
compiler_run(compiler, (node_t*) node->children.data[0], program);
} break;
case NODE_BLOCK: {
compiler->scope++;
for (size_t i=0; i<node->children.size; i++)
{
compiler_run(compiler, (node_t*) node->children.data[i], program);
}
compiler->scope--;
} break;
case NODE_IF: {
size_t const IF_LIMIT = 1024;
int end_points[IF_LIMIT];
size_t size = 0;
compiler_run_if(compiler, node, program, end_points, &size);
assert(size < IF_LIMIT);
for (size_t i=0; i<size; i++)
{
program->params[end_points[i]] = program->size;
}
} break;
case NODE_FUNDECL: {
char* fun_name = ((node_t*) node->children.data[0])->value.data;
fun_t* fun = malloc(sizeof(fun_t));
fun_init(fun, (struct tysy*) compiler->tysy, compiler->err);
value_t* value = tysy_new_fun(compiler->tysy, fun, node->line);
sym_entry_t* entry = sym_try_find_by_name(compiler->sym,
fun_name,
compiler->scope,
SYM_PRE,
node);
assert(entry);
entry->state = SYM_DECL;
entry->id = *compiler->id;
// Compile function
{
compiler_t comp;
compiler_init(&comp, compiler->id, (sym_t*) fun->sym,
compiler->tysy, compiler->err);
comp.parent = compiler;
comp.sym->parent = compiler->sym;
tysolver_t tysolver;
tysolver_init(&tysolver, (sym_t*) fun->sym, compiler->tysy);
prepass_t pre;
prepass_init(&pre, compiler->id, (sym_t*) fun->sym, compiler->tysy,
&tysolver, compiler->err);
// self
int id = sym_declare((sym_t*) fun->sym,
(*compiler->id)++,
fun_name,
tysy_try_find_type(comp.tysy, "fun"),
comp.scope,
SYM_DECL,
node);
fun->base = id;
prepass_run(&pre, (node_t*) node->children.data[2]);
fun->arg_base = *compiler->id;
compiler_run(&comp, (node_t*) node->children.data[1], &fun->program);
compiler_run(&comp, (node_t*) node->children.data[2], &fun->program);
program_push_instr(&fun->program, OP_RET, RZ_NO_PARAM);
prepass_free(&pre);
tysolver_free(&tysolver);
compiler_free(&comp);
}
param_t param = program_push_new_value(program, (struct value*) value);
program_push_instr(program, OP_PUSH, param);
program_push_instr(program, OP_MKFUN, RZ_NO_PARAM);
program_push_instr(program, OP_STORE, entry->id);
} break;
case NODE_PARAMS: {
for (size_t i=0; i<node->children.size; i++)
{
node_t* child = (node_t*) node->children.data[i];
char* name = child->value.data;
sym_declare(compiler->sym, (*compiler->id)++, name, NULL,
compiler->scope, SYM_DECL,
child);
}
} break;
case NODE_RETURN: {
compiler_run(compiler, (node_t*) node->children.data[0], program);
program_push_instr(program, OP_RET, RZ_NO_PARAM);
} break;
case NODE_FUNCALL: {
char* name = ((node_t*) node->children.data[0])->value.data;
node_t* args = (node_t*) node->children.data[1];
for (size_t i=0; i<args->children.size; i++)
{
compiler_run(compiler, (node_t*)args->children.data[i], program);
}
sym_entry_t* entry = sym_try_find_by_name(compiler->sym,
name,
compiler->scope,
SYM_DECL,
node);
if (!entry)
{
char msg[RZ_STR_LIMIT];
snprintf(msg, RZ_STR_LIMIT, "undefined function '%s'", name);
err_fatal(compiler->err, msg, node->line);
}
else
{
int id = entry->id;
program_push_instr(program, OP_LOAD, id);
program_push_instr(program, OP_CALL, args->children.size);
}
} break;
default: {
fprintf(stderr, "Cannot compile unknown node '%s'",
NodeTypeStr[node->type]);
abort();
} break;
}
return 0;
}
void compiler_run_if(compiler_t* compiler, node_t* node, program_t* program,
int* end_points, size_t* sz)
{
assert(compiler);
assert(node);
// if
compiler_run(compiler, (node_t*) node->children.data[0], program);
// if false goto next
int to_next = program_push_instr(program, OP_BRF, RZ_NO_PARAM);
// then
compiler_run(compiler, (node_t*) node->children.data[1], program);
// goto end
int to_end = program_push_instr(program, OP_BR, RZ_NO_PARAM);
end_points[*sz] = to_end;
(*sz)++;
program->params[to_next] = program->size;
if (node->children.size >= 3)
{
node_t* next = (node_t*) node->children.data[2];
if (next->type == NODE_BLOCK)
{
compiler_run(compiler, next, program);
}
else if (next->type == NODE_IF)
{
compiler_run_if(compiler, next, program, end_points, sz);
}
}
}