diff --git a/doc/grammar.bnf b/doc/grammar.bnf index c80f9bf..5024392 100644 --- a/doc/grammar.bnf +++ b/doc/grammar.bnf @@ -17,7 +17,7 @@ LITERAL ::= | INDEX | opar EXPR cpar INDEX ::= -| TUPLE osquare (EXPR (comma EXPR)*)? csquare +| (TUPLE|str) osquare (EXPR (comma EXPR)*)? csquare TUPLE ::= | opar EXPR+ cpar -BUILTIN ::= num | bool +BUILTIN ::= num | bool | str diff --git a/lib/ccm.c b/lib/ccm.c index 5a0012f..692272c 100644 --- a/lib/ccm.c +++ b/lib/ccm.c @@ -1,4 +1,5 @@ #include "ccm.h" +#include "str.h" void ccm_init(ccm_t* self) { @@ -45,10 +46,14 @@ CCM ccm_from_value(ccm_t* self, value_t* value) return ccm_to_num(self, value->data.num, value->line); } break; + case TYPE_STR: { + return ccm_to_str(self, value->data.str, value->line); + } break; + case TYPE_BOOLEAN: { return ccm_to_boolean( - self, - value->data.boolean, + self, + value->data.boolean, value->line ); } break; @@ -64,14 +69,14 @@ CCM ccm_from_value(ccm_t* self, value_t* value) } return ccm_to_tuple( - self, - vec, + self, + vec, value->line ); } break; default: { - fprintf(stderr, + fprintf(stderr, "cannot convert value of type <%s> to CCM\n", TypeStr[value->type]); abort(); @@ -165,6 +170,31 @@ CCM ccm_to_boolean(ccm_t* self, int value, int line) return self->values.size - 1; } +int ccm_is_str(ccm_t* self, CCM value) +{ + assert(self); + return ((value_t*) self->values.data[value])->type == TYPE_STR; +} + +char* ccm_from_str(ccm_t* self, CCM value) +{ + assert(self); + return ((value_t*) self->values.data[value])->data.str; +} + +CCM ccm_to_str(ccm_t* self, char* value, int line) +{ + assert(self); + assert(value); + + value_t* val = malloc(sizeof(value_t)); + value_init_str(val, value, line); + + vec_push(&self->values, val); + + return self->values.size - 1; +} + void ccm_push(ccm_t* self, CCM value) { assert(self); @@ -191,7 +221,7 @@ void ccm_in(ccm_t* self) assert(self); CCM ccm_rhs = ccm_pop(self); CCM ccm_lhs = ccm_pop(self); - + int line = ((value_t*) self->values.data[ccm_lhs])->line; if (!ccm_is_tuple(self, ccm_rhs)) @@ -200,7 +230,7 @@ void ccm_in(ccm_t* self) return; } - + vec_t* rhs = ccm_from_tuple(self, ccm_rhs); value_t* val = ((value_t*)self->values.data[ccm_lhs]); @@ -223,6 +253,21 @@ void ccm_add(ccm_t* self) CCM ccm_lhs = ccm_pop(self); int line = ((value_t*) self->values.data[ccm_lhs])->line; + if (ccm_is_str(self, ccm_rhs) + && ccm_is_str(self, ccm_lhs)) + { + char const* lhs = ccm_from_str(self, ccm_lhs); + char const* rhs = ccm_from_str(self, ccm_rhs); + + str_t s; + str_init(&s); + str_push_cstr(&s, lhs); + str_push_cstr(&s, rhs); + ccm_push(self, ccm_to_str(self, s.value, line)); + str_free(&s); + return; + } + double rhs = ccm_from_num(self, ccm_rhs); double lhs = ccm_from_num(self, ccm_lhs); @@ -260,6 +305,32 @@ void ccm_mul(ccm_t* self) CCM ccm_lhs = ccm_pop(self); int line = ((value_t*) self->values.data[ccm_lhs])->line; + if (ccm_is_str(self, ccm_rhs) + && ccm_is_num(self, ccm_lhs)) + { + CCM tmp = ccm_lhs; + ccm_lhs = ccm_rhs; + ccm_rhs = tmp; + } + + if (ccm_is_str(self, ccm_lhs) + && ccm_is_num(self, ccm_rhs)) + { + int count = ccm_from_num(self, ccm_rhs); + char const* val = ccm_from_str(self, ccm_lhs); + str_t s; + str_init(&s); + + for (int i=0; imodule->ccm, + node->value, + node->line) + ); + + prog_add_instr(prog, OP_PUSH, id); + } break; + case NODE_NUM: { size_t id = prog_add_new_constant( prog, diff --git a/lib/exec.c b/lib/exec.c index a918c43..4ebc617 100644 --- a/lib/exec.c +++ b/lib/exec.c @@ -46,40 +46,81 @@ void exec_instr(exec_t* self, { case OP_INDEX: { CCM ccm_target = ccm_pop(ccm); - vec_t* target = ccm_from_tuple(ccm, ccm_target); + value_t* result = NULL; - for (int i=0; ivalues.data[ccm_target])->line + ; + + if (param != 1) + { + err_push(&self->err, line, + "index out of bounds"); + return; + } + + char const* target = + ccm_from_str(ccm, ccm_target); + + size_t size = strlen(target); + CCM ccm_idx = ccm_pop(ccm); int idx = ccm_from_num(ccm, ccm_idx); if (idx < 0) { - idx = target->size + idx; + idx = size + idx; } - if (idx < 0 || idx >= (ssize_t) target->size) + if (idx < 0 || idx >= (ssize_t) size) { - assert(target->size > 0); - err_push(&self->err, ((value_t*)target->data[0])->line, + assert(size > 0); + err_push(&self->err, line, "index out of bounds"); return; } - value_t* val = target->data[idx]; + char buf[2] = {target[idx], '\0'}; + ccm_push(ccm, ccm_to_str(ccm, buf, line)); + } + else if (ccm_is_tuple(ccm, ccm_target)) + { + vec_t* target = ccm_from_tuple(ccm, ccm_target); - if (i == param - 1) + for (int i=0; idata.tuple; + CCM ccm_idx = ccm_pop(ccm); + int idx = ccm_from_num(ccm, ccm_idx); + + if (idx < 0) + { + idx = target->size + idx; + } + + if (idx < 0 || idx >= (ssize_t) target->size) + { + assert(target->size > 0); + err_push(&self->err, ((value_t*)target->data[0])->line, + "index out of bounds"); + return; + } + + value_t* val = target->data[idx]; + + if (i == param - 1) { + result = val; + } else { + target = val->data.tuple; + } } + ccm_push(ccm, ccm_from_value(ccm, result)); + } else { + assert(0); } - ccm_push(ccm, ccm_from_value(ccm, result)); self->pc++; } break; @@ -106,7 +147,7 @@ void exec_instr(exec_t* self, self->pc++; } else - { + { self->pc = param; } } break; @@ -124,7 +165,7 @@ void exec_instr(exec_t* self, } if (value_equals(values->data[0], - values->data[1]) == !oracle) + values->data[1]) == !oracle) { char lhs[CCM_STRLEN]; value_str(values->data[0], lhs, CCM_STRLEN); diff --git a/lib/lexer.c b/lib/lexer.c index 91cede6..dca1c3c 100644 --- a/lib/lexer.c +++ b/lib/lexer.c @@ -144,6 +144,11 @@ node_t* lexer_try_new_next(lexer_t* self) node_t* node = NULL; + if ( (node = lexer_try_new_str(self)) ) + { + return node; + } + if ( (node = lexer_try_new_num(self)) ) { return node; @@ -360,3 +365,73 @@ node_t* lexer_try_new_num(lexer_t* self) return node; } + +node_t* lexer_try_new_str(lexer_t* self) +{ + assert(self); + size_t cursor = self->cursor; + str_t value; + str_init(&value); + + if (cursor >= strlen(self->source) + || self->source[cursor] != '"') + { + str_free(&value); + return NULL; + } + + cursor++; + + while (cursor < strlen(self->source) + && self->source[cursor] != '"') + { + if (self->source[cursor] == '\\' + && cursor + 1 < strlen(self->source)) + { + switch (self->source[cursor + 1]) + { + case '\\': { + str_push(&value, '\\'); + } break; + case 'n': { + str_push(&value, '\n'); + } break; + case 'r': { + str_push(&value, '\r'); + } break; + case 't': { + str_push(&value, '\t'); + } break; + case 'e': { + str_push(&value, '\e'); + } break; + case '"': { + str_push(&value, '"'); + } break; + } + + cursor += 2; + } + else { + str_push(&value, self->source[cursor]); + cursor++; + } + } + + if (cursor >= strlen(self->source) + || self->source[cursor] != '"') + { + str_free(&value); + return NULL; + } + + cursor++; + self->cursor = cursor; + node_t* node = malloc(sizeof(node_t)); + node_init(node, NODE_STR, value.value, self->line); + + str_free(&value); + + return node; +} + diff --git a/lib/lexer.h b/lib/lexer.h index 0f43bd9..eee9c75 100644 --- a/lib/lexer.h +++ b/lib/lexer.h @@ -53,4 +53,5 @@ node_t* lexer_try_new_text(lexer_t* self, int has_value); node_t* lexer_try_new_num(lexer_t* self); +node_t* lexer_try_new_str(lexer_t* self); #endif diff --git a/lib/module.c b/lib/module.c index f85e42e..42218e4 100644 --- a/lib/module.c +++ b/lib/module.c @@ -45,12 +45,7 @@ int module_load(module_t* self, char const* path) parser_init(&parser, &lexer); node_t* ast = parser_try_new_parse(&parser); - - if (!ast) - { - goto free_parser; - } - + if (!err_is_ok(&lexer.err) || !err_is_ok(&parser.err)) { err_print_stack_trace(&lexer.err); @@ -58,6 +53,12 @@ int module_load(module_t* self, char const* path) err_push(&self->err, lexer.line, "invalid module"); goto free_parser; } + + if (!ast) + { + goto free_parser; + } + compiler_t compiler; compiler_init(&compiler, self); diff --git a/lib/node.h b/lib/node.h index abb679d..dbba48c 100644 --- a/lib/node.h +++ b/lib/node.h @@ -11,7 +11,8 @@ G(NODE_POW), G(NODE_ADD), G(NODE_SUB), G(NODE_MUL), \ G(NODE_DIV), G(NODE_MOD), G(NODE_COMMA), G(NODE_TUPLE), \ G(NODE_ASSERT_EQ), G(NODE_ASSERT_NE), G(NODE_BOOL), \ G(NODE_AND), G(NODE_OR), G(NODE_NOT), G(NODE_IN), \ -G(NODE_OSQUARE), G(NODE_CSQUARE), G(NODE_INDEX) +G(NODE_OSQUARE), G(NODE_CSQUARE), G(NODE_INDEX), \ +G(NODE_STR) CCM_ENUM_H(NodeKind, NODE_KIND); diff --git a/lib/parser.c b/lib/parser.c index 186cfab..0903fe1 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -22,7 +22,14 @@ void parser_free(parser_t* self) node_t* parser_try_new_parse(parser_t* self) { assert(self); - return CCM_TRY(parser_try_new_module); + node_t* res = CCM_TRY(parser_try_new_module); + + if (self->lexer->cursor < (ssize_t)strlen(self->lexer->source)) + { + err_push(&self->err, res ? res->line : 0, "unexpected end"); + } + + return res; } node_t* parser_try_new_rule_ll1(parser_t* self, @@ -427,6 +434,7 @@ node_t* parser_try_new_literal(parser_t* self) return tuple; } + if (!lexer_consume_next(self->lexer, NODE_OPAR)) { return NULL; @@ -448,6 +456,13 @@ node_t* parser_try_new_literal(parser_t* self) return expr; } + if (lexer_peek_kind(self->lexer, NODE_STR, 0) + && lexer_peek_kind(self->lexer, NODE_OSQUARE, 1)) + { + node_t* target = CCM_TRY(parser_try_new_builtin); + return CCM_TRY_LL1(parser_try_new_index, target); + } + return CCM_TRY(parser_try_new_builtin); } @@ -484,11 +499,6 @@ node_t* parser_try_new_index(parser_t* self, node_t* target) return node; } -node_t* parser_try_new_expr_lst(parser_t* self) -{ - assert(self); -} - node_t* parser_try_new_tuple(parser_t* self) { assert(self); @@ -554,6 +564,7 @@ node_t* parser_try_new_builtin(parser_t* self) ( node->kind == NODE_NUM || node->kind == NODE_BOOL + || node->kind == NODE_STR ) ) { diff --git a/lib/parser.h b/lib/parser.h index f1b0e17..d75b796 100644 --- a/lib/parser.h +++ b/lib/parser.h @@ -36,7 +36,6 @@ node_t* parser_try_new_not(parser_t* self); node_t* parser_try_new_pow(parser_t* self); node_t* parser_try_new_literal(parser_t* self); node_t* parser_try_new_index(parser_t* self, node_t* target); -node_t* parser_try_new_expr_lst(parser_t* self); node_t* parser_try_new_tuple(parser_t* self); node_t* parser_try_new_builtin(parser_t* self); diff --git a/lib/type.h b/lib/type.h index e9fb731..9f1a0eb 100644 --- a/lib/type.h +++ b/lib/type.h @@ -6,7 +6,8 @@ #define TYPES(G) \ G(TYPE_NUM), \ G(TYPE_TUPLE), \ -G(TYPE_BOOLEAN) +G(TYPE_BOOLEAN), \ +G(TYPE_STR) CCM_ENUM_H(Type, TYPES); diff --git a/lib/value.c b/lib/value.c index fb9af44..480007c 100644 --- a/lib/value.c +++ b/lib/value.c @@ -25,6 +25,16 @@ void value_init_boolean(value_t* self, int boolean, int line) self->line = line; } +void value_init_str(value_t* self, char const* value, int line) +{ + assert(self); + assert(value); + + self->data.str = strdup(value); + self->type = TYPE_STR; + self->line = line; +} + value_t* value_new_clone(value_t* self) { assert(self); @@ -33,6 +43,9 @@ value_t* value_new_clone(value_t* self) switch (self->type) { + case TYPE_STR: { + value_init_str(value, self->data.str, self->line); + } break; case TYPE_NUM: { value_init_num(value, self->data.num, self->line); } break; @@ -72,6 +85,11 @@ void value_free(value_t* self) vec_free(self->data.tuple); free(self->data.tuple); } + + if (self->type == TYPE_STR) + { + free(self->data.str); + } } size_t value_str(value_t* self, char* buffer, size_t size) @@ -82,6 +100,11 @@ size_t value_str(value_t* self, char* buffer, size_t size) switch (self->type) { + case TYPE_STR: { + sz += snprintf(buffer + sz, size - sz, "%s", + self->data.str); + } break; + case TYPE_NUM: { sz += snprintf(buffer + sz, size - sz, "%lf", self->data.num); @@ -129,6 +152,10 @@ int value_equals(value_t* self, value_t* rhs) switch (self->type) { + case TYPE_STR: { + return strcmp(self->data.str, rhs->data.str) == 0; + } break; + case TYPE_NUM: { return self->data.num == rhs->data.num; } break; diff --git a/lib/value.h b/lib/value.h index c150d53..931d117 100644 --- a/lib/value.h +++ b/lib/value.h @@ -10,6 +10,7 @@ typedef struct { double num; vec_t* tuple; int boolean; + char* str; } data; Type type; @@ -19,6 +20,7 @@ typedef struct { void value_init_num(value_t* self, double num, int line); void value_init_new_tuple(value_t* self, vec_t* values, int line); void value_init_boolean(value_t* self, int boolean, int line); +void value_init_str(value_t* self, char const* value, int line); value_t* value_new_clone(value_t* self); void value_free(value_t* self);