From e7e0789cbb98fa793da8af13b19ea1feda33e594 Mon Sep 17 00:00:00 2001 From: bog Date: Mon, 1 Apr 2024 21:42:36 +0200 Subject: [PATCH] :sparkles: strings arithmetic. --- doc/grammar.bnf | 1 + features/string.sk | 8 +++++ lib/include/lexer.h | 1 + lib/include/node.h | 2 +- lib/include/state.h | 2 ++ lib/include/token.h | 2 +- lib/include/value.h | 4 ++- lib/src/compiler.c | 7 ++++ lib/src/exec.c | 8 +++++ lib/src/lexer.c | 88 ++++++++++++++++++++++++++++++++++++++++++++- lib/src/parser.c | 11 ++++++ lib/src/state.c | 62 ++++++++++++++++++++++++++++++-- lib/src/value.c | 8 +++++ tests/lexer.h | 16 +++++++++ tests/parser.h | 8 +++++ 15 files changed, 222 insertions(+), 6 deletions(-) create mode 100644 features/string.sk diff --git a/doc/grammar.bnf b/doc/grammar.bnf index 6224dc6..97d2fee 100644 --- a/doc/grammar.bnf +++ b/doc/grammar.bnf @@ -17,3 +17,4 @@ BUILTIN ::= | int | bool | float +| string diff --git a/features/string.sk b/features/string.sk new file mode 100644 index 0000000..69a2bcc --- /dev/null +++ b/features/string.sk @@ -0,0 +1,8 @@ +assert "hello" eq "hello" + +assert "hello " + "world" eq "hello world" +assert "a" + "b" + "c" eq "abc" + +assert "a" * 3 eq "aaa" +assert 4 * "b" eq "bbbb" +assert ("a" + "b") * 2 eq "abab" diff --git a/lib/include/lexer.h b/lib/include/lexer.h index b478b5e..03500ff 100644 --- a/lib/include/lexer.h +++ b/lib/include/lexer.h @@ -31,6 +31,7 @@ void lexer_consume_next(struct lexer* self); struct token* lexer_try_new_next(struct lexer* self); struct token* lexer_try_scan_int(struct lexer* self); struct token* lexer_try_scan_float(struct lexer* self); +struct token* lexer_try_scan_string(struct lexer* self); struct token* lexer_try_scan_text(struct lexer* self, char const* text, TokenKind kind); diff --git a/lib/include/node.h b/lib/include/node.h index 96ccd12..26b8904 100644 --- a/lib/include/node.h +++ b/lib/include/node.h @@ -10,7 +10,7 @@ G(NODE_ADD), G(NODE_SUB), G(NODE_MUL),\ G(NODE_DIV), G(NODE_POW), G(NODE_MOD),\ G(NODE_USUB), G(NODE_ASSERT_EQ), \ G(NODE_BOOL), G(NODE_AND), G(NODE_OR), \ -G(NODE_NOT), G(NODE_FLOAT) +G(NODE_NOT), G(NODE_FLOAT), G(NODE_STRING) SK_ENUM_H(NodeKind, NODE_KIND); diff --git a/lib/include/state.h b/lib/include/state.h index 0bf9949..6edaf48 100644 --- a/lib/include/state.h +++ b/lib/include/state.h @@ -52,8 +52,10 @@ SK state_push(struct state* self, SK state_push_int(struct state* self, int integer, int line); SK state_push_bool(struct state* self, bool boolean, int line); SK state_push_float(struct state* self, double real, int line); +SK state_push_string(struct state* self, char const* str, int line); TypeKind state_common_num_type(struct state* self, SK lhs, SK rhs); +TypeKind state_type(struct state* self, SK value); double state_as_real(struct state* self, SK lhs); int state_line(struct state* self, SK lhs); diff --git a/lib/include/token.h b/lib/include/token.h index 9923658..fe762c5 100644 --- a/lib/include/token.h +++ b/lib/include/token.h @@ -11,7 +11,7 @@ G(TOKEN_MUL), G(TOKEN_DIV), G(TOKEN_MOD), \ G(TOKEN_POW), G(TOKEN_OPAR), G(TOKEN_CPAR), \ G(TOKEN_ASSERT), G(TOKEN_ASSERT_EQ), \ G(TOKEN_BOOL), G(TOKEN_AND), G(TOKEN_OR), \ -G(TOKEN_NOT), G(TOKEN_FLOAT) +G(TOKEN_NOT), G(TOKEN_FLOAT), G(TOKEN_STRING) SK_ENUM_H(TokenKind, TOKEN_KIND); diff --git a/lib/include/value.h b/lib/include/value.h index 322f248..50794f8 100644 --- a/lib/include/value.h +++ b/lib/include/value.h @@ -5,7 +5,8 @@ #include "node.h" #define TYPE_KIND(G) \ -G(TYPE_INT), G(TYPE_BOOL), G(TYPE_FLOAT) +G(TYPE_INT), G(TYPE_BOOL), G(TYPE_FLOAT), \ +G(TYPE_STRING) SK_ENUM_H(TypeKind, TYPE_KIND); @@ -14,6 +15,7 @@ union val int integer; double real; bool boolean; + char* str; }; struct value diff --git a/lib/src/compiler.c b/lib/src/compiler.c index 9a05d63..932f18b 100644 --- a/lib/src/compiler.c +++ b/lib/src/compiler.c @@ -69,6 +69,13 @@ void compiler_compile(struct compiler* self, val); } break; + case NODE_STRING: { + union val val; + val.str = strdup(node->token->value); + compiler_compile_value(self, node, prog, TYPE_STRING, + val); + } break; + case NODE_ADD: { compiler_compile_children(self, node, prog); prog_add_instr(prog, OP_ADD, SK_NO_PARAM); diff --git a/lib/src/exec.c b/lib/src/exec.c index d4342e7..8060ef0 100644 --- a/lib/src/exec.c +++ b/lib/src/exec.c @@ -82,6 +82,14 @@ void exec_execute(struct exec* self, ); } break; + case TYPE_STRING: { + state_push_string( + state, + constant->val.str, + constant->line + ); + } break; + case TYPE_BOOL: { state_push_bool( state, diff --git a/lib/src/lexer.c b/lib/src/lexer.c index 73fe99c..b02c327 100644 --- a/lib/src/lexer.c +++ b/lib/src/lexer.c @@ -143,6 +143,10 @@ struct token* lexer_try_new_next(struct lexer* self) return tok; } + if ( (tok=lexer_try_scan_string(self)) ) + { + return tok; + } SK_SCAN_TEXT("+", TOKEN_ADD); SK_SCAN_TEXT("-", TOKEN_SUB); SK_SCAN_TEXT("*", TOKEN_MUL); @@ -254,7 +258,7 @@ struct token* lexer_try_scan_float(struct lexer* self) str_push(&value, self->source[cursor]); cursor++; } - + struct token* tok = NULL; if (value.size > 0 @@ -271,6 +275,88 @@ struct token* lexer_try_scan_float(struct lexer* self) return tok; } +struct token* lexer_try_scan_string(struct lexer* self) +{ + assert(self); + size_t cursor = self->context.cursor; + + if (cursor >= self->len + || self->source[cursor] != '"') + { + return NULL; + } + + cursor++; + + struct str value; + str_init(&value); + bool escaped = false; + + while (cursor < self->len + && self->source[cursor] != '"') + { + if (self->source[cursor] == '\\') + { + escaped = true; + } + + if (escaped) + { + cursor++; + + switch (self->source[cursor]) + { + case '\\': { + str_push(&value, '\\'); + } break; + case 'n': { + str_push(&value, '\n'); + } break; + case 'r': { + str_push(&value, '\r'); + } break; + case 't': { + str_push(&value, '\t'); + } break; + case 'e': { + str_push(&value, '\e'); + } break; + case '"': { + str_push(&value, '"'); + } break; + default: { + errors_push(self->context.line, + "unknown escape symbol"); + str_free(&value); + return NULL; + } break; + } + + escaped = false; + cursor++; + continue; + } + + str_push(&value, self->source[cursor]); + cursor++; + } + + if (cursor >= self->len + || self->source[cursor] != '"') + { + str_free(&value); + return NULL; + } + + cursor++; + + struct token* tok = malloc(sizeof(struct token)); + token_init(tok, TOKEN_STRING, value.value, self->context.line); + self->context.cursor = cursor; + str_free(&value); + return tok; +} + struct token* lexer_try_scan_text(struct lexer* self, char const* text, TokenKind kind) diff --git a/lib/src/parser.c b/lib/src/parser.c index 37fd1a0..9736933 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -440,5 +440,16 @@ struct node* parser_try_builtin(struct parser* self) return node; } + if (lexer_next_is(&self->lexer, TOKEN_STRING)) + { + struct node* node = malloc(sizeof(struct node)); + + node_init(node, + NODE_STRING, + lexer_try_new_next(&self->lexer)); + + return node; + } + return NULL; } diff --git a/lib/src/state.c b/lib/src/state.c index 662e280..4dd2646 100644 --- a/lib/src/state.c +++ b/lib/src/state.c @@ -154,6 +154,14 @@ SK state_push_float(struct state* self, double real, int line) return state_push(self, TYPE_FLOAT, val ,line); } +SK state_push_string(struct state* self, char const* str, int line) +{ + assert(self); + union val val; + val.str = strdup(str); + return state_push(self, TYPE_STRING, val ,line); +} + TypeKind state_common_num_type(struct state* self, SK lhs, SK rhs) { assert(self); @@ -170,6 +178,13 @@ TypeKind state_common_num_type(struct state* self, SK lhs, SK rhs) return TYPE_INT; } +TypeKind state_type(struct state* self, SK value) +{ + assert(self); + struct value const* val = state_try_get_value(self, value); + return val->type; +} + double state_as_real(struct state* self, SK lhs) { assert(self); @@ -204,7 +219,20 @@ SK state_add(struct state* self) TypeKind type = state_common_num_type(self, lhs, rhs); - if (type == TYPE_INT) + if(state_type(self, lhs) == TYPE_STRING + && state_type(self, rhs) == TYPE_STRING) + { + struct value* left = state_try_get_value(self, lhs); + struct value* right = state_try_get_value(self, rhs); + struct str val; + str_init(&val); + str_extend(&val, left->val.str); + str_extend(&val, right->val.str); + SK res = state_push_string(self, val.value, left->line); + str_free(&val); + return res; + } + else if (type == TYPE_INT) { return state_push_int( self, @@ -296,7 +324,37 @@ SK state_mul(struct state* self) TypeKind type = state_common_num_type(self, lhs, rhs); - if (type == TYPE_INT) + if(state_type(self, lhs) == TYPE_STRING + && state_type(self, rhs) == TYPE_INT) + { + struct value* left = state_try_get_value(self, lhs); + struct value* right = state_try_get_value(self, rhs); + struct str val; + str_init(&val); + for (int i=0; ival.integer; i++) + { + str_extend(&val, left->val.str); + } + SK res = state_push_string(self, val.value, left->line); + str_free(&val); + return res; + } + else if(state_type(self, lhs) == TYPE_INT + && state_type(self, rhs) == TYPE_STRING) + { + struct value* left = state_try_get_value(self, lhs); + struct value* right = state_try_get_value(self, rhs); + struct str val; + str_init(&val); + for (int i=0; ival.integer; i++) + { + str_extend(&val, right->val.str); + } + SK res = state_push_string(self, val.value, left->line); + str_free(&val); + return res; + } + else if (type == TYPE_INT) { return state_push_int( self, diff --git a/lib/src/value.c b/lib/src/value.c index 5c9af91..93cb9e9 100644 --- a/lib/src/value.c +++ b/lib/src/value.c @@ -16,6 +16,10 @@ void value_init(struct value* self, void value_free(struct value* self) { assert(self); + if (self->type == TYPE_STRING) + { + free(self->val.str); + } } bool value_equals(struct value* self, struct value* rhs) @@ -43,6 +47,10 @@ void value_str(struct value* self, struct str* dest) switch (self->type) { + case TYPE_STRING: { + str_format(dest, "%s", self->val.str); + } break; + case TYPE_INT: { str_format(dest, "%d", self->val.integer); } break; diff --git a/tests/lexer.h b/tests/lexer.h index 9eb6709..38dec7c 100644 --- a/tests/lexer.h +++ b/tests/lexer.h @@ -16,6 +16,10 @@ static void test_lexer(char const* source, int count, ...) for (int i=0; i