#include "lexer.h" #include "lib/commons.h" #define RZ_KEYWORD(KW, NODE, VAL) \ { \ node_t* kw = lexer_try_new_keyword(lexer, KW, NODE, VAL, 1); \ if (kw) { lexer_skip_spaces(lexer); return kw; } \ } #define RZ_TEXT(KW, NODE, VAL) \ { \ node_t* kw = lexer_try_new_keyword(lexer, KW, NODE, VAL, 0); \ if (kw) { lexer_skip_spaces(lexer); return kw; } \ } void lexer_init(lexer_t* lexer, char const* source, err_t* err) { assert(lexer); lexer->source = strdup(source); lexer->cursor = 0; lexer->err = err; lexer->line = 1; } void lexer_free(lexer_t* lexer) { assert(lexer); free(lexer->source); lexer->source = NULL; } node_t* lexer_try_new_next(lexer_t* lexer) { assert(lexer); size_t len = strlen(lexer->source); lexer_skip_spaces(lexer); // Comments // ======== while (lexer->cursor < len && lexer->source[lexer->cursor] == '#') { while (lexer->cursor < len && lexer->source[lexer->cursor] != '\n') { lexer->cursor++; } lexer_skip_spaces(lexer); } // Text // ==== RZ_TEXT("==", NODE_EQ, 0); RZ_TEXT("!=", NODE_NE, 0); RZ_TEXT("<=", NODE_LE, 0); RZ_TEXT(">=", NODE_GE, 0); RZ_TEXT("<", NODE_LT, 0); RZ_TEXT(">", NODE_GT, 0); // Keywords // ======== RZ_KEYWORD("true", NODE_BOOL, 1); RZ_KEYWORD("false", NODE_BOOL, 1); RZ_KEYWORD("assert", NODE_ASSERT, 0); // scan str { node_t* node = lexer_try_new_str(lexer); if (node) { lexer_skip_spaces(lexer); return node; } } // scan num { size_t cursor = lexer->cursor; str_t res_str; str_init(&res_str); if (cursor < len && lexer->source[cursor] == '-') { str_push(&res_str, lexer->source[cursor]); cursor++; } while (cursor < len && isdigit(lexer->source[cursor])) { str_push(&res_str, lexer->source[cursor]); cursor += 1; } if (cursor < len && lexer->source[cursor] == '.') { str_push(&res_str, lexer->source[cursor]); cursor++; while (cursor < len && isdigit(lexer->source[cursor])) { str_push(&res_str, lexer->source[cursor]); cursor += 1; } } if (res_str.size > 0 && (cursor >= len || !(isalnum(lexer->source[cursor]) || lexer->source[cursor] == '.'))) { node_t* tok = malloc(sizeof(node_t)); node_init(tok, NODE_NUM, res_str.data, lexer->line); str_free(&res_str); lexer->cursor = cursor; lexer_skip_spaces(lexer); return tok; } str_free(&res_str); } if (lexer->cursor < len && lexer->err) { size_t const SZ = RZ_STR_LIMIT; char msg[SZ]; snprintf(msg, SZ, "unexpected symbol '%c'", lexer->source[lexer->cursor]); err_fatal(lexer->err, msg, lexer->line); } return NULL; } NodeType lexer_peek(lexer_t* lexer, int lookahead) { assert(lexer); size_t cursor = lexer->cursor; int line = lexer->line; NodeType type = -1; for (int i=0; itype; node_free(node); free(node); } else { break; } } lexer->cursor = cursor; lexer->line = line; return type; } void lexer_skip_spaces(lexer_t* lexer) { assert(lexer); size_t len = strlen(lexer->source); while (lexer->cursor < len && isspace(lexer->source[lexer->cursor])) { if (lexer->source[lexer->cursor] == '\n') { lexer->line++; } lexer->cursor++; } } node_t* lexer_try_new_keyword(lexer_t* lexer, char* kw, NodeType type, int has_value, int is_kw) { assert(lexer); assert(kw); size_t len = strlen(kw); size_t cursor = lexer->cursor; if (cursor + len <= strlen(lexer->source)) { int ok = 1; for (size_t i=cursor; isource[i] != kw[i - cursor]) { ok = 0; break; } } if (ok) { int next_idx = lexer->cursor + len; if (next_idx < strlen(lexer->source) && (is_kw && !lexer_is_sep(lexer, next_idx))) { return NULL; } node_t* node = malloc(sizeof(node_t)); node_init(node, type, has_value ? (char*) kw : "", lexer->line); lexer->cursor += len; return node; } } return NULL; } node_t* lexer_try_new_str(lexer_t* lexer) { assert(lexer); ssize_t cursor = lexer->cursor; ssize_t len = strlen(lexer->source); str_t res_str; str_init(&res_str); if (cursor >= len || lexer->source[cursor] != '"') { str_free(&res_str); return NULL; } cursor++; while (cursor < len && lexer->source[cursor] != '"') { if (lexer->source[cursor] == '\\') { if (cursor + 1 < len) { switch (lexer->source[cursor + 1]) { case '"': { str_push(&res_str, '"'); cursor += 2; } break; case 'n': { str_push(&res_str, '\n'); cursor += 2; } break; case 't': { str_push(&res_str, '\t'); cursor += 2; } break; case 'r': { str_push(&res_str, '\r'); cursor += 2; } break; default: cursor++; break; } } } else { str_push(&res_str, lexer->source[cursor]); cursor++; } } if (cursor >= len || lexer->source[cursor] != '"') { str_free(&res_str); return NULL; } cursor++; node_t* tok = malloc(sizeof(node_t)); node_init(tok, NODE_STR, res_str.data, lexer->line); str_free(&res_str); lexer->cursor = cursor; return tok; } int lexer_is_sep(lexer_t* lexer, size_t idx) { assert(lexer); if (idx >= strlen(lexer->source)) { return 1; } char c = lexer->source[idx]; if (isspace(c)) { return 1; } return c == '=' || c == '!'; }