#include "lexer.h" #include "commons.h" void lexer_init(struct lexer* self, char const* source) { assert(self); assert(source); self->source = strdup(source); self->cursor = 0; self->line = 1; vec_init(&self->toks, 1); lexer_add_tok(self, "if", NODE_IF, "", 1); lexer_add_tok(self, "else", NODE_ELSE, "", 1); lexer_add_tok(self, "cond", NODE_COND, "", 1); lexer_add_tok(self, "while", NODE_WHILE, "", 1); lexer_add_tok(self, "for", NODE_FOR, "", 1); lexer_add_tok(self, "var", NODE_VAR, "", 1); lexer_add_tok(self, "int", NODE_TYPE, "int", 1); lexer_add_tok(self, "float", NODE_TYPE, "float", 1); lexer_add_tok(self, "bool", NODE_TYPE, "bool", 1); lexer_add_tok(self, "string", NODE_TYPE, "string", 1); lexer_add_tok(self, "{", NODE_OBRACE, "", 0); lexer_add_tok(self, "}", NODE_CBRACE, "", 0); lexer_add_tok(self, ":", NODE_COLON, "", 0); lexer_add_tok(self, "=", NODE_ASSIGN, "", 0); lexer_add_tok(self, "<", NODE_LT, "", 0); lexer_add_tok(self, "<=", NODE_LE, "", 0); lexer_add_tok(self, ">", NODE_GT, "", 0); lexer_add_tok(self, ">=", NODE_GE, "", 0); lexer_add_tok(self, "+", NODE_ADD, "", 0); lexer_add_tok(self, "-", NODE_SUB, "", 0); lexer_add_tok(self, "*", NODE_MUL, "", 0); lexer_add_tok(self, "/", NODE_DIV, "", 0); lexer_add_tok(self, "%", NODE_MOD, "", 0); lexer_add_tok(self, "**", NODE_POW, "", 0); lexer_add_tok(self, "==", NODE_EQ, "", 0); lexer_add_tok(self, "!=", NODE_NE, "", 0); lexer_add_tok(self, "&&", NODE_AND, "", 0); lexer_add_tok(self, "||", NODE_OR, "", 0); lexer_add_tok(self, "!", NODE_NOT, "", 0); lexer_add_tok(self, "(", NODE_OPAR, "", 0); lexer_add_tok(self, ")", NODE_CPAR, "", 0); lexer_add_tok(self, ";", NODE_SEMICOLON, "", 0); } void lexer_free(struct lexer* self) { assert(self); free(self->source); vec_free_elements(&self->toks); vec_free(&self->toks); } int lexer_extract(struct lexer* self, struct vec* buffer) { assert(self); assert(buffer); struct node* node; while ( (node=lexer_next_new(self)) ) { vec_push(buffer, node); } if (self->cursor < strlen(self->source)) { snprintf(self->error_msg, GUX_STR_SIZE, "unexpected symbol '%c'", self->source[self->cursor]); return 1; } return 0; } struct node* lexer_next_new(struct lexer* self) { assert(self); lexer_skip_spaces(self); // Comments while (self->cursor < strlen(self->source) && self->source[self->cursor] == '#') { while (self->cursor < strlen(self->source) && self->source[self->cursor] != '\n') { self->cursor++; } lexer_skip_spaces(self); } struct token_info info; if (lexer_scan_float(self, &info)) { struct node* node = malloc(sizeof(struct node)); node_init(node, info.type, info.value, self->line); self->cursor = info.position; return node; } if (lexer_scan_int(self, &info)) { struct node* node = malloc(sizeof(struct node)); node_init(node, info.type, info.value, self->line); self->cursor = info.position; return node; } struct node* best = NULL; size_t pos = 0; // tokens for (size_t i=0; itoks.size; i++) { struct tok* tok = self->toks.data[i]; if ((tok->is_keyword && lexer_scan_keyword(self, tok->sym, &info)) || (!tok->is_keyword && lexer_scan_text(self, tok->sym, &info))) { if (best == NULL || info.position > pos) { struct node* node = malloc(sizeof(struct node)); node_init(node, tok->type, tok->value, self->line); if (best) { node_free(best); free(best); } best = node; pos = info.position; } } } if (best) { self->cursor = pos; return best; } if (lexer_scan_keyword(self, "true", &info) || lexer_scan_keyword(self, "false", &info)) { struct node* node = malloc(sizeof(struct node)); node_init(node, NODE_BOOL, info.value, self->line); self->cursor = info.position; return node; } if (lexer_scan_keyword(self, "assert", &info)) { struct node* node = malloc(sizeof(struct node)); node_init(node, NODE_ASSERT, "", self->line); self->cursor = info.position; return node; } if (lexer_scan_string(self, &info)) { struct node* node = malloc(sizeof(struct node)); node_init(node, info.type, info.value, self->line); self->cursor = info.position; return node; } if (lexer_scan_ident(self, &info)) { struct node* node = malloc(sizeof(struct node)); node_init(node, info.type, info.value, self->line); self->cursor = info.position; return node; } return NULL; } int lexer_is_sep(struct lexer* self, size_t index) { assert(self); assert(index < strlen(self->source)); char c = self->source[index]; for (size_t i=0; itoks.size; i++) { if (c == ((struct tok*)self->toks.data[i])->sym[0]) { return 1; } } return isspace(c); } void lexer_skip_spaces(struct lexer* self) { assert(self); while (self->cursor < strlen(self->source) && isspace(self->source[self->cursor])) { if (self->source[self->cursor] == '\n') { self->line++; } self->cursor++; } } int lexer_scan_keyword(struct lexer* self, char* keyword, struct token_info* info) { assert(self); assert(keyword); assert(info); size_t cursor = self->cursor; for (size_t i=0; i= strlen(self->source) || keyword[i] != self->source[self->cursor + i]) { return 0; } cursor++; } if (!(self->cursor == 0 || lexer_is_sep(self, self->cursor - 1))) { return 0; } if (!(cursor + 1 >= strlen(self->source) || lexer_is_sep(self, cursor))) { return 0; } info->position = self->cursor + strlen(keyword); memcpy(info->value, keyword, GUX_STR_SIZE * sizeof(char)); return 1; } int lexer_scan_text(struct lexer* self, char* text, struct token_info* info) { assert(self); assert(text); assert(info); size_t cursor = self->cursor; for (size_t i=0; i= strlen(self->source) || text[i] != self->source[self->cursor + i]) { return 0; } cursor++; } info->position = self->cursor + strlen(text); memcpy(info->value, text, GUX_STR_SIZE * sizeof(char)); return 1; } int lexer_scan_int(struct lexer* self, struct token_info* info) { assert(self); assert(info); size_t cursor = self->cursor; char value[GUX_STR_SIZE]; memset(value, 0, GUX_STR_SIZE); size_t sz = 0; if (cursor < strlen(self->source) && self->source[cursor] == '-') { value[sz++] = '-'; cursor++; } while (cursor < strlen(self->source) && isdigit(self->source[cursor])) { value[sz] = self->source[cursor]; cursor++; sz++; } if (sz == 1 && value[0] == '-') { return 0; } if (sz > 0) { info->position = cursor; info->type = NODE_INT; memcpy(info->value, value, GUX_STR_SIZE * sizeof(char)); return 1; } return 0; } int lexer_scan_float(struct lexer* self, struct token_info* info) { assert(self); assert(info); size_t cursor = self->cursor; char value[GUX_STR_SIZE]; memset(value, 0, GUX_STR_SIZE); size_t sz = 0; if (cursor < strlen(self->source) && self->source[cursor] == '-') { value[sz++] = '-'; cursor++; } while (cursor < strlen(self->source) && isdigit(self->source[cursor])) { value[sz] = self->source[cursor]; cursor++; sz++; } int is_float = 0; if (cursor < strlen(self->source) && self->source[cursor] == '.') { value[sz++] = '.'; is_float = 1; cursor++; while (cursor < strlen(self->source) && isdigit(self->source[cursor])) { value[sz] = self->source[cursor]; cursor++; sz++; } } if (sz > 0 && is_float) { info->position = cursor; info->type = NODE_FLOAT; memcpy(info->value, value, GUX_STR_SIZE * sizeof(char)); return 1; } return 0; } int lexer_scan_string(struct lexer* self, struct token_info* info) { assert(self); assert(info); size_t cursor = self->cursor; char value[GUX_STR_SIZE]; memset(value, 0, GUX_STR_SIZE); size_t sz = 0; char delim = ' '; if (cursor < strlen(self->source) && (self->source[cursor] == '\'' || self->source[cursor] == '"')) { delim = self->source[cursor]; cursor++; } else { return 0; } int found = 0; while (cursor < strlen(self->source)) { if (self->source[cursor] == delim && (cursor == 0 || self->source[cursor - 1] != '\\')) { found = 1; cursor++; break; } value[sz++] = self->source[cursor++]; } if (found) { info->position = cursor; info->type = NODE_STRING; memcpy(info->value, value, GUX_STR_SIZE * sizeof(char)); return 1; } return 0; } int lexer_scan_ident(struct lexer* self, struct token_info* info) { assert(self); assert(info); char value[GUX_STR_SIZE]; size_t size = 0; size_t cursor = self->cursor; while (cursor < strlen(self->source)) { char c = self->source[cursor]; if (isalpha(c) || c == '_' || c == '!' || c == '?' || (size > 0 && isdigit(c))) { value[size] = c; size++; cursor++; } else { break; } } if (size > 0) { info->position = cursor; memcpy(info->value, value, size); info->value[size] = '\0'; info->type = NODE_IDENT; return 1; } return 0; } void lexer_add_tok(struct lexer* self, char* sym, enum NodeType type, char* value, int is_keyword) { (void) self; struct tok* tok = malloc(sizeof(struct tok)); tok->sym = sym; tok->type = type; memcpy(tok->value, value, GUX_STR_SIZE); tok->is_keyword = is_keyword; vec_push(&self->toks, tok); }