gux/lang/src/lexer.c

239 lines
4.7 KiB
C
Raw Normal View History

2024-02-10 15:16:00 +00:00
#include "lexer.h"
void lexer_init(struct lexer* self, char const* source)
{
assert(self);
assert(source);
self->source = strdup(source);
self->cursor = 0;
self->line = 1;
vec_init(&self->toks, 1);
lexer_add_tok(self, "&&", NODE_AND, 0);
lexer_add_tok(self, "||", NODE_OR, 0);
lexer_add_tok(self, "!", NODE_NOT, 0);
lexer_add_tok(self, "(", NODE_OPAR, 0);
lexer_add_tok(self, ")", NODE_CPAR, 0);
lexer_add_tok(self, ";", NODE_SEMICOLON, 0);
}
void lexer_free(struct lexer* self)
{
assert(self);
free(self->source);
vec_free_elements(&self->toks);
vec_free(&self->toks);
}
int lexer_extract(struct lexer* self, struct vec* buffer)
{
assert(self);
assert(buffer);
struct node* node;
while ( (node=lexer_next_new(self)) )
{
vec_push(buffer, node);
}
if (self->cursor < strlen(self->source))
{
snprintf(self->error_msg, GUX_STR_SIZE,
"unexpected symbol '%c'", self->source[self->cursor]);
return 1;
}
return 0;
}
struct node* lexer_next_new(struct lexer* self)
{
assert(self);
lexer_skip_spaces(self);
// Comments
while (self->cursor < strlen(self->source)
&& self->source[self->cursor] == '#')
{
while (self->cursor < strlen(self->source)
&& self->source[self->cursor] != '\n')
{
self->cursor++;
}
lexer_skip_spaces(self);
}
struct token_info info;
struct node* best = NULL;
size_t pos = 0;
for (size_t i=0; i<self->toks.size; i++)
{
struct tok* tok = self->toks.data[i];
if (lexer_scan_text(self, tok->sym, &info))
{
if (best == NULL || pos > info.position)
{
struct node* node = malloc(sizeof(struct node));
node_init(node, tok->type, "", self->line);
if (best)
{
node_free(best);
}
best = node;
pos = info.position;
}
self->cursor = info.position;
}
}
if (best)
{
self->cursor = pos;
return best;
}
if (lexer_scan_keyword(self, "true", &info)
|| lexer_scan_keyword(self, "false", &info))
{
struct node* node = malloc(sizeof(struct node));
node_init(node, NODE_BOOL, info.value, self->line);
self->cursor = info.position;
return node;
}
if (lexer_scan_keyword(self, "assert", &info))
{
struct node* node = malloc(sizeof(struct node));
node_init(node, NODE_ASSERT, "", self->line);
self->cursor = info.position;
return node;
}
return NULL;
}
int lexer_is_sep(struct lexer* self, size_t index)
{
assert(self);
assert(index < strlen(self->source));
char c = self->source[index];
for (size_t i=0; i<self->toks.size; i++)
{
if (c == ((struct tok*)self->toks.data[i])->sym[0])
{
return 1;
}
}
return isspace(c);
}
void lexer_skip_spaces(struct lexer* self)
{
assert(self);
while (self->cursor < strlen(self->source)
&& isspace(self->source[self->cursor]))
{
if (self->source[self->cursor] == '\n')
{
self->line++;
}
self->cursor++;
}
}
int lexer_scan_keyword(struct lexer* self,
char* keyword,
struct token_info* info)
{
assert(self);
assert(keyword);
assert(info);
size_t cursor = self->cursor;
for (size_t i=0; i<strlen(keyword); i++)
{
if (cursor >= strlen(self->source)
|| keyword[i] != self->source[self->cursor + i])
{
return 0;
}
cursor++;
}
if (!(self->cursor == 0
|| lexer_is_sep(self, self->cursor - 1)))
{
return 0;
}
if (!(cursor + 1 >= strlen(self->source)
|| lexer_is_sep(self, cursor)))
{
return 0;
}
info->position = self->cursor + strlen(keyword);
info->value = keyword;
return 1;
}
int lexer_scan_text(struct lexer* self,
char* text,
struct token_info* info)
{
assert(self);
assert(text);
assert(info);
size_t cursor = self->cursor;
for (size_t i=0; i<strlen(text); i++)
{
if (cursor >= strlen(self->source)
|| text[i] != self->source[self->cursor + i])
{
return 0;
}
cursor++;
}
info->position = self->cursor + strlen(text);
info->value = text;
return 1;
}
void lexer_add_tok(struct lexer* self,
char* sym,
enum NodeType type,
int is_keyword)
{
(void) self;
struct tok* tok = malloc(sizeof(struct tok));
tok->sym = sym;
tok->type = type;
tok->is_keyword = is_keyword;
vec_push(&self->toks, tok);
}