roza/lib/lexer.c

174 lines
3.4 KiB
C
Raw Normal View History

2023-12-09 17:24:41 +00:00
#include "lexer.h"
#include "lib/commons.h"
void lexer_init(lexer_t* lexer, char const* source, err_t* err)
{
assert(lexer);
lexer->source = strdup(source);
lexer->cursor = 0;
lexer->err = err;
lexer->line = 1;
}
void lexer_free(lexer_t* lexer)
{
assert(lexer);
free(lexer->source);
lexer->source = NULL;
}
node_t* lexer_try_new_next(lexer_t* lexer)
{
assert(lexer);
size_t len = strlen(lexer->source);
// skip spaces
{
while (lexer->cursor < len
&& isspace(lexer->source[lexer->cursor]))
{
if (lexer->source[lexer->cursor] == '\n')
{
lexer->line++;
}
lexer->cursor++;
}
}
2023-12-09 21:59:24 +00:00
{
node_t* kw = lexer_try_new_keyword(lexer, "true", NODE_BOOL, 1);
if (kw) { return kw; }
}
{
node_t* kw = lexer_try_new_keyword(lexer, "false", NODE_BOOL, 1);
if (kw) { return kw; }
}
2023-12-09 17:24:41 +00:00
// scan num
{
size_t cursor = lexer->cursor;
str_t res_str;
str_init(&res_str);
if (cursor < len && lexer->source[cursor] == '-')
{
str_push(&res_str, lexer->source[cursor]);
cursor++;
}
while (cursor < len
&& isdigit(lexer->source[cursor]))
{
str_push(&res_str, lexer->source[cursor]);
cursor += 1;
}
if (cursor < len && lexer->source[cursor] == '.')
{
str_push(&res_str, lexer->source[cursor]);
cursor++;
while (cursor < len
&& isdigit(lexer->source[cursor]))
{
str_push(&res_str, lexer->source[cursor]);
cursor += 1;
}
}
if (res_str.size > 0
&& (cursor >= len || isspace(lexer->source[cursor])))
{
node_t* tok = malloc(sizeof(node_t));
node_init(tok, NODE_NUM, res_str.data, lexer->line);
str_free(&res_str);
lexer->cursor = cursor;
return tok;
}
str_free(&res_str);
}
if (lexer->cursor < len && lexer->err)
{
size_t const SZ = RZ_STR_LIMIT;
char msg[SZ];
snprintf(msg, SZ, "unexpected symbol '%c'", lexer->source[lexer->cursor]);
err_error(lexer->err, msg, lexer->line);
}
return NULL;
}
2023-12-09 21:59:24 +00:00
NodeType lexer_peek(lexer_t* lexer, int lookahead)
{
assert(lexer);
size_t cursor = lexer->cursor;
int line = lexer->line;
NodeType type = 0;
for (int i=0; i<lookahead; i++)
{
node_t* node = lexer_try_new_next(lexer);
if (node)
{
type = node->type;
node_free(node);
free(node);
}
}
lexer->cursor = cursor;
lexer->line = line;
return type;
}
node_t* lexer_try_new_keyword(lexer_t* lexer, char* kw,
NodeType type, int has_value)
{
assert(lexer);
assert(kw);
size_t len = strlen(kw);
size_t cursor = lexer->cursor;
if (cursor + len <= strlen(lexer->source))
{
int ok = 1;
for (size_t i=cursor; i<cursor + len; i++)
{
if (lexer->source[i] != kw[i - cursor])
{
ok = 0;
break;
}
}
if (ok)
{
int next_idx = lexer->cursor + len;
if (next_idx < strlen(lexer->source)
&& !isspace(lexer->source[next_idx]))
{
return NULL;
}
node_t* node = malloc(sizeof(node_t));
node_init(node, type, has_value ? (char*) kw : "", lexer->line);
lexer->cursor += len;
return node;
}
}
return NULL;
}