2023-12-09 17:24:41 +00:00
|
|
|
#include "lexer.h"
|
|
|
|
#include "lib/commons.h"
|
|
|
|
|
2023-12-15 18:30:20 +00:00
|
|
|
#define RZ_KEYWORD(KW, NODE, VAL) \
|
|
|
|
{ \
|
|
|
|
node_t* kw = lexer_try_new_keyword(lexer, KW, NODE, VAL, 1); \
|
|
|
|
if (kw) { lexer_skip_spaces(lexer); return kw; } \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define RZ_TEXT(KW, NODE, VAL) \
|
|
|
|
{ \
|
|
|
|
node_t* kw = lexer_try_new_keyword(lexer, KW, NODE, VAL, 0); \
|
|
|
|
if (kw) { lexer_skip_spaces(lexer); return kw; } \
|
2023-12-11 17:01:22 +00:00
|
|
|
}
|
|
|
|
|
2023-12-09 17:24:41 +00:00
|
|
|
void lexer_init(lexer_t* lexer, char const* source, err_t* err)
|
|
|
|
{
|
|
|
|
assert(lexer);
|
|
|
|
lexer->source = strdup(source);
|
|
|
|
lexer->cursor = 0;
|
|
|
|
lexer->err = err;
|
|
|
|
lexer->line = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void lexer_free(lexer_t* lexer)
|
|
|
|
{
|
|
|
|
assert(lexer);
|
|
|
|
free(lexer->source);
|
|
|
|
lexer->source = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
node_t* lexer_try_new_next(lexer_t* lexer)
|
|
|
|
{
|
|
|
|
assert(lexer);
|
|
|
|
size_t len = strlen(lexer->source);
|
|
|
|
|
2023-12-11 17:01:22 +00:00
|
|
|
lexer_skip_spaces(lexer);
|
2023-12-09 17:24:41 +00:00
|
|
|
|
2023-12-15 18:30:20 +00:00
|
|
|
// Text
|
|
|
|
// ====
|
|
|
|
RZ_TEXT("==", NODE_EQ, 0);
|
|
|
|
RZ_TEXT("!=", NODE_NE, 0);
|
|
|
|
|
2023-12-10 03:49:28 +00:00
|
|
|
// Keywords
|
|
|
|
// ========
|
2023-12-11 17:01:22 +00:00
|
|
|
RZ_KEYWORD("true", NODE_BOOL, 1);
|
|
|
|
RZ_KEYWORD("false", NODE_BOOL, 1);
|
|
|
|
RZ_KEYWORD("assert", NODE_ASSERT, 0);
|
2023-12-09 21:59:24 +00:00
|
|
|
|
2023-12-10 03:49:28 +00:00
|
|
|
// scan str
|
|
|
|
{
|
|
|
|
node_t* node = lexer_try_new_str(lexer);
|
2023-12-11 17:01:22 +00:00
|
|
|
|
|
|
|
if (node)
|
|
|
|
{
|
|
|
|
lexer_skip_spaces(lexer);
|
|
|
|
return node;
|
|
|
|
}
|
2023-12-10 03:49:28 +00:00
|
|
|
}
|
|
|
|
|
2023-12-09 17:24:41 +00:00
|
|
|
// scan num
|
|
|
|
{
|
|
|
|
size_t cursor = lexer->cursor;
|
|
|
|
|
|
|
|
str_t res_str;
|
|
|
|
str_init(&res_str);
|
|
|
|
|
|
|
|
if (cursor < len && lexer->source[cursor] == '-')
|
|
|
|
{
|
|
|
|
str_push(&res_str, lexer->source[cursor]);
|
|
|
|
cursor++;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (cursor < len
|
|
|
|
&& isdigit(lexer->source[cursor]))
|
|
|
|
{
|
|
|
|
str_push(&res_str, lexer->source[cursor]);
|
|
|
|
cursor += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cursor < len && lexer->source[cursor] == '.')
|
|
|
|
{
|
|
|
|
str_push(&res_str, lexer->source[cursor]);
|
|
|
|
cursor++;
|
|
|
|
|
|
|
|
while (cursor < len
|
|
|
|
&& isdigit(lexer->source[cursor]))
|
|
|
|
{
|
|
|
|
str_push(&res_str, lexer->source[cursor]);
|
|
|
|
cursor += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (res_str.size > 0
|
|
|
|
&& (cursor >= len || isspace(lexer->source[cursor])))
|
|
|
|
{
|
|
|
|
node_t* tok = malloc(sizeof(node_t));
|
|
|
|
node_init(tok, NODE_NUM, res_str.data, lexer->line);
|
|
|
|
str_free(&res_str);
|
|
|
|
|
|
|
|
lexer->cursor = cursor;
|
2023-12-11 17:01:22 +00:00
|
|
|
|
|
|
|
lexer_skip_spaces(lexer);
|
2023-12-09 17:24:41 +00:00
|
|
|
return tok;
|
|
|
|
}
|
|
|
|
|
|
|
|
str_free(&res_str);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (lexer->cursor < len && lexer->err)
|
|
|
|
{
|
|
|
|
size_t const SZ = RZ_STR_LIMIT;
|
|
|
|
char msg[SZ];
|
|
|
|
snprintf(msg, SZ, "unexpected symbol '%c'", lexer->source[lexer->cursor]);
|
2023-12-11 17:01:22 +00:00
|
|
|
err_fatal(lexer->err, msg, lexer->line);
|
2023-12-09 17:24:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2023-12-09 21:59:24 +00:00
|
|
|
|
|
|
|
NodeType lexer_peek(lexer_t* lexer, int lookahead)
|
|
|
|
{
|
|
|
|
assert(lexer);
|
|
|
|
size_t cursor = lexer->cursor;
|
|
|
|
int line = lexer->line;
|
|
|
|
|
2023-12-11 17:01:22 +00:00
|
|
|
NodeType type = -1;
|
2023-12-09 21:59:24 +00:00
|
|
|
|
|
|
|
for (int i=0; i<lookahead; i++)
|
|
|
|
{
|
|
|
|
node_t* node = lexer_try_new_next(lexer);
|
|
|
|
|
|
|
|
if (node)
|
|
|
|
{
|
|
|
|
type = node->type;
|
|
|
|
node_free(node);
|
|
|
|
free(node);
|
|
|
|
}
|
2023-12-11 17:01:22 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
2023-12-09 21:59:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
lexer->cursor = cursor;
|
|
|
|
lexer->line = line;
|
|
|
|
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
2023-12-11 17:01:22 +00:00
|
|
|
void lexer_skip_spaces(lexer_t* lexer)
|
|
|
|
{
|
|
|
|
assert(lexer);
|
|
|
|
size_t len = strlen(lexer->source);
|
|
|
|
|
|
|
|
while (lexer->cursor < len
|
|
|
|
&& isspace(lexer->source[lexer->cursor]))
|
|
|
|
{
|
|
|
|
if (lexer->source[lexer->cursor] == '\n')
|
|
|
|
{
|
|
|
|
lexer->line++;
|
|
|
|
}
|
|
|
|
|
|
|
|
lexer->cursor++;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2023-12-09 21:59:24 +00:00
|
|
|
node_t* lexer_try_new_keyword(lexer_t* lexer, char* kw,
|
2023-12-15 18:30:20 +00:00
|
|
|
NodeType type, int has_value,
|
|
|
|
int is_kw)
|
2023-12-09 21:59:24 +00:00
|
|
|
{
|
|
|
|
assert(lexer);
|
|
|
|
assert(kw);
|
|
|
|
|
|
|
|
size_t len = strlen(kw);
|
|
|
|
size_t cursor = lexer->cursor;
|
|
|
|
|
|
|
|
if (cursor + len <= strlen(lexer->source))
|
|
|
|
{
|
|
|
|
int ok = 1;
|
|
|
|
|
|
|
|
for (size_t i=cursor; i<cursor + len; i++)
|
|
|
|
{
|
|
|
|
if (lexer->source[i] != kw[i - cursor])
|
|
|
|
{
|
|
|
|
ok = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ok)
|
|
|
|
{
|
|
|
|
int next_idx = lexer->cursor + len;
|
|
|
|
|
|
|
|
if (next_idx < strlen(lexer->source)
|
2023-12-15 18:30:20 +00:00
|
|
|
&& (is_kw && !lexer_is_sep(lexer, next_idx)))
|
2023-12-09 21:59:24 +00:00
|
|
|
{
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
node_t* node = malloc(sizeof(node_t));
|
|
|
|
node_init(node, type, has_value ? (char*) kw : "", lexer->line);
|
|
|
|
|
|
|
|
lexer->cursor += len;
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
2023-12-10 03:49:28 +00:00
|
|
|
|
|
|
|
node_t* lexer_try_new_str(lexer_t* lexer)
|
|
|
|
{
|
|
|
|
assert(lexer);
|
|
|
|
|
2023-12-10 21:43:48 +00:00
|
|
|
ssize_t cursor = lexer->cursor;
|
|
|
|
ssize_t len = strlen(lexer->source);
|
2023-12-10 03:49:28 +00:00
|
|
|
|
|
|
|
str_t res_str;
|
|
|
|
str_init(&res_str);
|
|
|
|
|
|
|
|
if (cursor >= len || lexer->source[cursor] != '"')
|
|
|
|
{
|
|
|
|
str_free(&res_str);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
cursor++;
|
|
|
|
|
|
|
|
while (cursor < len
|
|
|
|
&& lexer->source[cursor] != '"')
|
|
|
|
{
|
2023-12-10 21:43:48 +00:00
|
|
|
if (lexer->source[cursor] == '\\')
|
|
|
|
{
|
|
|
|
if (cursor + 1 < len)
|
|
|
|
{
|
|
|
|
switch (lexer->source[cursor + 1])
|
|
|
|
{
|
|
|
|
case '"': {
|
|
|
|
str_push(&res_str, '"');
|
|
|
|
cursor += 2;
|
|
|
|
} break;
|
|
|
|
|
|
|
|
case 'n': {
|
|
|
|
str_push(&res_str, '\n');
|
|
|
|
cursor += 2;
|
|
|
|
} break;
|
|
|
|
|
|
|
|
case 't': {
|
|
|
|
str_push(&res_str, '\t');
|
|
|
|
cursor += 2;
|
|
|
|
} break;
|
|
|
|
|
|
|
|
case 'r': {
|
|
|
|
str_push(&res_str, '\r');
|
|
|
|
cursor += 2;
|
|
|
|
} break;
|
|
|
|
|
|
|
|
default: cursor++; break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
str_push(&res_str, lexer->source[cursor]);
|
|
|
|
cursor++;
|
|
|
|
}
|
2023-12-10 03:49:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (cursor >= len || lexer->source[cursor] != '"')
|
|
|
|
{
|
|
|
|
str_free(&res_str);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
cursor++;
|
|
|
|
|
|
|
|
node_t* tok = malloc(sizeof(node_t));
|
|
|
|
node_init(tok, NODE_STR, res_str.data, lexer->line);
|
|
|
|
str_free(&res_str);
|
|
|
|
|
|
|
|
lexer->cursor = cursor;
|
|
|
|
|
|
|
|
return tok;
|
|
|
|
}
|
2023-12-15 18:30:20 +00:00
|
|
|
|
|
|
|
int lexer_is_sep(lexer_t* lexer, size_t idx)
|
|
|
|
{
|
|
|
|
assert(lexer);
|
|
|
|
|
|
|
|
if (idx >= strlen(lexer->source))
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
char c = lexer->source[idx];
|
|
|
|
|
|
|
|
if (isspace(c))
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return c == '='
|
|
|
|
|| c == '!';
|
|
|
|
}
|