From bd819bfc542ab14b9581186045e53141bcfa69a3 Mon Sep 17 00:00:00 2001 From: bog Date: Thu, 28 Sep 2023 21:37:10 +0200 Subject: [PATCH] ADD: basic function call and int literal. --- doc/grammar.bnf | 13 ++++ lib/Compiler.cpp | 161 ++++++++++++++++++++++++++++++++++++++++++++--- lib/Compiler.hpp | 3 + lib/Lexer.cpp | 63 +++++++++++++++++++ lib/Lexer.hpp | 8 +++ lib/Loc.hpp | 8 +-- lib/Node.cpp | 4 +- lib/Node.hpp | 24 ++++--- lib/Parser.cpp | 150 ++++++++++++++++++++++++++++++++++++++++++- lib/Parser.hpp | 9 +++ lib/SymTable.cpp | 90 ++++++++++++++++++++++++++ lib/SymTable.hpp | 39 ++++++++++++ meson.build | 1 + tests/Lexer.cpp | 10 +++ tests/Parser.cpp | 35 +++++++++++ 15 files changed, 594 insertions(+), 24 deletions(-) create mode 100644 lib/SymTable.cpp create mode 100644 lib/SymTable.hpp diff --git a/doc/grammar.bnf b/doc/grammar.bnf index fce4538..7c9ee9c 100644 --- a/doc/grammar.bnf +++ b/doc/grammar.bnf @@ -2,15 +2,28 @@ PROG ::= INSTR* INSTR ::= | DIR | EXPR semicolon +| FUNDECL +| return EXPR semicolon +| EXTERN semicolon +EXTERN ::= extern fun ident opar PARAMS cpar RET +FUNDECL ::= fun ident opar PARAMS cpar RET BLOCK +PARAMS ::= (ident type? (comma ident type?)*) +RET ::= type? +BLOCK ::= obrace INSTR* cbrace DIR ::= hash ident EXPR EXPR ::= | ADDSUB + ADDSUB ::= MULDIVMOD ((add|sub) MULDIVMOD)* MULDIVMOD ::= LITERAL ((mul|div|mod) LITERAL)* LITERAL ::= | ident | int +| CALL + +CALL ::= ident opar ARGS cpar +ARGS ::= (EXPR (comma EXPR)*)? diff --git a/lib/Compiler.cpp b/lib/Compiler.cpp index b0724bf..6d49034 100644 --- a/lib/Compiler.cpp +++ b/lib/Compiler.cpp @@ -1,5 +1,7 @@ #include "Compiler.hpp" #include +#include +#include #include #include #include @@ -12,6 +14,7 @@ namespace wg { /*explicit*/ Compiler::Compiler() { + } /*virtual*/ Compiler::~Compiler() @@ -64,10 +67,13 @@ namespace wg llvm::errs() << "Target machine cannot emit a file of this type"; } + llvm::verifyModule(*m_module); pass.run(*m_module); - dest.flush(); - m_module->print(llvm::outs(), nullptr); + dest.flush(); + m_module->print(llvm::errs(), nullptr); + + } llvm::Value* Compiler::compile(std::shared_ptr node) @@ -75,16 +81,151 @@ namespace wg switch (node->type()) { case NODE_PROG: { - auto* block = llvm::BasicBlock::Create(*m_context, - "entry"); - m_builder->SetInsertPoint(block); - for (size_t i=0; isize(); i++) { compile(node->child(i)); } - return block; + return nullptr; + } break; + + case NODE_BLOCK: { + for (size_t i=0; isize(); i++) + { + compile(node->child(i)); + } + return nullptr; + } break; + + case NODE_RETURN: { + return m_builder->CreateRet(compile(node->child(0))); + } break; + + case NODE_EXTERN: { + auto ident = node->child(0)->repr(); + auto params = node->child(1); + auto ret = node->child(2); + + std::vector names; + std::vector types; + + for (size_t i=0; isize(); i++) + { + auto param = params->child(i); + + if (param->type() == NODE_IDENT) + { + names.push_back(param->repr()); + } + else if (param->type() == NODE_TYPE) + { + auto ty = llvm::Type::getInt32Ty(*m_context); + + for (auto name: names) + { + m_sym->declare(name, ty, node->loc()); + types.push_back(ty); + } + + names.clear(); + } + } + + llvm::Type* ret_type = llvm::Type::getVoidTy(*m_context); + + if (ret->size() > 0) + { + ret_type = llvm::Type::getInt32Ty(*m_context); + } + + auto fun_type = llvm::FunctionType::get(ret_type, types, false); + auto fun = llvm::Function::Create(fun_type, + llvm::Function::ExternalLinkage, + ident, + *m_module); + + m_sym->declare_prototype(ident, fun_type, node->loc()); + return fun; + } break; + + case NODE_FUNDECL: { + auto ident = node->child(0)->repr(); + auto params = node->child(1); + auto ret = node->child(2); + + std::vector names; + std::vector types; + + for (size_t i=0; isize(); i++) + { + auto param = params->child(i); + + if (param->type() == NODE_IDENT) + { + names.push_back(param->repr()); + } + else if (param->type() == NODE_TYPE) + { + auto ty = llvm::Type::getInt32Ty(*m_context); + + for (auto name: names) + { + m_sym->declare(name, ty, node->loc()); + types.push_back(ty); + } + + names.clear(); + } + } + + llvm::Type* ret_type = llvm::Type::getVoidTy(*m_context); + auto body = node->child(3); + + if (ret->size() > 0) + { + ret_type = llvm::Type::getInt32Ty(*m_context); + } + + auto fun_type = llvm::FunctionType::get(ret_type, types, false); + auto fun = llvm::Function::Create(fun_type, + llvm::Function::ExternalLinkage, + ident, + *m_module); + + m_sym->declare(ident, fun_type, node->loc()); + + llvm::BasicBlock* old_bb = m_builder->GetInsertBlock(); + + auto* bb = llvm::BasicBlock::Create(*m_context, + "entry", + fun); + m_builder->SetInsertPoint(bb); + + compile(body); + + m_builder->SetInsertPoint(old_bb); + + llvm::verifyFunction(*fun); + + return fun; + } break; + + case NODE_CALL: { + std::string ident = node->child(0)->repr(); + + auto fun = m_module->getFunction(ident); + WG_ASSERT(fun, "cannot call unknown function '" + ident + "'"); + + std::vector values; + + for (size_t i=0; ichild(1)->size(); i++) + { + auto arg = node->child(1)->child(i); + auto val = compile(arg); + values.push_back(val); + } + + return m_builder->CreateCall(fun, values); } break; case NODE_ADD: { @@ -118,7 +259,11 @@ namespace wg } break; case NODE_INT: { - return llvm::ConstantInt::get(*m_context, llvm::APInt(32, 0, true)); + + return llvm::ConstantInt::get(*m_context, + llvm::APInt(32, + std::stoi(node->repr()), + true)); } break; default: diff --git a/lib/Compiler.hpp b/lib/Compiler.hpp index 8804ad3..398d9a2 100644 --- a/lib/Compiler.hpp +++ b/lib/Compiler.hpp @@ -7,6 +7,7 @@ #include "commons.hpp" #include "Node.hpp" +#include "SymTable.hpp" namespace wg { @@ -28,6 +29,8 @@ namespace wg std::unique_ptr m_module = std::make_unique("my module", *m_context); + + std::unique_ptr m_sym = std::make_unique(); }; } diff --git a/lib/Lexer.cpp b/lib/Lexer.cpp index 64df3a0..1860876 100644 --- a/lib/Lexer.cpp +++ b/lib/Lexer.cpp @@ -5,6 +5,14 @@ namespace wg { /*explicit*/ Lexer::Lexer() { + add_keyword("int", NODE_TYPE, true); + add_keyword("fun", NODE_FUN); + add_keyword("return", NODE_RETURN); + add_keyword("extern", NODE_EXTERN); + + add_text("{", NODE_OBRACE); + add_text("}", NODE_CBRACE); + add_text(",", NODE_COMMA); add_text("#", NODE_HASH); add_text("+", NODE_ADD); add_text("-", NODE_SUB); @@ -35,6 +43,18 @@ namespace wg skip_spaces(); + while (m_cursor + 1 < m_source.size() + && m_source[m_cursor] == ':' + && m_source[m_cursor + 1] == ':') + { + while (m_source[m_cursor] != '\n') + { + m_cursor++; + } + + skip_spaces(); + } + for (auto scanner: m_scanners) { auto info = scanner(); @@ -87,6 +107,20 @@ namespace wg node, has_value)); } + void Lexer::add_keyword(std::string const& text, + NodeType node, + bool has_value) + { + if (text.size() == 1) + { + m_seps.push_back(text[0]); + } + + m_scanners.push_back(std::bind(&Lexer::scan_keyword, + this, text, + node, has_value)); + } + bool Lexer::is_sep(size_t index) const { WG_ASSERT(index < m_source.size(), "cannot find separator"); @@ -141,6 +175,35 @@ namespace wg }; } + std::optional Lexer::scan_keyword(std::string const& text, + NodeType type, + bool has_value) const + { + if (m_cursor + text.size() > m_source.size()) + { + return std::nullopt; + } + + for (size_t i=0; i Lexer::scan_ident() const { size_t cursor = m_cursor; diff --git a/lib/Lexer.hpp b/lib/Lexer.hpp index 7c45513..a336063 100644 --- a/lib/Lexer.hpp +++ b/lib/Lexer.hpp @@ -36,6 +36,10 @@ namespace wg NodeType node, bool has_value=false); + void add_keyword(std::string const& text, + NodeType node, + bool has_value=false); + bool is_sep(size_t index) const; void skip_spaces(); @@ -44,6 +48,10 @@ namespace wg NodeType type, bool has_value) const; + std::optional scan_keyword(std::string const& text, + NodeType type, + bool has_value) const; + std::optional scan_ident() const; std::optional scan_int() const; }; diff --git a/lib/Loc.hpp b/lib/Loc.hpp index 7b4f3b4..961b176 100644 --- a/lib/Loc.hpp +++ b/lib/Loc.hpp @@ -16,10 +16,10 @@ namespace wg int line() const { return m_line; } template - void error(std::string const& what); + void error(std::string const& what) const; template - void error(std::stringstream const& what); + void error(std::stringstream const& what) const; private: std::filesystem::path m_origin; @@ -27,7 +27,7 @@ namespace wg }; template - void Loc::error(std::string const& what) + void Loc::error(std::string const& what) const { std::stringstream ss; ss << m_origin.string() << ": ERROR " << what; @@ -36,7 +36,7 @@ namespace wg } template - void Loc::error(std::stringstream const& what) + void Loc::error(std::stringstream const& what) const { error(what.str()); } diff --git a/lib/Node.cpp b/lib/Node.cpp index fb5699e..6a74b2f 100644 --- a/lib/Node.cpp +++ b/lib/Node.cpp @@ -20,7 +20,9 @@ namespace wg std::shared_ptr Node::child(size_t index) const { - WG_ASSERT(index < size(), "aze"); + WG_ASSERT(index < size(), "Cannot get child node of '" + + string() + + "'"); return m_children.at(index); } diff --git a/lib/Node.hpp b/lib/Node.hpp index 837b879..f0d33bf 100644 --- a/lib/Node.hpp +++ b/lib/Node.hpp @@ -4,16 +4,20 @@ #include "commons.hpp" #include "Loc.hpp" -#define NODE_TYPES(G) \ - G(NODE_PROG), \ - G(NODE_IDENT), \ - G(NODE_HASH), \ - G(NODE_DIR), \ - G(NODE_INT), \ - G(NODE_ADD), G(NODE_SUB), \ - G(NODE_MUL),G(NODE_DIV), \ - G(NODE_MOD), G(NODE_OPAR), G(NODE_CPAR), \ - G(NODE_SEMICOLON) +#define NODE_TYPES(G) \ + G(NODE_PROG), \ + G(NODE_IDENT), \ + G(NODE_HASH), \ + G(NODE_DIR), \ + G(NODE_INT), \ + G(NODE_ADD), G(NODE_SUB), \ + G(NODE_MUL),G(NODE_DIV), \ + G(NODE_MOD), G(NODE_OPAR), G(NODE_CPAR), \ + G(NODE_SEMICOLON), G(NODE_COMMA), G(NODE_CALL), \ + G(NODE_ARGS), G(NODE_TYPE), G(NODE_RETURN), \ + G(NODE_FUN), G(NODE_PARAMS), G(NODE_BLOCK), \ + G(NODE_OBRACE), G(NODE_CBRACE), G(NODE_FUNDECL), \ + G(NODE_EXTERN), G(NODE_RET) namespace wg { diff --git a/lib/Parser.cpp b/lib/Parser.cpp index 30332aa..77b9d0e 100644 --- a/lib/Parser.cpp +++ b/lib/Parser.cpp @@ -1,4 +1,5 @@ #include "Parser.hpp" +#include "lib/Node.hpp" namespace wg { @@ -23,7 +24,7 @@ namespace wg { if (m_cursor >= m_tokens.size()) { - return Loc {}; + return m_tokens.back()->loc(); } return m_tokens[m_cursor]->loc(); @@ -110,11 +111,120 @@ namespace wg return parse_dir(); } + if (type_is(NODE_FUN)) + { + return parse_fundecl(); + } + + if (type_is(NODE_RETURN)) + { + auto node = consume(); + node->add_child(parse_expr()); + consume(NODE_SEMICOLON); + return node; + } + + if (type_is(NODE_EXTERN)) + { + auto node = parse_extern(); + consume(NODE_SEMICOLON); + return node; + } + auto expr = parse_expr(); consume(NODE_SEMICOLON); return expr; } + std::shared_ptr Parser::parse_extern() + { + auto node = consume(NODE_EXTERN); + consume(NODE_FUN); + node->add_child(consume(NODE_IDENT)); + + consume(NODE_OPAR); + node->add_child(parse_params()); + consume(NODE_CPAR); + + node->add_child(parse_ret()); + + return node; + } + + std::shared_ptr Parser::parse_fundecl() + { + auto node = make_node(NODE_FUNDECL); + consume(NODE_FUN); + node->add_child(consume(NODE_IDENT)); + consume(NODE_OPAR); + node->add_child(parse_params()); + consume(NODE_CPAR); + + node->add_child(parse_ret()); + + node->add_child(parse_block()); + + return node; + } + + std::shared_ptr Parser::parse_params() + { + auto node = make_node(NODE_PARAMS); + + if (type_is(NODE_CPAR)) + { + return node; + } + + node->add_child(consume(NODE_IDENT)); + + if (type_is(NODE_TYPE)) + { + node->add_child(consume()); + } + + while (type_is(NODE_COMMA)) + { + consume(); + + node->add_child(consume(NODE_IDENT)); + + if (type_is(NODE_TYPE)) + { + node->add_child(consume()); + } + } + + return node; + } + + std::shared_ptr Parser::parse_ret() + { + auto node = make_node(NODE_RET); + + if (type_is(NODE_TYPE)) + { + node->add_child(consume()); + } + + return node; + } + + std::shared_ptr Parser::parse_block() + { + auto node = make_node(NODE_BLOCK); + consume(NODE_OBRACE); + + while (type_isnt(NODE_CBRACE)) + { + node->add_child(parse_instr()); + } + + consume(NODE_CBRACE); + + return node; + } + std::shared_ptr Parser::parse_dir() { auto node = make_node(NODE_DIR); @@ -164,6 +274,12 @@ namespace wg std::shared_ptr Parser::parse_literal() { + if (type_is(NODE_IDENT) + && type_is(NODE_OPAR, 1)) + { + return parse_call(); + } + if (type_is(NODE_INT) || type_is(NODE_IDENT)) { @@ -189,4 +305,36 @@ namespace wg + "'"); return nullptr; } + + std::shared_ptr Parser::parse_call() + { + auto node = make_node(NODE_CALL); + node->add_child(consume(NODE_IDENT)); + + consume(NODE_OPAR); + node->add_child(parse_args()); + consume(NODE_CPAR); + + return node; + } + + std::shared_ptr Parser::parse_args() + { + auto node = make_node(NODE_ARGS); + + if (type_is(NODE_CPAR)) + { + return node; + } + + node->add_child(parse_expr()); + + while (type_is(NODE_COMMA)) + { + consume(); + node->add_child(parse_expr()); + } + + return node; + } } diff --git a/lib/Parser.hpp b/lib/Parser.hpp index 9d796b7..f7c8da2 100644 --- a/lib/Parser.hpp +++ b/lib/Parser.hpp @@ -30,12 +30,21 @@ namespace wg std::shared_ptr parse_prog(); std::shared_ptr parse_instr(); + std::shared_ptr parse_extern(); + std::shared_ptr parse_fundecl(); + std::shared_ptr parse_params(); + std::shared_ptr parse_ret(); + std::shared_ptr parse_block(); std::shared_ptr parse_dir(); + std::shared_ptr parse_expr(); std::shared_ptr parse_addsub(); std::shared_ptr parse_muldivmod(); std::shared_ptr parse_literal(); + std::shared_ptr parse_call(); + std::shared_ptr parse_args(); + }; } diff --git a/lib/SymTable.cpp b/lib/SymTable.cpp new file mode 100644 index 0000000..99e6630 --- /dev/null +++ b/lib/SymTable.cpp @@ -0,0 +1,90 @@ +#include "SymTable.hpp" +#include "commons.hpp" + +namespace wg +{ + /*explicit*/ SymTable::SymTable() + { + } + + /*virtual*/ SymTable::~SymTable() + { + } + + bool SymTable::exists(std::string const& name) const + { + return m_entries.find(name) != std::end(m_entries); + } + + void SymTable::declare_prototype(std::string const& name, + llvm::Type* type, + Loc const& loc) + { + if (auto itr=m_entries.find(name); + itr != std::end(m_entries)) + { + loc.error("cannot declare existing symbol '" + + name + + "'"); + } + + SymEntry entry; + entry.name = name; + entry.type = type; + entry.prototype = true; + m_entries[name] = entry; + } + + void SymTable::declare(std::string const& name, + llvm::Type* type, + Loc const& loc) + { + if (auto itr=m_entries.find(name); + itr != std::end(m_entries) + && itr->second.prototype == false) + { + loc.error("cannot declare existing symbol '" + + name + + "'"); + } + + SymEntry entry; + entry.name = name; + entry.type = type; + entry.prototype = false; + m_entries[name] = entry; + } + + void SymTable::set(std::string const& name, + llvm::Type* type, + Loc const& loc) + { + if (auto itr=m_entries.find(name); + itr == std::end(m_entries)) + { + loc.error("cannot set inexisting symbol '" + + name + + "'"); + } + + SymEntry entry; + entry.name = name; + entry.type = type; + m_entries[name] = entry; + } + + SymEntry& SymTable::get(std::string const& name, Loc const& loc) + { + if (auto itr=m_entries.find(name); + itr != std::end(m_entries)) + { + return itr->second; + } + else + { + loc.error("cannot find symbol '" + name + "'"); + } + + abort(); + } +} diff --git a/lib/SymTable.hpp b/lib/SymTable.hpp new file mode 100644 index 0000000..43d9a8d --- /dev/null +++ b/lib/SymTable.hpp @@ -0,0 +1,39 @@ +#ifndef wg_SYMTABLE_HPP +#define wg_SYMTABLE_HPP + +#include +#include "commons.hpp" +#include "Loc.hpp" + +namespace wg +{ + WG_ERROR(symbol_error); + + struct SymEntry { + std::string name; + llvm::Type* type; + bool prototype = false; + }; + + class SymTable + { + public: + explicit SymTable(); + virtual ~SymTable(); + + bool exists(std::string const& name) const; + + void declare_prototype(std::string const& name, + llvm::Type* type, + Loc const& loc); + + void declare(std::string const& name, llvm::Type* type, Loc const& loc); + void set(std::string const& name, llvm::Type* type, Loc const& loc); + SymEntry& get(std::string const& name, Loc const& loc); + + private: + std::unordered_map m_entries; + }; +} + +#endif diff --git a/meson.build b/meson.build index 283c87d..71608b3 100644 --- a/meson.build +++ b/meson.build @@ -15,6 +15,7 @@ wongola_lib = static_library( 'lib/Parser.cpp', 'lib/Compiler.cpp', 'lib/Loc.cpp', + 'lib/SymTable.cpp', ], dependencies: [ dependency('LLVM') diff --git a/tests/Lexer.cpp b/tests/Lexer.cpp index 585308b..8ec4ef5 100644 --- a/tests/Lexer.cpp +++ b/tests/Lexer.cpp @@ -58,3 +58,13 @@ TEST_CASE_METHOD(LexerTest, "Lexer_int_arith") test_next(lex, "CPAR"); test_end(lex); } + +TEST_CASE_METHOD(LexerTest, "Lexer_fun_call") +{ + wg::Lexer lex; + lex.scan(" , int extern "); + test_next(lex, "COMMA"); + test_next(lex, "TYPE[int]"); + test_next(lex, "EXTERN"); + test_end(lex); +} diff --git a/tests/Parser.cpp b/tests/Parser.cpp index 79f48cf..6a29d64 100644 --- a/tests/Parser.cpp +++ b/tests/Parser.cpp @@ -55,3 +55,38 @@ TEST_CASE_METHOD(ParserTest, "Parser_int") test_parse("PROG(MOD(ADD(INT[1],INT[2]),INT[3]))", " (1 + 2) % 3; "); } + +TEST_CASE_METHOD(ParserTest, "Parser_call") +{ + test_parse("PROG(CALL(IDENT[hello],ARGS))", + " hello(); "); + + test_parse("PROG(CALL(IDENT[hello_world],ARGS(IDENT[x])))", + " hello_world(x); "); + + test_parse("PROG(CALL(IDENT[hello_world],ARGS(IDENT[x],INT[78])))", + " hello_world(x, 78); "); +} + +TEST_CASE_METHOD(ParserTest, "Parser_fundecl") +{ + test_parse("PROG(EXTERN(IDENT[hello]," + "PARAMS(IDENT[x],IDENT[y],TYPE[int]),RET(TYPE[int])))", + " extern fun hello(x, y int) int; "); + + test_parse("PROG(FUNDECL(IDENT[couc],PARAMS,RET,BLOCK))", + " fun couc() {} "); + + test_parse("PROG(FUNDECL(IDENT[couc],PARAMS(" + "IDENT[x],IDENT[y],TYPE[int]" + "),RET,BLOCK(RETURN(INT[4]))))", + " fun couc(x, y int) { return 4; } "); + + test_parse("PROG(FUNDECL(IDENT[couc],PARAMS(" + "IDENT[x],IDENT[y],TYPE[int]" + "),RET(TYPE[int]),BLOCK(RETURN(INT[4]))))", + " fun couc(x, y int) int { return 4; } "); + + test_parse("PROG(RETURN(ADD(CALL(IDENT[a],ARGS),INT[1])))", + " return a() + 1; "); +}