From f264450ea4bfef8e8981a3cec2fab689bc3e7e3e Mon Sep 17 00:00:00 2001 From: jeaye Date: Sun, 6 Oct 2024 18:27:11 -0700 Subject: [PATCH 001/101] Add initial LLVM IR codegen --- compiler+runtime/CMakeLists.txt | 2 + compiler+runtime/include/cpp/jank/c_api.h | 52 ++ .../cpp/jank/codegen/llvm_processor.hpp | 91 ++++ compiler+runtime/include/cpp/jank/prelude.hpp | 2 + .../include/cpp/jank/runtime/context.hpp | 1 + .../include/cpp/jank/runtime/core.hpp | 2 + compiler+runtime/src/cpp/jank/c_api.cpp | 141 ++++++ .../src/cpp/jank/codegen/llvm_processor.cpp | 470 ++++++++++++++++++ .../src/cpp/jank/codegen/processor.cpp | 2 - .../src/cpp/jank/runtime/context.cpp | 23 +- .../src/cpp/jank/runtime/core.cpp | 9 + 11 files changed, 791 insertions(+), 4 deletions(-) create mode 100644 compiler+runtime/include/cpp/jank/c_api.h create mode 100644 compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp create mode 100644 compiler+runtime/src/cpp/jank/c_api.cpp create mode 100644 compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp diff --git a/compiler+runtime/CMakeLists.txt b/compiler+runtime/CMakeLists.txt index 6bb45828..aae32492 100644 --- a/compiler+runtime/CMakeLists.txt +++ b/compiler+runtime/CMakeLists.txt @@ -132,6 +132,7 @@ endfunction() add_library( jank_lib STATIC + src/cpp/jank/c_api.cpp src/cpp/jank/hash.cpp src/cpp/jank/util/cli.cpp src/cpp/jank/util/sha256.cpp @@ -199,6 +200,7 @@ add_library( src/cpp/jank/analyze/step/force_boxed.cpp src/cpp/jank/evaluate.cpp src/cpp/jank/codegen/processor.cpp + src/cpp/jank/codegen/llvm_processor.cpp src/cpp/jank/jit/processor.cpp ) diff --git a/compiler+runtime/include/cpp/jank/c_api.h b/compiler+runtime/include/cpp/jank/c_api.h new file mode 100644 index 00000000..a354c72b --- /dev/null +++ b/compiler+runtime/include/cpp/jank/c_api.h @@ -0,0 +1,52 @@ +#pragma once + +#ifdef __cplusplus +extern "C" +{ +#endif + + /* NOLINTNEXTLINE(modernize-use-using) */ + typedef void *jank_object_ptr; + /* NOLINTNEXTLINE(modernize-use-using) */ + typedef long long jank_native_integer; + /* NOLINTNEXTLINE(modernize-use-using) */ + typedef long double jank_native_real; + /* NOLINTNEXTLINE(modernize-use-using) */ + typedef char jank_native_bool; + /* NOLINTNEXTLINE(modernize-use-using) */ + typedef uint32_t jank_native_hash; + + jank_object_ptr jank_eval(jank_object_ptr s); + jank_object_ptr jank_read_string(jank_object_ptr s); + + jank_object_ptr jank_var_intern(jank_object_ptr ns, jank_object_ptr name); + jank_object_ptr jank_var_bind_root(jank_object_ptr var, jank_object_ptr val); + + jank_object_ptr jank_deref(jank_object_ptr o); + + jank_object_ptr jank_call0(jank_object_ptr f); + jank_object_ptr jank_call1(jank_object_ptr f, jank_object_ptr a1); + jank_object_ptr jank_call2(jank_object_ptr f, jank_object_ptr a1, jank_object_ptr a2); + jank_object_ptr + jank_call3(jank_object_ptr f, jank_object_ptr a1, jank_object_ptr a2, jank_object_ptr a3); + + jank_object_ptr jank_nil(); + jank_object_ptr jank_true(); + jank_object_ptr jank_false(); + jank_object_ptr jank_create_integer(jank_native_integer i); + jank_object_ptr jank_create_real(jank_native_real r); + jank_object_ptr jank_create_string(char const *s); + + jank_object_ptr jank_create_function0(jank_object_ptr (*f)()); + jank_object_ptr jank_create_function1(jank_object_ptr (*f)(jank_object_ptr)); + jank_object_ptr jank_create_function2(jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr)); + jank_object_ptr + jank_create_function3(jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)); + + jank_native_bool jank_truthy(jank_object_ptr o); + jank_native_bool jank_equal(jank_object_ptr l, jank_object_ptr r); + jank_native_hash jank_to_hash(jank_object_ptr o); + +#ifdef __cplusplus +} +#endif diff --git a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp new file mode 100644 index 00000000..771b9f51 --- /dev/null +++ b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp @@ -0,0 +1,91 @@ +#pragma once + +#include +#include +#include + +#include +#include + +namespace jank::codegen +{ + using namespace jank::runtime; + + struct llvm_processor + { + llvm_processor() = delete; + llvm_processor(analyze::expression_ptr const &expr, + native_persistent_string const &module, + compilation_target target); + llvm_processor(analyze::expr::function const &expr, + native_persistent_string const &module, + compilation_target target); + llvm_processor(llvm_processor const &) = delete; + llvm_processor(llvm_processor &&) noexcept = default; + + void gen(); + llvm::Value *gen(analyze::expression_ptr const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::def const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::var_deref const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::var_ref const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::call const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::primitive_literal const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::vector const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::map const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::set const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::local_reference const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::function const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::recur const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::let const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::do_ const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::if_ const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::throw_ const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::try_ const &, + analyze::expr::function_arity const &); + llvm::Value *gen(analyze::expr::native_raw const &, + analyze::expr::function_arity const &); + + llvm::Value *gen_var(obj::symbol_ptr qualified_name); + + native_persistent_string to_string(); + + void create_function(); + void install_global_ctors(); + llvm::Value *nil_global(); + llvm::Value *string_global(obj::persistent_string_ptr const s); + + /* This is stored just to keep the expression alive. */ + analyze::expression_ptr root_expr{}; + analyze::expr::function const &root_fn; + native_persistent_string module_name; + compilation_target target{}; + native_persistent_string struct_name; + native_persistent_string expression_fn_name; + + std::unique_ptr context; + std::unique_ptr module; + std::unique_ptr> builder; + llvm::Function *fn{}; + llvm::Value *nil{}; + native_unordered_map literal_globals; + native_unordered_map var_globals; + native_vector global_ctors; + llvm::BasicBlock *global_ctor_block{}; + }; +} diff --git a/compiler+runtime/include/cpp/jank/prelude.hpp b/compiler+runtime/include/cpp/jank/prelude.hpp index 565a4966..b7e46393 100644 --- a/compiler+runtime/include/cpp/jank/prelude.hpp +++ b/compiler+runtime/include/cpp/jank/prelude.hpp @@ -40,3 +40,5 @@ #include #include #include + +#include diff --git a/compiler+runtime/include/cpp/jank/runtime/context.hpp b/compiler+runtime/include/cpp/jank/runtime/context.hpp index 078c9244..e557ff62 100644 --- a/compiler+runtime/include/cpp/jank/runtime/context.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/context.hpp @@ -76,6 +76,7 @@ namespace jank::runtime object_ptr eval_file(native_persistent_string_view const &path); object_ptr eval_string(native_persistent_string_view const &code); + object_ptr read_string(native_persistent_string_view const &code); native_vector analyze_string(native_persistent_string_view const &code, native_bool const eval = true); diff --git a/compiler+runtime/include/cpp/jank/runtime/core.hpp b/compiler+runtime/include/cpp/jank/runtime/core.hpp index 8475ab30..dc584df0 100644 --- a/compiler+runtime/include/cpp/jank/runtime/core.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/core.hpp @@ -27,4 +27,6 @@ namespace jank::runtime native_persistent_string namespace_(object_ptr o); native_bool is_callable(object_ptr o); + + native_hash to_hash(object_ptr o); } diff --git a/compiler+runtime/src/cpp/jank/c_api.cpp b/compiler+runtime/src/cpp/jank/c_api.cpp new file mode 100644 index 00000000..2b44c064 --- /dev/null +++ b/compiler+runtime/src/cpp/jank/c_api.cpp @@ -0,0 +1,141 @@ +#include + +using namespace jank; +using namespace jank::runtime; + +extern "C" +{ + jank_object_ptr jank_eval(jank_object_ptr s) + { + auto const s_obj(try_object(reinterpret_cast(s))); + return __rt_ctx->eval_string(s_obj->data); + } + + jank_object_ptr jank_read_string(jank_object_ptr s) + { + auto const s_obj(try_object(reinterpret_cast(s))); + return __rt_ctx->read_string(s_obj->data); + } + + jank_object_ptr jank_var_intern(jank_object_ptr ns, jank_object_ptr name) + { + auto const ns_obj(try_object(reinterpret_cast(ns))); + auto const name_obj(try_object(reinterpret_cast(name))); + return erase(__rt_ctx->intern_var(ns_obj->data, name_obj->data).expect_ok()); + } + + jank_object_ptr jank_var_bind_root(jank_object_ptr var, jank_object_ptr val) + { + auto const var_obj(try_object(reinterpret_cast(var))); + auto const val_obj(reinterpret_cast(val)); + return erase(var_obj->bind_root(val_obj)); + } + + jank_object_ptr jank_deref(jank_object_ptr o) + { + auto const o_obj(reinterpret_cast(o)); + return behavior::deref(o_obj); + } + + jank_object_ptr jank_call0(jank_object_ptr f) + { + auto const f_obj(reinterpret_cast(f)); + return dynamic_call(f_obj); + } + + jank_object_ptr jank_call1(jank_object_ptr f, jank_object_ptr a1) + { + auto const f_obj(reinterpret_cast(f)); + auto const a1_obj(reinterpret_cast(a1)); + return dynamic_call(f_obj, a1_obj); + } + + jank_object_ptr jank_call2(jank_object_ptr f, jank_object_ptr a1, jank_object_ptr a2) + { + auto const f_obj(reinterpret_cast(f)); + auto const a1_obj(reinterpret_cast(a1)); + auto const a2_obj(reinterpret_cast(a2)); + return dynamic_call(f_obj, a1_obj, a2_obj); + } + + jank_object_ptr + jank_call3(jank_object_ptr f, jank_object_ptr a1, jank_object_ptr a2, jank_object_ptr a3) + { + auto const f_obj(reinterpret_cast(f)); + auto const a1_obj(reinterpret_cast(a1)); + auto const a2_obj(reinterpret_cast(a2)); + auto const a3_obj(reinterpret_cast(a3)); + return dynamic_call(f_obj, a1_obj, a2_obj, a3_obj); + } + + jank_object_ptr jank_nil() + { + return erase(obj::nil::nil_const()); + } + + jank_object_ptr jank_true() + { + return erase(obj::boolean::true_const()); + } + + jank_object_ptr jank_false() + { + return erase(obj::boolean::false_const()); + } + + jank_object_ptr jank_create_integer(jank_native_integer i) + { + return erase(make_box(i)); + } + + jank_object_ptr jank_create_real(jank_native_real r) + { + return erase(make_box(r)); + } + + jank_object_ptr jank_create_string(char const *s) + { + assert(s); + return erase(make_box(s)); + } + + jank_object_ptr jank_create_function0(jank_object_ptr (*f)()) + { + return erase(make_box(f)); + } + + jank_object_ptr jank_create_function1(jank_object_ptr (*f)(jank_object_ptr)) + { + return erase(make_box(f)); + } + + jank_object_ptr jank_create_function2(jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr)) + { + return erase(make_box(f)); + } + + jank_object_ptr + jank_create_function3(jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)) + { + return erase(make_box(f)); + } + + jank_native_bool jank_truthy(jank_object_ptr o) + { + auto const o_obj(reinterpret_cast(o)); + return static_cast(truthy(o_obj)); + } + + jank_native_bool jank_equal(jank_object_ptr l, jank_object_ptr r) + { + auto const l_obj(reinterpret_cast(l)); + auto const r_obj(reinterpret_cast(r)); + return static_cast(equal(l_obj, r_obj)); + } + + jank_native_hash jank_to_hash(jank_object_ptr o) + { + auto const o_obj(reinterpret_cast(o)); + return to_hash(o_obj); + } +} diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp new file mode 100644 index 00000000..d17e7c4b --- /dev/null +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -0,0 +1,470 @@ +#include +#include + +#include + +/* https://www.youtube.com/watch?v=Nw9YmNuJhJ4 */ + +namespace jank::codegen +{ + llvm_processor::llvm_processor(analyze::expression_ptr const &expr, + native_persistent_string const &module_name, + compilation_target const target) + : llvm_processor{ boost::get>(expr->data), + module_name, + target } + { + } + + llvm_processor::llvm_processor(analyze::expr::function const &expr, + native_persistent_string const &module_name, + compilation_target const target) + : root_fn{ expr } + , module_name{ module_name } + , target{ target } + , struct_name{ root_fn.unique_name } + , context{ std::make_unique() } + , module{ std::make_unique(module_name.c_str(), *context) } + , builder{ std::make_unique>(*context) } + , global_ctor_block{ llvm::BasicBlock::Create(*context, "entry") } + { + assert(root_fn.frame.data); + + install_global_ctors(); + create_function(); + gen(); + } + + void llvm_processor::create_function() + { + auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), false)); + auto const munged_name(runtime::munge(struct_name)); + fn = llvm::Function::Create(fn_type, + llvm::Function::ExternalLinkage, + munged_name.c_str(), + *module); + llvm::verifyFunction(*fn); + + auto const entry(llvm::BasicBlock::Create(*context, "entry", fn)); + builder->SetInsertPoint(entry); + } + + void llvm_processor::gen() + { + auto const &arity(root_fn.arities[0]); + for(auto const &form : arity.body.values) + { + gen(form, arity); + } + } + + llvm::Value * + llvm_processor::gen(analyze::expression_ptr const &ex, + analyze::expr::function_arity const &fn_arity) + { + llvm::Value *ret{}; + boost::apply_visitor( + [this, fn_arity, &ret](auto const &typed_ex) { ret = gen(typed_ex, fn_arity); }, + ex->data); + return ret; + } + + llvm::Value *llvm_processor::gen(analyze::expr::def const &expr, + analyze::expr::function_arity const &arity) + { + auto const ref(gen_var(expr.name)); + + /* TODO: Move vars into globals. */ + if(expr.value.is_some()) + { + auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), + { builder->getPtrTy(), builder->getPtrTy() }, + false)); + auto const fn(module->getOrInsertFunction("jank_var_bind_root", fn_type)); + + llvm::SmallVector args{ ref, gen(expr.value.unwrap(), arity) }; + builder->CreateCall(fn, args); + } + + return ref; + } + + llvm::Value *llvm_processor::gen(analyze::expr::var_deref const &expr, + analyze::expr::function_arity const &) + { + auto const ref(gen_var(expr.qualified_name)); + auto const fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); + auto const fn(module->getOrInsertFunction("jank_deref", fn_type)); + + llvm::SmallVector args{ ref }; + auto const call(builder->CreateCall(fn, args)); + return call; + } + + llvm::Value *llvm_processor::gen(analyze::expr::var_ref const &expr, + analyze::expr::function_arity const &) + { + return gen_var(expr.qualified_name); + } + + static native_persistent_string arity_to_call_fn(size_t const arity) + { + switch(arity) + { + case 0 ... 10: + return fmt::format("jank_call{}", arity); + default: + throw std::runtime_error{ fmt::format("invalid fn arity: {}", arity) }; + } + } + + llvm::Value *llvm_processor::gen(analyze::expr::call const &expr, + analyze::expr::function_arity const &arity) + { + auto const callee(gen(expr.source_expr, arity)); + + llvm::SmallVector arg_handles; + llvm::SmallVector arg_types; + arg_handles.reserve(expr.arg_exprs.size() + 1); + arg_types.reserve(expr.arg_exprs.size() + 1); + + arg_handles.emplace_back(callee); + arg_types.emplace_back(builder->getPtrTy()); + + for(auto const &arg_expr : expr.arg_exprs) + { + arg_handles.emplace_back(gen(arg_expr, arity)); + arg_types.emplace_back(builder->getPtrTy()); + } + + auto const call_fn_name(arity_to_call_fn(expr.arg_exprs.size())); + + auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); + auto const fn(module->getOrInsertFunction(call_fn_name.c_str(), fn_type)); + auto const call(builder->CreateCall(fn, arg_handles)); + + return call; + } + + llvm::Value * + llvm_processor::gen(analyze::expr::primitive_literal const &expr, + analyze::expr::function_arity const &) + { + return runtime::visit_object( + [&](auto const typed_o) -> llvm::Value * { + using T = typename decltype(typed_o)::value_type; + + if constexpr(std::same_as) + { + return nil_global(); + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + return string_global(typed_o); + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + else if constexpr(std::same_as) + { + } + /* Cons, etc. */ + else if constexpr(runtime::behavior::seqable) + { + } + else + { + throw std::runtime_error{ fmt::format("unimplemented constant codegen: {}\n", + typed_o->to_string()) }; + } + return nullptr; + }, + expr.data); + } + + llvm::Value *llvm_processor::gen(analyze::expr::vector const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::map const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::set const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::local_reference const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::function const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::recur const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::let const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::do_ const &expr, + analyze::expr::function_arity const &arity) + { + llvm::Value *last{}; + for(auto const &form : expr.values) + { + last = gen(form, arity); + } + + switch(expr.expr_type) + { + case analyze::expression_type::statement: + case analyze::expression_type::nested: + { + return last; + } + case analyze::expression_type::return_statement: + { + if(!last) + { + builder->CreateRet(nil_global()); + } + else + { + builder->CreateRet(last); + } + return nullptr; + } + } + } + + llvm::Value *llvm_processor::gen(analyze::expr::if_ const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::throw_ const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::try_ const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen(analyze::expr::native_raw const &, + analyze::expr::function_arity const &) + { + return nullptr; + } + + llvm::Value *llvm_processor::gen_var(obj::symbol_ptr const qualified_name) + { + auto const found(var_globals.find(qualified_name)); + if(found != var_globals.end()) + { + return found->second; + } + + auto const name(fmt::format("var_{}", munge(qualified_name->to_string()))); + auto const global(module->getOrInsertGlobal(name, builder->getPtrTy())); + var_globals[qualified_name] = global; + + //llvm::IRBuilder<> builder{ *context }; + //auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); + //auto const init(llvm::Function::Create(init_type, + // llvm::Function::InternalLinkage, + // "jank_global_var_init", + // *module)); + //auto const entry(llvm::BasicBlock::Create(*context, "entry", init)); + //builder.SetInsertPoint(entry); + + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), + { builder->getPtrTy(), builder->getPtrTy() }, + false)); + auto const fn(module->getOrInsertFunction("jank_var_intern", fn_type)); + + llvm::SmallVector args{ + builder->CreateGlobalStringPtr(qualified_name->ns.c_str()), + builder->CreateGlobalStringPtr(qualified_name->name.c_str()) + }; + auto const call(builder->CreateCall(fn, args)); + builder->CreateStore(call, global); + } + + //builder.CreateRet(call); + + //llvm::verifyFunction(*init); + + //global_ctors.emplace_back(init); + + return global; + } + + llvm::Value *llvm_processor::nil_global() + { + auto const found(literal_globals.find(obj::nil::nil_const())); + if(found != literal_globals.end()) + { + return found->second; + } + + auto &global(literal_globals[obj::nil::nil_const()]); + global = module->getOrInsertGlobal("nil", builder->getPtrTy()); + + //llvm::IRBuilder<> builder{ *context }; + //auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); + //auto const init(llvm::Function::Create(init_type, + // llvm::Function::InternalLinkage, + // "jank_global_nil_init", + // *module)); + //auto const entry(llvm::BasicBlock::Create(*context, "entry", init)); + //builder.SetInsertPoint(entry); + + auto const create_fn_type(llvm::FunctionType::get(builder->getPtrTy(), false)); + auto const create_fn(module->getOrInsertFunction("jank_nil", create_fn_type)); + + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + auto const call(builder->CreateCall(create_fn)); + builder->CreateStore(call, global); + } + + //builder.CreateRetVoid(); + + //llvm::verifyFunction(*init); + + //global_ctors.emplace_back(init); + + return global; + } + + llvm::Value *llvm_processor::string_global(obj::persistent_string_ptr const s) + { + auto const found(literal_globals.find(s)); + if(found != literal_globals.end()) + { + return found->second; + } + + auto &global(literal_globals[s]); + auto const name(fmt::format("string_{}", s->to_hash())); + global = module->getOrInsertGlobal(name, builder->getPtrTy()); + + //llvm::IRBuilder<> builder{ *context }; + //auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); + //auto const init(llvm::Function::Create(init_type, + // llvm::Function::InternalLinkage, + // "jank_global_string_init", + // *module)); + //auto const entry(llvm::BasicBlock::Create(*context, "entry", init)); + //builder.SetInsertPoint(entry); + + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); + auto const create_fn(module->getOrInsertFunction("jank_create_string", create_fn_type)); + + llvm::SmallVector args{ builder->CreateGlobalStringPtr(s->data.c_str()) }; + auto const call(builder->CreateCall(create_fn, args)); + builder->CreateStore(call, global); + } + //builder.CreateRetVoid(); + + //llvm::verifyFunction(*init); + + //global_ctors.emplace_back(init); + + return global; + } + + void llvm_processor::install_global_ctors() + { + //if(global_ctors.empty()) + //{ + // return; + //} + + //return; + + llvm::IRBuilder<> builder{ *context }; + auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); + auto const init(llvm::Function::Create(init_type, + llvm::Function::InternalLinkage, + "jank_global_init", + *module)); + //auto const entry(llvm::BasicBlock::Create(*context, "entry", init)); + //builder.SetInsertPoint(entry); + global_ctor_block->insertInto(init); + + //for(auto const ctor : global_ctors) + //{ + // builder.CreateCall(ctor); + //} + llvm::appendToGlobalCtors(*module, init, 65535); + + llvm::verifyFunction(*init); + } + + native_persistent_string llvm_processor::to_string() + { + module->print(llvm::outs(), nullptr); + return ""; + } +} diff --git a/compiler+runtime/src/cpp/jank/codegen/processor.cpp b/compiler+runtime/src/cpp/jank/codegen/processor.cpp index 2cb950d8..d63bf8f3 100644 --- a/compiler+runtime/src/cpp/jank/codegen/processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/processor.cpp @@ -1,5 +1,3 @@ -#include - #include #include #include diff --git a/compiler+runtime/src/cpp/jank/runtime/context.cpp b/compiler+runtime/src/cpp/jank/runtime/context.cpp index d53244e8..f89ebcfb 100644 --- a/compiler+runtime/src/cpp/jank/runtime/context.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/context.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace jank::runtime { @@ -218,6 +219,21 @@ namespace jank::runtime return ret; } + object_ptr context::read_string(native_persistent_string_view const &code) + { + profile::timer timer{ "rt read_string" }; + read::lex::processor l_prc{ code }; + read::parse::processor p_prc{ l_prc.begin(), l_prc.end() }; + + object_ptr ret{ obj::nil::nil_const() }; + for(auto const &form : p_prc) + { + ret = form.expect_ok().unwrap().ptr; + } + + return ret; + } + native_vector context::analyze_string(native_persistent_string_view const &code, native_bool const eval) { @@ -554,9 +570,12 @@ namespace jank::runtime auto const &module( expect_object(intern_var("clojure.core", "*ns*").expect_ok()->deref()) ->to_string()); - codegen::processor cg_prc{ *this, wrapped_expr, module, codegen::compilation_target::repl }; - return make_box(util::format_cpp_source(cg_prc.declaration_str()).expect_ok()); + codegen::llvm_processor cg_prc{ wrapped_expr, module, codegen::compilation_target::repl }; + return make_box(cg_prc.to_string()); + + //codegen::processor cg_prc{ *this, wrapped_expr, module, codegen::compilation_target::repl }; + //return make_box(util::format_cpp_source(cg_prc.declaration_str()).expect_ok()); } object_ptr context::print(object_ptr const o) diff --git a/compiler+runtime/src/cpp/jank/runtime/core.cpp b/compiler+runtime/src/cpp/jank/runtime/core.cpp index 5aef335c..c7510a9e 100644 --- a/compiler+runtime/src/cpp/jank/runtime/core.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/core.cpp @@ -234,4 +234,13 @@ namespace jank::runtime }, o); } + + native_hash to_hash(object_ptr const o) + { + return visit_object( + [=](auto const typed_o) -> native_hash { + return typed_o->to_hash(); + }, + o); + } } From a593d664dbfe81c929090773f5ccf124b2a08462 Mon Sep 17 00:00:00 2001 From: jeaye Date: Sun, 6 Oct 2024 21:49:36 -0700 Subject: [PATCH 002/101] Add LLVM IR codegen for remaining literals --- .../include/cpp/jank/analyze/expr/map.hpp | 1 + .../include/cpp/jank/analyze/expr/set.hpp | 1 + .../include/cpp/jank/analyze/expr/vector.hpp | 1 + compiler+runtime/include/cpp/jank/c_api.h | 4 + .../cpp/jank/codegen/llvm_processor.hpp | 17 +- .../src/cpp/jank/analyze/processor.cpp | 18 +- .../src/cpp/jank/codegen/llvm_processor.cpp | 352 ++++++++++++------ 7 files changed, 265 insertions(+), 129 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/analyze/expr/map.hpp b/compiler+runtime/include/cpp/jank/analyze/expr/map.hpp index 97ad1509..acc05995 100644 --- a/compiler+runtime/include/cpp/jank/analyze/expr/map.hpp +++ b/compiler+runtime/include/cpp/jank/analyze/expr/map.hpp @@ -11,6 +11,7 @@ namespace jank::analyze::expr { native_vector, native_box>> data_exprs; option meta; + object_ptr data{}; object_ptr to_runtime_data() const { diff --git a/compiler+runtime/include/cpp/jank/analyze/expr/set.hpp b/compiler+runtime/include/cpp/jank/analyze/expr/set.hpp index 4e8fe840..fddbe7a2 100644 --- a/compiler+runtime/include/cpp/jank/analyze/expr/set.hpp +++ b/compiler+runtime/include/cpp/jank/analyze/expr/set.hpp @@ -11,6 +11,7 @@ namespace jank::analyze::expr { native_vector> data_exprs; option meta; + obj::persistent_hash_set_ptr data{}; object_ptr to_runtime_data() const { diff --git a/compiler+runtime/include/cpp/jank/analyze/expr/vector.hpp b/compiler+runtime/include/cpp/jank/analyze/expr/vector.hpp index a92fc228..9e18097b 100644 --- a/compiler+runtime/include/cpp/jank/analyze/expr/vector.hpp +++ b/compiler+runtime/include/cpp/jank/analyze/expr/vector.hpp @@ -11,6 +11,7 @@ namespace jank::analyze::expr { native_vector> data_exprs; option meta; + obj::persistent_vector_ptr data{}; object_ptr to_runtime_data() const { diff --git a/compiler+runtime/include/cpp/jank/c_api.h b/compiler+runtime/include/cpp/jank/c_api.h index a354c72b..bf8a8d46 100644 --- a/compiler+runtime/include/cpp/jank/c_api.h +++ b/compiler+runtime/include/cpp/jank/c_api.h @@ -22,6 +22,8 @@ extern "C" jank_object_ptr jank_var_intern(jank_object_ptr ns, jank_object_ptr name); jank_object_ptr jank_var_bind_root(jank_object_ptr var, jank_object_ptr val); + jank_object_ptr jank_keyword_intern(jank_object_ptr ns, jank_object_ptr name); + jank_object_ptr jank_deref(jank_object_ptr o); jank_object_ptr jank_call0(jank_object_ptr f); @@ -36,6 +38,8 @@ extern "C" jank_object_ptr jank_create_integer(jank_native_integer i); jank_object_ptr jank_create_real(jank_native_real r); jank_object_ptr jank_create_string(char const *s); + jank_object_ptr jank_create_symbol(jank_object_ptr ns, jank_object_ptr name); + jank_object_ptr jank_create_character(char const *s); jank_object_ptr jank_create_function0(jank_object_ptr (*f)()); jank_object_ptr jank_create_function1(jank_object_ptr (*f)(jank_object_ptr)); diff --git a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp index 771b9f51..a8bc34f7 100644 --- a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp +++ b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp @@ -62,13 +62,22 @@ namespace jank::codegen analyze::expr::function_arity const &); llvm::Value *gen_var(obj::symbol_ptr qualified_name); + llvm::Value *gen_c_string(native_persistent_string const &s); native_persistent_string to_string(); void create_function(); - void install_global_ctors(); - llvm::Value *nil_global(); - llvm::Value *string_global(obj::persistent_string_ptr const s); + void create_global_ctor(); + + llvm::Value *gen_global(obj::nil_ptr); + llvm::Value *gen_global(obj::boolean_ptr b); + llvm::Value *gen_global(obj::integer_ptr i); + llvm::Value *gen_global(obj::real_ptr r); + llvm::Value *gen_global(obj::persistent_string_ptr s); + llvm::Value *gen_global(obj::symbol_ptr s); + llvm::Value *gen_global(obj::keyword_ptr k); + llvm::Value *gen_global(obj::character_ptr c); + llvm::Value *gen_global_from_read_string(object_ptr o); /* This is stored just to keep the expression alive. */ analyze::expression_ptr root_expr{}; @@ -85,7 +94,7 @@ namespace jank::codegen llvm::Value *nil{}; native_unordered_map literal_globals; native_unordered_map var_globals; - native_vector global_ctors; + native_unordered_map c_string_globals; llvm::BasicBlock *global_ctor_block{}; }; } diff --git a/compiler+runtime/src/cpp/jank/analyze/processor.cpp b/compiler+runtime/src/cpp/jank/analyze/processor.cpp index 339b6233..5772bba7 100644 --- a/compiler+runtime/src/cpp/jank/analyze/processor.cpp +++ b/compiler+runtime/src/cpp/jank/analyze/processor.cpp @@ -837,8 +837,7 @@ namespace jank::analyze } auto const condition(o->data.rest().first().unwrap()); - auto condition_expr( - analyze(condition, current_frame, expression_type::nested, fn_ctx, false)); + auto condition_expr(analyze(condition, current_frame, expression_type::nested, fn_ctx, false)); if(condition_expr.is_err()) { return condition_expr.expect_err_move(); @@ -1282,7 +1281,8 @@ namespace jank::analyze return make_box(expr::vector{ expression_base{ {}, expr_type, current_frame, true }, std::move(exprs), - o->meta + o->meta, + o }); } @@ -1315,7 +1315,8 @@ namespace jank::analyze return make_box(expr::map{ expression_base{ {}, expr_type, current_frame, true }, std::move(exprs), - o->meta + o->meta, + o }); } @@ -1365,7 +1366,8 @@ namespace jank::analyze return make_box(expr::set{ expression_base{ {}, expr_type, current_frame, true }, std::move(exprs), - o->meta + o->meta, + o }); } @@ -1402,8 +1404,7 @@ namespace jank::analyze return found_special->second(o, current_frame, expr_type, fn_ctx, needs_box); } - auto sym_result( - analyze_symbol(sym, current_frame, expression_type::nested, fn_ctx, true)); + auto sym_result(analyze_symbol(sym, current_frame, expression_type::nested, fn_ctx, true)); if(sym_result.is_err()) { return sym_result; @@ -1466,8 +1467,7 @@ namespace jank::analyze } else { - auto callable_expr( - analyze(first, current_frame, expression_type::nested, fn_ctx, needs_box)); + auto callable_expr(analyze(first, current_frame, expression_type::nested, fn_ctx, needs_box)); if(callable_expr.is_err()) { return callable_expr; diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index d17e7c4b..91d8017d 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -30,9 +30,12 @@ namespace jank::codegen { assert(root_fn.frame.data); - install_global_ctors(); + create_global_ctor(); create_function(); gen(); + + llvm::verifyFunction(*fn); + llvm::verifyFunction(*global_ctor_block->getParent()); } void llvm_processor::create_function() @@ -43,7 +46,6 @@ namespace jank::codegen llvm::Function::ExternalLinkage, munged_name.c_str(), *module); - llvm::verifyFunction(*fn); auto const entry(llvm::BasicBlock::Create(*context, "entry", fn)); builder->SetInsertPoint(entry); @@ -74,7 +76,6 @@ namespace jank::codegen { auto const ref(gen_var(expr.name)); - /* TODO: Move vars into globals. */ if(expr.value.is_some()) { auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), @@ -155,77 +156,50 @@ namespace jank::codegen [&](auto const typed_o) -> llvm::Value * { using T = typename decltype(typed_o)::value_type; - if constexpr(std::same_as) - { - return nil_global(); - } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) - { - return string_global(typed_o); - } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) + if constexpr(std::same_as || std::same_as + || std::same_as + || std::same_as || std::same_as + || std::same_as + || std::same_as + || std::same_as) { + return gen_global(typed_o); } - else if constexpr(std::same_as) - { - } - else if constexpr(std::same_as) - { - } - /* Cons, etc. */ - else if constexpr(runtime::behavior::seqable) + else if constexpr(std::same_as + || std::same_as + || std::same_as + || std::same_as + || std::same_as + /* Cons, etc. */ + || runtime::behavior::seqable) { + return gen_global_from_read_string(typed_o); } else { throw std::runtime_error{ fmt::format("unimplemented constant codegen: {}\n", typed_o->to_string()) }; } - return nullptr; }, expr.data); } - llvm::Value *llvm_processor::gen(analyze::expr::vector const &, + llvm::Value *llvm_processor::gen(analyze::expr::vector const &expr, analyze::expr::function_arity const &) { - return nullptr; + return gen_global_from_read_string(expr.data); } - llvm::Value *llvm_processor::gen(analyze::expr::map const &, + llvm::Value *llvm_processor::gen(analyze::expr::map const &expr, analyze::expr::function_arity const &) { - return nullptr; + return gen_global_from_read_string(expr.data); } - llvm::Value *llvm_processor::gen(analyze::expr::set const &, + llvm::Value *llvm_processor::gen(analyze::expr::set const &expr, analyze::expr::function_arity const &) { - return nullptr; + return gen_global_from_read_string(expr.data); } llvm::Value *llvm_processor::gen(analyze::expr::local_reference const &, @@ -272,13 +246,12 @@ namespace jank::codegen { if(!last) { - builder->CreateRet(nil_global()); + return builder->CreateRet(gen_global(obj::nil::nil_const())); } else { - builder->CreateRet(last); + return builder->CreateRet(last); } - return nullptr; } } } @@ -319,15 +292,6 @@ namespace jank::codegen auto const global(module->getOrInsertGlobal(name, builder->getPtrTy())); var_globals[qualified_name] = global; - //llvm::IRBuilder<> builder{ *context }; - //auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); - //auto const init(llvm::Function::Create(init_type, - // llvm::Function::InternalLinkage, - // "jank_global_var_init", - // *module)); - //auto const entry(llvm::BasicBlock::Create(*context, "entry", init)); - //builder.SetInsertPoint(entry); - { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -336,63 +300,130 @@ namespace jank::codegen false)); auto const fn(module->getOrInsertFunction("jank_var_intern", fn_type)); - llvm::SmallVector args{ - builder->CreateGlobalStringPtr(qualified_name->ns.c_str()), - builder->CreateGlobalStringPtr(qualified_name->name.c_str()) - }; + llvm::SmallVector args{ gen_c_string(qualified_name->ns.c_str()), + gen_c_string(qualified_name->name.c_str()) }; auto const call(builder->CreateCall(fn, args)); builder->CreateStore(call, global); } - //builder.CreateRet(call); + return global; + } + + llvm::Value *llvm_processor::gen_c_string(native_persistent_string const &s) + { + auto const found(c_string_globals.find(s)); + if(found != c_string_globals.end()) + { + return found->second; + } + return c_string_globals[s] = builder->CreateGlobalStringPtr(s.c_str()); + } + + llvm::Value *llvm_processor::gen_global(obj::nil_ptr const nil) + { + auto const found(literal_globals.find(nil)); + if(found != literal_globals.end()) + { + return found->second; + } + + auto &global(literal_globals[nil]); + global = module->getOrInsertGlobal("nil", builder->getPtrTy()); - //llvm::verifyFunction(*init); + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); - //global_ctors.emplace_back(init); + auto const create_fn_type(llvm::FunctionType::get(builder->getPtrTy(), false)); + auto const create_fn(module->getOrInsertFunction("jank_nil", create_fn_type)); + auto const call(builder->CreateCall(create_fn)); + builder->CreateStore(call, global); + } return global; } - llvm::Value *llvm_processor::nil_global() + llvm::Value *llvm_processor::gen_global(obj::boolean_ptr const b) { - auto const found(literal_globals.find(obj::nil::nil_const())); + auto const found(literal_globals.find(b)); if(found != literal_globals.end()) { return found->second; } - auto &global(literal_globals[obj::nil::nil_const()]); - global = module->getOrInsertGlobal("nil", builder->getPtrTy()); + auto &global(literal_globals[b]); + auto const name(b->data ? "true" : "false"); + global = module->getOrInsertGlobal(name, builder->getPtrTy()); - //llvm::IRBuilder<> builder{ *context }; - //auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); - //auto const init(llvm::Function::Create(init_type, - // llvm::Function::InternalLinkage, - // "jank_global_nil_init", - // *module)); - //auto const entry(llvm::BasicBlock::Create(*context, "entry", init)); - //builder.SetInsertPoint(entry); + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); - auto const create_fn_type(llvm::FunctionType::get(builder->getPtrTy(), false)); - auto const create_fn(module->getOrInsertFunction("jank_nil", create_fn_type)); + auto const create_fn_type(llvm::FunctionType::get(builder->getPtrTy(), false)); + auto const create_fn( + module->getOrInsertFunction(fmt::format("jank_{}", name), create_fn_type)); + auto const call(builder->CreateCall(create_fn)); + builder->CreateStore(call, global); + } + + return global; + } + + llvm::Value *llvm_processor::gen_global(obj::integer_ptr const i) + { + auto const found(literal_globals.find(i)); + if(found != literal_globals.end()) + { + return found->second; + } + + auto &global(literal_globals[i]); + auto const name(fmt::format("int_{}", i->data)); + global = module->getOrInsertGlobal(name, builder->getPtrTy()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); - auto const call(builder->CreateCall(create_fn)); + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); + auto const create_fn(module->getOrInsertFunction("jank_create_integer", create_fn_type)); + auto const arg(llvm::ConstantInt::getSigned(builder->getInt64Ty(), i->data)); + auto const call(builder->CreateCall(create_fn, { arg })); builder->CreateStore(call, global); } - //builder.CreateRetVoid(); + return global; + } + + llvm::Value *llvm_processor::gen_global(obj::real_ptr const r) + { + auto const found(literal_globals.find(r)); + if(found != literal_globals.end()) + { + return found->second; + } - //llvm::verifyFunction(*init); + auto &global(literal_globals[r]); + auto const name(fmt::format("real_{}", r->to_hash())); + global = module->getOrInsertGlobal(name, builder->getPtrTy()); - //global_ctors.emplace_back(init); + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); + auto const create_fn(module->getOrInsertFunction("jank_create_integer", create_fn_type)); + auto const arg(llvm::ConstantFP::get(builder->getDoubleTy(), r->data)); + auto const call(builder->CreateCall(create_fn, { arg })); + builder->CreateStore(call, global); + } return global; } - llvm::Value *llvm_processor::string_global(obj::persistent_string_ptr const s) + llvm::Value *llvm_processor::gen_global(obj::persistent_string_ptr const s) { auto const found(literal_globals.find(s)); if(found != literal_globals.end()) @@ -404,15 +435,6 @@ namespace jank::codegen auto const name(fmt::format("string_{}", s->to_hash())); global = module->getOrInsertGlobal(name, builder->getPtrTy()); - //llvm::IRBuilder<> builder{ *context }; - //auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); - //auto const init(llvm::Function::Create(init_type, - // llvm::Function::InternalLinkage, - // "jank_global_string_init", - // *module)); - //auto const entry(llvm::BasicBlock::Create(*context, "entry", init)); - //builder.SetInsertPoint(entry); - { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -421,45 +443,143 @@ namespace jank::codegen llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); auto const create_fn(module->getOrInsertFunction("jank_create_string", create_fn_type)); - llvm::SmallVector args{ builder->CreateGlobalStringPtr(s->data.c_str()) }; + llvm::SmallVector args{ gen_c_string(s->data.c_str()) }; auto const call(builder->CreateCall(create_fn, args)); builder->CreateStore(call, global); } - //builder.CreateRetVoid(); - //llvm::verifyFunction(*init); + return global; + } - //global_ctors.emplace_back(init); + llvm::Value *llvm_processor::gen_global(obj::symbol_ptr const s) + { + auto const found(literal_globals.find(s)); + if(found != literal_globals.end()) + { + return found->second; + } + + auto &global(literal_globals[s]); + auto const name(fmt::format("symbol_{}", s->to_hash())); + global = module->getOrInsertGlobal(name, builder->getPtrTy()); + + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), + { builder->getPtrTy(), builder->getPtrTy() }, + false)); + auto const create_fn(module->getOrInsertFunction("jank_create_symbol", create_fn_type)); + + llvm::SmallVector args{ gen_c_string(s->ns.c_str()), + gen_c_string(s->name.c_str()) }; + auto const call(builder->CreateCall(create_fn, args)); + builder->CreateStore(call, global); + } return global; } - void llvm_processor::install_global_ctors() + llvm::Value *llvm_processor::gen_global(obj::keyword_ptr const k) { - //if(global_ctors.empty()) - //{ - // return; - //} + auto const found(literal_globals.find(k)); + if(found != literal_globals.end()) + { + return found->second; + } + + auto &global(literal_globals[k]); + auto const name(fmt::format("keyword_{}", k->to_hash())); + global = module->getOrInsertGlobal(name, builder->getPtrTy()); + + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), + { builder->getPtrTy(), builder->getPtrTy() }, + false)); + auto const create_fn(module->getOrInsertFunction("jank_create_keyword", create_fn_type)); - //return; + llvm::SmallVector args{ gen_c_string(k->sym.ns.c_str()), + gen_c_string(k->sym.name.c_str()) }; + auto const call(builder->CreateCall(create_fn, args)); + builder->CreateStore(call, global); + } + return global; + } + + llvm::Value *llvm_processor::gen_global(obj::character_ptr const c) + { + auto const found(literal_globals.find(c)); + if(found != literal_globals.end()) + { + return found->second; + } + + auto &global(literal_globals[c]); + auto const name(fmt::format("char_{}", c->to_hash())); + global = module->getOrInsertGlobal(name, builder->getPtrTy()); + + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); + auto const create_fn(module->getOrInsertFunction("jank_create_character", create_fn_type)); + + llvm::SmallVector args{ gen_c_string(c->to_string()) }; + auto const call(builder->CreateCall(create_fn, args)); + builder->CreateStore(call, global); + } + + return global; + } + + llvm::Value *llvm_processor::gen_global_from_read_string(object_ptr const o) + { + auto const found(literal_globals.find(o)); + if(found != literal_globals.end()) + { + return found->second; + } + + auto &global(literal_globals[o]); + auto const name(fmt::format("data_{}", to_hash(o))); + global = module->getOrInsertGlobal(name, builder->getPtrTy()); + + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); + auto const create_fn(module->getOrInsertFunction("jank_read_string", create_fn_type)); + + llvm::SmallVector args{ gen_c_string(runtime::to_string(o)) }; + auto const call(builder->CreateCall(create_fn, args)); + builder->CreateStore(call, global); + } + + return global; + } + + void llvm_processor::create_global_ctor() + { llvm::IRBuilder<> builder{ *context }; auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); auto const init(llvm::Function::Create(init_type, llvm::Function::InternalLinkage, "jank_global_init", *module)); - //auto const entry(llvm::BasicBlock::Create(*context, "entry", init)); - //builder.SetInsertPoint(entry); global_ctor_block->insertInto(init); - //for(auto const ctor : global_ctors) - //{ - // builder.CreateCall(ctor); - //} llvm::appendToGlobalCtors(*module, init, 65535); - - llvm::verifyFunction(*init); } native_persistent_string llvm_processor::to_string() From eab9d0c76a1296263d059e2a070d7e0b7dd3717f Mon Sep 17 00:00:00 2001 From: jeaye Date: Sun, 6 Oct 2024 21:49:58 -0700 Subject: [PATCH 003/101] Use double over long double --- compiler+runtime/include/cpp/jank/type.hpp | 2 +- compiler+runtime/src/cpp/jank/read/lex.cpp | 2 +- compiler+runtime/test/cpp/jank/read/lex.cpp | 14 +++++++------- compiler+runtime/test/cpp/jank/read/parse.cpp | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/type.hpp b/compiler+runtime/include/cpp/jank/type.hpp index 9eba65c7..44c39331 100644 --- a/compiler+runtime/include/cpp/jank/type.hpp +++ b/compiler+runtime/include/cpp/jank/type.hpp @@ -17,7 +17,7 @@ namespace jank false>; using native_integer = long long; - using native_real = long double; + using native_real = double; using native_bool = bool; using native_hash = uint32_t; using native_persistent_string_view = std::string_view; diff --git a/compiler+runtime/src/cpp/jank/read/lex.cpp b/compiler+runtime/src/cpp/jank/read/lex.cpp index adb9d17d..c0fb46ab 100644 --- a/compiler+runtime/src/cpp/jank/read/lex.cpp +++ b/compiler+runtime/src/cpp/jank/read/lex.cpp @@ -415,7 +415,7 @@ namespace jank::read return ok(token{ token_start, pos - token_start, token_kind::real, - std::strtold(file.data() + token_start, nullptr) }); + std::strtod(file.data() + token_start, nullptr) }); } else { diff --git a/compiler+runtime/test/cpp/jank/read/lex.cpp b/compiler+runtime/test/cpp/jank/read/lex.cpp index b0b995df..0bcbd4f0 100644 --- a/compiler+runtime/test/cpp/jank/read/lex.cpp +++ b/compiler+runtime/test/cpp/jank/read/lex.cpp @@ -453,7 +453,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_tokens({ - { 0, 2, token_kind::real, 0.0l } + { 0, 2, token_kind::real, 0.0 } })); } @@ -463,7 +463,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_tokens({ - { 0, 3, token_kind::real, 0.0l } + { 0, 3, token_kind::real, 0.0 } })); } @@ -473,7 +473,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_tokens({ - { 0, 3, token_kind::real, -1.0l } + { 0, 3, token_kind::real, -1.0 } })); } @@ -483,7 +483,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_tokens({ - { 0, 4, token_kind::real, -1.5l } + { 0, 4, token_kind::real, -1.5 } })); } @@ -493,7 +493,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_tokens({ - { 0, 10, token_kind::real, -1234.1234l } + { 0, 10, token_kind::real, -1234.1234 } })); } @@ -562,7 +562,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_results({ - token{ 0, 5, token_kind::real, 12.34l }, + token{ 0, 5, token_kind::real, 12.34 }, error{ 5, "expected whitespace before next token" }, token{ 5, 3, token_kind::symbol, "abc"sv }, })); @@ -575,7 +575,7 @@ namespace jank::read::lex CHECK(tokens == make_results({ token{ 0, token_kind::open_paren }, - token{ 1, 5, token_kind::real, 12.34l }, + token{ 1, 5, token_kind::real, 12.34 }, token{ 6, token_kind::close_paren }, })); } diff --git a/compiler+runtime/test/cpp/jank/read/parse.cpp b/compiler+runtime/test/cpp/jank/read/parse.cpp index 158ca8ff..02b0e612 100644 --- a/compiler+runtime/test/cpp/jank/read/parse.cpp +++ b/compiler+runtime/test/cpp/jank/read/parse.cpp @@ -79,8 +79,8 @@ namespace jank::read::parse lex::processor lp{ "12.34" }; processor p{ lp.begin(), lp.end() }; auto const r(p.next()); - CHECK(equal(r.expect_ok().unwrap().ptr, make_box(12.34l))); - CHECK(r.expect_ok().unwrap().start == lex::token{ 0, 5, lex::token_kind::real, 12.34l }); + CHECK(equal(r.expect_ok().unwrap().ptr, make_box(12.34))); + CHECK(r.expect_ok().unwrap().start == lex::token{ 0, 5, lex::token_kind::real, 12.34 }); CHECK(r.expect_ok().unwrap().end == r.expect_ok().unwrap().start); } From 4046c0013a50d60a5e2055f1b4d53896f03f2844 Mon Sep 17 00:00:00 2001 From: jeaye Date: Fri, 11 Oct 2024 15:20:29 -0700 Subject: [PATCH 004/101] Start JIT compiling our LLVM IR --- compiler+runtime/include/cpp/jank/c_api.h | 2 +- .../cpp/jank/codegen/llvm_processor.hpp | 3 +- .../cpp/jank/runtime/core/make_box.hpp | 7 +- .../src/cpp/jank/codegen/llvm_processor.cpp | 186 +++++++++++++----- .../src/cpp/jank/runtime/context.cpp | 18 +- 5 files changed, 162 insertions(+), 54 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/c_api.h b/compiler+runtime/include/cpp/jank/c_api.h index bf8a8d46..fe7b50bf 100644 --- a/compiler+runtime/include/cpp/jank/c_api.h +++ b/compiler+runtime/include/cpp/jank/c_api.h @@ -10,7 +10,7 @@ extern "C" /* NOLINTNEXTLINE(modernize-use-using) */ typedef long long jank_native_integer; /* NOLINTNEXTLINE(modernize-use-using) */ - typedef long double jank_native_real; + typedef double jank_native_real; /* NOLINTNEXTLINE(modernize-use-using) */ typedef char jank_native_bool; /* NOLINTNEXTLINE(modernize-use-using) */ diff --git a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp index a8bc34f7..ebc0e65f 100644 --- a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp +++ b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp @@ -68,6 +68,7 @@ namespace jank::codegen void create_function(); void create_global_ctor(); + llvm::GlobalVariable *create_global_var(native_persistent_string const &name); llvm::Value *gen_global(obj::nil_ptr); llvm::Value *gen_global(obj::boolean_ptr b); @@ -85,7 +86,7 @@ namespace jank::codegen native_persistent_string module_name; compilation_target target{}; native_persistent_string struct_name; - native_persistent_string expression_fn_name; + native_persistent_string ctor_name; std::unique_ptr context; std::unique_ptr module; diff --git a/compiler+runtime/include/cpp/jank/runtime/core/make_box.hpp b/compiler+runtime/include/cpp/jank/runtime/core/make_box.hpp index 8a4217dd..b780fa0f 100644 --- a/compiler+runtime/include/cpp/jank/runtime/core/make_box.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/core/make_box.hpp @@ -67,12 +67,9 @@ namespace jank::runtime } [[gnu::always_inline, gnu::flatten, gnu::hot]] - inline runtime::object_ptr make_box(char const * const s) + inline runtime::obj::persistent_string_ptr make_box(char const * const s) { - if(!s) [[unlikely]] - { - return runtime::obj::nil::nil_const(); - } + assert(s); return make_box(s); } diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 91d8017d..2bbe5441 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -3,8 +3,6 @@ #include -/* https://www.youtube.com/watch?v=Nw9YmNuJhJ4 */ - namespace jank::codegen { llvm_processor::llvm_processor(analyze::expression_ptr const &expr, @@ -22,9 +20,11 @@ namespace jank::codegen : root_fn{ expr } , module_name{ module_name } , target{ target } - , struct_name{ root_fn.unique_name } + , struct_name{ runtime::munge(root_fn.unique_name) } + , ctor_name{ runtime::munge(runtime::context::unique_string("jank_global_init")) } , context{ std::make_unique() } - , module{ std::make_unique(module_name.c_str(), *context) } + , module{ std::make_unique(runtime::context::unique_string(module_name).c_str(), + *context) } , builder{ std::make_unique>(*context) } , global_ctor_block{ llvm::BasicBlock::Create(*context, "entry") } { @@ -34,6 +34,12 @@ namespace jank::codegen create_function(); gen(); + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + builder->CreateRetVoid(); + } + llvm::verifyFunction(*fn); llvm::verifyFunction(*global_ctor_block->getParent()); } @@ -41,10 +47,9 @@ namespace jank::codegen void llvm_processor::create_function() { auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), false)); - auto const munged_name(runtime::munge(struct_name)); fn = llvm::Function::Create(fn_type, llvm::Function::ExternalLinkage, - munged_name.c_str(), + struct_name.c_str(), *module); auto const entry(llvm::BasicBlock::Create(*context, "entry", fn)); @@ -285,13 +290,16 @@ namespace jank::codegen auto const found(var_globals.find(qualified_name)); if(found != var_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } + auto &global(literal_globals[qualified_name]); auto const name(fmt::format("var_{}", munge(qualified_name->to_string()))); - auto const global(module->getOrInsertGlobal(name, builder->getPtrTy())); - var_globals[qualified_name] = global; + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -300,13 +308,18 @@ namespace jank::codegen false)); auto const fn(module->getOrInsertFunction("jank_var_intern", fn_type)); - llvm::SmallVector args{ gen_c_string(qualified_name->ns.c_str()), - gen_c_string(qualified_name->name.c_str()) }; + llvm::SmallVector args{ gen_global(make_box(qualified_name->ns)), + gen_global(make_box(qualified_name->name)) }; auto const call(builder->CreateCall(fn, args)); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_c_string(native_persistent_string const &s) @@ -314,7 +327,7 @@ namespace jank::codegen auto const found(c_string_globals.find(s)); if(found != c_string_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } return c_string_globals[s] = builder->CreateGlobalStringPtr(s.c_str()); } @@ -324,12 +337,16 @@ namespace jank::codegen auto const found(literal_globals.find(nil)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[nil]); - global = module->getOrInsertGlobal("nil", builder->getPtrTy()); + auto const name("nil"); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -338,9 +355,14 @@ namespace jank::codegen auto const create_fn(module->getOrInsertFunction("jank_nil", create_fn_type)); auto const call(builder->CreateCall(create_fn)); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_global(obj::boolean_ptr const b) @@ -348,13 +370,16 @@ namespace jank::codegen auto const found(literal_globals.find(b)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[b]); auto const name(b->data ? "true" : "false"); - global = module->getOrInsertGlobal(name, builder->getPtrTy()); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -364,9 +389,14 @@ namespace jank::codegen module->getOrInsertFunction(fmt::format("jank_{}", name), create_fn_type)); auto const call(builder->CreateCall(create_fn)); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_global(obj::integer_ptr const i) @@ -374,13 +404,16 @@ namespace jank::codegen auto const found(literal_globals.find(i)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[i]); auto const name(fmt::format("int_{}", i->data)); - global = module->getOrInsertGlobal(name, builder->getPtrTy()); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -391,9 +424,14 @@ namespace jank::codegen auto const arg(llvm::ConstantInt::getSigned(builder->getInt64Ty(), i->data)); auto const call(builder->CreateCall(create_fn, { arg })); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_global(obj::real_ptr const r) @@ -401,13 +439,16 @@ namespace jank::codegen auto const found(literal_globals.find(r)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[r]); auto const name(fmt::format("real_{}", r->to_hash())); - global = module->getOrInsertGlobal(name, builder->getPtrTy()); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -418,9 +459,14 @@ namespace jank::codegen auto const arg(llvm::ConstantFP::get(builder->getDoubleTy(), r->data)); auto const call(builder->CreateCall(create_fn, { arg })); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_global(obj::persistent_string_ptr const s) @@ -428,13 +474,16 @@ namespace jank::codegen auto const found(literal_globals.find(s)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[s]); auto const name(fmt::format("string_{}", s->to_hash())); - global = module->getOrInsertGlobal(name, builder->getPtrTy()); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -446,9 +495,14 @@ namespace jank::codegen llvm::SmallVector args{ gen_c_string(s->data.c_str()) }; auto const call(builder->CreateCall(create_fn, args)); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_global(obj::symbol_ptr const s) @@ -456,13 +510,16 @@ namespace jank::codegen auto const found(literal_globals.find(s)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[s]); auto const name(fmt::format("symbol_{}", s->to_hash())); - global = module->getOrInsertGlobal(name, builder->getPtrTy()); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -473,13 +530,18 @@ namespace jank::codegen false)); auto const create_fn(module->getOrInsertFunction("jank_create_symbol", create_fn_type)); - llvm::SmallVector args{ gen_c_string(s->ns.c_str()), - gen_c_string(s->name.c_str()) }; + llvm::SmallVector args{ gen_global(make_box(s->ns)), + gen_global(make_box(s->name)) }; auto const call(builder->CreateCall(create_fn, args)); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_global(obj::keyword_ptr const k) @@ -487,13 +549,16 @@ namespace jank::codegen auto const found(literal_globals.find(k)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[k]); auto const name(fmt::format("keyword_{}", k->to_hash())); - global = module->getOrInsertGlobal(name, builder->getPtrTy()); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -508,9 +573,14 @@ namespace jank::codegen gen_c_string(k->sym.name.c_str()) }; auto const call(builder->CreateCall(create_fn, args)); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_global(obj::character_ptr const c) @@ -518,13 +588,16 @@ namespace jank::codegen auto const found(literal_globals.find(c)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[c]); auto const name(fmt::format("char_{}", c->to_hash())); - global = module->getOrInsertGlobal(name, builder->getPtrTy()); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -536,9 +609,14 @@ namespace jank::codegen llvm::SmallVector args{ gen_c_string(c->to_string()) }; auto const call(builder->CreateCall(create_fn, args)); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } llvm::Value *llvm_processor::gen_global_from_read_string(object_ptr const o) @@ -546,13 +624,16 @@ namespace jank::codegen auto const found(literal_globals.find(o)); if(found != literal_globals.end()) { - return found->second; + return builder->CreateLoad(builder->getPtrTy(), found->second); } auto &global(literal_globals[o]); auto const name(fmt::format("data_{}", to_hash(o))); - global = module->getOrInsertGlobal(name, builder->getPtrTy()); + auto const var(create_global_var(name)); + module->insertGlobalVariable(var); + global = var; + auto const prev_block(builder->GetInsertBlock()); { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; builder->SetInsertPoint(global_ctor_block); @@ -564,24 +645,37 @@ namespace jank::codegen llvm::SmallVector args{ gen_c_string(runtime::to_string(o)) }; auto const call(builder->CreateCall(create_fn, args)); builder->CreateStore(call, global); + + if(prev_block == global_ctor_block) + { + return call; + } } - return global; + return builder->CreateLoad(builder->getPtrTy(), global); } void llvm_processor::create_global_ctor() { - llvm::IRBuilder<> builder{ *context }; - auto const init_type(llvm::FunctionType::get(builder.getVoidTy(), false)); + auto const init_type(llvm::FunctionType::get(builder->getVoidTy(), false)); auto const init(llvm::Function::Create(init_type, - llvm::Function::InternalLinkage, - "jank_global_init", + llvm::Function::ExternalLinkage, + ctor_name.c_str(), *module)); global_ctor_block->insertInto(init); llvm::appendToGlobalCtors(*module, init, 65535); } + llvm::GlobalVariable *llvm_processor::create_global_var(native_persistent_string const &name) + { + return new llvm::GlobalVariable{ builder->getPtrTy(), + false, + llvm::GlobalVariable::InternalLinkage, + builder->getInt64(0), + name.c_str() }; + } + native_persistent_string llvm_processor::to_string() { module->print(llvm::outs(), nullptr); diff --git a/compiler+runtime/src/cpp/jank/runtime/context.cpp b/compiler+runtime/src/cpp/jank/runtime/context.cpp index f89ebcfb..7eead9cc 100644 --- a/compiler+runtime/src/cpp/jank/runtime/context.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/context.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -572,7 +574,21 @@ namespace jank::runtime ->to_string()); codegen::llvm_processor cg_prc{ wrapped_expr, module, codegen::compilation_target::repl }; - return make_box(cg_prc.to_string()); + fmt::println("{}\n", cg_prc.to_string()); + llvm::cantFail(jit_prc.interpreter->getExecutionEngine().get().addIRModule( + llvm::orc::ThreadSafeModule{ std::move(cg_prc.module), std::move(cg_prc.context) })); + + /* TODO: Why isn't this being run as a global ctor? */ + auto const init(jit_prc.interpreter->getSymbolAddress(cg_prc.ctor_name.c_str()).get()); + //fmt::println("calling ctor"); + init.toPtr()(); + + auto const fn(jit_prc.interpreter->getSymbolAddress(cg_prc.struct_name.c_str()).get()); + //fmt::println("calling fn"); + auto const ret(fn.toPtr()()); + //fmt::println("ret {}", fmt::ptr(ret)); + //fmt::println("ret type {}", static_cast(ret->type)); + return make_box(to_string(ret)); //codegen::processor cg_prc{ *this, wrapped_expr, module, codegen::compilation_target::repl }; //return make_box(util::format_cpp_source(cg_prc.declaration_str()).expect_ok()); From c7372b97f0b34234f7b87259be071f08a08ab670 Mon Sep 17 00:00:00 2001 From: jeaye Date: Fri, 11 Oct 2024 16:45:33 -0700 Subject: [PATCH 005/101] Add IR codegen for if --- .../include/cpp/jank/runtime/core/truthy.hpp | 1 + .../include/cpp/jank/runtime/erasure.hpp | 9 ++-- .../src/cpp/jank/codegen/llvm_processor.cpp | 42 +++++++++++++++++-- .../src/cpp/jank/runtime/core/truthy.cpp | 7 +++- 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/runtime/core/truthy.hpp b/compiler+runtime/include/cpp/jank/runtime/core/truthy.hpp index b6c4fda7..56a4f741 100644 --- a/compiler+runtime/include/cpp/jank/runtime/core/truthy.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/core/truthy.hpp @@ -5,6 +5,7 @@ namespace jank::runtime { + native_bool truthy(object const *o); native_bool truthy(object_ptr o); native_bool truthy(obj::nil_ptr); native_bool truthy(obj::boolean_ptr const o); diff --git a/compiler+runtime/include/cpp/jank/runtime/erasure.hpp b/compiler+runtime/include/cpp/jank/runtime/erasure.hpp index fa4c9a33..c99fbb2a 100644 --- a/compiler+runtime/include/cpp/jank/runtime/erasure.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/erasure.hpp @@ -105,7 +105,8 @@ namespace jank::runtime if(o->type != detail::object_type_to_enum::value) { /* TODO: Use fmt when possible. */ - throw std::runtime_error{ "invalid object type" }; + throw std::runtime_error{ "invalid object type: " + + std::to_string(static_cast(o->type)) }; //throw std::runtime_error{ fmt::format( // "invalid object type (expected {}, found {})", // magic_enum::enum_name(detail::object_type_to_enum::value), @@ -405,7 +406,8 @@ namespace jank::runtime default: { /* TODO: Use fmt when possible. */ - throw std::runtime_error{ "invalid object type" }; + throw std::runtime_error{ "invalid object type: " + + std::to_string(static_cast(const_erased->type)) }; //throw std::runtime_error //{ // fmt::format @@ -431,7 +433,8 @@ namespace jank::runtime } else { - throw std::runtime_error{ "invalid object type" }; + throw std::runtime_error{ "invalid object type: " + + std::to_string(static_cast(const_erased->type)) }; } } diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 2bbe5441..19314cf3 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -261,10 +261,44 @@ namespace jank::codegen } } - llvm::Value *llvm_processor::gen(analyze::expr::if_ const &, - analyze::expr::function_arity const &) + llvm::Value *llvm_processor::gen(analyze::expr::if_ const &expr, + analyze::expr::function_arity const &arity) { - return nullptr; + auto const condition(gen(expr.condition, arity)); + auto const truthy_fn_type( + llvm::FunctionType::get(builder->getInt8Ty(), { builder->getPtrTy() }, false)); + auto const fn(module->getOrInsertFunction("jank_truthy", truthy_fn_type)); + llvm::SmallVector args{ condition }; + auto const call(builder->CreateCall(fn, args)); + auto const cmp(builder->CreateICmpEQ(call, builder->getInt8(1), "iftmp")); + + auto const current_fn(builder->GetInsertBlock()->getParent()); + auto then_block(llvm::BasicBlock::Create(*context, "then", current_fn)); + auto else_block(llvm::BasicBlock::Create(*context, "else")); + auto const merge_block(llvm::BasicBlock::Create(*context, "ifcont")); + + builder->CreateCondBr(cmp, then_block, else_block); + builder->SetInsertPoint(then_block); + + auto const then(gen(expr.then, arity)); + builder->CreateBr(merge_block); + /* Codegen for `then` can change the current block, so track that. */ + then_block = builder->GetInsertBlock(); + current_fn->insert(current_fn->end(), else_block); + builder->SetInsertPoint(else_block); + + auto const else_(gen(expr.else_.unwrap(), arity)); + builder->CreateBr(merge_block); + /* Codegen for `else` can change the current block, so track that. */ + else_block = builder->GetInsertBlock(); + current_fn->insert(current_fn->end(), merge_block); + + builder->SetInsertPoint(merge_block); + auto const phi(builder->CreatePHI(builder->getPtrTy(), 2, "iftmp")); + phi->addIncoming(then, then_block); + phi->addIncoming(else_, else_block); + + return phi; } llvm::Value *llvm_processor::gen(analyze::expr::throw_ const &, @@ -642,7 +676,7 @@ namespace jank::codegen llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); auto const create_fn(module->getOrInsertFunction("jank_read_string", create_fn_type)); - llvm::SmallVector args{ gen_c_string(runtime::to_string(o)) }; + llvm::SmallVector args{ gen_global(make_box(runtime::to_code_string(o))) }; auto const call(builder->CreateCall(create_fn, args)); builder->CreateStore(call, global); diff --git a/compiler+runtime/src/cpp/jank/runtime/core/truthy.cpp b/compiler+runtime/src/cpp/jank/runtime/core/truthy.cpp index 7f79585f..35c79baa 100644 --- a/compiler+runtime/src/cpp/jank/runtime/core/truthy.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/core/truthy.cpp @@ -2,7 +2,7 @@ namespace jank::runtime { - native_bool truthy(object_ptr const o) + native_bool truthy(object const *o) { if(!o) { @@ -29,6 +29,11 @@ namespace jank::runtime o); } + native_bool truthy(object_ptr const o) + { + return truthy(o.data); + } + native_bool truthy(obj::nil_ptr) { return false; From 17449a63bfe4d4e8b189c4c7272dc8348121fb01 Mon Sep 17 00:00:00 2001 From: jeaye Date: Fri, 11 Oct 2024 17:27:17 -0700 Subject: [PATCH 006/101] Add throw IR codegen --- compiler+runtime/include/cpp/jank/c_api.h | 2 ++ compiler+runtime/src/cpp/jank/c_api.cpp | 5 +++++ .../src/cpp/jank/codegen/llvm_processor.cpp | 13 ++++++++++--- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/c_api.h b/compiler+runtime/include/cpp/jank/c_api.h index fe7b50bf..8edcdec1 100644 --- a/compiler+runtime/include/cpp/jank/c_api.h +++ b/compiler+runtime/include/cpp/jank/c_api.h @@ -51,6 +51,8 @@ extern "C" jank_native_bool jank_equal(jank_object_ptr l, jank_object_ptr r); jank_native_hash jank_to_hash(jank_object_ptr o); + void jank_throw(jank_object_ptr o); + #ifdef __cplusplus } #endif diff --git a/compiler+runtime/src/cpp/jank/c_api.cpp b/compiler+runtime/src/cpp/jank/c_api.cpp index 2b44c064..213c2e2e 100644 --- a/compiler+runtime/src/cpp/jank/c_api.cpp +++ b/compiler+runtime/src/cpp/jank/c_api.cpp @@ -138,4 +138,9 @@ extern "C" auto const o_obj(reinterpret_cast(o)); return to_hash(o_obj); } + + void jank_throw(jank_object_ptr o) + { + throw runtime::object_ptr{ reinterpret_cast(o) }; + } } diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 19314cf3..a4318296 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -301,10 +301,17 @@ namespace jank::codegen return phi; } - llvm::Value *llvm_processor::gen(analyze::expr::throw_ const &, - analyze::expr::function_arity const &) + llvm::Value *llvm_processor::gen(analyze::expr::throw_ const &expr, + analyze::expr::function_arity const &arity) { - return nullptr; + auto const value(gen(expr.value, arity)); + auto const fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); + auto const fn(module->getOrInsertFunction("jank_throw", fn_type)); + + llvm::SmallVector args{ value }; + auto const call(builder->CreateCall(fn, args)); + return call; } llvm::Value *llvm_processor::gen(analyze::expr::try_ const &, From 98346d0b9526ed0dd9b2cfa896bb30b9d8084c66 Mon Sep 17 00:00:00 2001 From: jeaye Date: Fri, 11 Oct 2024 18:28:02 -0700 Subject: [PATCH 007/101] Add basic let IR codegen No support for captures yet. --- .../cpp/jank/codegen/llvm_processor.hpp | 1 + .../src/cpp/jank/codegen/llvm_processor.cpp | 25 +++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp index ebc0e65f..64327010 100644 --- a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp +++ b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp @@ -97,5 +97,6 @@ namespace jank::codegen native_unordered_map var_globals; native_unordered_map c_string_globals; llvm::BasicBlock *global_ctor_block{}; + native_unordered_map locals; }; } diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index a4318296..5697f2d8 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -207,10 +207,10 @@ namespace jank::codegen return gen_global_from_read_string(expr.data); } - llvm::Value *llvm_processor::gen(analyze::expr::local_reference const &, + llvm::Value *llvm_processor::gen(analyze::expr::local_reference const &expr, analyze::expr::function_arity const &) { - return nullptr; + return locals[expr.binding.name]; } llvm::Value *llvm_processor::gen(analyze::expr::function const &, @@ -225,10 +225,25 @@ namespace jank::codegen return nullptr; } - llvm::Value *llvm_processor::gen(analyze::expr::let const &, - analyze::expr::function_arity const &) + llvm::Value *llvm_processor::gen(analyze::expr::let const &expr, + analyze::expr::function_arity const &arity) { - return nullptr; + auto old_locals(locals); + for(auto const &pair : expr.pairs) + { + auto const local(expr.frame->find_local_or_capture(pair.first)); + if(local.is_none()) + { + throw std::runtime_error{ fmt::format("ICE: unable to find local: {}", + pair.first->to_string()) }; + } + + locals[pair.first] = gen(pair.second, arity); + } + + auto const ret(gen(expr.body, arity)); + locals = std::move(old_locals); + return ret; } llvm::Value *llvm_processor::gen(analyze::expr::do_ const &expr, From aaaa2dccea888239db727e261e5fbfb9873ece2e Mon Sep 17 00:00:00 2001 From: jeaye Date: Sat, 12 Oct 2024 13:10:32 -0700 Subject: [PATCH 008/101] Add a todo --- compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 5697f2d8..0cbe4c43 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -216,6 +216,13 @@ namespace jank::codegen llvm::Value *llvm_processor::gen(analyze::expr::function const &, analyze::expr::function_arity const &) { + /* TODO: + * 1. Copy this into a local llvm_processor + * 2. Move module, context, and builder in + * 3. Do generation + * 4. Move module, context, builder back out + * 5. Reference fn names and gen call to build fn object + */ return nullptr; } From 174361170b70b15788afc1ad8dc4279340f6a79f Mon Sep 17 00:00:00 2001 From: jeaye Date: Thu, 17 Oct 2024 17:44:08 -0700 Subject: [PATCH 009/101] Add initial nested fn codegen --- compiler+runtime/include/cpp/jank/c_api.h | 29 +- .../cpp/jank/codegen/llvm_processor.hpp | 10 + compiler+runtime/src/cpp/jank/c_api.cpp | 73 ++++-- .../src/cpp/jank/codegen/llvm_processor.cpp | 248 +++++++++++++++--- .../src/cpp/jank/runtime/context.cpp | 4 +- 5 files changed, 299 insertions(+), 65 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/c_api.h b/compiler+runtime/include/cpp/jank/c_api.h index 8edcdec1..e000d3e8 100644 --- a/compiler+runtime/include/cpp/jank/c_api.h +++ b/compiler+runtime/include/cpp/jank/c_api.h @@ -15,6 +15,8 @@ extern "C" typedef char jank_native_bool; /* NOLINTNEXTLINE(modernize-use-using) */ typedef uint32_t jank_native_hash; + /* NOLINTNEXTLINE(modernize-use-using) */ + typedef uint8_t jank_arity_flags; jank_object_ptr jank_eval(jank_object_ptr s); jank_object_ptr jank_read_string(jank_object_ptr s); @@ -35,17 +37,24 @@ extern "C" jank_object_ptr jank_nil(); jank_object_ptr jank_true(); jank_object_ptr jank_false(); - jank_object_ptr jank_create_integer(jank_native_integer i); - jank_object_ptr jank_create_real(jank_native_real r); - jank_object_ptr jank_create_string(char const *s); - jank_object_ptr jank_create_symbol(jank_object_ptr ns, jank_object_ptr name); - jank_object_ptr jank_create_character(char const *s); - - jank_object_ptr jank_create_function0(jank_object_ptr (*f)()); - jank_object_ptr jank_create_function1(jank_object_ptr (*f)(jank_object_ptr)); - jank_object_ptr jank_create_function2(jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr)); + jank_object_ptr jank_integer_create(jank_native_integer i); + jank_object_ptr jank_real_create(jank_native_real r); + jank_object_ptr jank_string_create(char const *s); + jank_object_ptr jank_symbol_create(jank_object_ptr ns, jank_object_ptr name); + jank_object_ptr jank_character_create(char const *s); + + jank_arity_flags jank_function_build_arity_flags(uint8_t highest_fixed_arity, + jank_native_bool is_variadic, + jank_native_bool is_variadic_ambiguous); + jank_object_ptr jank_function_create(jank_arity_flags arity_flags); + jank_object_ptr jank_function_set_arity0(jank_object_ptr fn, jank_object_ptr (*f)()); + jank_object_ptr + jank_function_set_arity1(jank_object_ptr fn, jank_object_ptr (*f)(jank_object_ptr)); + jank_object_ptr jank_function_set_arity2(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr)); jank_object_ptr - jank_create_function3(jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)); + jank_function_set_arity3(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)); jank_native_bool jank_truthy(jank_object_ptr o); jank_native_bool jank_equal(jank_object_ptr l, jank_object_ptr r); diff --git a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp index 64327010..3db7a9af 100644 --- a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp +++ b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp @@ -11,6 +11,10 @@ namespace jank::codegen { using namespace jank::runtime; + struct nested_tag + { + }; + struct llvm_processor { llvm_processor() = delete; @@ -20,9 +24,14 @@ namespace jank::codegen llvm_processor(analyze::expr::function const &expr, native_persistent_string const &module, compilation_target target); + llvm_processor(nested_tag, + analyze::expr::function const &expr, + llvm_processor &&); llvm_processor(llvm_processor const &) = delete; llvm_processor(llvm_processor &&) noexcept = default; + void release(llvm_processor &into) &&; + void gen(); llvm::Value *gen(analyze::expression_ptr const &, analyze::expr::function_arity const &); @@ -67,6 +76,7 @@ namespace jank::codegen native_persistent_string to_string(); void create_function(); + void create_function(analyze::expr::function_arity const &); void create_global_ctor(); llvm::GlobalVariable *create_global_var(native_persistent_string const &name); diff --git a/compiler+runtime/src/cpp/jank/c_api.cpp b/compiler+runtime/src/cpp/jank/c_api.cpp index 213c2e2e..6207e153 100644 --- a/compiler+runtime/src/cpp/jank/c_api.cpp +++ b/compiler+runtime/src/cpp/jank/c_api.cpp @@ -5,52 +5,53 @@ using namespace jank::runtime; extern "C" { - jank_object_ptr jank_eval(jank_object_ptr s) + jank_object_ptr jank_eval(jank_object_ptr const s) { auto const s_obj(try_object(reinterpret_cast(s))); return __rt_ctx->eval_string(s_obj->data); } - jank_object_ptr jank_read_string(jank_object_ptr s) + jank_object_ptr jank_read_string(jank_object_ptr const s) { auto const s_obj(try_object(reinterpret_cast(s))); return __rt_ctx->read_string(s_obj->data); } - jank_object_ptr jank_var_intern(jank_object_ptr ns, jank_object_ptr name) + jank_object_ptr jank_var_intern(jank_object_ptr const ns, jank_object_ptr const name) { auto const ns_obj(try_object(reinterpret_cast(ns))); auto const name_obj(try_object(reinterpret_cast(name))); return erase(__rt_ctx->intern_var(ns_obj->data, name_obj->data).expect_ok()); } - jank_object_ptr jank_var_bind_root(jank_object_ptr var, jank_object_ptr val) + jank_object_ptr jank_var_bind_root(jank_object_ptr const var, jank_object_ptr const val) { auto const var_obj(try_object(reinterpret_cast(var))); auto const val_obj(reinterpret_cast(val)); return erase(var_obj->bind_root(val_obj)); } - jank_object_ptr jank_deref(jank_object_ptr o) + jank_object_ptr jank_deref(jank_object_ptr const o) { auto const o_obj(reinterpret_cast(o)); return behavior::deref(o_obj); } - jank_object_ptr jank_call0(jank_object_ptr f) + jank_object_ptr jank_call0(jank_object_ptr const f) { auto const f_obj(reinterpret_cast(f)); return dynamic_call(f_obj); } - jank_object_ptr jank_call1(jank_object_ptr f, jank_object_ptr a1) + jank_object_ptr jank_call1(jank_object_ptr const f, jank_object_ptr const a1) { auto const f_obj(reinterpret_cast(f)); auto const a1_obj(reinterpret_cast(a1)); return dynamic_call(f_obj, a1_obj); } - jank_object_ptr jank_call2(jank_object_ptr f, jank_object_ptr a1, jank_object_ptr a2) + jank_object_ptr + jank_call2(jank_object_ptr const f, jank_object_ptr const a1, jank_object_ptr const a2) { auto const f_obj(reinterpret_cast(f)); auto const a1_obj(reinterpret_cast(a1)); @@ -58,8 +59,10 @@ extern "C" return dynamic_call(f_obj, a1_obj, a2_obj); } - jank_object_ptr - jank_call3(jank_object_ptr f, jank_object_ptr a1, jank_object_ptr a2, jank_object_ptr a3) + jank_object_ptr jank_call3(jank_object_ptr const f, + jank_object_ptr const a1, + jank_object_ptr const a2, + jank_object_ptr const a3) { auto const f_obj(reinterpret_cast(f)); auto const a1_obj(reinterpret_cast(a1)); @@ -83,57 +86,79 @@ extern "C" return erase(obj::boolean::false_const()); } - jank_object_ptr jank_create_integer(jank_native_integer i) + jank_object_ptr jank_integer_create(jank_native_integer const i) { return erase(make_box(i)); } - jank_object_ptr jank_create_real(jank_native_real r) + jank_object_ptr jank_real_create(jank_native_real const r) { return erase(make_box(r)); } - jank_object_ptr jank_create_string(char const *s) + jank_object_ptr jank_string_create(char const *s) { assert(s); return erase(make_box(s)); } - jank_object_ptr jank_create_function0(jank_object_ptr (*f)()) + jank_arity_flags jank_function_build_arity_flags(uint8_t const highest_fixed_arity, + jank_native_bool const is_variadic, + jank_native_bool const is_variadic_ambiguous) + { + return (is_variadic << 7) | (is_variadic_ambiguous << 6) | highest_fixed_arity; + } + + jank_object_ptr jank_function_create(jank_arity_flags const arity_flags) { - return erase(make_box(f)); + fmt::println("jank_function_create"); + return erase(obj::nil::nil_const()); } - jank_object_ptr jank_create_function1(jank_object_ptr (*f)(jank_object_ptr)) + jank_object_ptr jank_function_set_arity0(jank_object_ptr const fn, jank_object_ptr (* const f)()) { - return erase(make_box(f)); + fmt::println("jank_function_set_arity0"); + return nullptr; } - jank_object_ptr jank_create_function2(jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr)) + jank_object_ptr + jank_function_set_arity1(jank_object_ptr const fn, jank_object_ptr (* const f)(jank_object_ptr)) { - return erase(make_box(f)); + fmt::println("jank_function_set_arity1"); + return nullptr; } jank_object_ptr - jank_create_function3(jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)) + jank_function_set_arity2(jank_object_ptr const fn, + jank_object_ptr (* const f)(jank_object_ptr, jank_object_ptr)) + { + fmt::println("jank_function_set_arity2"); + return nullptr; + } + + jank_object_ptr jank_function_set_arity3(jank_object_ptr const fn, + jank_object_ptr (* const f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr)) { - return erase(make_box(f)); + fmt::println("jank_function_set_arity3"); + return nullptr; } - jank_native_bool jank_truthy(jank_object_ptr o) + jank_native_bool jank_truthy(jank_object_ptr const o) { auto const o_obj(reinterpret_cast(o)); return static_cast(truthy(o_obj)); } - jank_native_bool jank_equal(jank_object_ptr l, jank_object_ptr r) + jank_native_bool jank_equal(jank_object_ptr const l, jank_object_ptr const r) { auto const l_obj(reinterpret_cast(l)); auto const r_obj(reinterpret_cast(r)); return static_cast(equal(l_obj, r_obj)); } - jank_native_hash jank_to_hash(jank_object_ptr o) + jank_native_hash jank_to_hash(jank_object_ptr const o) { auto const o_obj(reinterpret_cast(o)); return to_hash(o_obj); diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 0cbe4c43..3283fff0 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -29,19 +29,28 @@ namespace jank::codegen , global_ctor_block{ llvm::BasicBlock::Create(*context, "entry") } { assert(root_fn.frame.data); + } - create_global_ctor(); - create_function(); - gen(); - - { - llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; - builder->SetInsertPoint(global_ctor_block); - builder->CreateRetVoid(); - } + llvm_processor::llvm_processor(nested_tag, + analyze::expr::function const &expr, + llvm_processor &&from) + : root_fn{ expr } + , module_name(from.module_name) + , target{ compilation_target::function } + , struct_name{ runtime::munge(root_fn.unique_name) } + , ctor_name{ runtime::munge(runtime::context::unique_string("jank_global_init")) } + , context{ std::move(from.context) } + , module{ std::move(from.module) } + , builder{ std::move(from.builder) } + , global_ctor_block{ from.global_ctor_block } + { + } - llvm::verifyFunction(*fn); - llvm::verifyFunction(*global_ctor_block->getParent()); + void llvm_processor::release(llvm_processor &into) && + { + into.context = std::move(context); + into.module = std::move(module); + into.builder = std::move(builder); } void llvm_processor::create_function() @@ -56,12 +65,44 @@ namespace jank::codegen builder->SetInsertPoint(entry); } + void + llvm_processor::create_function(analyze::expr::function_arity const &arity) + { + std::vector const arg_types{ arity.params.size(), builder->getPtrTy() }; + auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); + fn = llvm::Function::Create(fn_type, + llvm::Function::ExternalLinkage, + fmt::format("{}_{}", struct_name, arity.params.size()), + *module); + + auto const entry(llvm::BasicBlock::Create(*context, "entry", fn)); + builder->SetInsertPoint(entry); + } + void llvm_processor::gen() { - auto const &arity(root_fn.arities[0]); - for(auto const &form : arity.body.values) + if(target == compilation_target::repl) + { + create_global_ctor(); + } + + for(auto const &arity : root_fn.arities) + { + create_function(arity); + for(auto const &form : arity.body.values) + { + gen(form, arity); + } + } + + if(target != compilation_target::function) { - gen(form, arity); + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + builder->SetInsertPoint(global_ctor_block); + builder->CreateRetVoid(); + + llvm::verifyFunction(*fn); + llvm::verifyFunction(*global_ctor_block->getParent()); } } @@ -92,6 +133,11 @@ namespace jank::codegen builder->CreateCall(fn, args); } + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(ref); + } + return ref; } @@ -105,13 +151,26 @@ namespace jank::codegen llvm::SmallVector args{ ref }; auto const call(builder->CreateCall(fn, args)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(call); + } + return call; } llvm::Value *llvm_processor::gen(analyze::expr::var_ref const &expr, analyze::expr::function_arity const &) { - return gen_var(expr.qualified_name); + auto const var(gen_var(expr.qualified_name)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(var); + } + + return var; } static native_persistent_string arity_to_call_fn(size_t const arity) @@ -150,6 +209,11 @@ namespace jank::codegen auto const fn(module->getOrInsertFunction(call_fn_name.c_str(), fn_type)); auto const call(builder->CreateCall(fn, arg_handles)); + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(call); + } + return call; } @@ -157,7 +221,7 @@ namespace jank::codegen llvm_processor::gen(analyze::expr::primitive_literal const &expr, analyze::expr::function_arity const &) { - return runtime::visit_object( + auto const ret(runtime::visit_object( [&](auto const typed_o) -> llvm::Value * { using T = typename decltype(typed_o)::value_type; @@ -186,34 +250,69 @@ namespace jank::codegen typed_o->to_string()) }; } }, - expr.data); + expr.data)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(ret); + } + + return ret; } llvm::Value *llvm_processor::gen(analyze::expr::vector const &expr, analyze::expr::function_arity const &) { - return gen_global_from_read_string(expr.data); + auto const ret(gen_global_from_read_string(expr.data)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(ret); + } + + return ret; } llvm::Value *llvm_processor::gen(analyze::expr::map const &expr, analyze::expr::function_arity const &) { - return gen_global_from_read_string(expr.data); + auto const ret(gen_global_from_read_string(expr.data)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(ret); + } + + return ret; } llvm::Value *llvm_processor::gen(analyze::expr::set const &expr, analyze::expr::function_arity const &) { - return gen_global_from_read_string(expr.data); + auto const ret(gen_global_from_read_string(expr.data)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(ret); + } + + return ret; } llvm::Value *llvm_processor::gen(analyze::expr::local_reference const &expr, analyze::expr::function_arity const &) { - return locals[expr.binding.name]; + auto const ret(locals[expr.binding.name]); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(ret); + } + + return ret; } - llvm::Value *llvm_processor::gen(analyze::expr::function const &, + llvm::Value *llvm_processor::gen(analyze::expr::function const &expr, analyze::expr::function_arity const &) { /* TODO: @@ -223,12 +322,84 @@ namespace jank::codegen * 4. Move module, context, builder back out * 5. Reference fn names and gen call to build fn object */ - return nullptr; + + { + llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; + + llvm_processor nested{ nested_tag{}, expr, std::move(*this) }; + nested.gen(); + + std::move(nested).release(*this); + } + + analyze::expr::function_arity const *variadic_arity{}; + analyze::expr::function_arity const *highest_fixed_arity{}; + for(auto const &arity : expr.arities) + { + if(arity.fn_ctx->is_variadic) + { + variadic_arity = &arity; + } + else if(!highest_fixed_arity + || highest_fixed_arity->fn_ctx->param_count < arity.fn_ctx->param_count) + { + highest_fixed_arity = &arity; + } + } + native_bool const variadic_ambiguous{ highest_fixed_arity && variadic_arity + && highest_fixed_arity->fn_ctx->param_count + == variadic_arity->fn_ctx->param_count - 1 }; + + auto const arity_flags_fn_type( + llvm::FunctionType::get(builder->getInt8Ty(), + { builder->getInt8Ty(), builder->getInt8Ty(), builder->getInt8Ty() }, + false)); + auto const arity_flags_fn( + module->getOrInsertFunction("jank_function_build_arity_flags", arity_flags_fn_type)); + auto const arity_flags(builder->CreateCall( + arity_flags_fn, + { builder->getInt8(highest_fixed_arity ? highest_fixed_arity->params.size() : 0), + builder->getInt8(!!variadic_arity), + builder->getInt8(variadic_ambiguous) })); + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getInt8Ty() }, false)); + auto const create_fn(module->getOrInsertFunction("jank_function_create", create_fn_type)); + auto const fn_obj(builder->CreateCall(create_fn, { arity_flags })); + + for(auto const &arity : expr.arities) + { + std::vector const arg_types{ arity.params.size() + 1, builder->getPtrTy() }; + auto const set_arity_fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); + auto const set_arity_fn( + module->getOrInsertFunction(fmt::format("jank_function_set_arity{}", arity.params.size()), + set_arity_fn_type)); + + std::vector const target_arg_types{ arity.params.size(), builder->getPtrTy() }; + auto const target_fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); + auto target_fn(module->getOrInsertFunction( + fmt::format("{}_{}", munge(expr.unique_name), arity.params.size()), + target_fn_type)); + + builder->CreateCall(set_arity_fn, { fn_obj, target_fn.getCallee() }); + } + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(fn_obj); + } + + return fn_obj; } - llvm::Value *llvm_processor::gen(analyze::expr::recur const &, + llvm::Value *llvm_processor::gen(analyze::expr::recur const &expr, analyze::expr::function_arity const &) { + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(nullptr); + } + return nullptr; } @@ -250,6 +421,12 @@ namespace jank::codegen auto const ret(gen(expr.body, arity)); locals = std::move(old_locals); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(ret); + } + return ret; } @@ -320,6 +497,11 @@ namespace jank::codegen phi->addIncoming(then, then_block); phi->addIncoming(else_, else_block); + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(phi); + } + return phi; } @@ -333,6 +515,12 @@ namespace jank::codegen llvm::SmallVector args{ value }; auto const call(builder->CreateCall(fn, args)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(call); + } + return call; } @@ -483,7 +671,7 @@ namespace jank::codegen auto const create_fn_type( llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); - auto const create_fn(module->getOrInsertFunction("jank_create_integer", create_fn_type)); + auto const create_fn(module->getOrInsertFunction("jank_integer_create", create_fn_type)); auto const arg(llvm::ConstantInt::getSigned(builder->getInt64Ty(), i->data)); auto const call(builder->CreateCall(create_fn, { arg })); builder->CreateStore(call, global); @@ -518,7 +706,7 @@ namespace jank::codegen auto const create_fn_type( llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); - auto const create_fn(module->getOrInsertFunction("jank_create_integer", create_fn_type)); + auto const create_fn(module->getOrInsertFunction("jank_integer_create", create_fn_type)); auto const arg(llvm::ConstantFP::get(builder->getDoubleTy(), r->data)); auto const call(builder->CreateCall(create_fn, { arg })); builder->CreateStore(call, global); @@ -553,7 +741,7 @@ namespace jank::codegen auto const create_fn_type( llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); - auto const create_fn(module->getOrInsertFunction("jank_create_string", create_fn_type)); + auto const create_fn(module->getOrInsertFunction("jank_string_create", create_fn_type)); llvm::SmallVector args{ gen_c_string(s->data.c_str()) }; auto const call(builder->CreateCall(create_fn, args)); @@ -591,7 +779,7 @@ namespace jank::codegen llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy(), builder->getPtrTy() }, false)); - auto const create_fn(module->getOrInsertFunction("jank_create_symbol", create_fn_type)); + auto const create_fn(module->getOrInsertFunction("jank_symbol_create", create_fn_type)); llvm::SmallVector args{ gen_global(make_box(s->ns)), gen_global(make_box(s->name)) }; @@ -630,7 +818,7 @@ namespace jank::codegen llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy(), builder->getPtrTy() }, false)); - auto const create_fn(module->getOrInsertFunction("jank_create_keyword", create_fn_type)); + auto const create_fn(module->getOrInsertFunction("jank_keyword_intern", create_fn_type)); llvm::SmallVector args{ gen_c_string(k->sym.ns.c_str()), gen_c_string(k->sym.name.c_str()) }; @@ -667,7 +855,7 @@ namespace jank::codegen auto const create_fn_type( llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); - auto const create_fn(module->getOrInsertFunction("jank_create_character", create_fn_type)); + auto const create_fn(module->getOrInsertFunction("jank_character_create", create_fn_type)); llvm::SmallVector args{ gen_c_string(c->to_string()) }; auto const call(builder->CreateCall(create_fn, args)); diff --git a/compiler+runtime/src/cpp/jank/runtime/context.cpp b/compiler+runtime/src/cpp/jank/runtime/context.cpp index 7eead9cc..b978d29e 100644 --- a/compiler+runtime/src/cpp/jank/runtime/context.cpp +++ b/compiler+runtime/src/cpp/jank/runtime/context.cpp @@ -574,6 +574,7 @@ namespace jank::runtime ->to_string()); codegen::llvm_processor cg_prc{ wrapped_expr, module, codegen::compilation_target::repl }; + cg_prc.gen(); fmt::println("{}\n", cg_prc.to_string()); llvm::cantFail(jit_prc.interpreter->getExecutionEngine().get().addIRModule( llvm::orc::ThreadSafeModule{ std::move(cg_prc.module), std::move(cg_prc.context) })); @@ -583,7 +584,8 @@ namespace jank::runtime //fmt::println("calling ctor"); init.toPtr()(); - auto const fn(jit_prc.interpreter->getSymbolAddress(cg_prc.struct_name.c_str()).get()); + auto const fn( + jit_prc.interpreter->getSymbolAddress(fmt::format("{}_0", cg_prc.struct_name)).get()); //fmt::println("calling fn"); auto const ret(fn.toPtr()()); //fmt::println("ret {}", fmt::ptr(ret)); From 966e91261a598ef37af05bcd5a2f61b9d04c767e Mon Sep 17 00:00:00 2001 From: jeaye Date: Sat, 19 Oct 2024 13:15:46 -0700 Subject: [PATCH 010/101] Add all IR necessary for closures Still missing the runtime object, though. --- .../cpp/jank/analyze/expr/function.hpp | 17 +++ compiler+runtime/include/cpp/jank/c_api.h | 1 + .../cpp/jank/codegen/llvm_processor.hpp | 5 +- compiler+runtime/src/cpp/jank/c_api.cpp | 7 ++ .../src/cpp/jank/codegen/llvm_processor.cpp | 118 ++++++++++++++---- 5 files changed, 124 insertions(+), 24 deletions(-) diff --git a/compiler+runtime/include/cpp/jank/analyze/expr/function.hpp b/compiler+runtime/include/cpp/jank/analyze/expr/function.hpp index a450b963..13548c23 100644 --- a/compiler+runtime/include/cpp/jank/analyze/expr/function.hpp +++ b/compiler+runtime/include/cpp/jank/analyze/expr/function.hpp @@ -70,6 +70,23 @@ namespace jank::analyze::expr native_vector> arities; obj::persistent_hash_map_ptr meta{}; + /* Aggregates all `frame->captures` from each arity so that we can know the overall + * captures for all arities of this fn. This is necessary for codegen to IR, since we + * generate a context struct which is shared across all arities, even if one arity + * doesn't use any captures. */ + native_unordered_map captures() const + { + native_unordered_map ret; + for(auto const &arity : arities) + { + for(auto const &capture : arity.frame->captures) + { + ret.emplace(capture.first, &capture.second); + } + } + return ret; + } + object_ptr to_runtime_data() const { object_ptr arity_maps(make_box()); diff --git a/compiler+runtime/include/cpp/jank/c_api.h b/compiler+runtime/include/cpp/jank/c_api.h index e000d3e8..5d13605a 100644 --- a/compiler+runtime/include/cpp/jank/c_api.h +++ b/compiler+runtime/include/cpp/jank/c_api.h @@ -47,6 +47,7 @@ extern "C" jank_native_bool is_variadic, jank_native_bool is_variadic_ambiguous); jank_object_ptr jank_function_create(jank_arity_flags arity_flags); + jank_object_ptr jank_function_create_closure(jank_arity_flags arity_flags, void *context); jank_object_ptr jank_function_set_arity0(jank_object_ptr fn, jank_object_ptr (*f)()); jank_object_ptr jank_function_set_arity1(jank_object_ptr fn, jank_object_ptr (*f)(jank_object_ptr)); diff --git a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp index 3db7a9af..38c9ed0a 100644 --- a/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp +++ b/compiler+runtime/include/cpp/jank/codegen/llvm_processor.hpp @@ -76,7 +76,7 @@ namespace jank::codegen native_persistent_string to_string(); void create_function(); - void create_function(analyze::expr::function_arity const &); + void create_function(analyze::expr::function_arity const &arity); void create_global_ctor(); llvm::GlobalVariable *create_global_var(native_persistent_string const &name); @@ -90,6 +90,9 @@ namespace jank::codegen llvm::Value *gen_global(obj::character_ptr c); llvm::Value *gen_global_from_read_string(object_ptr o); + llvm::StructType * + get_or_insert_struct_type(std::string const &name, std::vector const &fields); + /* This is stored just to keep the expression alive. */ analyze::expression_ptr root_expr{}; analyze::expr::function const &root_fn; diff --git a/compiler+runtime/src/cpp/jank/c_api.cpp b/compiler+runtime/src/cpp/jank/c_api.cpp index 6207e153..010731c6 100644 --- a/compiler+runtime/src/cpp/jank/c_api.cpp +++ b/compiler+runtime/src/cpp/jank/c_api.cpp @@ -115,6 +115,13 @@ extern "C" return erase(obj::nil::nil_const()); } + jank_object_ptr + jank_function_create_closure(jank_arity_flags const arity_flags, void * const context) + { + fmt::println("jank_function_create_closure {}", fmt::ptr(context)); + return erase(obj::nil::nil_const()); + } + jank_object_ptr jank_function_set_arity0(jank_object_ptr const fn, jank_object_ptr (* const f)()) { fmt::println("jank_function_set_arity0"); diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 3283fff0..0128ae8d 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -68,7 +68,17 @@ namespace jank::codegen void llvm_processor::create_function(analyze::expr::function_arity const &arity) { - std::vector const arg_types{ arity.params.size(), builder->getPtrTy() }; + auto const captures(root_fn.captures()); + auto const is_closure(!captures.empty()); + fmt::println("Creating fn {} with arities: {} captures: {}", + root_fn.unique_name, + root_fn.arities.size(), + captures.size()); + + /* Closures get one extra parameter, the first one, which is a pointer to the closure's + * context. The context is a struct containing all captured values. */ + std::vector const arg_types{ arity.params.size() + is_closure, + builder->getPtrTy() }; auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); fn = llvm::Function::Create(fn_type, llvm::Function::ExternalLinkage, @@ -77,6 +87,28 @@ namespace jank::codegen auto const entry(llvm::BasicBlock::Create(*context, "entry", fn)); builder->SetInsertPoint(entry); + + for(size_t i{}; i < arity.params.size(); ++i) + { + locals[arity.params[i]] = fn->getArg(i + is_closure); + } + + if(is_closure) + { + auto const context(fn->getArg(0)); + for(auto const &capture : arity.frame->captures) + { + auto const captures(root_fn.captures()); + std::vector const capture_types{ captures.size(), builder->getPtrTy() }; + auto const closure_ctx_type( + get_or_insert_struct_type(fmt::format("{}_context", munge(root_fn.unique_name)), + capture_types)); + auto const field_ptr(builder->CreateStructGEP(closure_ctx_type, context, 0)); + locals[capture.first] = builder->CreateLoad(builder->getPtrTy(), + field_ptr, + munge(capture.first->to_string()).c_str()); + } + } } void llvm_processor::gen() @@ -312,17 +344,10 @@ namespace jank::codegen return ret; } - llvm::Value *llvm_processor::gen(analyze::expr::function const &expr, - analyze::expr::function_arity const &) + llvm::Value * + llvm_processor::gen(analyze::expr::function const &expr, + analyze::expr::function_arity const &fn_arity) { - /* TODO: - * 1. Copy this into a local llvm_processor - * 2. Move module, context, and builder in - * 3. Do generation - * 4. Move module, context, builder back out - * 5. Reference fn names and gen call to build fn object - */ - { llvm::IRBuilder<>::InsertPointGuard const guard{ *builder }; @@ -334,6 +359,7 @@ namespace jank::codegen analyze::expr::function_arity const *variadic_arity{}; analyze::expr::function_arity const *highest_fixed_arity{}; + auto const captures(expr.captures()); for(auto const &arity : expr.arities) { if(arity.fn_ctx->is_variadic) @@ -362,10 +388,47 @@ namespace jank::codegen builder->getInt8(!!variadic_arity), builder->getInt8(variadic_ambiguous) })); - auto const create_fn_type( - llvm::FunctionType::get(builder->getPtrTy(), { builder->getInt8Ty() }, false)); - auto const create_fn(module->getOrInsertFunction("jank_function_create", create_fn_type)); - auto const fn_obj(builder->CreateCall(create_fn, { arity_flags })); + llvm::Value *fn_obj{}; + + if(captures.empty()) + { + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getInt8Ty() }, false)); + auto const create_fn(module->getOrInsertFunction("jank_function_create", create_fn_type)); + fn_obj = builder->CreateCall(create_fn, { arity_flags }); + } + else + { + std::vector const capture_types{ captures.size(), builder->getPtrTy() }; + auto const closure_ctx_type( + get_or_insert_struct_type(fmt::format("{}_context", munge(expr.unique_name)), + capture_types)); + + auto const malloc_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), { builder->getInt64Ty() }, false)); + auto const malloc_fn(module->getOrInsertFunction("malloc", malloc_fn_type)); + auto const closure_obj( + builder->CreateCall(malloc_fn, { llvm::ConstantExpr::getSizeOf(closure_ctx_type) })); + + for(auto const &capture : captures) + { + auto const field_ptr(builder->CreateStructGEP(closure_ctx_type, closure_obj, 0)); + analyze::expr::local_reference const local_ref{ + analyze::expression_base{ {}, expr.expr_type, expr.frame }, + capture.first, + *capture.second + }; + builder->CreateStore(gen(local_ref, fn_arity), field_ptr); + } + + auto const create_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), + { builder->getInt8Ty(), builder->getPtrTy() }, + false)); + auto const create_fn( + module->getOrInsertFunction("jank_function_create_closure", create_fn_type)); + fn_obj = builder->CreateCall(create_fn, { arity_flags, closure_obj }); + } for(auto const &arity : expr.arities) { @@ -514,14 +577,8 @@ namespace jank::codegen auto const fn(module->getOrInsertFunction("jank_throw", fn_type)); llvm::SmallVector args{ value }; - auto const call(builder->CreateCall(fn, args)); - - if(expr.expr_type == analyze::expression_type::return_statement) - { - return builder->CreateRet(call); - } - - return call; + builder->CreateCall(fn, args); + return builder->CreateUnreachable(); } llvm::Value *llvm_processor::gen(analyze::expr::try_ const &, @@ -927,6 +984,21 @@ namespace jank::codegen name.c_str() }; } + llvm::StructType * + llvm_processor::get_or_insert_struct_type(std::string const &name, + std::vector const &fields) + { + auto const found(llvm::StructType::getTypeByName(*context, name)); + if(found) + { + return found; + } + + std::vector const field_types{ fields.size(), builder->getPtrTy() }; + auto const struct_type(llvm::StructType::create(field_types, name)); + return struct_type; + } + native_persistent_string llvm_processor::to_string() { module->print(llvm::outs(), nullptr); From 1aadea4909605054321031a7207d084a2c876580 Mon Sep 17 00:00:00 2001 From: jeaye Date: Sat, 19 Oct 2024 23:16:19 -0700 Subject: [PATCH 011/101] Add jit_closure object --- compiler+runtime/CMakeLists.txt | 1 + compiler+runtime/include/cpp/jank/c_api.h | 21 +- .../include/cpp/jank/runtime/erasure.hpp | 6 + .../cpp/jank/runtime/obj/jit_closure.hpp | 125 ++++ .../include/cpp/jank/runtime/object.hpp | 1 + compiler+runtime/src/cpp/clang/cc1_main.cpp | 320 ++++++++ compiler+runtime/src/cpp/clang/cc1as_main.cpp | 703 ++++++++++++++++++ .../src/cpp/clang/cc1gen_reproducer_main.cpp | 204 +++++ compiler+runtime/src/cpp/clang/driver.cpp | 454 +++++++++++ compiler+runtime/src/cpp/jank/c_api.cpp | 101 ++- .../src/cpp/jank/codegen/llvm_processor.cpp | 23 +- .../src/cpp/jank/runtime/obj/jit_closure.cpp | 212 ++++++ 12 files changed, 2131 insertions(+), 40 deletions(-) create mode 100644 compiler+runtime/include/cpp/jank/runtime/obj/jit_closure.hpp create mode 100644 compiler+runtime/src/cpp/clang/cc1_main.cpp create mode 100644 compiler+runtime/src/cpp/clang/cc1as_main.cpp create mode 100644 compiler+runtime/src/cpp/clang/cc1gen_reproducer_main.cpp create mode 100644 compiler+runtime/src/cpp/clang/driver.cpp create mode 100644 compiler+runtime/src/cpp/jank/runtime/obj/jit_closure.cpp diff --git a/compiler+runtime/CMakeLists.txt b/compiler+runtime/CMakeLists.txt index aae32492..129d15f1 100644 --- a/compiler+runtime/CMakeLists.txt +++ b/compiler+runtime/CMakeLists.txt @@ -161,6 +161,7 @@ add_library( src/cpp/jank/runtime/obj/number.cpp src/cpp/jank/runtime/obj/native_function_wrapper.cpp src/cpp/jank/runtime/obj/jit_function.cpp + src/cpp/jank/runtime/obj/jit_closure.cpp src/cpp/jank/runtime/obj/multi_function.cpp src/cpp/jank/runtime/obj/symbol.cpp src/cpp/jank/runtime/obj/keyword.cpp diff --git a/compiler+runtime/include/cpp/jank/c_api.h b/compiler+runtime/include/cpp/jank/c_api.h index 5d13605a..b3fbf19c 100644 --- a/compiler+runtime/include/cpp/jank/c_api.h +++ b/compiler+runtime/include/cpp/jank/c_api.h @@ -47,16 +47,23 @@ extern "C" jank_native_bool is_variadic, jank_native_bool is_variadic_ambiguous); jank_object_ptr jank_function_create(jank_arity_flags arity_flags); - jank_object_ptr jank_function_create_closure(jank_arity_flags arity_flags, void *context); - jank_object_ptr jank_function_set_arity0(jank_object_ptr fn, jank_object_ptr (*f)()); - jank_object_ptr - jank_function_set_arity1(jank_object_ptr fn, jank_object_ptr (*f)(jank_object_ptr)); - jank_object_ptr jank_function_set_arity2(jank_object_ptr fn, - jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr)); - jank_object_ptr + void jank_function_set_arity0(jank_object_ptr fn, jank_object_ptr (*f)()); + void jank_function_set_arity1(jank_object_ptr fn, jank_object_ptr (*f)(jank_object_ptr)); + void jank_function_set_arity2(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr)); + void jank_function_set_arity3(jank_object_ptr fn, jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)); + jank_object_ptr jank_closure_create(jank_arity_flags arity_flags, void *context); + void jank_closure_set_arity0(jank_object_ptr fn, jank_object_ptr (*f)()); + void jank_closure_set_arity1(jank_object_ptr fn, jank_object_ptr (*f)(jank_object_ptr)); + void jank_closure_set_arity2(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr)); + void + jank_closure_set_arity3(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)); + jank_native_bool jank_truthy(jank_object_ptr o); jank_native_bool jank_equal(jank_object_ptr l, jank_object_ptr r); jank_native_hash jank_to_hash(jank_object_ptr o); diff --git a/compiler+runtime/include/cpp/jank/runtime/erasure.hpp b/compiler+runtime/include/cpp/jank/runtime/erasure.hpp index c99fbb2a..2f1a53cb 100644 --- a/compiler+runtime/include/cpp/jank/runtime/erasure.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/erasure.hpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -363,6 +364,11 @@ namespace jank::runtime return fn(expect_object(erased), std::forward(args)...); } break; + case object_type::jit_closure: + { + return fn(expect_object(erased), std::forward(args)...); + } + break; case object_type::multi_function: { return fn(expect_object(erased), std::forward(args)...); diff --git a/compiler+runtime/include/cpp/jank/runtime/obj/jit_closure.hpp b/compiler+runtime/include/cpp/jank/runtime/obj/jit_closure.hpp new file mode 100644 index 00000000..2c7202d4 --- /dev/null +++ b/compiler+runtime/include/cpp/jank/runtime/obj/jit_closure.hpp @@ -0,0 +1,125 @@ +#pragma once + +#include +#include + +namespace jank::runtime +{ + template <> + struct static_object + : gc + , behavior::callable + { + static constexpr native_bool pointer_free{ false }; + + static_object() = default; + static_object(static_object &&) = default; + static_object(static_object const &) = default; + static_object(arity_flag_t arity_flags); + static_object(arity_flag_t arity_flags, void *context); + static_object(object_ptr meta); + + /* behavior::object_like */ + native_bool equal(object const &) const; + native_persistent_string to_string(); + void to_string(fmt::memory_buffer &buff); + native_persistent_string to_code_string(); + native_hash to_hash() const; + + /* behavior::metadatable */ + native_box with_meta(object_ptr m); + + /* behavior::callable */ + object_ptr call() final; + object_ptr call(object_ptr) final; + object_ptr call(object_ptr, object_ptr) final; + object_ptr call(object_ptr, object_ptr, object_ptr) final; + object_ptr call(object_ptr, object_ptr, object_ptr, object_ptr) final; + object_ptr call(object_ptr, object_ptr, object_ptr, object_ptr, object_ptr) final; + object_ptr call(object_ptr, object_ptr, object_ptr, object_ptr, object_ptr, object_ptr) final; + object_ptr + call(object_ptr, object_ptr, object_ptr, object_ptr, object_ptr, object_ptr, object_ptr) + final; + object_ptr call(object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr) final; + object_ptr call(object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr) final; + object_ptr call(object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr, + object_ptr) final; + + arity_flag_t get_arity_flags() const final; + + object_ptr this_object_ptr() final; + + object base{ object_type::jit_closure }; + void *context{}; + object *(*arity_0)(void *){}; + object *(*arity_1)(void *, object *){}; + object *(*arity_2)(void *, object *, object *){}; + object *(*arity_3)(void *, object *, object *, object *){}; + object *(*arity_4)(void *, object *, object *, object *, object *){}; + object *(*arity_5)(void *, object *, object *, object *, object *, object *){}; + object *(*arity_6)(void *, object *, object *, object *, object *, object *, object *){}; + object *( + *arity_7)(void *, object *, object *, object *, object *, object *, object *, object *){}; + object *(*arity_8)(void *, + object *, + object *, + object *, + object *, + object *, + object *, + object *, + object *){}; + object *(*arity_9)(void *, + object *, + object *, + object *, + object *, + object *, + object *, + object *, + object *, + object *){}; + object *(*arity_10)(void *, + object *, + object *, + object *, + object *, + object *, + object *, + object *, + object *, + object *, + object *){}; + option meta; + arity_flag_t arity_flags{}; + }; + + namespace obj + { + using jit_closure = static_object; + using jit_closure_ptr = native_box; + } +} diff --git a/compiler+runtime/include/cpp/jank/runtime/object.hpp b/compiler+runtime/include/cpp/jank/runtime/object.hpp index 701b141a..f053d23d 100644 --- a/compiler+runtime/include/cpp/jank/runtime/object.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/object.hpp @@ -62,6 +62,7 @@ namespace jank::runtime native_function_wrapper, jit_function, + jit_closure, multi_function, atom, diff --git a/compiler+runtime/src/cpp/clang/cc1_main.cpp b/compiler+runtime/src/cpp/clang/cc1_main.cpp new file mode 100644 index 00000000..3c0599c2 --- /dev/null +++ b/compiler+runtime/src/cpp/clang/cc1_main.cpp @@ -0,0 +1,320 @@ +//===-- cc1_main.cpp - Clang CC1 Compiler Frontend ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the entry point to the clang -cc1 functionality, which implements the +// core compiler functionality along with a number of additional tools for +// demonstration and testing purposes. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Stack.h" +#include "clang/Basic/TargetOptions.h" +#include "clang/CodeGen/ObjectFilePCHContainerOperations.h" +#include "clang/Config/config.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/FrontendDiagnostic.h" +#include "clang/Frontend/TextDiagnosticBuffer.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Frontend/Utils.h" +#include "clang/FrontendTool/Utils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/LinkAllPasses.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/BuryPointer.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TimeProfiler.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/TargetParser/AArch64TargetParser.h" +#include "llvm/TargetParser/ARMTargetParser.h" +#include "llvm/TargetParser/RISCVISAInfo.h" +#include + +#ifdef CLANG_HAVE_RLIMITS +#include +#endif + +using namespace clang; +using namespace llvm::opt; + +//===----------------------------------------------------------------------===// +// Main driver +//===----------------------------------------------------------------------===// + +static void LLVMErrorHandler(void *UserData, const char *Message, + bool GenCrashDiag) { + DiagnosticsEngine &Diags = *static_cast(UserData); + + Diags.Report(diag::err_fe_error_backend) << Message; + + // Run the interrupt handlers to make sure any special cleanups get done, in + // particular that we remove files registered with RemoveFileOnSignal. + llvm::sys::RunInterruptHandlers(); + + // We cannot recover from llvm errors. When reporting a fatal error, exit + // with status 70 to generate crash diagnostics. For BSD systems this is + // defined as an internal software error. Otherwise, exit with status 1. + llvm::sys::Process::Exit(GenCrashDiag ? 70 : 1); +} + +#ifdef CLANG_HAVE_RLIMITS +/// Attempt to ensure that we have at least 8MiB of usable stack space. +static void ensureSufficientStack() { + struct rlimit rlim; + if (getrlimit(RLIMIT_STACK, &rlim) != 0) + return; + + // Increase the soft stack limit to our desired level, if necessary and + // possible. + if (rlim.rlim_cur != RLIM_INFINITY && + rlim.rlim_cur < rlim_t(DesiredStackSize)) { + // Try to allocate sufficient stack. + if (rlim.rlim_max == RLIM_INFINITY || + rlim.rlim_max >= rlim_t(DesiredStackSize)) + rlim.rlim_cur = DesiredStackSize; + else if (rlim.rlim_cur == rlim.rlim_max) + return; + else + rlim.rlim_cur = rlim.rlim_max; + + if (setrlimit(RLIMIT_STACK, &rlim) != 0 || + rlim.rlim_cur != DesiredStackSize) + return; + } +} +#else +static void ensureSufficientStack() {} +#endif + +/// Print supported cpus of the given target. +static int PrintSupportedCPUs(std::string TargetStr) { + std::string Error; + const llvm::Target *TheTarget = + llvm::TargetRegistry::lookupTarget(TargetStr, Error); + if (!TheTarget) { + llvm::errs() << Error; + return 1; + } + + // the target machine will handle the mcpu printing + llvm::TargetOptions Options; + std::unique_ptr TheTargetMachine( + TheTarget->createTargetMachine(TargetStr, "", "+cpuhelp", Options, + std::nullopt)); + return 0; +} + +static int PrintSupportedExtensions(std::string TargetStr) { + std::string Error; + const llvm::Target *TheTarget = + llvm::TargetRegistry::lookupTarget(TargetStr, Error); + if (!TheTarget) { + llvm::errs() << Error; + return 1; + } + + llvm::TargetOptions Options; + std::unique_ptr TheTargetMachine( + TheTarget->createTargetMachine(TargetStr, "", "", Options, std::nullopt)); + const llvm::Triple &MachineTriple = TheTargetMachine->getTargetTriple(); + const llvm::MCSubtargetInfo *MCInfo = TheTargetMachine->getMCSubtargetInfo(); + const llvm::ArrayRef Features = + MCInfo->getAllProcessorFeatures(); + + llvm::StringMap DescMap; + for (const llvm::SubtargetFeatureKV &feature : Features) + DescMap.insert({feature.Key, feature.Desc}); + + if (MachineTriple.isRISCV()) + llvm::riscvExtensionsHelp(DescMap); + else if (MachineTriple.isAArch64()) + llvm::AArch64::PrintSupportedExtensions(); + else if (MachineTriple.isARM()) + llvm::ARM::PrintSupportedExtensions(DescMap); + else { + // The option was already checked in Driver::HandleImmediateArgs, + // so we do not expect to get here if we are not a supported architecture. + assert(0 && "Unhandled triple for --print-supported-extensions option."); + return 1; + } + + return 0; +} + +static int PrintEnabledExtensions(const TargetOptions& TargetOpts) { + std::string Error; + const llvm::Target *TheTarget = + llvm::TargetRegistry::lookupTarget(TargetOpts.Triple, Error); + if (!TheTarget) { + llvm::errs() << Error; + return 1; + } + + // Create a target machine using the input features, the triple information + // and a dummy instance of llvm::TargetOptions. Note that this is _not_ the + // same as the `clang::TargetOptions` instance we have access to here. + llvm::TargetOptions BackendOptions; + std::string FeaturesStr = llvm::join(TargetOpts.FeaturesAsWritten, ","); + std::unique_ptr TheTargetMachine( + TheTarget->createTargetMachine(TargetOpts.Triple, TargetOpts.CPU, FeaturesStr, BackendOptions, std::nullopt)); + const llvm::Triple &MachineTriple = TheTargetMachine->getTargetTriple(); + const llvm::MCSubtargetInfo *MCInfo = TheTargetMachine->getMCSubtargetInfo(); + + // Extract the feature names that are enabled for the given target. + // We do that by capturing the key from the set of SubtargetFeatureKV entries + // provided by MCSubtargetInfo, which match the '-target-feature' values. + const std::vector Features = + MCInfo->getEnabledProcessorFeatures(); + std::set EnabledFeatureNames; + for (const llvm::SubtargetFeatureKV &feature : Features) + EnabledFeatureNames.insert(feature.Key); + + if (!MachineTriple.isAArch64()) { + // The option was already checked in Driver::HandleImmediateArgs, + // so we do not expect to get here if we are not a supported architecture. + assert(0 && "Unhandled triple for --print-enabled-extensions option."); + return 1; + } + llvm::AArch64::printEnabledExtensions(EnabledFeatureNames); + + return 0; +} + +int cc1_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { + ensureSufficientStack(); + + std::unique_ptr Clang(new CompilerInstance()); + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + + // Register the support for object-file-wrapped Clang modules. + auto PCHOps = Clang->getPCHContainerOperations(); + PCHOps->registerWriter(std::make_unique()); + PCHOps->registerReader(std::make_unique()); + + // Initialize targets first, so that --version shows registered targets. + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllAsmParsers(); + + // Buffer diagnostics from argument parsing so that we can output them using a + // well formed diagnostic object. + IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer; + DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagsBuffer); + + // Setup round-trip remarks for the DiagnosticsEngine used in CreateFromArgs. + if (find(Argv, StringRef("-Rround-trip-cc1-args")) != Argv.end()) + Diags.setSeverity(diag::remark_cc1_round_trip_generated, + diag::Severity::Remark, {}); + + bool Success = CompilerInvocation::CreateFromArgs(Clang->getInvocation(), + Argv, Diags, Argv0); + + if (!Clang->getFrontendOpts().TimeTracePath.empty()) { + llvm::timeTraceProfilerInitialize( + Clang->getFrontendOpts().TimeTraceGranularity, Argv0); + } + // --print-supported-cpus takes priority over the actual compilation. + if (Clang->getFrontendOpts().PrintSupportedCPUs) + return PrintSupportedCPUs(Clang->getTargetOpts().Triple); + + // --print-supported-extensions takes priority over the actual compilation. + if (Clang->getFrontendOpts().PrintSupportedExtensions) + return PrintSupportedExtensions(Clang->getTargetOpts().Triple); + + // --print-enabled-extensions takes priority over the actual compilation. + if (Clang->getFrontendOpts().PrintEnabledExtensions) + return PrintEnabledExtensions(Clang->getTargetOpts()); + + // Infer the builtin include path if unspecified. + if (Clang->getHeaderSearchOpts().UseBuiltinIncludes && + Clang->getHeaderSearchOpts().ResourceDir.empty()) + Clang->getHeaderSearchOpts().ResourceDir = + CompilerInvocation::GetResourcesPath(Argv0, MainAddr); + + // Create the actual diagnostics engine. + Clang->createDiagnostics(); + if (!Clang->hasDiagnostics()) + return 1; + + // Set an error handler, so that any LLVM backend diagnostics go through our + // error handler. + llvm::install_fatal_error_handler(LLVMErrorHandler, + static_cast(&Clang->getDiagnostics())); + + DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics()); + if (!Success) { + Clang->getDiagnosticClient().finish(); + return 1; + } + + // Execute the frontend actions. + { + llvm::TimeTraceScope TimeScope("ExecuteCompiler"); + Success = ExecuteCompilerInvocation(Clang.get()); + } + + // If any timers were active but haven't been destroyed yet, print their + // results now. This happens in -disable-free mode. + llvm::TimerGroup::printAll(llvm::errs()); + llvm::TimerGroup::clearAll(); + + if (llvm::timeTraceProfilerEnabled()) { + // It is possible that the compiler instance doesn't own a file manager here + // if we're compiling a module unit. Since the file manager are owned by AST + // when we're compiling a module unit. So the file manager may be invalid + // here. + // + // It should be fine to create file manager here since the file system + // options are stored in the compiler invocation and we can recreate the VFS + // from the compiler invocation. + if (!Clang->hasFileManager()) + Clang->createFileManager(createVFSFromCompilerInvocation( + Clang->getInvocation(), Clang->getDiagnostics())); + + if (auto profilerOutput = Clang->createOutputFile( + Clang->getFrontendOpts().TimeTracePath, /*Binary=*/false, + /*RemoveFileOnSignal=*/false, + /*useTemporary=*/false)) { + llvm::timeTraceProfilerWrite(*profilerOutput); + profilerOutput.reset(); + llvm::timeTraceProfilerCleanup(); + Clang->clearOutputFiles(false); + } + } + + // Our error handler depends on the Diagnostics object, which we're + // potentially about to delete. Uninstall the handler now so that any + // later errors use the default handling behavior instead. + llvm::remove_fatal_error_handler(); + + // When running with -disable-free, don't do any destruction or shutdown. + if (Clang->getFrontendOpts().DisableFree) { + llvm::BuryPointer(std::move(Clang)); + return !Success; + } + + return !Success; +} diff --git a/compiler+runtime/src/cpp/clang/cc1as_main.cpp b/compiler+runtime/src/cpp/clang/cc1as_main.cpp new file mode 100644 index 00000000..4e0aa145 --- /dev/null +++ b/compiler+runtime/src/cpp/clang/cc1as_main.cpp @@ -0,0 +1,703 @@ +//===-- cc1as_main.cpp - Clang Assembler ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the entry point to the clang -cc1as functionality, which implements +// the direct interface to the LLVM MC based assembler. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "clang/Frontend/FrontendDiagnostic.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Frontend/Utils.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Host.h" +#include "llvm/TargetParser/Triple.h" +#include +#include +#include +using namespace clang; +using namespace clang::driver; +using namespace clang::driver::options; +using namespace llvm; +using namespace llvm::opt; + +namespace { + +/// Helper class for representing a single invocation of the assembler. +struct AssemblerInvocation { + /// @name Target Options + /// @{ + + /// The name of the target triple to assemble for. + std::string Triple; + + /// If given, the name of the target CPU to determine which instructions + /// are legal. + std::string CPU; + + /// The list of target specific features to enable or disable -- this should + /// be a list of strings starting with '+' or '-'. + std::vector Features; + + /// The list of symbol definitions. + std::vector SymbolDefs; + + /// @} + /// @name Language Options + /// @{ + + std::vector IncludePaths; + LLVM_PREFERRED_TYPE(bool) + unsigned NoInitialTextSection : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned SaveTemporaryLabels : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned GenDwarfForAssembly : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned RelaxELFRelocations : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned Dwarf64 : 1; + unsigned DwarfVersion; + std::string DwarfDebugFlags; + std::string DwarfDebugProducer; + std::string DebugCompilationDir; + llvm::SmallVector, 0> DebugPrefixMap; + llvm::DebugCompressionType CompressDebugSections = + llvm::DebugCompressionType::None; + std::string MainFileName; + std::string SplitDwarfOutput; + + /// @} + /// @name Frontend Options + /// @{ + + std::string InputFile; + std::vector LLVMArgs; + std::string OutputPath; + enum FileType { + FT_Asm, ///< Assembly (.s) output, transliterate mode. + FT_Null, ///< No output, for timing purposes. + FT_Obj ///< Object file output. + }; + FileType OutputType; + LLVM_PREFERRED_TYPE(bool) + unsigned ShowHelp : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned ShowVersion : 1; + + /// @} + /// @name Transliterate Options + /// @{ + + unsigned OutputAsmVariant; + LLVM_PREFERRED_TYPE(bool) + unsigned ShowEncoding : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned ShowInst : 1; + + /// @} + /// @name Assembler Options + /// @{ + + LLVM_PREFERRED_TYPE(bool) + unsigned RelaxAll : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned NoExecStack : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned FatalWarnings : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned NoWarn : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned NoTypeCheck : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned IncrementalLinkerCompatible : 1; + LLVM_PREFERRED_TYPE(bool) + unsigned EmbedBitcode : 1; + + /// Whether to emit DWARF unwind info. + EmitDwarfUnwindType EmitDwarfUnwind; + + // Whether to emit compact-unwind for non-canonical entries. + // Note: maybe overriden by other constraints. + LLVM_PREFERRED_TYPE(bool) + unsigned EmitCompactUnwindNonCanonical : 1; + + LLVM_PREFERRED_TYPE(bool) + unsigned Crel : 1; + + /// The name of the relocation model to use. + std::string RelocationModel; + + /// The ABI targeted by the backend. Specified using -target-abi. Empty + /// otherwise. + std::string TargetABI; + + /// Darwin target variant triple, the variant of the deployment target + /// for which the code is being compiled. + std::optional DarwinTargetVariantTriple; + + /// The version of the darwin target variant SDK which was used during the + /// compilation + llvm::VersionTuple DarwinTargetVariantSDKVersion; + + /// The name of a file to use with \c .secure_log_unique directives. + std::string AsSecureLogFile; + /// @} + +public: + AssemblerInvocation() { + Triple = ""; + NoInitialTextSection = 0; + InputFile = "-"; + OutputPath = "-"; + OutputType = FT_Asm; + OutputAsmVariant = 0; + ShowInst = 0; + ShowEncoding = 0; + RelaxAll = 0; + NoExecStack = 0; + FatalWarnings = 0; + NoWarn = 0; + NoTypeCheck = 0; + IncrementalLinkerCompatible = 0; + Dwarf64 = 0; + DwarfVersion = 0; + EmbedBitcode = 0; + EmitDwarfUnwind = EmitDwarfUnwindType::Default; + EmitCompactUnwindNonCanonical = false; + Crel = false; + } + + static bool CreateFromArgs(AssemblerInvocation &Res, + ArrayRef Argv, + DiagnosticsEngine &Diags); +}; + +} + +bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts, + ArrayRef Argv, + DiagnosticsEngine &Diags) { + bool Success = true; + + // Parse the arguments. + const OptTable &OptTbl = getDriverOptTable(); + + llvm::opt::Visibility VisibilityMask(options::CC1AsOption); + unsigned MissingArgIndex, MissingArgCount; + InputArgList Args = + OptTbl.ParseArgs(Argv, MissingArgIndex, MissingArgCount, VisibilityMask); + + // Check for missing argument error. + if (MissingArgCount) { + Diags.Report(diag::err_drv_missing_argument) + << Args.getArgString(MissingArgIndex) << MissingArgCount; + Success = false; + } + + // Issue errors on unknown arguments. + for (const Arg *A : Args.filtered(OPT_UNKNOWN)) { + auto ArgString = A->getAsString(Args); + std::string Nearest; + if (OptTbl.findNearest(ArgString, Nearest, VisibilityMask) > 1) + Diags.Report(diag::err_drv_unknown_argument) << ArgString; + else + Diags.Report(diag::err_drv_unknown_argument_with_suggestion) + << ArgString << Nearest; + Success = false; + } + + // Construct the invocation. + + // Target Options + Opts.Triple = llvm::Triple::normalize(Args.getLastArgValue(OPT_triple)); + if (Arg *A = Args.getLastArg(options::OPT_darwin_target_variant_triple)) + Opts.DarwinTargetVariantTriple = llvm::Triple(A->getValue()); + if (Arg *A = Args.getLastArg(OPT_darwin_target_variant_sdk_version_EQ)) { + VersionTuple Version; + if (Version.tryParse(A->getValue())) + Diags.Report(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + else + Opts.DarwinTargetVariantSDKVersion = Version; + } + + Opts.CPU = std::string(Args.getLastArgValue(OPT_target_cpu)); + Opts.Features = Args.getAllArgValues(OPT_target_feature); + + // Use the default target triple if unspecified. + if (Opts.Triple.empty()) + Opts.Triple = llvm::sys::getDefaultTargetTriple(); + + // Language Options + Opts.IncludePaths = Args.getAllArgValues(OPT_I); + Opts.NoInitialTextSection = Args.hasArg(OPT_n); + Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels); + // Any DebugInfoKind implies GenDwarfForAssembly. + Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ); + + if (const Arg *A = Args.getLastArg(OPT_compress_debug_sections_EQ)) { + Opts.CompressDebugSections = + llvm::StringSwitch(A->getValue()) + .Case("none", llvm::DebugCompressionType::None) + .Case("zlib", llvm::DebugCompressionType::Zlib) + .Case("zstd", llvm::DebugCompressionType::Zstd) + .Default(llvm::DebugCompressionType::None); + } + + Opts.RelaxELFRelocations = !Args.hasArg(OPT_mrelax_relocations_no); + if (auto *DwarfFormatArg = Args.getLastArg(OPT_gdwarf64, OPT_gdwarf32)) + Opts.Dwarf64 = DwarfFormatArg->getOption().matches(OPT_gdwarf64); + Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 2, Diags); + Opts.DwarfDebugFlags = + std::string(Args.getLastArgValue(OPT_dwarf_debug_flags)); + Opts.DwarfDebugProducer = + std::string(Args.getLastArgValue(OPT_dwarf_debug_producer)); + if (const Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ, + options::OPT_fdebug_compilation_dir_EQ)) + Opts.DebugCompilationDir = A->getValue(); + Opts.MainFileName = std::string(Args.getLastArgValue(OPT_main_file_name)); + + for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ)) { + auto Split = StringRef(Arg).split('='); + Opts.DebugPrefixMap.emplace_back(Split.first, Split.second); + } + + // Frontend Options + if (Args.hasArg(OPT_INPUT)) { + bool First = true; + for (const Arg *A : Args.filtered(OPT_INPUT)) { + if (First) { + Opts.InputFile = A->getValue(); + First = false; + } else { + Diags.Report(diag::err_drv_unknown_argument) << A->getAsString(Args); + Success = false; + } + } + } + Opts.LLVMArgs = Args.getAllArgValues(OPT_mllvm); + Opts.OutputPath = std::string(Args.getLastArgValue(OPT_o)); + Opts.SplitDwarfOutput = + std::string(Args.getLastArgValue(OPT_split_dwarf_output)); + if (Arg *A = Args.getLastArg(OPT_filetype)) { + StringRef Name = A->getValue(); + unsigned OutputType = StringSwitch(Name) + .Case("asm", FT_Asm) + .Case("null", FT_Null) + .Case("obj", FT_Obj) + .Default(~0U); + if (OutputType == ~0U) { + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; + Success = false; + } else + Opts.OutputType = FileType(OutputType); + } + Opts.ShowHelp = Args.hasArg(OPT_help); + Opts.ShowVersion = Args.hasArg(OPT_version); + + // Transliterate Options + Opts.OutputAsmVariant = + getLastArgIntValue(Args, OPT_output_asm_variant, 0, Diags); + Opts.ShowEncoding = Args.hasArg(OPT_show_encoding); + Opts.ShowInst = Args.hasArg(OPT_show_inst); + + // Assemble Options + Opts.RelaxAll = Args.hasArg(OPT_mrelax_all); + Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack); + Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings); + Opts.NoWarn = Args.hasArg(OPT_massembler_no_warn); + Opts.NoTypeCheck = Args.hasArg(OPT_mno_type_check); + Opts.RelocationModel = + std::string(Args.getLastArgValue(OPT_mrelocation_model, "pic")); + Opts.TargetABI = std::string(Args.getLastArgValue(OPT_target_abi)); + Opts.IncrementalLinkerCompatible = + Args.hasArg(OPT_mincremental_linker_compatible); + Opts.SymbolDefs = Args.getAllArgValues(OPT_defsym); + + // EmbedBitcode Option. If -fembed-bitcode is enabled, set the flag. + // EmbedBitcode behaves the same for all embed options for assembly files. + if (auto *A = Args.getLastArg(OPT_fembed_bitcode_EQ)) { + Opts.EmbedBitcode = llvm::StringSwitch(A->getValue()) + .Case("all", 1) + .Case("bitcode", 1) + .Case("marker", 1) + .Default(0); + } + + if (auto *A = Args.getLastArg(OPT_femit_dwarf_unwind_EQ)) { + Opts.EmitDwarfUnwind = + llvm::StringSwitch(A->getValue()) + .Case("always", EmitDwarfUnwindType::Always) + .Case("no-compact-unwind", EmitDwarfUnwindType::NoCompactUnwind) + .Case("default", EmitDwarfUnwindType::Default); + } + + Opts.EmitCompactUnwindNonCanonical = + Args.hasArg(OPT_femit_compact_unwind_non_canonical); + Opts.Crel = Args.hasArg(OPT_crel); + + Opts.AsSecureLogFile = Args.getLastArgValue(OPT_as_secure_log_file); + + return Success; +} + +static std::unique_ptr +getOutputStream(StringRef Path, DiagnosticsEngine &Diags, bool Binary) { + // Make sure that the Out file gets unlinked from the disk if we get a + // SIGINT. + if (Path != "-") + sys::RemoveFileOnSignal(Path); + + std::error_code EC; + auto Out = std::make_unique( + Path, EC, (Binary ? sys::fs::OF_None : sys::fs::OF_TextWithCRLF)); + if (EC) { + Diags.Report(diag::err_fe_unable_to_open_output) << Path << EC.message(); + return nullptr; + } + + return Out; +} + +static bool ExecuteAssemblerImpl(AssemblerInvocation &Opts, + DiagnosticsEngine &Diags) { + // Get the target specific parser. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(Opts.Triple, Error); + if (!TheTarget) + return Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; + + ErrorOr> Buffer = + MemoryBuffer::getFileOrSTDIN(Opts.InputFile, /*IsText=*/true); + + if (std::error_code EC = Buffer.getError()) { + return Diags.Report(diag::err_fe_error_reading) + << Opts.InputFile << EC.message(); + } + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + unsigned BufferIndex = SrcMgr.AddNewSourceBuffer(std::move(*Buffer), SMLoc()); + + // Record the location of the include directories so that the lexer can find + // it later. + SrcMgr.setIncludeDirs(Opts.IncludePaths); + + std::unique_ptr MRI(TheTarget->createMCRegInfo(Opts.Triple)); + assert(MRI && "Unable to create target register info!"); + + MCTargetOptions MCOptions; + MCOptions.MCRelaxAll = Opts.RelaxAll; + MCOptions.EmitDwarfUnwind = Opts.EmitDwarfUnwind; + MCOptions.EmitCompactUnwindNonCanonical = Opts.EmitCompactUnwindNonCanonical; + MCOptions.MCSaveTempLabels = Opts.SaveTemporaryLabels; + MCOptions.Crel = Opts.Crel; + MCOptions.X86RelaxRelocations = Opts.RelaxELFRelocations; + MCOptions.CompressDebugSections = Opts.CompressDebugSections; + MCOptions.AsSecureLogFile = Opts.AsSecureLogFile; + + std::unique_ptr MAI( + TheTarget->createMCAsmInfo(*MRI, Opts.Triple, MCOptions)); + assert(MAI && "Unable to create target asm info!"); + + // Ensure MCAsmInfo initialization occurs before any use, otherwise sections + // may be created with a combination of default and explicit settings. + + + bool IsBinary = Opts.OutputType == AssemblerInvocation::FT_Obj; + if (Opts.OutputPath.empty()) + Opts.OutputPath = "-"; + std::unique_ptr FDOS = + getOutputStream(Opts.OutputPath, Diags, IsBinary); + if (!FDOS) + return true; + std::unique_ptr DwoOS; + if (!Opts.SplitDwarfOutput.empty()) + DwoOS = getOutputStream(Opts.SplitDwarfOutput, Diags, IsBinary); + + // Build up the feature string from the target feature list. + std::string FS = llvm::join(Opts.Features, ","); + + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(Opts.Triple, Opts.CPU, FS)); + assert(STI && "Unable to create subtarget info!"); + + MCContext Ctx(Triple(Opts.Triple), MAI.get(), MRI.get(), STI.get(), &SrcMgr, + &MCOptions); + + bool PIC = false; + if (Opts.RelocationModel == "static") { + PIC = false; + } else if (Opts.RelocationModel == "pic") { + PIC = true; + } else { + assert(Opts.RelocationModel == "dynamic-no-pic" && + "Invalid PIC model!"); + PIC = false; + } + + // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and + // MCObjectFileInfo needs a MCContext reference in order to initialize itself. + std::unique_ptr MOFI( + TheTarget->createMCObjectFileInfo(Ctx, PIC)); + if (Opts.DarwinTargetVariantTriple) + MOFI->setDarwinTargetVariantTriple(*Opts.DarwinTargetVariantTriple); + if (!Opts.DarwinTargetVariantSDKVersion.empty()) + MOFI->setDarwinTargetVariantSDKVersion(Opts.DarwinTargetVariantSDKVersion); + Ctx.setObjectFileInfo(MOFI.get()); + + if (Opts.GenDwarfForAssembly) + Ctx.setGenDwarfForAssembly(true); + if (!Opts.DwarfDebugFlags.empty()) + Ctx.setDwarfDebugFlags(StringRef(Opts.DwarfDebugFlags)); + if (!Opts.DwarfDebugProducer.empty()) + Ctx.setDwarfDebugProducer(StringRef(Opts.DwarfDebugProducer)); + if (!Opts.DebugCompilationDir.empty()) + Ctx.setCompilationDir(Opts.DebugCompilationDir); + else { + // If no compilation dir is set, try to use the current directory. + SmallString<128> CWD; + if (!sys::fs::current_path(CWD)) + Ctx.setCompilationDir(CWD); + } + if (!Opts.DebugPrefixMap.empty()) + for (const auto &KV : Opts.DebugPrefixMap) + Ctx.addDebugPrefixMapEntry(KV.first, KV.second); + if (!Opts.MainFileName.empty()) + Ctx.setMainFileName(StringRef(Opts.MainFileName)); + Ctx.setDwarfFormat(Opts.Dwarf64 ? dwarf::DWARF64 : dwarf::DWARF32); + Ctx.setDwarfVersion(Opts.DwarfVersion); + if (Opts.GenDwarfForAssembly) + Ctx.setGenDwarfRootFile(Opts.InputFile, + SrcMgr.getMemoryBuffer(BufferIndex)->getBuffer()); + + std::unique_ptr Str; + + std::unique_ptr MCII(TheTarget->createMCInstrInfo()); + assert(MCII && "Unable to create instruction info!"); + + raw_pwrite_stream *Out = FDOS.get(); + std::unique_ptr BOS; + + MCOptions.MCNoWarn = Opts.NoWarn; + MCOptions.MCFatalWarnings = Opts.FatalWarnings; + MCOptions.MCNoTypeCheck = Opts.NoTypeCheck; + MCOptions.ABIName = Opts.TargetABI; + + // FIXME: There is a bit of code duplication with addPassesToEmitFile. + if (Opts.OutputType == AssemblerInvocation::FT_Asm) { + MCInstPrinter *IP = TheTarget->createMCInstPrinter( + llvm::Triple(Opts.Triple), Opts.OutputAsmVariant, *MAI, *MCII, *MRI); + + std::unique_ptr CE; + if (Opts.ShowEncoding) + CE.reset(TheTarget->createMCCodeEmitter(*MCII, Ctx)); + std::unique_ptr MAB( + TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); + + auto FOut = std::make_unique(*Out); + Str.reset(TheTarget->createAsmStreamer( + Ctx, std::move(FOut), /*asmverbose*/ true, + /*useDwarfDirectory*/ true, IP, std::move(CE), std::move(MAB), + Opts.ShowInst)); + } else if (Opts.OutputType == AssemblerInvocation::FT_Null) { + Str.reset(createNullStreamer(Ctx)); + } else { + assert(Opts.OutputType == AssemblerInvocation::FT_Obj && + "Invalid file type!"); + if (!FDOS->supportsSeeking()) { + BOS = std::make_unique(*FDOS); + Out = BOS.get(); + } + + std::unique_ptr CE( + TheTarget->createMCCodeEmitter(*MCII, Ctx)); + std::unique_ptr MAB( + TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions)); + assert(MAB && "Unable to create asm backend!"); + + std::unique_ptr OW = + DwoOS ? MAB->createDwoObjectWriter(*Out, *DwoOS) + : MAB->createObjectWriter(*Out); + + Triple T(Opts.Triple); + Str.reset(TheTarget->createMCObjectStreamer( + T, Ctx, std::move(MAB), std::move(OW), std::move(CE), *STI, + Opts.RelaxAll, Opts.IncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ true)); + Str.get()->initSections(Opts.NoExecStack, *STI); + } + + // When -fembed-bitcode is passed to clang_as, a 1-byte marker + // is emitted in __LLVM,__asm section if the object file is MachO format. + if (Opts.EmbedBitcode && Ctx.getObjectFileType() == MCContext::IsMachO) { + MCSection *AsmLabel = Ctx.getMachOSection( + "__LLVM", "__asm", MachO::S_REGULAR, 4, SectionKind::getReadOnly()); + Str.get()->switchSection(AsmLabel); + Str.get()->emitZeros(1); + } + + bool Failed = false; + + std::unique_ptr Parser( + createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); + + // FIXME: init MCTargetOptions from sanitizer flags here. + std::unique_ptr TAP( + TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); + if (!TAP) + Failed = Diags.Report(diag::err_target_unknown_triple) << Opts.Triple; + + // Set values for symbols, if any. + for (auto &S : Opts.SymbolDefs) { + auto Pair = StringRef(S).split('='); + auto Sym = Pair.first; + auto Val = Pair.second; + int64_t Value; + // We have already error checked this in the driver. + Val.getAsInteger(0, Value); + Ctx.setSymbolValue(Parser->getStreamer(), Sym, Value); + } + + if (!Failed) { + Parser->setTargetParser(*TAP.get()); + Failed = Parser->Run(Opts.NoInitialTextSection); + } + + return Failed; +} + +static bool ExecuteAssembler(AssemblerInvocation &Opts, + DiagnosticsEngine &Diags) { + bool Failed = ExecuteAssemblerImpl(Opts, Diags); + + // Delete output file if there were errors. + if (Failed) { + if (Opts.OutputPath != "-") + sys::fs::remove(Opts.OutputPath); + if (!Opts.SplitDwarfOutput.empty() && Opts.SplitDwarfOutput != "-") + sys::fs::remove(Opts.SplitDwarfOutput); + } + + return Failed; +} + +static void LLVMErrorHandler(void *UserData, const char *Message, + bool GenCrashDiag) { + DiagnosticsEngine &Diags = *static_cast(UserData); + + Diags.Report(diag::err_fe_error_backend) << Message; + + // We cannot recover from llvm errors. + sys::Process::Exit(1); +} + +int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { + // Initialize targets and assembly printers/parsers. + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + + // Construct our diagnostic client. + IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + TextDiagnosticPrinter *DiagClient + = new TextDiagnosticPrinter(errs(), &*DiagOpts); + DiagClient->setPrefix("clang -cc1as"); + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); + + // Set an error handler, so that any LLVM backend diagnostics go through our + // error handler. + ScopedFatalErrorHandler FatalErrorHandler + (LLVMErrorHandler, static_cast(&Diags)); + + // Parse the arguments. + AssemblerInvocation Asm; + if (!AssemblerInvocation::CreateFromArgs(Asm, Argv, Diags)) + return 1; + + if (Asm.ShowHelp) { + getDriverOptTable().printHelp( + llvm::outs(), "clang -cc1as [options] file...", + "Clang Integrated Assembler", /*ShowHidden=*/false, + /*ShowAllAliases=*/false, + llvm::opt::Visibility(driver::options::CC1AsOption)); + + return 0; + } + + // Honor -version. + // + // FIXME: Use a better -version message? + if (Asm.ShowVersion) { + llvm::cl::PrintVersionMessage(); + return 0; + } + + // Honor -mllvm. + // + // FIXME: Remove this, one day. + if (!Asm.LLVMArgs.empty()) { + unsigned NumArgs = Asm.LLVMArgs.size(); + auto Args = std::make_unique(NumArgs + 2); + Args[0] = "clang (LLVM option parsing)"; + for (unsigned i = 0; i != NumArgs; ++i) + Args[i + 1] = Asm.LLVMArgs[i].c_str(); + Args[NumArgs + 1] = nullptr; + llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get()); + } + + // Execute the invocation, unless there were parsing errors. + bool Failed = Diags.hasErrorOccurred() || ExecuteAssembler(Asm, Diags); + + // If any timers were active but haven't been destroyed yet, print their + // results now. + TimerGroup::printAll(errs()); + TimerGroup::clearAll(); + + return !!Failed; +} diff --git a/compiler+runtime/src/cpp/clang/cc1gen_reproducer_main.cpp b/compiler+runtime/src/cpp/clang/cc1gen_reproducer_main.cpp new file mode 100644 index 00000000..e97fa3d2 --- /dev/null +++ b/compiler+runtime/src/cpp/clang/cc1gen_reproducer_main.cpp @@ -0,0 +1,204 @@ +//===-- cc1gen_reproducer_main.cpp - Clang reproducer generator ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the entry point to the clang -cc1gen-reproducer functionality, which +// generates reproducers for invocations for clang-based tools. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/LLVMDriver.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Host.h" +#include + +using namespace clang; + +namespace { + +struct UnsavedFileHash { + std::string Name; + std::string MD5; +}; + +struct ClangInvocationInfo { + std::string Toolchain; + std::string LibclangOperation; + std::string LibclangOptions; + std::vector Arguments; + std::vector InvocationArguments; + std::vector UnsavedFileHashes; + bool Dump = false; +}; + +} // end anonymous namespace + +LLVM_YAML_IS_SEQUENCE_VECTOR(UnsavedFileHash) + +namespace llvm { +namespace yaml { + +template <> struct MappingTraits { + static void mapping(IO &IO, UnsavedFileHash &Info) { + IO.mapRequired("name", Info.Name); + IO.mapRequired("md5", Info.MD5); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, ClangInvocationInfo &Info) { + IO.mapRequired("toolchain", Info.Toolchain); + IO.mapOptional("libclang.operation", Info.LibclangOperation); + IO.mapOptional("libclang.opts", Info.LibclangOptions); + IO.mapRequired("args", Info.Arguments); + IO.mapOptional("invocation-args", Info.InvocationArguments); + IO.mapOptional("unsaved_file_hashes", Info.UnsavedFileHashes); + } +}; + +} // end namespace yaml +} // end namespace llvm + +static std::string generateReproducerMetaInfo(const ClangInvocationInfo &Info) { + std::string Result; + llvm::raw_string_ostream OS(Result); + OS << '{'; + bool NeedComma = false; + auto EmitKey = [&](StringRef Key) { + if (NeedComma) + OS << ", "; + NeedComma = true; + OS << '"' << Key << "\": "; + }; + auto EmitStringKey = [&](StringRef Key, StringRef Value) { + if (Value.empty()) + return; + EmitKey(Key); + OS << '"' << Value << '"'; + }; + EmitStringKey("libclang.operation", Info.LibclangOperation); + EmitStringKey("libclang.opts", Info.LibclangOptions); + if (!Info.InvocationArguments.empty()) { + EmitKey("invocation-args"); + OS << '['; + for (const auto &Arg : llvm::enumerate(Info.InvocationArguments)) { + if (Arg.index()) + OS << ','; + OS << '"' << Arg.value() << '"'; + } + OS << ']'; + } + OS << '}'; + // FIXME: Compare unsaved file hashes and report mismatch in the reproducer. + if (Info.Dump) + llvm::outs() << "REPRODUCER METAINFO: " << OS.str() << "\n"; + return std::move(OS.str()); +} + +/// Generates a reproducer for a set of arguments from a specific invocation. +static std::optional +generateReproducerForInvocationArguments(ArrayRef Argv, + const ClangInvocationInfo &Info, + const llvm::ToolContext &ToolContext) { + using namespace driver; + auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(Argv[0]); + + IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions; + + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + DiagnosticsEngine Diags(DiagID, &*DiagOpts, new IgnoringDiagConsumer()); + ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); + Driver TheDriver(ToolContext.Path, llvm::sys::getDefaultTargetTriple(), + Diags); + TheDriver.setTargetAndMode(TargetAndMode); + if (ToolContext.NeedsPrependArg) + TheDriver.setPrependArg(ToolContext.PrependArg); + + std::unique_ptr C(TheDriver.BuildCompilation(Argv)); + if (C && !C->containsError()) { + for (const auto &J : C->getJobs()) { + if (const Command *Cmd = dyn_cast(&J)) { + Driver::CompilationDiagnosticReport Report; + TheDriver.generateCompilationDiagnostics( + *C, *Cmd, generateReproducerMetaInfo(Info), &Report); + return Report; + } + } + } + + return std::nullopt; +} + +std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes); + +static void printReproducerInformation( + llvm::raw_ostream &OS, const ClangInvocationInfo &Info, + const driver::Driver::CompilationDiagnosticReport &Report) { + OS << "REPRODUCER:\n"; + OS << "{\n"; + OS << R"("files":[)"; + for (const auto &File : llvm::enumerate(Report.TemporaryFiles)) { + if (File.index()) + OS << ','; + OS << '"' << File.value() << '"'; + } + OS << "]\n}\n"; +} + +int cc1gen_reproducer_main(ArrayRef Argv, const char *Argv0, + void *MainAddr, + const llvm::ToolContext &ToolContext) { + if (Argv.size() < 1) { + llvm::errs() << "error: missing invocation file\n"; + return 1; + } + // Parse the invocation descriptor. + StringRef Input = Argv[0]; + llvm::ErrorOr> Buffer = + llvm::MemoryBuffer::getFile(Input, /*IsText=*/true); + if (!Buffer) { + llvm::errs() << "error: failed to read " << Input << ": " + << Buffer.getError().message() << "\n"; + return 1; + } + llvm::yaml::Input YAML(Buffer.get()->getBuffer()); + ClangInvocationInfo InvocationInfo; + YAML >> InvocationInfo; + if (Argv.size() > 1 && Argv[1] == StringRef("-v")) + InvocationInfo.Dump = true; + + // Create an invocation that will produce the reproducer. + std::vector DriverArgs; + for (const auto &Arg : InvocationInfo.Arguments) + DriverArgs.push_back(Arg.c_str()); + std::string Path = GetExecutablePath(Argv0, /*CanonicalPrefixes=*/true); + DriverArgs[0] = Path.c_str(); + std::optional Report = + generateReproducerForInvocationArguments(DriverArgs, InvocationInfo, + ToolContext); + + // Emit the information about the reproduce files to stdout. + int Result = 1; + if (Report) { + printReproducerInformation(llvm::outs(), InvocationInfo, *Report); + Result = 0; + } + + // Remove the input file. + llvm::sys::fs::remove(Input); + return Result; +} diff --git a/compiler+runtime/src/cpp/clang/driver.cpp b/compiler+runtime/src/cpp/clang/driver.cpp new file mode 100644 index 00000000..83b5bbb7 --- /dev/null +++ b/compiler+runtime/src/cpp/clang/driver.cpp @@ -0,0 +1,454 @@ +//===-- driver.cpp - Clang GCC-Compatible Driver --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the entry point to the clang driver; it is a thin wrapper +// for functionality in the Driver clang library. +// +//===----------------------------------------------------------------------===// + +#include "clang/Driver/Driver.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/HeaderInclude.h" +#include "clang/Basic/Stack.h" +#include "clang/Config/config.h" +#include "clang/Driver/Compilation.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "clang/Driver/ToolChain.h" +#include "clang/Frontend/ChainedDiagnosticConsumer.h" +#include "clang/Frontend/CompilerInvocation.h" +#include "clang/Frontend/SerializedDiagnosticPrinter.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Frontend/Utils.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/BuryPointer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CrashRecoveryContext.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/LLVMDriver.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Host.h" +#include +#include +#include +#include +using namespace clang; +using namespace clang::driver; +using namespace llvm::opt; + +std::string GetExecutablePath(const char *Argv0, bool CanonicalPrefixes) { + if (!CanonicalPrefixes) { + SmallString<128> ExecutablePath(Argv0); + // Do a PATH lookup if Argv0 isn't a valid path. + if (!llvm::sys::fs::exists(ExecutablePath)) + if (llvm::ErrorOr P = + llvm::sys::findProgramByName(ExecutablePath)) + ExecutablePath = *P; + return std::string(ExecutablePath); + } + + // This just needs to be some symbol in the binary; C++ doesn't + // allow taking the address of ::main however. + void *P = (void*) (intptr_t) GetExecutablePath; + return llvm::sys::fs::getMainExecutable(Argv0, P); +} + +static const char *GetStableCStr(llvm::StringSet<> &SavedStrings, StringRef S) { + return SavedStrings.insert(S).first->getKeyData(); +} + +extern int cc1_main(ArrayRef Argv, const char *Argv0, + void *MainAddr); +extern int cc1as_main(ArrayRef Argv, const char *Argv0, + void *MainAddr); +extern int cc1gen_reproducer_main(ArrayRef Argv, + const char *Argv0, void *MainAddr, + const llvm::ToolContext &); + +static void insertTargetAndModeArgs(const ParsedClangName &NameParts, + SmallVectorImpl &ArgVector, + llvm::StringSet<> &SavedStrings) { + // Put target and mode arguments at the start of argument list so that + // arguments specified in command line could override them. Avoid putting + // them at index 0, as an option like '-cc1' must remain the first. + int InsertionPoint = 0; + if (ArgVector.size() > 0) + ++InsertionPoint; + + if (NameParts.DriverMode) { + // Add the mode flag to the arguments. + ArgVector.insert(ArgVector.begin() + InsertionPoint, + GetStableCStr(SavedStrings, NameParts.DriverMode)); + } + + if (NameParts.TargetIsValid) { + const char *arr[] = {"-target", GetStableCStr(SavedStrings, + NameParts.TargetPrefix)}; + ArgVector.insert(ArgVector.begin() + InsertionPoint, + std::begin(arr), std::end(arr)); + } +} + +static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver, + SmallVectorImpl &Opts) { + llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts); + // The first instance of '#' should be replaced with '=' in each option. + for (const char *Opt : Opts) + if (char *NumberSignPtr = const_cast(::strchr(Opt, '#'))) + *NumberSignPtr = '='; +} + +template +static T checkEnvVar(const char *EnvOptSet, const char *EnvOptFile, + std::string &OptFile) { + const char *Str = ::getenv(EnvOptSet); + if (!Str) + return T{}; + + T OptVal = Str; + if (const char *Var = ::getenv(EnvOptFile)) + OptFile = Var; + return OptVal; +} + +static bool SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) { + TheDriver.CCPrintOptions = + checkEnvVar("CC_PRINT_OPTIONS", "CC_PRINT_OPTIONS_FILE", + TheDriver.CCPrintOptionsFilename); + if (checkEnvVar("CC_PRINT_HEADERS", "CC_PRINT_HEADERS_FILE", + TheDriver.CCPrintHeadersFilename)) { + TheDriver.CCPrintHeadersFormat = HIFMT_Textual; + TheDriver.CCPrintHeadersFiltering = HIFIL_None; + } else { + std::string EnvVar = checkEnvVar( + "CC_PRINT_HEADERS_FORMAT", "CC_PRINT_HEADERS_FILE", + TheDriver.CCPrintHeadersFilename); + if (!EnvVar.empty()) { + TheDriver.CCPrintHeadersFormat = + stringToHeaderIncludeFormatKind(EnvVar.c_str()); + if (!TheDriver.CCPrintHeadersFormat) { + TheDriver.Diag(clang::diag::err_drv_print_header_env_var) + << 0 << EnvVar; + return false; + } + + const char *FilteringStr = ::getenv("CC_PRINT_HEADERS_FILTERING"); + HeaderIncludeFilteringKind Filtering; + if (!stringToHeaderIncludeFiltering(FilteringStr, Filtering)) { + TheDriver.Diag(clang::diag::err_drv_print_header_env_var) + << 1 << FilteringStr; + return false; + } + + if ((TheDriver.CCPrintHeadersFormat == HIFMT_Textual && + Filtering != HIFIL_None) || + (TheDriver.CCPrintHeadersFormat == HIFMT_JSON && + Filtering != HIFIL_Only_Direct_System)) { + TheDriver.Diag(clang::diag::err_drv_print_header_env_var_combination) + << EnvVar << FilteringStr; + return false; + } + TheDriver.CCPrintHeadersFiltering = Filtering; + } + } + + TheDriver.CCLogDiagnostics = + checkEnvVar("CC_LOG_DIAGNOSTICS", "CC_LOG_DIAGNOSTICS_FILE", + TheDriver.CCLogDiagnosticsFilename); + TheDriver.CCPrintProcessStats = + checkEnvVar("CC_PRINT_PROC_STAT", "CC_PRINT_PROC_STAT_FILE", + TheDriver.CCPrintStatReportFilename); + TheDriver.CCPrintInternalStats = + checkEnvVar("CC_PRINT_INTERNAL_STAT", "CC_PRINT_INTERNAL_STAT_FILE", + TheDriver.CCPrintInternalStatReportFilename); + + return true; +} + +static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, + const std::string &Path) { + // If the clang binary happens to be named cl.exe for compatibility reasons, + // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. + StringRef ExeBasename(llvm::sys::path::stem(Path)); + if (ExeBasename.equals_insensitive("cl")) + ExeBasename = "clang-cl"; + DiagClient->setPrefix(std::string(ExeBasename)); +} + +static int ExecuteCC1Tool(SmallVectorImpl &ArgV, + const llvm::ToolContext &ToolContext) { + // If we call the cc1 tool from the clangDriver library (through + // Driver::CC1Main), we need to clean up the options usage count. The options + // are currently global, and they might have been used previously by the + // driver. + llvm::cl::ResetAllOptionOccurrences(); + + llvm::BumpPtrAllocator A; + llvm::cl::ExpansionContext ECtx(A, llvm::cl::TokenizeGNUCommandLine); + if (llvm::Error Err = ECtx.expandResponseFiles(ArgV)) { + llvm::errs() << toString(std::move(Err)) << '\n'; + return 1; + } + StringRef Tool = ArgV[1]; + void *GetExecutablePathVP = (void *)(intptr_t)GetExecutablePath; + if (Tool == "-cc1") + return cc1_main(ArrayRef(ArgV).slice(1), ArgV[0], GetExecutablePathVP); + if (Tool == "-cc1as") + return cc1as_main(ArrayRef(ArgV).slice(2), ArgV[0], GetExecutablePathVP); + if (Tool == "-cc1gen-reproducer") + return cc1gen_reproducer_main(ArrayRef(ArgV).slice(2), ArgV[0], + GetExecutablePathVP, ToolContext); + // Reject unknown tools. + llvm::errs() + << "error: unknown integrated tool '" << Tool << "'. " + << "Valid tools include '-cc1', '-cc1as' and '-cc1gen-reproducer'.\n"; + return 1; +} + +int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContext) { + noteBottomOfStack(); + llvm::setBugReportMsg("PLEASE submit a bug report to " BUG_REPORT_URL + " and include the crash backtrace, preprocessed " + "source, and associated run script.\n"); + SmallVector Args(Argv, Argv + Argc); + + if (llvm::sys::Process::FixupStandardFileDescriptors()) + return 1; + + llvm::InitializeAllTargets(); + + llvm::BumpPtrAllocator A; + llvm::StringSaver Saver(A); + + const char *ProgName = + ToolContext.NeedsPrependArg ? ToolContext.PrependArg : ToolContext.Path; + + bool ClangCLMode = + IsClangCL(getDriverMode(ProgName, llvm::ArrayRef(Args).slice(1))); + + if (llvm::Error Err = expandResponseFiles(Args, ClangCLMode, A)) { + llvm::errs() << toString(std::move(Err)) << '\n'; + return 1; + } + + // Handle -cc1 integrated tools. + if (Args.size() >= 2 && StringRef(Args[1]).starts_with("-cc1")) + return ExecuteCC1Tool(Args, ToolContext); + + // Handle options that need handling before the real command line parsing in + // Driver::BuildCompilation() + bool CanonicalPrefixes = true; + for (int i = 1, size = Args.size(); i < size; ++i) { + // Skip end-of-line response file markers + if (Args[i] == nullptr) + continue; + if (StringRef(Args[i]) == "-canonical-prefixes") + CanonicalPrefixes = true; + else if (StringRef(Args[i]) == "-no-canonical-prefixes") + CanonicalPrefixes = false; + } + + // Handle CL and _CL_ which permits additional command line options to be + // prepended or appended. + if (ClangCLMode) { + // Arguments in "CL" are prepended. + std::optional OptCL = llvm::sys::Process::GetEnv("CL"); + if (OptCL) { + SmallVector PrependedOpts; + getCLEnvVarOptions(*OptCL, Saver, PrependedOpts); + + // Insert right after the program name to prepend to the argument list. + Args.insert(Args.begin() + 1, PrependedOpts.begin(), PrependedOpts.end()); + } + // Arguments in "_CL_" are appended. + std::optional Opt_CL_ = llvm::sys::Process::GetEnv("_CL_"); + if (Opt_CL_) { + SmallVector AppendedOpts; + getCLEnvVarOptions(*Opt_CL_, Saver, AppendedOpts); + + // Insert at the end of the argument list to append. + Args.append(AppendedOpts.begin(), AppendedOpts.end()); + } + } + + llvm::StringSet<> SavedStrings; + // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the + // scenes. + if (const char *OverrideStr = ::getenv("CCC_OVERRIDE_OPTIONS")) { + // FIXME: Driver shouldn't take extra initial argument. + driver::applyOverrideOptions(Args, OverrideStr, SavedStrings, + &llvm::errs()); + } + + std::string Path = GetExecutablePath(ToolContext.Path, CanonicalPrefixes); + + // Whether the cc1 tool should be called inside the current process, or if we + // should spawn a new clang subprocess (old behavior). + // Not having an additional process saves some execution time of Windows, + // and makes debugging and profiling easier. + bool UseNewCC1Process = CLANG_SPAWN_CC1; + for (const char *Arg : Args) + UseNewCC1Process = llvm::StringSwitch(Arg) + .Case("-fno-integrated-cc1", true) + .Case("-fintegrated-cc1", false) + .Default(UseNewCC1Process); + + IntrusiveRefCntPtr DiagOpts = + CreateAndPopulateDiagOpts(Args); + + TextDiagnosticPrinter *DiagClient + = new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts); + FixupDiagPrefixExeName(DiagClient, ProgName); + + IntrusiveRefCntPtr DiagID(new DiagnosticIDs()); + + DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient); + + if (!DiagOpts->DiagnosticSerializationFile.empty()) { + auto SerializedConsumer = + clang::serialized_diags::create(DiagOpts->DiagnosticSerializationFile, + &*DiagOpts, /*MergeChildRecords=*/true); + Diags.setClient(new ChainedDiagnosticConsumer( + Diags.takeClient(), std::move(SerializedConsumer))); + } + + ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false); + + Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags); + auto TargetAndMode = ToolChain::getTargetAndModeFromProgramName(ProgName); + TheDriver.setTargetAndMode(TargetAndMode); + // If -canonical-prefixes is set, GetExecutablePath will have resolved Path + // to the llvm driver binary, not clang. In this case, we need to use + // PrependArg which should be clang-*. Checking just CanonicalPrefixes is + // safe even in the normal case because PrependArg will be null so + // setPrependArg will be a no-op. + if (ToolContext.NeedsPrependArg || CanonicalPrefixes) + TheDriver.setPrependArg(ToolContext.PrependArg); + + insertTargetAndModeArgs(TargetAndMode, Args, SavedStrings); + + if (!SetBackdoorDriverOutputsFromEnvVars(TheDriver)) + return 1; + + if (!UseNewCC1Process) { + TheDriver.CC1Main = [ToolContext](SmallVectorImpl &ArgV) { + return ExecuteCC1Tool(ArgV, ToolContext); + }; + // Ensure the CC1Command actually catches cc1 crashes + llvm::CrashRecoveryContext::Enable(); + } + + std::unique_ptr C(TheDriver.BuildCompilation(Args)); + + Driver::ReproLevel ReproLevel = Driver::ReproLevel::OnCrash; + if (Arg *A = C->getArgs().getLastArg(options::OPT_gen_reproducer_eq)) { + auto Level = + llvm::StringSwitch>(A->getValue()) + .Case("off", Driver::ReproLevel::Off) + .Case("crash", Driver::ReproLevel::OnCrash) + .Case("error", Driver::ReproLevel::OnError) + .Case("always", Driver::ReproLevel::Always) + .Default(std::nullopt); + if (!Level) { + llvm::errs() << "Unknown value for " << A->getSpelling() << ": '" + << A->getValue() << "'\n"; + return 1; + } + ReproLevel = *Level; + } + if (!!::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) + ReproLevel = Driver::ReproLevel::Always; + + int Res = 1; + bool IsCrash = false; + Driver::CommandStatus CommandStatus = Driver::CommandStatus::Ok; + // Pretend the first command failed if ReproStatus is Always. + const Command *FailingCommand = nullptr; + if (!C->getJobs().empty()) + FailingCommand = &*C->getJobs().begin(); + if (C && !C->containsError()) { + SmallVector, 4> FailingCommands; + Res = TheDriver.ExecuteCompilation(*C, FailingCommands); + + for (const auto &P : FailingCommands) { + int CommandRes = P.first; + FailingCommand = P.second; + if (!Res) + Res = CommandRes; + + // If result status is < 0, then the driver command signalled an error. + // If result status is 70, then the driver command reported a fatal error. + // On Windows, abort will return an exit code of 3. In these cases, + // generate additional diagnostic information if possible. + IsCrash = CommandRes < 0 || CommandRes == 70; +#ifdef _WIN32 + IsCrash |= CommandRes == 3; +#endif +#if LLVM_ON_UNIX + // When running in integrated-cc1 mode, the CrashRecoveryContext returns + // the same codes as if the program crashed. See section "Exit Status for + // Commands": + // https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xcu_chap02.html + IsCrash |= CommandRes > 128; +#endif + CommandStatus = + IsCrash ? Driver::CommandStatus::Crash : Driver::CommandStatus::Error; + if (IsCrash) + break; + } + } + + // Print the bug report message that would be printed if we did actually + // crash, but only if we're crashing due to FORCE_CLANG_DIAGNOSTICS_CRASH. + if (::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH")) + llvm::dbgs() << llvm::getBugReportMsg(); + if (FailingCommand != nullptr && + TheDriver.maybeGenerateCompilationDiagnostics(CommandStatus, ReproLevel, + *C, *FailingCommand)) + Res = 1; + + Diags.getClient()->finish(); + + if (!UseNewCC1Process && IsCrash) { + // When crashing in -fintegrated-cc1 mode, bury the timer pointers, because + // the internal linked list might point to already released stack frames. + llvm::BuryPointer(llvm::TimerGroup::aquireDefaultGroup()); + } else { + // If any timers were active but haven't been destroyed yet, print their + // results now. This happens in -disable-free mode. + llvm::TimerGroup::printAll(llvm::errs()); + llvm::TimerGroup::clearAll(); + } + +#ifdef _WIN32 + // Exit status should not be negative on Win32, unless abnormal termination. + // Once abnormal termination was caught, negative status should not be + // propagated. + if (Res < 0) + Res = 1; +#endif + + // If we have multiple failing commands, we return the result of the first + // failing command. + return Res; +} diff --git a/compiler+runtime/src/cpp/jank/c_api.cpp b/compiler+runtime/src/cpp/jank/c_api.cpp index 010731c6..b2797a8c 100644 --- a/compiler+runtime/src/cpp/jank/c_api.cpp +++ b/compiler+runtime/src/cpp/jank/c_api.cpp @@ -3,6 +3,30 @@ using namespace jank; using namespace jank::runtime; +template +struct make_closure_arity; + +template +struct make_closure_arity_arg +{ + using type = object *; +}; + +template +struct make_closure_arity> +{ + using type = object *(*)(void *, typename make_closure_arity_arg::type...); +}; + +template <> +struct make_closure_arity> +{ + using type = object *(*)(void *); +}; + +template +using closure_arity = typename make_closure_arity>::type; + extern "C" { jank_object_ptr jank_eval(jank_object_ptr const s) @@ -111,45 +135,74 @@ extern "C" jank_object_ptr jank_function_create(jank_arity_flags const arity_flags) { - fmt::println("jank_function_create"); - return erase(obj::nil::nil_const()); + return erase(make_box(arity_flags)); } - jank_object_ptr - jank_function_create_closure(jank_arity_flags const arity_flags, void * const context) + void jank_function_set_arity0(jank_object_ptr const fn, jank_object_ptr (* const f)()) { - fmt::println("jank_function_create_closure {}", fmt::ptr(context)); - return erase(obj::nil::nil_const()); } - jank_object_ptr jank_function_set_arity0(jank_object_ptr const fn, jank_object_ptr (* const f)()) + void + jank_function_set_arity1(jank_object_ptr const fn, jank_object_ptr (* const f)(jank_object_ptr)) { - fmt::println("jank_function_set_arity0"); - return nullptr; } - jank_object_ptr - jank_function_set_arity1(jank_object_ptr const fn, jank_object_ptr (* const f)(jank_object_ptr)) + void jank_function_set_arity2(jank_object_ptr const fn, + jank_object_ptr (* const f)(jank_object_ptr, jank_object_ptr)) { - fmt::println("jank_function_set_arity1"); - return nullptr; } - jank_object_ptr - jank_function_set_arity2(jank_object_ptr const fn, - jank_object_ptr (* const f)(jank_object_ptr, jank_object_ptr)) + void jank_function_set_arity3(jank_object_ptr const fn, + jank_object_ptr (* const f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr)) + { + } + + jank_object_ptr jank_closure_create(jank_arity_flags const arity_flags, void * const context) + { + return erase(make_box(arity_flags, context)); + } + + void jank_closure_set_arity0(jank_object_ptr const fn, jank_object_ptr (* const f)()) + { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-function-type-mismatch" + auto const fn_obj(reinterpret_cast(fn)); + try_object(fn_obj)->arity_0 = reinterpret_cast>(f); +#pragma clang diagnostic pop + } + + void + jank_closure_set_arity1(jank_object_ptr const fn, jank_object_ptr (* const f)(jank_object_ptr)) + { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-function-type-mismatch" + auto const fn_obj(reinterpret_cast(fn)); + try_object(fn_obj)->arity_1 = reinterpret_cast>(f); +#pragma clang diagnostic pop + } + + void jank_closure_set_arity2(jank_object_ptr const fn, + jank_object_ptr (* const f)(jank_object_ptr, jank_object_ptr)) { - fmt::println("jank_function_set_arity2"); - return nullptr; +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-function-type-mismatch" + auto const fn_obj(reinterpret_cast(fn)); + try_object(fn_obj)->arity_2 = reinterpret_cast>(f); +#pragma clang diagnostic pop } - jank_object_ptr jank_function_set_arity3(jank_object_ptr const fn, - jank_object_ptr (* const f)(jank_object_ptr, - jank_object_ptr, - jank_object_ptr)) + void jank_closure_set_arity3(jank_object_ptr const fn, + jank_object_ptr (* const f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr)) { - fmt::println("jank_function_set_arity3"); - return nullptr; +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-function-type-mismatch" + auto const fn_obj(reinterpret_cast(fn)); + try_object(fn_obj)->arity_3 = reinterpret_cast>(f); +#pragma clang diagnostic pop } jank_native_bool jank_truthy(jank_object_ptr const o) diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 0128ae8d..4bc51e83 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -390,7 +390,9 @@ namespace jank::codegen llvm::Value *fn_obj{}; - if(captures.empty()) + auto const is_closure(!captures.empty()); + + if(!is_closure) { auto const create_fn_type( llvm::FunctionType::get(builder->getPtrTy(), { builder->getInt8Ty() }, false)); @@ -425,21 +427,24 @@ namespace jank::codegen llvm::FunctionType::get(builder->getPtrTy(), { builder->getInt8Ty(), builder->getPtrTy() }, false)); - auto const create_fn( - module->getOrInsertFunction("jank_function_create_closure", create_fn_type)); + auto const create_fn(module->getOrInsertFunction("jank_closure_create", create_fn_type)); fn_obj = builder->CreateCall(create_fn, { arity_flags, closure_obj }); } for(auto const &arity : expr.arities) { - std::vector const arg_types{ arity.params.size() + 1, builder->getPtrTy() }; - auto const set_arity_fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); - auto const set_arity_fn( - module->getOrInsertFunction(fmt::format("jank_function_set_arity{}", arity.params.size()), - set_arity_fn_type)); + auto const set_arity_fn_type( + llvm::FunctionType::get(builder->getVoidTy(), + { builder->getPtrTy(), builder->getPtrTy() }, + false)); + auto const set_arity_fn(module->getOrInsertFunction( + is_closure ? fmt::format("jank_closure_set_arity{}", arity.params.size()) + : fmt::format("jank_function_set_arity{}", arity.params.size()), + set_arity_fn_type)); std::vector const target_arg_types{ arity.params.size(), builder->getPtrTy() }; - auto const target_fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); + auto const target_fn_type( + llvm::FunctionType::get(builder->getPtrTy(), target_arg_types, false)); auto target_fn(module->getOrInsertFunction( fmt::format("{}_{}", munge(expr.unique_name), arity.params.size()), target_fn_type)); diff --git a/compiler+runtime/src/cpp/jank/runtime/obj/jit_closure.cpp b/compiler+runtime/src/cpp/jank/runtime/obj/jit_closure.cpp new file mode 100644 index 00000000..8368900a --- /dev/null +++ b/compiler+runtime/src/cpp/jank/runtime/obj/jit_closure.cpp @@ -0,0 +1,212 @@ +#include + +namespace jank::runtime +{ + obj::jit_closure::static_object(arity_flag_t const arity_flags) + { + } + + obj::jit_closure::static_object(arity_flag_t const arity_flags, void * const context) + : context{ context } + , arity_flags{ arity_flags } + { + } + + obj::jit_closure::static_object(object_ptr const meta) + : meta{ meta } + { + } + + native_bool obj::jit_closure::equal(object const &rhs) const + { + return &base == &rhs; + } + + native_persistent_string obj::jit_closure::to_string() + { + fmt::memory_buffer buff; + to_string(buff); + return native_persistent_string{ buff.data(), buff.size() }; + } + + void obj::jit_closure::to_string(fmt::memory_buffer &buff) + { + auto const name( + get(meta.unwrap_or(obj::nil::nil_const()), __rt_ctx->intern_keyword("name").expect_ok())); + fmt::format_to(std::back_inserter(buff), + "{} ({}@{})", + (name->type == object_type::nil + ? "unknown" + : expect_object(name)->data), + magic_enum::enum_name(base.type), + fmt::ptr(&base)); + } + + native_persistent_string obj::jit_closure::to_code_string() + { + return to_string(); + } + + native_hash obj::jit_closure::to_hash() const + { + return static_cast(reinterpret_cast(this)); + } + + obj::jit_closure_ptr obj::jit_closure::with_meta(object_ptr const m) + { + auto const new_meta(behavior::detail::validate_meta(m)); + meta = new_meta; + return this; + } + + object_ptr obj::jit_closure::call() + { + if(!arity_0) + { + throw invalid_arity<0>{ runtime::to_string(this_object_ptr()) }; + } + return arity_0(context); + } + + object_ptr obj::jit_closure::call(object_ptr const a1) + { + if(!arity_1) + { + throw invalid_arity<1>{ runtime::to_string(this_object_ptr()) }; + } + return arity_1(context, a1); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, object_ptr const a2) + { + if(!arity_2) + { + throw invalid_arity<2>{ runtime::to_string(this_object_ptr()) }; + } + return arity_2(context, a1, a2); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, object_ptr const a2, object_ptr const a3) + { + if(!arity_3) + { + throw invalid_arity<3>{ runtime::to_string(this_object_ptr()) }; + } + return arity_3(context, a1, a2, a3); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, + object_ptr const a2, + object_ptr const a3, + object_ptr const a4) + { + if(!arity_4) + { + throw invalid_arity<4>{ runtime::to_string(this_object_ptr()) }; + } + return arity_4(context, a1, a2, a3, a4); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, + object_ptr const a2, + object_ptr const a3, + object_ptr const a4, + object_ptr const a5) + { + if(!arity_5) + { + throw invalid_arity<5>{ runtime::to_string(this_object_ptr()) }; + } + return arity_5(context, a1, a2, a3, a4, a5); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, + object_ptr const a2, + object_ptr const a3, + object_ptr const a4, + object_ptr const a5, + object_ptr const a6) + { + if(!arity_6) + { + throw invalid_arity<6>{ runtime::to_string(this_object_ptr()) }; + } + return arity_6(context, a1, a2, a3, a4, a5, a6); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, + object_ptr const a2, + object_ptr const a3, + object_ptr const a4, + object_ptr const a5, + object_ptr const a6, + object_ptr const a7) + { + if(!arity_7) + { + throw invalid_arity<7>{ runtime::to_string(this_object_ptr()) }; + } + return arity_7(context, a1, a2, a3, a4, a5, a6, a7); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, + object_ptr const a2, + object_ptr const a3, + object_ptr const a4, + object_ptr const a5, + object_ptr const a6, + object_ptr const a7, + object_ptr const a8) + { + if(!arity_8) + { + throw invalid_arity<8>{ runtime::to_string(this_object_ptr()) }; + } + return arity_8(context, a1, a2, a3, a4, a5, a6, a7, a8); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, + object_ptr const a2, + object_ptr const a3, + object_ptr const a4, + object_ptr const a5, + object_ptr const a6, + object_ptr const a7, + object_ptr const a8, + object_ptr const a9) + { + if(!arity_9) + { + throw invalid_arity<9>{ runtime::to_string(this_object_ptr()) }; + } + return arity_9(context, a1, a2, a3, a4, a5, a6, a7, a8, a9); + } + + object_ptr obj::jit_closure::call(object_ptr const a1, + object_ptr const a2, + object_ptr const a3, + object_ptr const a4, + object_ptr const a5, + object_ptr const a6, + object_ptr const a7, + object_ptr const a8, + object_ptr const a9, + object_ptr const a10) + { + if(!arity_10) + { + throw invalid_arity<10>{ runtime::to_string(this_object_ptr()) }; + } + return arity_10(context, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10); + } + + behavior::callable::arity_flag_t obj::jit_closure::get_arity_flags() const + { + return arity_flags; + } + + object_ptr obj::jit_closure::this_object_ptr() + { + return &this->base; + } +} From 9ef039b52c95d553ee9a88843405b60297031f06 Mon Sep 17 00:00:00 2001 From: jeaye Date: Mon, 21 Oct 2024 16:33:27 -0700 Subject: [PATCH 012/101] Fix reduced visit --- compiler+runtime/include/cpp/jank/runtime/erasure.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler+runtime/include/cpp/jank/runtime/erasure.hpp b/compiler+runtime/include/cpp/jank/runtime/erasure.hpp index 2f1a53cb..283c0351 100644 --- a/compiler+runtime/include/cpp/jank/runtime/erasure.hpp +++ b/compiler+runtime/include/cpp/jank/runtime/erasure.hpp @@ -386,7 +386,7 @@ namespace jank::runtime break; case object_type::reduced: { - return fn(expect_object(erased), std::forward(args)...); + return fn(expect_object(erased), std::forward(args)...); } break; case object_type::ns: From 1ac6623e77460a2a300c2453a55e2fbdf18943ed Mon Sep 17 00:00:00 2001 From: jeaye Date: Fri, 1 Nov 2024 23:08:19 -0700 Subject: [PATCH 013/101] Fix lex tests with new real size --- compiler+runtime/test/cpp/jank/read/lex.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/compiler+runtime/test/cpp/jank/read/lex.cpp b/compiler+runtime/test/cpp/jank/read/lex.cpp index b0283e1f..6fd8e0d1 100644 --- a/compiler+runtime/test/cpp/jank/read/lex.cpp +++ b/compiler+runtime/test/cpp/jank/read/lex.cpp @@ -658,12 +658,12 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_results({ - token{ 0, 3, token_kind::real, 1000.0l }, - token{ 4, 4, token_kind::real, -100.0l }, - token{ 9, 5, token_kind::real, 0.002l }, - token{ 15, 7, token_kind::real, 2.23e-07l }, - token{ 23, 7, token_kind::real, -1.2e+19l }, - token{ 30, 2, token_kind::character, "\\a"sv }, + token{ 0, 3, token_kind::real, 1000.0 }, + token{ 4, 4, token_kind::real, -100.0 }, + token{ 9, 5, token_kind::real, 0.002 }, + token{ 15, 7, token_kind::real, 2.23e-07 }, + token{ 23, 7, token_kind::real, -1.2e+19 }, + token{ 30, 2, token_kind::character, "\\a"sv }, })); } @@ -674,7 +674,7 @@ namespace jank::read::lex CHECK(tokens == make_results({ error{ 0, 2, "unexpected end of real, expecting exponent" }, - token{ 3, 5, token_kind::real, 2.3l }, + token{ 3, 5, token_kind::real, 2.3 }, error{ 9, 13, "unexpected end of real, expecting exponent" }, error{ 14, 19, "unexpected end of real, expecting exponent" }, })); @@ -686,7 +686,7 @@ namespace jank::read::lex native_vector> tokens(p.begin(), p.end()); CHECK(tokens == make_results({ - token{ 0, 4, token_kind::real, 12.3l }, + token{ 0, 4, token_kind::real, 12.3 }, error{ 5, 9, "invalid number" }, error{ 9, "expected whitespace before next token" }, token{ 9, token_kind::symbol, "-"sv }, @@ -702,7 +702,7 @@ namespace jank::read::lex == make_results({ error{ 0, 3, "invalid number" }, error{ 3, "unexpected character: ." }, - token{ 5, 4, token_kind::real, 12.3l }, + token{ 5, 4, token_kind::real, 12.3 }, error{ 10, 14, "invalid number" }, error{ 14, "unexpected character: ." }, error{ 15, "expected whitespace before next token" }, @@ -724,7 +724,7 @@ namespace jank::read::lex error{ 13, 16, "unexpected end of real, expecting exponent" }, error{ 16, "expected whitespace before next token" }, token{ 16, 3, token_kind::symbol, "Foo"sv }, - token{ 20, 3, token_kind::real, 300000.0l }, + token{ 20, 3, token_kind::real, 300000.0 }, error{ 23, "expected whitespace before next token" }, token{ 23, 3, token_kind::symbol, "fOo"sv }, })); From c5d6aaf29f1f289b13d909a3a3e1ae6337b8b021 Mon Sep 17 00:00:00 2001 From: jeaye Date: Sat, 2 Nov 2024 13:16:13 -0700 Subject: [PATCH 014/101] Add named recursion support to IR codegen --- .../src/cpp/jank/codegen/llvm_processor.cpp | 103 +++++++++++++----- 1 file changed, 73 insertions(+), 30 deletions(-) diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 4bc51e83..4d9808f4 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -70,20 +70,16 @@ namespace jank::codegen { auto const captures(root_fn.captures()); auto const is_closure(!captures.empty()); - fmt::println("Creating fn {} with arities: {} captures: {}", - root_fn.unique_name, - root_fn.arities.size(), - captures.size()); /* Closures get one extra parameter, the first one, which is a pointer to the closure's * context. The context is a struct containing all captured values. */ std::vector const arg_types{ arity.params.size() + is_closure, builder->getPtrTy() }; auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); - fn = llvm::Function::Create(fn_type, - llvm::Function::ExternalLinkage, - fmt::format("{}_{}", struct_name, arity.params.size()), - *module); + auto fn_value( + module->getOrInsertFunction(fmt::format("{}_{}", struct_name, arity.params.size()), fn_type)); + fn = llvm::dyn_cast(fn_value.getCallee()); + fn->setLinkage(llvm::Function::ExternalLinkage); auto const entry(llvm::BasicBlock::Create(*context, "entry", fn)); builder->SetInsertPoint(entry); @@ -219,34 +215,78 @@ namespace jank::codegen llvm::Value *llvm_processor::gen(analyze::expr::call const &expr, analyze::expr::function_arity const &arity) { - auto const callee(gen(expr.source_expr, arity)); + /* Named recursion is a special kind of call. We can't go always through a var, since there + * may not be one. We can't just use the fn's name, since we could be recursing into a + * different arity. Finally, we need to keep in account whether or not this fn is a closure. + * + * For named recursion calls, we don't use dynamic_call. We just call the generated C fn + * directly. This doesn't impede interactivity, since the whole thing will be redefined + * if a new fn is created. */ + if(expr.is_named_recur) + { + auto const is_closure(!root_fn.captures().empty()); - llvm::SmallVector arg_handles; - llvm::SmallVector arg_types; - arg_handles.reserve(expr.arg_exprs.size() + 1); - arg_types.reserve(expr.arg_exprs.size() + 1); + llvm::SmallVector arg_handles; + llvm::SmallVector arg_types; + arg_handles.reserve(expr.arg_exprs.size() + is_closure); + arg_types.reserve(expr.arg_exprs.size() + is_closure); - arg_handles.emplace_back(callee); - arg_types.emplace_back(builder->getPtrTy()); + if(is_closure) + { + arg_handles.emplace_back(builder->GetInsertBlock()->getParent()->getArg(0)); + arg_types.emplace_back(builder->getPtrTy()); + } + + for(auto const &arg_expr : expr.arg_exprs) + { + arg_handles.emplace_back(gen(arg_expr, arity)); + arg_types.emplace_back(builder->getPtrTy()); + } - for(auto const &arg_expr : expr.arg_exprs) + auto const call_fn_name( + fmt::format("{}_{}", munge(root_fn.unique_name), expr.arg_exprs.size())); + auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); + auto const fn(module->getOrInsertFunction(call_fn_name, fn_type)); + auto const call(builder->CreateCall(fn, arg_handles)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(call); + } + + return call; + } + else { - arg_handles.emplace_back(gen(arg_expr, arity)); + auto const callee(gen(expr.source_expr, arity)); + + llvm::SmallVector arg_handles; + llvm::SmallVector arg_types; + arg_handles.reserve(expr.arg_exprs.size() + 1); + arg_types.reserve(expr.arg_exprs.size() + 1); + + arg_handles.emplace_back(callee); arg_types.emplace_back(builder->getPtrTy()); - } - auto const call_fn_name(arity_to_call_fn(expr.arg_exprs.size())); + for(auto const &arg_expr : expr.arg_exprs) + { + arg_handles.emplace_back(gen(arg_expr, arity)); + arg_types.emplace_back(builder->getPtrTy()); + } - auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); - auto const fn(module->getOrInsertFunction(call_fn_name.c_str(), fn_type)); - auto const call(builder->CreateCall(fn, arg_handles)); + auto const call_fn_name(arity_to_call_fn(expr.arg_exprs.size())); - if(expr.expr_type == analyze::expression_type::return_statement) - { - return builder->CreateRet(call); - } + auto const fn_type(llvm::FunctionType::get(builder->getPtrTy(), arg_types, false)); + auto const fn(module->getOrInsertFunction(call_fn_name.c_str(), fn_type)); + auto const call(builder->CreateCall(fn, arg_handles)); - return call; + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(call); + } + + return call; + } } llvm::Value * @@ -586,15 +626,18 @@ namespace jank::codegen return builder->CreateUnreachable(); } - llvm::Value *llvm_processor::gen(analyze::expr::try_ const &, - analyze::expr::function_arity const &) + /* TODO: Remove arity from gen */ + llvm::Value *llvm_processor::gen(analyze::expr::try_ const &expr, + analyze::expr::function_arity const &arity) { - return nullptr; + /* TODO: Implement try. */ + return gen(expr.body, arity); } llvm::Value *llvm_processor::gen(analyze::expr::native_raw const &, analyze::expr::function_arity const &) { + throw std::runtime_error{ "no ir codegen for native/raw" }; return nullptr; } From 83056cd71c53ce934a273c3bc58799c16f2c98b1 Mon Sep 17 00:00:00 2001 From: jeaye Date: Sat, 2 Nov 2024 15:02:10 -0700 Subject: [PATCH 015/101] Fix codegen for if exprs in return position --- .../src/cpp/jank/codegen/llvm_processor.cpp | 65 ++++++++++++------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 4d9808f4..7edb7e8e 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -129,6 +129,7 @@ namespace jank::codegen builder->SetInsertPoint(global_ctor_block); builder->CreateRetVoid(); + /* TODO: Verify module? */ llvm::verifyFunction(*fn); llvm::verifyFunction(*global_ctor_block->getParent()); } @@ -501,14 +502,17 @@ namespace jank::codegen } llvm::Value *llvm_processor::gen(analyze::expr::recur const &expr, - analyze::expr::function_arity const &) + analyze::expr::function_arity const &arity) { - if(expr.expr_type == analyze::expression_type::return_statement) - { - return builder->CreateRet(nullptr); - } - - return nullptr; + analyze::expr::call call_expr{ + analyze::expression_base{ {}, expr.expr_type, expr.frame }, + nullptr, + expr.args, + expr.arg_exprs, + true + }; + auto const call(gen(call_expr, arity)); + return call; } llvm::Value *llvm_processor::gen(analyze::expr::let const &expr, @@ -530,10 +534,7 @@ namespace jank::codegen auto const ret(gen(expr.body, arity)); locals = std::move(old_locals); - if(expr.expr_type == analyze::expression_type::return_statement) - { - return builder->CreateRet(ret); - } + /* XXX: No return creation, since we rely on the body to do that. */ return ret; } @@ -571,6 +572,10 @@ namespace jank::codegen llvm::Value *llvm_processor::gen(analyze::expr::if_ const &expr, analyze::expr::function_arity const &arity) { + /* If we're in return position, our then/else branches will generate return instructions + * for us. Since LLVM basic blocks can only have one terminating instruction, we need + * to take care to not generate our own, too. */ + auto const is_return(expr.expr_type == analyze::expression_type::return_statement); auto const condition(gen(expr.condition, arity)); auto const truthy_fn_type( llvm::FunctionType::get(builder->getInt8Ty(), { builder->getPtrTy() }, false)); @@ -585,32 +590,42 @@ namespace jank::codegen auto const merge_block(llvm::BasicBlock::Create(*context, "ifcont")); builder->CreateCondBr(cmp, then_block, else_block); - builder->SetInsertPoint(then_block); + builder->SetInsertPoint(then_block); auto const then(gen(expr.then, arity)); - builder->CreateBr(merge_block); + + if(!is_return) + { + builder->CreateBr(merge_block); + } + /* Codegen for `then` can change the current block, so track that. */ then_block = builder->GetInsertBlock(); current_fn->insert(current_fn->end(), else_block); - builder->SetInsertPoint(else_block); + builder->SetInsertPoint(else_block); auto const else_(gen(expr.else_.unwrap(), arity)); - builder->CreateBr(merge_block); + + if(!is_return) + { + builder->CreateBr(merge_block); + } + /* Codegen for `else` can change the current block, so track that. */ else_block = builder->GetInsertBlock(); - current_fn->insert(current_fn->end(), merge_block); - - builder->SetInsertPoint(merge_block); - auto const phi(builder->CreatePHI(builder->getPtrTy(), 2, "iftmp")); - phi->addIncoming(then, then_block); - phi->addIncoming(else_, else_block); - if(expr.expr_type == analyze::expression_type::return_statement) + if(!is_return) { - return builder->CreateRet(phi); + current_fn->insert(current_fn->end(), merge_block); + builder->SetInsertPoint(merge_block); + auto const phi( + builder->CreatePHI(is_return ? builder->getVoidTy() : builder->getPtrTy(), 2, "iftmp")); + phi->addIncoming(then, then_block); + phi->addIncoming(else_, else_block); + + return phi; } - - return phi; + return nullptr; } llvm::Value *llvm_processor::gen(analyze::expr::throw_ const &expr, From dd318c555de3aab0b7eb764d01eb817cd87c3289 Mon Sep 17 00:00:00 2001 From: jeaye Date: Sat, 2 Nov 2024 16:31:50 -0700 Subject: [PATCH 016/101] Handle IR codegen for if with no else --- .../src/cpp/jank/codegen/llvm_processor.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 7edb7e8e..89079466 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -604,7 +604,16 @@ namespace jank::codegen current_fn->insert(current_fn->end(), else_block); builder->SetInsertPoint(else_block); - auto const else_(gen(expr.else_.unwrap(), arity)); + llvm::Value *else_{}; + + if(expr.else_.is_some()) + { + else_ = gen(expr.else_.unwrap(), arity); + } + else + { + else_ = gen_global(obj::nil::nil_const()); + } if(!is_return) { From 408ec8d64664d687072ec251da25ca01c40be830 Mon Sep 17 00:00:00 2001 From: jeaye Date: Sat, 2 Nov 2024 18:07:33 -0700 Subject: [PATCH 017/101] Fix a few IR codegen issues --- .../src/cpp/jank/codegen/llvm_processor.cpp | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp index 89079466..a33f00a4 100644 --- a/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp +++ b/compiler+runtime/src/cpp/jank/codegen/llvm_processor.cpp @@ -613,6 +613,10 @@ namespace jank::codegen else { else_ = gen_global(obj::nil::nil_const()); + if(expr.expr_type == analyze::expression_type::return_statement) + { + else_ = builder->CreateRet(else_); + } } if(!is_return) @@ -643,11 +647,17 @@ namespace jank::codegen auto const value(gen(expr.value, arity)); auto const fn_type( llvm::FunctionType::get(builder->getPtrTy(), { builder->getPtrTy() }, false)); - auto const fn(module->getOrInsertFunction("jank_throw", fn_type)); + auto fn(module->getOrInsertFunction("jank_throw", fn_type)); + llvm::dyn_cast(fn.getCallee())->setDoesNotReturn(); llvm::SmallVector args{ value }; - builder->CreateCall(fn, args); - return builder->CreateUnreachable(); + auto const call(builder->CreateCall(fn, args)); + + if(expr.expr_type == analyze::expression_type::return_statement) + { + return builder->CreateRet(call); + } + return call; } /* TODO: Remove arity from gen */ @@ -949,8 +959,8 @@ namespace jank::codegen false)); auto const create_fn(module->getOrInsertFunction("jank_keyword_intern", create_fn_type)); - llvm::SmallVector args{ gen_c_string(k->sym.ns.c_str()), - gen_c_string(k->sym.name.c_str()) }; + llvm::SmallVector args{ gen_global(make_box(k->sym.ns)), + gen_global(make_box(k->sym.name)) }; auto const call(builder->CreateCall(create_fn, args)); builder->CreateStore(call, global); From 16a98b36dd5463ed9194a73e3e53b26e3bbff34a Mon Sep 17 00:00:00 2001 From: jeaye Date: Sat, 2 Nov 2024 22:07:57 -0700 Subject: [PATCH 018/101] Snapshot IR port --- compiler+runtime/CMakeLists.txt | 1 + .../cpp/clojure/core_native/phase_1.hpp | 3 + compiler+runtime/include/cpp/jank/c_api.h | 108 + .../include/cpp/jank/runtime/convert.hpp | 36 + .../include/cpp/jank/runtime/convert/from.hpp | 7 + .../cpp/jank/runtime/convert/function.hpp | 29 + .../include/cpp/jank/runtime/convert/into.hpp | 112 + .../include/cpp/jank/runtime/core.hpp | 14 + .../include/cpp/jank/runtime/core/seq.hpp | 4 +- .../cpp/jank/runtime/obj/jit_closure.hpp | 1 - .../cpp/jank/runtime/obj/jit_function.hpp | 71 +- .../include/cpp/jank/runtime/obj/symbol.hpp | 1 + .../src/cpp/clojure/core_native/phase_1.cpp | 155 + .../src/cpp/jank/analyze/processor.cpp | 1 + compiler+runtime/src/cpp/jank/c_api.cpp | 282 +- .../src/cpp/jank/codegen/llvm_processor.cpp | 3 +- compiler+runtime/src/cpp/jank/evaluate.cpp | 27 +- .../src/cpp/jank/runtime/core.cpp | 61 +- .../src/cpp/jank/runtime/core/seq.cpp | 22 +- .../src/cpp/jank/runtime/obj/jit_closure.cpp | 4 - .../src/cpp/jank/runtime/obj/jit_function.cpp | 151 + .../src/cpp/jank/runtime/obj/symbol.cpp | 6 + compiler+runtime/src/cpp/main.cpp | 4 +- compiler+runtime/src/jank/clojure/core.jank | 7281 ++++++++--------- 24 files changed, 4667 insertions(+), 3717 deletions(-) create mode 100644 compiler+runtime/include/cpp/clojure/core_native/phase_1.hpp create mode 100644 compiler+runtime/include/cpp/jank/runtime/convert.hpp create mode 100644 compiler+runtime/include/cpp/jank/runtime/convert/from.hpp create mode 100644 compiler+runtime/include/cpp/jank/runtime/convert/function.hpp create mode 100644 compiler+runtime/include/cpp/jank/runtime/convert/into.hpp create mode 100644 compiler+runtime/src/cpp/clojure/core_native/phase_1.cpp diff --git a/compiler+runtime/CMakeLists.txt b/compiler+runtime/CMakeLists.txt index 1aeef962..2822a9cc 100644 --- a/compiler+runtime/CMakeLists.txt +++ b/compiler+runtime/CMakeLists.txt @@ -218,6 +218,7 @@ add_library( src/cpp/jank/codegen/processor.cpp src/cpp/jank/codegen/llvm_processor.cpp src/cpp/jank/jit/processor.cpp + src/cpp/clojure/core_native/phase_1.cpp ) target_include_directories( diff --git a/compiler+runtime/include/cpp/clojure/core_native/phase_1.hpp b/compiler+runtime/include/cpp/clojure/core_native/phase_1.hpp new file mode 100644 index 00000000..7423fbdd --- /dev/null +++ b/compiler+runtime/include/cpp/clojure/core_native/phase_1.hpp @@ -0,0 +1,3 @@ +#pragma once + +jank_object_ptr jank_load_clojure_core_native_phase_1(); diff --git a/compiler+runtime/include/cpp/jank/c_api.h b/compiler+runtime/include/cpp/jank/c_api.h index b3fbf19c..a96c3616 100644 --- a/compiler+runtime/include/cpp/jank/c_api.h +++ b/compiler+runtime/include/cpp/jank/c_api.h @@ -54,6 +54,60 @@ extern "C" void jank_function_set_arity3(jank_object_ptr fn, jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)); + void jank_function_set_arity4( + jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr, jank_object_ptr)); + void jank_function_set_arity5(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_function_set_arity6(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_function_set_arity7(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_function_set_arity8(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_function_set_arity9(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_function_set_arity10(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); jank_object_ptr jank_closure_create(jank_arity_flags arity_flags, void *context); void jank_closure_set_arity0(jank_object_ptr fn, jank_object_ptr (*f)()); @@ -63,6 +117,60 @@ extern "C" void jank_closure_set_arity3(jank_object_ptr fn, jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr)); + void jank_closure_set_arity4( + jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, jank_object_ptr, jank_object_ptr, jank_object_ptr)); + void jank_closure_set_arity5(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_closure_set_arity6(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_closure_set_arity7(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_closure_set_arity8(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_closure_set_arity9(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); + void jank_closure_set_arity10(jank_object_ptr fn, + jank_object_ptr (*f)(jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr, + jank_object_ptr)); jank_native_bool jank_truthy(jank_object_ptr o); jank_native_bool jank_equal(jank_object_ptr l, jank_object_ptr r); diff --git a/compiler+runtime/include/cpp/jank/runtime/convert.hpp b/compiler+runtime/include/cpp/jank/runtime/convert.hpp new file mode 100644 index 00000000..5d21f5c0 --- /dev/null +++ b/compiler+runtime/include/cpp/jank/runtime/convert.hpp @@ -0,0 +1,36 @@ +#pragma once + +namespace jank::runtime +{ + template + struct convert; + + template + concept convertible = requires(Input const &t) { + { convert::call(t) } -> std::same_as; + }; + + /* Given any T, getting a T from it requires no conversion. */ + template + struct convert + { + static T &call(T &t) + { + return t; + } + + static T call(T const &t) + { + return t; + } + + static T call(T &&t) + { + return std::move(t); + } + }; +} + +#include +#include +#include diff --git a/compiler+runtime/include/cpp/jank/runtime/convert/from.hpp b/compiler+runtime/include/cpp/jank/runtime/convert/from.hpp new file mode 100644 index 00000000..793a3749 --- /dev/null +++ b/compiler+runtime/include/cpp/jank/runtime/convert/from.hpp @@ -0,0 +1,7 @@ +#pragma once + +#include + +namespace jank::runtime +{ +} diff --git a/compiler+runtime/include/cpp/jank/runtime/convert/function.hpp b/compiler+runtime/include/cpp/jank/runtime/convert/function.hpp new file mode 100644 index 00000000..6e971430 --- /dev/null +++ b/compiler+runtime/include/cpp/jank/runtime/convert/function.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include + +namespace jank::runtime +{ + template + struct always_object_ptr + { + using type = object_ptr; + }; + + template + auto convert_function(R (* const fn)(Args...)) + { + if constexpr(std::conjunction_v, std::is_same...>) + { + return fn; + } + else + { + return std::function::type...)>{ + [fn](Args &&...args) -> object_ptr { + return convert::call(fn(convert::call(args)...)); + } + }; + } + } +} diff --git a/compiler+runtime/include/cpp/jank/runtime/convert/into.hpp b/compiler+runtime/include/cpp/jank/runtime/convert/into.hpp new file mode 100644 index 00000000..85731099 --- /dev/null +++ b/compiler+runtime/include/cpp/jank/runtime/convert/into.hpp @@ -0,0 +1,112 @@ +#pragma once + +#include + +namespace jank::runtime +{ + template + concept same_as_any = (std::same_as || ...); + + template + requires behavior::object_like + struct convert, object_ptr> + { + static object_ptr call(native_box const o) + { + return o; + } + }; + + template + requires behavior::object_like + struct convert + { + static object_ptr call(T * const o) + { + return o; + } + }; + + template <> + struct convert + { + static object_ptr call(object * const o) + { + return o; + } + }; + + template <> + struct convert + { + static object_ptr call(object const * const o) + { + return const_cast(o); + } + }; + + template <> + struct convert + { + static object_ptr call(bool const o) + { + return make_box(o); + } + }; + + /* Native integer primitives. */ + template + requires(std::is_integral_v + && !same_as_any) + struct convert + { + static object_ptr call(Input const o) + { + return make_box(o); + } + }; + + /* Native floating point primitives. */ + template + requires(std::is_floating_point_v) + struct convert + { + static object_ptr call(Input const o) + { + return make_box(o); + } + }; + + /* Native strings. */ + template + requires(same_as_any) + struct convert + { + static object_ptr call(Input const &o) + { + return make_box(o); + } + + static object_ptr call(Input &&o) + { + return make_box(std::move(o)); + } + }; + + template