Skip to content

Commit

Permalink
add executor singleton and remove tbb
Browse files Browse the repository at this point in the history
  • Loading branch information
samayala22 committed Feb 8, 2024
1 parent 5914b0b commit cb061ac
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 51 deletions.
32 changes: 3 additions & 29 deletions vlm/backends/avx2/src/vlm_backend_avx2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,13 @@
#include "vlm_mesh.hpp"
#include "vlm_data.hpp"
#include "vlm_utils.hpp"
#include "vlm_executor.hpp" // includes taskflow/taskflow.hpp

#include <algorithm>
#include <cstdio>
#include <fstream>
#include <immintrin.h>

// TODO: evaluate possible replacement of TBB with TaskFlow or OMP
#include <oneapi/tbb/global_control.h>
#include <oneapi/tbb/blocked_range.h>
#include <oneapi/tbb/parallel_for.h>

#include <taskflow/taskflow.hpp>
#include <taskflow/algorithm/for_each.hpp>

#include <lapacke.h>
Expand All @@ -27,7 +22,6 @@ using namespace vlm;
BackendAVX2::~BackendAVX2() = default; // Destructor definition

BackendAVX2::BackendAVX2(Mesh& mesh) : Backend(mesh) {
//tbb::global_control global_limit(oneapi::tbb::global_control::max_allowed_parallelism, 1);
lhs.resize((u64)mesh.nb_panels_wing() * (u64)mesh.nb_panels_wing());
rhs.resize(mesh.nb_panels_wing());
ipiv.resize(mesh.nb_panels_wing());
Expand Down Expand Up @@ -283,34 +277,13 @@ void BackendAVX2::compute_lhs(const FlowData& flow) {
SimpleTimer timer("LHS");
Mesh& m = mesh;
const f32 sigma_p4 = pow<4>(flow.sigma_vatistas); // Vatistas coeffcient (^2n with n=2)
tbb::affinity_partitioner ap;

const u32 start_wing = 0;
const u32 end_wing = (m.nc - 1) * m.ns;
// tbb::parallel_for(tbb::blocked_range<u32>(start_wing, end_wing),[&](const tbb::blocked_range<u32> &r) {
// for (u32 i = r.begin(); i < r.end(); i++) {
// macro_kernel_avx2<true>(m, lhs, i, i, sigma_p4);
// macro_kernel_remainder_scalar<true>(m, lhs, i, i);
// }
// }, ap);

// for (u32 i = m.nc - 1; i < m.nc + m.nw; i++) {
// tbb::parallel_for(tbb::blocked_range<u32>(0, m.ns),[&](const tbb::blocked_range<u32> &r) {
// for (u32 j = r.begin(); j < r.end(); j++) {
// const u32 ia = (m.nc - 1) * m.ns + j;
// const u32 lidx = i * m.ns + j;
// macro_kernel_avx2<false>(m, lhs, ia, lidx, sigma_p4);
// macro_kernel_remainder_scalar<false>(m, lhs, i, i);
// }
// }, ap);
// }

tf::Executor executor{};
tf::Taskflow taskflow;

auto init = taskflow.placeholder();
auto sync = taskflow.placeholder();

auto wing_pass = taskflow.for_each_index(start_wing, end_wing, [&] (u32 i) {
macro_kernel_avx2<true>(m, lhs, i, i, sigma_p4);
macro_kernel_remainder_scalar<true>(m, lhs, i, i);
Expand All @@ -330,14 +303,15 @@ void BackendAVX2::compute_lhs(const FlowData& flow) {
idx++;
return 0; // 0 means continue
});
auto sync = taskflow.placeholder();

init.precede(wing_pass, cond);
wing_pass.precede(sync);
cond.precede(wake_pass, sync);
wake_pass.precede(back);
back.precede(cond);

executor.run(taskflow).wait();
Executor::get().run(taskflow).wait();
}

void BackendAVX2::compute_rhs(const FlowData& flow) {
Expand Down
5 changes: 0 additions & 5 deletions vlm/backends/avx2/xmake.lua
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
includes("../../../packages/taskflow.lua")

add_requires("tbb")

add_requires("openblas")
add_requires("taskflow_custom")

target("backend-avx2")
set_kind("static")
set_default(false)
add_vectorexts("avx2", "fma")
add_packages("tbb")
add_packages("taskflow_custom")
add_defines("HAVE_LAPACK_CONFIG_H")
add_packages("openblas", { public = true })
Expand Down
2 changes: 2 additions & 0 deletions vlm/dev/main.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "vlm.hpp"
#include "parser.hpp"
#include "vlm_executor.hpp"
#include "vlm_types.hpp"
#include <iostream>
#include <cstdio>
Expand Down Expand Up @@ -53,6 +54,7 @@ int main(int argc, char **argv) {
});

try {
//vlm::Executor::instance(1); // 1 thread
LinearVLM solver(cfg);
std::vector<f32> alphas = cfg().section("solver").get_vector<f32>("alphas", {0.0f});
std::transform(alphas.begin(), alphas.end(), alphas.begin(),
Expand Down
27 changes: 27 additions & 0 deletions vlm/include/vlm_executor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// This header should not be included by another header file, only by source files.
#pragma once

#include <mutex>
#include <taskflow/taskflow.hpp> // includes <thread>, <mutex>, <memory>

// Taskflow executor singleton wrapper (not thread-safe by design)
namespace vlm {
class Executor {
public:
Executor(const Executor&) = delete;
Executor& operator=(const Executor&) = delete;
static tf::Executor& instance(size_t num_threads) {
if (!_instance) _instance = std::make_unique<tf::Executor>(num_threads ? num_threads : std::thread::hardware_concurrency());
return *_instance;
}
static tf::Executor& get() {
if (!_instance) return instance(0);
return *_instance;
}
private:
static std::unique_ptr<tf::Executor> _instance;

Executor() = default;
~Executor() = default;
};
}
3 changes: 3 additions & 0 deletions vlm/include/vlm_fwd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,8 @@ namespace vlm {

class FlowData;
class Mesh;
class Backend;
template<typename Interpolator> class Database2D;
template<typename Interpolator> class WingProfile;

}
5 changes: 5 additions & 0 deletions vlm/src/vlm_executor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "vlm_executor.hpp"

using namespace vlm;

std::unique_ptr<tf::Executor> Executor::_instance = nullptr;
13 changes: 4 additions & 9 deletions vlm/xmake.lua
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
-- add_requires("openmp")
add_requires("taskflow_custom")

target("libvlm")
set_kind("static")
add_packages("taskflow_custom", {public = true})

for _,name in ipairs(backends) do
if has_config(backend_option(name)) then
add_defines(backend_defines(name))
Expand All @@ -15,16 +17,9 @@ target("libvlm")
target("vlm")
set_kind("binary")
set_default(true)
add_rpathdirs("$ORIGIN") -- tbb dll must be in same dir as exe
-- add_packages("openmp") -- needed for gcc linker (for eigen)
add_rpathdirs("$ORIGIN")
add_deps("libvlm") -- core library
set_runargs({"-i"}, {"../../../../config/elliptic.vlm"}, {"-m"}, {"../../../../mesh/elliptic_64x64.x"}, {"-o"}, {"../../../../results/elliptic.vtu"})
add_files("dev/main.cpp")

-- xmake run vlm -i ../../../../config/elliptic.vlm -m ../../../../mesh/elliptic_128x128.x -o ../../../../results/rec.vtu

-- target("vlm-tests")
-- set_kind("binary")
-- set_default(false)
-- add_deps("libvlm")
-- add_files("dev/tests.cpp")
9 changes: 1 addition & 8 deletions xmake.lua
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,6 @@ set_warnings("all")
set_languages("c++17", "c99")
set_runtimes("MD") -- msvc runtime library (MD/MT/MDd/MTd)

-- TBB macro for profiling parallel objects
if is_mode("debug", "releasedbg") then
add_defines("TBB_USE_THREADING_TOOLS")
end

-- Define backends and helper functions
backends = {"cuda", "avx2"}
backend_includes = function(name) return "vlm/backends/" .. name .. "/xmake.lua" end
Expand All @@ -45,8 +40,7 @@ for _,name in ipairs(backends) do
end
end

-- add_requires("openmp")

includes("packages/taskflow.lua")
includes("vlm/xmake.lua") -- library and main driver

-- Create tests
Expand All @@ -55,7 +49,6 @@ for _, file in ipairs(os.files("tests/test_*.cpp")) do
target(name)
set_kind("binary")
set_default(false)
-- add_packages("openmp") -- needed for gcc linker (for eigen)
add_deps("libvlm")
add_files("tests/" .. name .. ".cpp")
add_tests("default")
Expand Down

0 comments on commit cb061ac

Please sign in to comment.