diff --git a/packages/taskflow.lua b/packages/taskflow.lua new file mode 100644 index 0000000..6954642 --- /dev/null +++ b/packages/taskflow.lua @@ -0,0 +1,30 @@ +package("taskflow_custom", function() + set_urls("https://github.com/samayala22/taskflow.git") + add_urls("https://github.com/samayala22/taskflow/archive/$(version).tar.gz") + add_versions("v3.6.2", "0a1d306f90e8e17cb98b95eaae9e8b9455beeeca0f0a72afee7719b27706c68c") + + if is_plat("linux") then + add_syslinks("pthread") + end + + on_install("linux", "macosx", "windows", "iphoneos", "android", "cross", "mingw", "bsd", function (package) + os.cp("taskflow", package:installdir("include")) + end) + + on_test(function (package) + assert(package:check_cxxsnippets({test = [[ + #include + #include + static void test() { + tf::Executor executor; + tf::Taskflow taskflow; + std::vector range(10); + std::iota(range.begin(), range.end(), 0); + taskflow.for_each(range.begin(), range.end(), [&] (int i) { + std::printf("for_each on container item: %d\n", i); + }); + executor.run(taskflow).wait(); + } + ]]}, {configs = {languages = "c++17"}})) + end) +end) \ No newline at end of file diff --git a/vlm/backends/avx2/src/vlm_backend_avx2.cpp b/vlm/backends/avx2/src/vlm_backend_avx2.cpp index f95edce..f1e4269 100644 --- a/vlm/backends/avx2/src/vlm_backend_avx2.cpp +++ b/vlm/backends/avx2/src/vlm_backend_avx2.cpp @@ -311,7 +311,7 @@ void BackendAVX2::compute_lhs(const FlowData& flow) { auto init = taskflow.placeholder(); auto sync = taskflow.placeholder(); - auto wing_pass = taskflow.for_each_index(start_wing, end_wing, (u32)1, [&] (u32 i) { + auto wing_pass = taskflow.for_each_index(start_wing, end_wing, [&] (u32 i) { macro_kernel_avx2(m, lhs, i, i, sigma_p4); macro_kernel_remainder_scalar(m, lhs, i, i); }, tf::GuidedPartitioner()); @@ -320,7 +320,7 @@ void BackendAVX2::compute_lhs(const FlowData& flow) { auto cond = taskflow.emplace([&]{ return idx < m.nc + m.nw ? 0 : 1; // 0 means continue, 1 means break }); - auto wake_pass = taskflow.for_each_index(0u, m.ns, (u32)1, [&] (u32 j) { + auto wake_pass = taskflow.for_each_index(0u, m.ns, [&] (u32 j) { const u32 ia = (m.nc - 1) * m.ns + j; const u32 lidx = idx * m.ns + j; macro_kernel_avx2(m, lhs, ia, lidx, sigma_p4); diff --git a/vlm/backends/avx2/xmake.lua b/vlm/backends/avx2/xmake.lua index 6cbe172..e68ccb5 100644 --- a/vlm/backends/avx2/xmake.lua +++ b/vlm/backends/avx2/xmake.lua @@ -1,18 +1,19 @@ +includes("../../../packages/taskflow.lua") + add_requires("tbb") add_requires("openblas") -add_requires("taskflow") +add_requires("taskflow_custom") target("backend-avx2") set_kind("static") set_default(false) add_vectorexts("avx2", "fma") add_packages("tbb") - add_packages("taskflow") + add_packages("taskflow_custom") add_defines("HAVE_LAPACK_CONFIG_H") add_packages("openblas", { public = true }) add_includedirs("../../include") add_files("src/*.cpp") add_includedirs("include", {public = true}) -