Skip to content

Commit

Permalink
parallel wake rollup & fix taskflow
Browse files Browse the repository at this point in the history
  • Loading branch information
samayala22 committed May 7, 2024
1 parent c6bb1c0 commit c8864c9
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 30 deletions.
2 changes: 0 additions & 2 deletions packages/taskflow.lua
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package("taskflow_custom")
set_urls("https://github.com/samayala22/taskflow.git")
add_urls("https://github.com/samayala22/taskflow/archive/$(version).tar.gz")
add_versions("v3.6.2", "0a1d306f90e8e17cb98b95eaae9e8b9455beeeca0f0a72afee7719b27706c68c")

if is_plat("linux") then
add_syslinks("pthread")
Expand Down
6 changes: 3 additions & 3 deletions tests/test_uvlm_theodorsen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ int main() {

mesh->resize_wake(vec_t.size()-1); // +1 for the initial pose
const std::unique_ptr<Backend> backend = create_backend(backend_name, *mesh); // create after mesh has been resized

// Precompute the LHS since wing geometry is constant
backend->compute_lhs();
backend->lu_factor();
Expand Down Expand Up @@ -219,8 +219,8 @@ int main() {
velocities.z[idx] = local_velocity.z;

if (idx == 0) {
f32 analytical_vel = - amplitude * omega * std::cos(omega * t);
f32 rel_error = 100.0f * std::abs((analytical_vel - local_velocity.z) / analytical_vel);
const f32 analytical_vel = - amplitude * omega * std::cos(omega * t);
const f32 rel_error = 100.0f * std::abs((analytical_vel - local_velocity.z) / analytical_vel);
std::cout << "vel error:" << rel_error << "%\n";
avg_vel_error += rel_error;
}
Expand Down
1 change: 1 addition & 0 deletions vlm/backends/cpu/include/vlm_backend_cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class BackendCPU : public Backend {
std::vector<f32> panel_uw;
std::vector<f32> panel_vw;
std::vector<f32> panel_ww;
SoA_3D_t<f32> rollup_vertices;
std::vector<f32> wake_buffer; // (ns*nc) * ns
std::vector<f32> rhs;
std::vector<i32> ipiv;
Expand Down
54 changes: 37 additions & 17 deletions vlm/backends/cpu/src/vlm_backend_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ BackendCPU::BackendCPU(Mesh& mesh) : Backend(mesh) {
lhs.resize(mesh.nb_panels_wing() * mesh.nb_panels_wing());
wake_buffer.resize(mesh.nb_panels_wing() * mesh.ns);

rollup_vertices.resize(mesh.nb_vertices_total());
uw.resize(mesh.nb_panels_wing() * mesh.ns);
vw.resize(mesh.nb_panels_wing() * mesh.ns);
ww.resize(mesh.nb_panels_wing() * mesh.ns);
Expand Down Expand Up @@ -131,7 +132,6 @@ void BackendCPU::compute_rhs(const FlowData& flow) {
kernel_cpu_rhs(m.nb_panels_wing(), m.normal.x.data(), m.normal.y.data(), m.normal.z.data(), flow.freestream.x, flow.freestream.y, flow.freestream.z, rhs.data());
}

// TODO: consider changing FlowData to SolverData
void BackendCPU::add_wake_influence() {
// const tiny::ScopedTimer timer("Wake Influence");

Expand Down Expand Up @@ -214,10 +214,30 @@ void BackendCPU::wake_rollup(float dt) {
const u64 wake_vertices_begin = (mesh.nc + mesh.nw - mesh.current_nw + 1) * (mesh.ns+1);
const u64 wake_vertices_end = (mesh.nc + mesh.nw + 1) * (mesh.ns + 1);

// parallel for
for (u64 vidx = wake_vertices_begin; vidx < wake_vertices_end; vidx++) {
ispc::kernel_induced_vel(mesh_view, vidx, dt, gamma.data(), sigma_vatistas);
}
tf::Taskflow taskflow;

auto init = taskflow.placeholder();
auto rollup = taskflow.for_each_index(wake_vertices_begin, wake_vertices_end, [&] (u64 vidx) {
ispc::kernel_induced_vel(mesh_view, dt, rollup_vertices.x.data(), rollup_vertices.y.data(), rollup_vertices.z.data(), vidx, gamma.data(), sigma_vatistas);
});
auto copy = taskflow.emplace([&]{
std::copy(rollup_vertices.x.data() + wake_vertices_begin, rollup_vertices.x.data() + wake_vertices_end, mesh.v.x.data() + wake_vertices_begin);
std::copy(rollup_vertices.y.data() + wake_vertices_begin, rollup_vertices.y.data() + wake_vertices_end, mesh.v.y.data() + wake_vertices_begin);
std::copy(rollup_vertices.z.data() + wake_vertices_begin, rollup_vertices.z.data() + wake_vertices_end, mesh.v.z.data() + wake_vertices_begin);
});
auto sync = taskflow.placeholder();
init.precede(rollup);
rollup.precede(copy);
copy.precede(sync);

Executor::get().run(taskflow).wait();

// for (u64 vidx = wake_vertices_begin; vidx < wake_vertices_end; vidx++) {
// ispc::kernel_induced_vel(mesh_view, dt, rollup_vertices.x.data(), rollup_vertices.y.data(), rollup_vertices.z.data(), vidx, gamma.data(), sigma_vatistas);
// }
// std::copy(rollup_vertices.x.data() + wake_vertices_begin, rollup_vertices.x.data() + rollup_vertices.size, mesh.v.x.data() + wake_vertices_begin);
// std::copy(rollup_vertices.y.data() + wake_vertices_begin, rollup_vertices.y.data() + rollup_vertices.size, mesh.v.y.data() + wake_vertices_begin);
// std::copy(rollup_vertices.z.data() + wake_vertices_begin, rollup_vertices.z.data() + rollup_vertices.size, mesh.v.z.data() + wake_vertices_begin);
}

void BackendCPU::shed_gamma() {
Expand Down Expand Up @@ -298,9 +318,9 @@ f32 BackendCPU::compute_coefficient_unsteady_cl(const SoA_3D_t<f32>& vel, f32 dt

linalg::alias::float3 freestream{vel.x[li], vel.y[li], vel.z[li]};
//std::cout << "Without: " << freestream << "\n";
freestream.x += panel_uw[li];
freestream.y += panel_vw[li];
freestream.z += panel_ww[li];
// freestream.x += panel_uw[li];
// freestream.y += panel_vw[li];
// freestream.z += panel_ww[li];
// std::cout << "With: " << freestream << "\n";

const linalg::alias::float3 lift_axis = compute_lift_axis(freestream);
Expand All @@ -315,17 +335,17 @@ f32 BackendCPU::compute_coefficient_unsteady_cl(const SoA_3D_t<f32>& vel, f32 dt
const f32 gamma_dt = (gamma[li] - gamma_prev[li]) / dt; // backward difference

// Joukowski method
// force += rho * delta_gamma[li] * linalg::cross(freestream, v1 - v0);
// force += rho * gamma_dt * mesh.area[li] * normal;
force += rho * delta_gamma[li] * linalg::cross(freestream, v1 - v0);
force += rho * gamma_dt * mesh.area[li] * normal;

// Katz Plotkin method
linalg::alias::float3 delta_p = {0.0f, 0.0f, 0.0f};
const f32 delta_gamma_i = (u == 0) ? gamma[li] : gamma[li] - gamma[(u-1) * mesh.ns + v];
const f32 delta_gamma_j = (v == 0) ? gamma[li] : gamma[li] - gamma[u * mesh.ns + v - 1];
delta_p += rho * linalg::dot(freestream, linalg::normalize(v1 - v0)) * delta_gamma_j / mesh.panel_width_y(u, v);
delta_p += rho * linalg::dot(freestream, linalg::normalize(v3 - v0)) * delta_gamma_i / mesh.panel_length(u, v);
delta_p += gamma_dt;
force = (delta_p * mesh.area[li]) * normal;
// linalg::alias::float3 delta_p = {0.0f, 0.0f, 0.0f};
// const f32 delta_gamma_i = (u == 0) ? gamma[li] : gamma[li] - gamma[(u-1) * mesh.ns + v];
// const f32 delta_gamma_j = (v == 0) ? gamma[li] : gamma[li] - gamma[u * mesh.ns + v - 1];
// delta_p += rho * linalg::dot(freestream, linalg::normalize(v1 - v0)) * delta_gamma_j / mesh.panel_width_y(u, v);
// delta_p += rho * linalg::dot(freestream, linalg::normalize(v3 - v0)) * delta_gamma_i / mesh.panel_length(u, v);
// delta_p += gamma_dt;
// force = (delta_p * mesh.area[li]) * normal;

// force /= linalg::length2(freestream);

Expand Down
11 changes: 6 additions & 5 deletions vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,9 @@ export void kernel_influence2(
}

export void kernel_induced_vel(
uniform const MeshView& m, uniform uint64 vidx, uniform float dt,
uniform float* uniform gamma, uniform float sigma
uniform MeshView& m, uniform float dt,
uniform float* uniform rollup_vx, uniform float* uniform rollup_vy, uniform float* uniform rollup_vz,
uniform uint64 vidx, uniform float* uniform gamma, uniform float sigma
) {

const uniform float3 vertex_influenced = {m.v.x[vidx], m.v.y[vidx], m.v.z[vidx]};
Expand Down Expand Up @@ -237,9 +238,9 @@ export void kernel_induced_vel(
}
}

m.v.x[vidx] += reduce_add(induced_vel.x) * dt;
m.v.y[vidx] += reduce_add(induced_vel.y) * dt;
m.v.z[vidx] += reduce_add(induced_vel.z) * dt;
rollup_vx[vidx] = m.v.x[vidx] + reduce_add(induced_vel.x) * dt;
rollup_vy[vidx] = m.v.y[vidx] + reduce_add(induced_vel.y) * dt;
rollup_vz[vidx] = m.v.z[vidx] + reduce_add(induced_vel.z) * dt;
}

export uniform float kernel_trefftz_cd(
Expand Down
1 change: 1 addition & 0 deletions vlm/include/vlm_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "vlm_fwd.hpp"
#include "vlm_types.hpp"
#include "vlm_mesh.hpp"

namespace vlm {

Expand Down
6 changes: 3 additions & 3 deletions vlm/include/vlm_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ struct SoA_3D_t {

void resize(u64 size_) {
size = size_;
x.resize(size);
y.resize(size);
z.resize(size);
x.resize(size, 0.f);
y.resize(size, 0.f);
z.resize(size, 0.f);
}

void reserve(u64 size_) {
Expand Down

0 comments on commit c8864c9

Please sign in to comment.