Skip to content

Commit

Permalink
wip - begin backend refactor
Browse files Browse the repository at this point in the history
- data is now defined in the simulation class -> more granular control on data transfers and allocation
- backend is now stateless -> simpler to maintain since data is now passed as arguments (instead of being stored there)
- multimesh support wip
- todo: fix and cleanup everything
  • Loading branch information
samayala22 committed Jul 14, 2024
1 parent 82397ba commit 47c53f0
Show file tree
Hide file tree
Showing 11 changed files with 767 additions and 902 deletions.
13 changes: 9 additions & 4 deletions tests/test_vlm_elliptic_coeffs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,18 @@ int main(int /*argc*/, char ** /*argv*/) {
const std::vector<std::string> backends = get_available_backends();
std::vector<f32> test_alphas = {0, 1, 2, 3, 4, 5, 10, 15};
std::transform(test_alphas.begin(), test_alphas.end(), test_alphas.begin(), to_radians);

MeshGeom mesh_geom{};

const MeshIO mesh_io{"plot3d"};
MeshGeom mesh_geom{}; // DEPRECATE
auto solvers = tiny::make_combination(meshes, backends);
for (const auto& [mesh_name, backend_name] : solvers) {
std::printf(">>> MESH: %s | BACKEND: %s\n", mesh_name.get().c_str(), backend_name.get().c_str());
mesh_io_read_file(mesh_name, &mesh_geom);
mesh_quarterchord(&mesh_geom);

auto dims = mesh_io.get_dims(mesh_name);
// LinearVLM solver{backend_name, }

mesh_io_read_file(mesh_name, &mesh_geom); // DEPRECATE
mesh_quarterchord(&mesh_geom); // DEPRECATE
LinearVLM solver{create_backend(backend_name, &mesh_geom, 1)};

for (u64 i = 0; i < test_alphas.size(); i++) {
Expand Down
39 changes: 19 additions & 20 deletions vlm/backends/cpu/include/vlm_backend_cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,28 @@ namespace vlm {

class BackendCPU final : public Backend {
public:
BackendCPU(MeshGeom* mesh, u64 timesteps);
BackendCPU();
~BackendCPU();
void reset() override;
void lhs_assemble() override;
void compute_rhs() override;
void add_wake_influence() override;
void wake_rollup(float dt) override;
void shed_gamma() override;
void lu_factor() override;
void lu_solve() override;
f32 compute_coefficient_cl(const FlowData& flow, const f32 area, const u64 j, const u64 n) override;
f32 compute_coefficient_unsteady_cl(const linalg::alias::float3& freestream, const SoA_3D_t<f32>& vel, f32 dt, const f32 area, const u64 j, const u64 n) override;
linalg::alias::float3 compute_coefficient_cm(const FlowData& flow, const f32 area, const f32 chord, const u64 j, const u64 n) override;
f32 compute_coefficient_cd(const FlowData& flow, const f32 area, const u64 j, const u64 n) override;
void compute_delta_gamma() override;
void set_velocities(const linalg::alias::float3& vel) override;
void set_velocities(const f32* vels) override;
void lhs_assemble(f32* lhs) override;
void rhs_assemble_velocities(f32* rhs, const f32* normals, const f32* velocities) override;
void rhs_assemble_wake_influence(f32* rhs, const f32* gamma) override;
void displace_wake_rollup(float dt, f32* rollup_vertices, f32* verts_wake, const f32* verts_wing, const f32* gamma) override;
// void displace_wing(const linalg::alias::float4x4& transform) override;
void displace_wing_and_shed(const std::vector<linalg::alias::float4x4>& transform, f32* verts_wing, f32* verts_wake) override;
void gamma_shed(f32* gamma, f32* gamma_prev) override;
void gamma_delta(f32* gamma_delta, const f32* gamma) override;
void lu_factor(f32* lhs) override;
void lu_solve(f32* lhs, f32* rhs, f32* gamma) override;

// Per mesh kernels
f32 coeff_steady_cl(const MeshParams& param, const f32* verts_wing, const f32* gamma_delta, const FlowData& flow, const f32 area, const u64 j, const u64 n) override;
f32 coeff_unsteady_cl(const MeshParams& param, const f32* verts_wing, const f32* gamma_delta, const f32* gamma, const f32* gamma_prev, const f32* local_velocities, const f32* areas, const f32* normals, const linalg::alias::float3& freestream, f32 dt, const f32 area, const u64 j, const u64 n) override;
linalg::alias::float3 coeff_steady_cm(const MeshParams& param, const FlowData& flow, const f32 area, const f32 chord, const u64 j, const u64 n) override;
f32 coeff_steady_cd(const MeshParams& param, const FlowData& flow, const f32 area, const u64 j, const u64 n) override;

void mesh_metrics(const f32 alpha) override;
void mesh_move(const linalg::alias::float4x4& transform) override;
void update_wake(const linalg::alias::float3& freestream) override;
f32 mesh_mac(u64 j, u64 n) override; // mean chord
f32 mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) override; // mean span
f32 mesh_mac(const MeshParams& param, const u64 j, const u64 n) override;
f32 mesh_area(const MeshParams& param, const u64 i, const u64 j, const u64 m, const u64 n) override;
};

} // namespace vlm
150 changes: 79 additions & 71 deletions vlm/backends/cpu/src/vlm_backend_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,67 +22,75 @@
using namespace vlm;
using namespace linalg::ostream_overloads;

ispc::Mesh2 mesh_to_ispc(Mesh2* mesh) {
ispc::Mesh2 mesh_ispc;
mesh_ispc.nc = mesh->nc;
mesh_ispc.ns = mesh->ns;
mesh_ispc.nw = mesh->nw;
mesh_ispc.nwa = mesh->nwa;
mesh_ispc.vertices = mesh->vertices;
mesh_ispc.normals = mesh->normals;
mesh_ispc.colloc = mesh->colloc;
mesh_ispc.area = mesh->area;
return mesh_ispc;
}

BackendCPU::~BackendCPU() = default; // Destructor definition

BackendCPU::BackendCPU(MeshGeom* mesh, u64 timesteps) {
allocator.h_malloc = std::malloc;
allocator.d_malloc = std::malloc;
allocator.h_free = std::free;
allocator.d_free = std::free;
allocator.hh_memcpy = std::memcpy;
allocator.hd_memcpy = std::memcpy;
allocator.dh_memcpy = std::memcpy;
allocator.dd_memcpy = std::memcpy;
allocator.h_memset = std::memset;
allocator.d_memset = std::memset;
init(mesh, timesteps);

d_solver_info = (i32*)allocator.d_malloc(sizeof(i32));
d_solver_ipiv = (i32*)allocator.d_malloc(hd_mesh->nc*hd_mesh->ns*sizeof(i32));
}

void BackendCPU::reset() {
const u64 nb_panels_wing = hd_mesh->nc * hd_mesh->ns;
const u64 nb_vertices_wing = (hd_mesh->nc+1)*(hd_mesh->ns+1);


// std::fill(gamma.begin(), gamma.end(), 0.0f);
std::fill(hd_data->lhs, hd_data->lhs + nb_panels_wing*nb_panels_wing, 0.0f); // influence kernel is +=
// std::fill(rhs.begin(), rhs.end(), 0.0f);
allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 0), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 0), nb_vertices_wing*sizeof(f32));
allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 1), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 1), nb_vertices_wing*sizeof(f32));
allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 2), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 2), nb_vertices_wing*sizeof(f32));

dd_mesh->nwa = 0;
}
const Allocator allocator_cpu{
std::malloc,
std::malloc,
std::free,
std::free,
std::memcpy,
std::memcpy,
std::memcpy,
std::memcpy,
std::memset,
std::memset
};

BackendCPU::BackendCPU() : Backend(allocator_cpu) {}
BackendCPU::~BackendCPU() = default;

// void BackendCPU::reset() {
// const u64 nb_panels_wing = hd_mesh->nc * hd_mesh->ns;
// const u64 nb_vertices_wing = (hd_mesh->nc+1)*(hd_mesh->ns+1);


// // std::fill(gamma.begin(), gamma.end(), 0.0f);
// std::fill(hd_data->lhs, hd_data->lhs + nb_panels_wing*nb_panels_wing, 0.0f); // influence kernel is +=
// // std::fill(rhs.begin(), rhs.end(), 0.0f);
// allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 0), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 0), nb_vertices_wing*sizeof(f32));
// allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 1), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 1), nb_vertices_wing*sizeof(f32));
// allocator.dd_memcpy(PTR_MESH_V(hd_mesh, 0, 0, 2), PTR_MESHGEOM_V(hd_mesh_geom, 0, 0, 2), nb_vertices_wing*sizeof(f32));

// dd_mesh->nwa = 0;
// }

void BackendCPU::compute_delta_gamma() {
const u64 nc = dd_mesh->nc;
const u64 ns = dd_mesh->ns;
const u64 nw = dd_mesh->nw;
const u64 nwa = dd_mesh->nwa;
void BackendCPU::gamma_delta(f32* gamma_delta, const f32* gamma) {
// const tiny::ScopedTimer timer("Delta Gamma");
std::copy(dd_data->gamma, dd_data->gamma+ns, dd_data->delta_gamma);

// note: this is efficient as the memory is contiguous
for (u64 i = 1; i < nc; i++) {
for (u64 j = 0; j < ns; j++) {
dd_data->delta_gamma[i*ns + j] = dd_data->gamma[i*ns + j] - dd_data->gamma[(i-1)*ns + j];
}
}
tf::Taskflow graph;

auto begin = graph.placeholder();
auto end = graph.placeholder();

for (const auto& p : prm) {
f32* p_gamma_delta = gamma_delta + p.poff;
const f32* p_gamma = gamma + p.poff;
tf::Task first_row = graph.emplace([&]{
allocator.dd_memcpy(p_gamma_delta, p_gamma, p.ns * sizeof(f32));
});
tf::Task remaining_rows = graph.for_each_index((u64)1,p.nc, [&] (u64 b, u64 e) {
for (u64 i = b; i < e; i++) {
for (u64 j = 0; j < p.ns; j++) {
p_gamma_delta[i*p.ns + j] = p_gamma[i*p.ns + j] - p_gamma[(i-1)*p.ns + j];
}
}
});

begin.precede(first_row);
begin.precede(remaining_rows);
first_row.precede(end);
remaining_rows.precede(end);
};

Executor::get().run(graph).wait();

// std::copy(gamma,gamma+ns, delta_gamma);

// // note: this is efficient as the memory is contiguous
// for (u64 i = 1; i < nc; i++) {
// for (u64 j = 0; j < ns; j++) {
// delta_gamma[i*ns + j] = gamma[i*ns + j] - gamma[(i-1)*ns + j];
// }
// }
}

void BackendCPU::lhs_assemble() {
Expand All @@ -95,7 +103,7 @@ void BackendCPU::lhs_assemble() {
const u64 zero = 0;
const u64 end_wing = (nc - 1) * ns;

ispc::Mesh2 mesh = mesh_to_ispc(dd_mesh);
ispc::Mesh mesh = mesh_to_ispc(dd_mesh);

tf::Taskflow taskflow;

Expand Down Expand Up @@ -128,7 +136,7 @@ void BackendCPU::lhs_assemble() {
Executor::get().run(taskflow).wait();
}

void BackendCPU::compute_rhs() {
void BackendCPU::rhs_assemble_velocities() {
const tiny::ScopedTimer timer("RHS");
const u64 nc = dd_mesh->nc;
const u64 ns = dd_mesh->ns;
Expand All @@ -143,13 +151,13 @@ void BackendCPU::compute_rhs() {
}
}

void BackendCPU::add_wake_influence() {
void BackendCPU::rhs_assemble_wake_influence() {
// const tiny::ScopedTimer timer("Wake Influence");
const u64 nc = dd_mesh->nc;
const u64 ns = dd_mesh->ns;
const u64 nw = dd_mesh->nw;

ispc::Mesh2 mesh = mesh_to_ispc(dd_mesh);
ispc::Mesh mesh = mesh_to_ispc(dd_mesh);

tf::Taskflow taskflow;

Expand All @@ -165,7 +173,7 @@ void BackendCPU::add_wake_influence() {
Executor::get().run(taskflow).wait();
}

void BackendCPU::wake_rollup(float dt) {
void BackendCPU::displace_wake_rollup(float dt) {
const tiny::ScopedTimer timer("Wake Rollup");

const u64 nc = dd_mesh->nc;
Expand All @@ -180,7 +188,7 @@ void BackendCPU::wake_rollup(float dt) {
const f32* ry = dd_data->rollup_vertices + (nc+nw+1)*(ns+1)*1;
const f32* rz = dd_data->rollup_vertices + (nc+nw+1)*(ns+1)*2;

ispc::Mesh2 mesh = mesh_to_ispc(dd_mesh);
ispc::Mesh mesh = mesh_to_ispc(dd_mesh);

tf::Taskflow taskflow;

Expand All @@ -201,7 +209,7 @@ void BackendCPU::wake_rollup(float dt) {
Executor::get().run(taskflow).wait();
}

void BackendCPU::shed_gamma() {
void BackendCPU::gamma_shed() {
// const tiny::ScopedTimer timer("Shed Gamma");
const u64 nc = dd_mesh->nc;
const u64 ns = dd_mesh->ns;
Expand Down Expand Up @@ -230,7 +238,7 @@ void BackendCPU::lu_solve() {
LAPACKE_sgetrs(LAPACK_COL_MAJOR, 'N', n, 1, dd_data->lhs, n, d_solver_ipiv, dd_data->gamma, n);
}

f32 BackendCPU::compute_coefficient_cl(const FlowData& flow, const f32 area, const u64 j, const u64 n) {
f32 BackendCPU::coeff_steady_cl(const FlowData& flow, const f32 area, const u64 j, const u64 n) {
// const tiny::ScopedTimer timer("Compute CL");

const u64 nc = dd_mesh->nc;
Expand Down Expand Up @@ -264,7 +272,7 @@ f32 BackendCPU::compute_coefficient_cl(const FlowData& flow, const f32 area, con
return cl;
}

f32 BackendCPU::compute_coefficient_unsteady_cl(const linalg::alias::float3& freestream, const SoA_3D_t<f32>& vel, f32 dt, const f32 area, const u64 j, const u64 n) {
f32 BackendCPU::coeff_unsteady_cl(const linalg::alias::float3& freestream, const SoA_3D_t<f32>& vel, f32 dt, const f32 area, const u64 j, const u64 n) {
assert(n > 0);
assert(j >= 0 && j+n <= dd_mesh->ns);

Expand Down Expand Up @@ -315,7 +323,7 @@ f32 BackendCPU::compute_coefficient_unsteady_cl(const linalg::alias::float3& fre
return cl;
}

linalg::alias::float3 BackendCPU::compute_coefficient_cm(
linalg::alias::float3 BackendCPU::coeff_steady_cm(
const FlowData& flow,
const f32 area,
const f32 chord,
Expand Down Expand Up @@ -352,7 +360,7 @@ linalg::alias::float3 BackendCPU::compute_coefficient_cm(
return cm;
}

f32 BackendCPU::compute_coefficient_cd(
f32 BackendCPU::coeff_steady_cd(
const FlowData& flow,
const f32 area,
const u64 j,
Expand All @@ -362,7 +370,7 @@ f32 BackendCPU::compute_coefficient_cd(
assert(n > 0);
assert(j >= 0 && j+n <= dd_mesh->ns);
// std::fill(dd_data->trefftz_buffer, dd_data->trefftz_buffer + hd_mesh->ns, 0.0f);
ispc::Mesh2 mesh = mesh_to_ispc(dd_mesh);
ispc::Mesh mesh = mesh_to_ispc(dd_mesh);
f32 cd = ispc::kernel_trefftz_cd2(&mesh, dd_data->gamma, j, n, sigma_vatistas);
cd /= linalg::length2(flow.freestream) * area;
return cd;
Expand Down Expand Up @@ -519,7 +527,7 @@ f32 BackendCPU::mesh_area(const u64 i, const u64 j, const u64 m, const u64 n) {
return area_sum;
}

void BackendCPU::mesh_move(const linalg::alias::float4x4& transform) {
void BackendCPU::displace_wing_and_shed(const linalg::alias::float4x4& transform) {
// const tiny::ScopedTimer t("Mesh::move");
assert(dd_mesh->nwa < dd_mesh->nw); // check if we have capacity

Expand Down
14 changes: 7 additions & 7 deletions vlm/backends/cpu/src/vlm_backend_cpu_kernels.ispc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ inline F3 quad_normal(const F3& v0, const F3& v1, const F3& v2, const F3& v3) {
return normalize(cross(v3-v1, v2-v0));
}

export struct Mesh2 {
export struct Mesh {
float* vertices; // (nc+nw+1)*(ns+1)*3
// TODO: evaluate if we really need those values or we can just compute on the fly in the kernels
float* normals; // nc*ns*3
Expand Down Expand Up @@ -109,7 +109,7 @@ inline void kernel_symmetry(float3& inf, C colloc, const V& vertex0, const V& ve
}

export void kernel_influence(
uniform const Mesh2* uniform m,
uniform const Mesh* uniform m,
uniform float* uniform lhs,
uniform uint64 ia, uniform uint64 lidx, uniform float sigma
) {
Expand Down Expand Up @@ -153,7 +153,7 @@ export void kernel_influence(
}

export void kernel_wake_influence(
uniform const Mesh2* uniform m,
uniform const Mesh* uniform m,
uniform uint64 lidx, uniform float* uniform gamma, uniform float* uniform rhs, uniform float sigma
) {
const uniform uint64 nb_panels = m->ns * m->nc;
Expand Down Expand Up @@ -199,7 +199,7 @@ export void kernel_wake_influence(
}

export void kernel_rollup(
uniform const Mesh2* uniform m, uniform float dt,
uniform const Mesh* uniform m, uniform float dt,
uniform float* uniform rollup,
uniform uint64 vidx, uniform float* uniform gamma, uniform float sigma
) {
Expand Down Expand Up @@ -269,7 +269,7 @@ export void kernel_rollup(
}

// export uniform float kernel_trefftz_cd(
// uniform const Mesh2* uniform m,
// uniform const Mesh* uniform m,
// uniform float* uniform gamma,
// uniform float* uniform trefftz_buffer,
// uniform uint64 j, uniform uint64 n, uniform float sigma
Expand Down Expand Up @@ -325,7 +325,7 @@ export void kernel_rollup(
// }

export uniform float kernel_trefftz_cd2(
uniform const Mesh2* uniform m,
uniform const Mesh* uniform m,
uniform float* uniform gamma,
uniform uint64 jp, uniform uint64 n, uniform float sigma
) {
Expand Down Expand Up @@ -382,7 +382,7 @@ export uniform float kernel_trefftz_cd2(
}

export uniform float kernel_trefftz_cl(
uniform const Mesh2* uniform m,
uniform const Mesh* uniform m,
uniform float* uniform gamma,
uniform uint64 j, uniform uint64 n
) {
Expand Down
Loading

0 comments on commit 47c53f0

Please sign in to comment.